]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
IPA C++ refactoring 4/N
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "gimple-expr.h"
64 #include "builtins.h"
65
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode;
68 typedef struct minipool_fixup Mfix;
69
70 void (*arm_lang_output_object_attributes_hook)(void);
71
72 struct four_ints
73 {
74 int i[4];
75 };
76
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets *arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
85 HOST_WIDE_INT, rtx, rtx, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx, int);
88 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
89 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
90 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
91 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
92 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
93 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
94 inline static int thumb1_index_register_rtx_p (rtx, int);
95 static int thumb_far_jump_used_p (void);
96 static bool thumb_force_lr_save (void);
97 static unsigned arm_size_return_regs (void);
98 static bool arm_assemble_integer (rtx, unsigned int, int);
99 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
100 static void arm_print_operand (FILE *, rtx, int);
101 static void arm_print_operand_address (FILE *, rtx);
102 static bool arm_print_operand_punct_valid_p (unsigned char code);
103 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
104 static arm_cc get_arm_condition_code (rtx);
105 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
106 static const char *output_multi_immediate (rtx *, const char *, const char *,
107 int, HOST_WIDE_INT);
108 static const char *shift_op (rtx, HOST_WIDE_INT *);
109 static struct machine_function *arm_init_machine_status (void);
110 static void thumb_exit (FILE *, int);
111 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
112 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
113 static Mnode *add_minipool_forward_ref (Mfix *);
114 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_backward_ref (Mfix *);
116 static void assign_minipool_offsets (Mfix *);
117 static void arm_print_value (FILE *, rtx);
118 static void dump_minipool (rtx);
119 static int arm_barrier_cost (rtx);
120 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
121 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
122 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
123 rtx);
124 static void arm_reorg (void);
125 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
126 static unsigned long arm_compute_save_reg0_reg12_mask (void);
127 static unsigned long arm_compute_save_reg_mask (void);
128 static unsigned long arm_isr_value (tree);
129 static unsigned long arm_compute_func_type (void);
130 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
131 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
133 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
134 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
135 #endif
136 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
137 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
138 static int arm_comp_type_attributes (const_tree, const_tree);
139 static void arm_set_default_type_attributes (tree);
140 static int arm_adjust_cost (rtx, rtx, rtx, int);
141 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
142 static int optimal_immediate_sequence (enum rtx_code code,
143 unsigned HOST_WIDE_INT val,
144 struct four_ints *return_sequence);
145 static int optimal_immediate_sequence_1 (enum rtx_code code,
146 unsigned HOST_WIDE_INT val,
147 struct four_ints *return_sequence,
148 int i);
149 static int arm_get_strip_length (int);
150 static bool arm_function_ok_for_sibcall (tree, tree);
151 static enum machine_mode arm_promote_function_mode (const_tree,
152 enum machine_mode, int *,
153 const_tree, int);
154 static bool arm_return_in_memory (const_tree, const_tree);
155 static rtx arm_function_value (const_tree, const_tree, bool);
156 static rtx arm_libcall_value_1 (enum machine_mode);
157 static rtx arm_libcall_value (enum machine_mode, const_rtx);
158 static bool arm_function_value_regno_p (const unsigned int);
159 static void arm_internal_label (FILE *, const char *, unsigned long);
160 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
161 tree);
162 static bool arm_have_conditional_execution (void);
163 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
164 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
165 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
166 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
167 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
172 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
173 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
174 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
175 static void arm_init_builtins (void);
176 static void arm_init_iwmmxt_builtins (void);
177 static rtx safe_vector_operand (rtx, enum machine_mode);
178 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
179 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
180 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
181 static tree arm_builtin_decl (unsigned, bool);
182 static void emit_constant_insn (rtx cond, rtx pattern);
183 static rtx emit_set_insn (rtx, rtx);
184 static rtx emit_multi_reg_push (unsigned long, unsigned long);
185 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
186 tree, bool);
187 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
188 const_tree, bool);
189 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
190 const_tree, bool);
191 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
192 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
193 const_tree);
194 static rtx aapcs_libcall_value (enum machine_mode);
195 static int aapcs_select_return_coproc (const_tree, const_tree);
196
197 #ifdef OBJECT_FORMAT_ELF
198 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
199 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
200 #endif
201 #ifndef ARM_PE
202 static void arm_encode_section_info (tree, rtx, int);
203 #endif
204
205 static void arm_file_end (void);
206 static void arm_file_start (void);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 enum machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 static void arm_asm_init_sections (void);
223 #endif
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
241 static bool arm_cannot_copy_insn_p (rtx);
242 static int arm_issue_rate (void);
243 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
244 static bool arm_output_addr_const_extra (FILE *, rtx);
245 static bool arm_allocate_stack_slots_for_args (void);
246 static bool arm_warn_func_return (tree);
247 static const char *arm_invalid_parameter_type (const_tree t);
248 static const char *arm_invalid_return_type (const_tree t);
249 static tree arm_promoted_type (const_tree t);
250 static tree arm_convert_to_type (tree type, tree expr);
251 static bool arm_scalar_mode_supported_p (enum machine_mode);
252 static bool arm_frame_pointer_required (void);
253 static bool arm_can_eliminate (const int, const int);
254 static void arm_asm_trampoline_template (FILE *);
255 static void arm_trampoline_init (rtx, tree, rtx);
256 static rtx arm_trampoline_adjust_address (rtx);
257 static rtx arm_pic_static_addr (rtx orig, rtx reg);
258 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
259 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
260 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
261 static bool arm_array_mode_supported_p (enum machine_mode,
262 unsigned HOST_WIDE_INT);
263 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
264 static bool arm_class_likely_spilled_p (reg_class_t);
265 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
266 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
267 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
268 const_tree type,
269 int misalignment,
270 bool is_packed);
271 static void arm_conditional_register_usage (void);
272 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
273 static unsigned int arm_autovectorize_vector_sizes (void);
274 static int arm_default_branch_cost (bool, bool);
275 static int arm_cortex_a5_branch_cost (bool, bool);
276 static int arm_cortex_m_branch_cost (bool, bool);
277
278 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
279 const unsigned char *sel);
280
281 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
282 tree vectype,
283 int misalign ATTRIBUTE_UNUSED);
284 static unsigned arm_add_stmt_cost (void *data, int count,
285 enum vect_cost_for_stmt kind,
286 struct _stmt_vec_info *stmt_info,
287 int misalign,
288 enum vect_cost_model_location where);
289
290 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
291 bool op0_preserve_value);
292 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
293 \f
294 /* Table of machine attributes. */
295 static const struct attribute_spec arm_attribute_table[] =
296 {
297 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
298 affects_type_identity } */
299 /* Function calls made to this symbol must be done indirectly, because
300 it may lie outside of the 26 bit addressing range of a normal function
301 call. */
302 { "long_call", 0, 0, false, true, true, NULL, false },
303 /* Whereas these functions are always known to reside within the 26 bit
304 addressing range. */
305 { "short_call", 0, 0, false, true, true, NULL, false },
306 /* Specify the procedure call conventions for a function. */
307 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
308 false },
309 /* Interrupt Service Routines have special prologue and epilogue requirements. */
310 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
311 false },
312 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
313 false },
314 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
315 false },
316 #ifdef ARM_PE
317 /* ARM/PE has three new attributes:
318 interfacearm - ?
319 dllexport - for exporting a function/variable that will live in a dll
320 dllimport - for importing a function/variable from a dll
321
322 Microsoft allows multiple declspecs in one __declspec, separating
323 them with spaces. We do NOT support this. Instead, use __declspec
324 multiple times.
325 */
326 { "dllimport", 0, 0, true, false, false, NULL, false },
327 { "dllexport", 0, 0, true, false, false, NULL, false },
328 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
329 false },
330 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
331 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
332 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
333 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
334 false },
335 #endif
336 { NULL, 0, 0, false, false, false, NULL, false }
337 };
338 \f
339 /* Initialize the GCC target structure. */
340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 #undef TARGET_MERGE_DECL_ATTRIBUTES
342 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
343 #endif
344
345 #undef TARGET_LEGITIMIZE_ADDRESS
346 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
347
348 #undef TARGET_LRA_P
349 #define TARGET_LRA_P arm_lra_p
350
351 #undef TARGET_ATTRIBUTE_TABLE
352 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
353
354 #undef TARGET_ASM_FILE_START
355 #define TARGET_ASM_FILE_START arm_file_start
356 #undef TARGET_ASM_FILE_END
357 #define TARGET_ASM_FILE_END arm_file_end
358
359 #undef TARGET_ASM_ALIGNED_SI_OP
360 #define TARGET_ASM_ALIGNED_SI_OP NULL
361 #undef TARGET_ASM_INTEGER
362 #define TARGET_ASM_INTEGER arm_assemble_integer
363
364 #undef TARGET_PRINT_OPERAND
365 #define TARGET_PRINT_OPERAND arm_print_operand
366 #undef TARGET_PRINT_OPERAND_ADDRESS
367 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
368 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
369 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
370
371 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
372 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
373
374 #undef TARGET_ASM_FUNCTION_PROLOGUE
375 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
376
377 #undef TARGET_ASM_FUNCTION_EPILOGUE
378 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
379
380 #undef TARGET_OPTION_OVERRIDE
381 #define TARGET_OPTION_OVERRIDE arm_option_override
382
383 #undef TARGET_COMP_TYPE_ATTRIBUTES
384 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
385
386 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
387 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
388
389 #undef TARGET_SCHED_ADJUST_COST
390 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
391
392 #undef TARGET_SCHED_REORDER
393 #define TARGET_SCHED_REORDER arm_sched_reorder
394
395 #undef TARGET_REGISTER_MOVE_COST
396 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
397
398 #undef TARGET_MEMORY_MOVE_COST
399 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
400
401 #undef TARGET_ENCODE_SECTION_INFO
402 #ifdef ARM_PE
403 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
404 #else
405 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
406 #endif
407
408 #undef TARGET_STRIP_NAME_ENCODING
409 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
410
411 #undef TARGET_ASM_INTERNAL_LABEL
412 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
413
414 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
415 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
416
417 #undef TARGET_FUNCTION_VALUE
418 #define TARGET_FUNCTION_VALUE arm_function_value
419
420 #undef TARGET_LIBCALL_VALUE
421 #define TARGET_LIBCALL_VALUE arm_libcall_value
422
423 #undef TARGET_FUNCTION_VALUE_REGNO_P
424 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
425
426 #undef TARGET_ASM_OUTPUT_MI_THUNK
427 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
428 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
429 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
430
431 #undef TARGET_RTX_COSTS
432 #define TARGET_RTX_COSTS arm_rtx_costs
433 #undef TARGET_ADDRESS_COST
434 #define TARGET_ADDRESS_COST arm_address_cost
435
436 #undef TARGET_SHIFT_TRUNCATION_MASK
437 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
438 #undef TARGET_VECTOR_MODE_SUPPORTED_P
439 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
440 #undef TARGET_ARRAY_MODE_SUPPORTED_P
441 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
442 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
443 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
444 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
445 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
446 arm_autovectorize_vector_sizes
447
448 #undef TARGET_MACHINE_DEPENDENT_REORG
449 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
450
451 #undef TARGET_INIT_BUILTINS
452 #define TARGET_INIT_BUILTINS arm_init_builtins
453 #undef TARGET_EXPAND_BUILTIN
454 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
455 #undef TARGET_BUILTIN_DECL
456 #define TARGET_BUILTIN_DECL arm_builtin_decl
457
458 #undef TARGET_INIT_LIBFUNCS
459 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
460
461 #undef TARGET_PROMOTE_FUNCTION_MODE
462 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
463 #undef TARGET_PROMOTE_PROTOTYPES
464 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
465 #undef TARGET_PASS_BY_REFERENCE
466 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
467 #undef TARGET_ARG_PARTIAL_BYTES
468 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
469 #undef TARGET_FUNCTION_ARG
470 #define TARGET_FUNCTION_ARG arm_function_arg
471 #undef TARGET_FUNCTION_ARG_ADVANCE
472 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
473 #undef TARGET_FUNCTION_ARG_BOUNDARY
474 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
475
476 #undef TARGET_SETUP_INCOMING_VARARGS
477 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
478
479 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
480 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
481
482 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
483 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
484 #undef TARGET_TRAMPOLINE_INIT
485 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
486 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
487 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
488
489 #undef TARGET_WARN_FUNC_RETURN
490 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
491
492 #undef TARGET_DEFAULT_SHORT_ENUMS
493 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
494
495 #undef TARGET_ALIGN_ANON_BITFIELD
496 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
497
498 #undef TARGET_NARROW_VOLATILE_BITFIELD
499 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
500
501 #undef TARGET_CXX_GUARD_TYPE
502 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
503
504 #undef TARGET_CXX_GUARD_MASK_BIT
505 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
506
507 #undef TARGET_CXX_GET_COOKIE_SIZE
508 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
509
510 #undef TARGET_CXX_COOKIE_HAS_SIZE
511 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
512
513 #undef TARGET_CXX_CDTOR_RETURNS_THIS
514 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
515
516 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
517 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
518
519 #undef TARGET_CXX_USE_AEABI_ATEXIT
520 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
521
522 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
523 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
524 arm_cxx_determine_class_data_visibility
525
526 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
527 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
528
529 #undef TARGET_RETURN_IN_MSB
530 #define TARGET_RETURN_IN_MSB arm_return_in_msb
531
532 #undef TARGET_RETURN_IN_MEMORY
533 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
534
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
537
538 #if ARM_UNWIND_INFO
539 #undef TARGET_ASM_UNWIND_EMIT
540 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
541
542 /* EABI unwinding tables use a different format for the typeinfo tables. */
543 #undef TARGET_ASM_TTYPE
544 #define TARGET_ASM_TTYPE arm_output_ttype
545
546 #undef TARGET_ARM_EABI_UNWINDER
547 #define TARGET_ARM_EABI_UNWINDER true
548
549 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
550 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
551
552 #undef TARGET_ASM_INIT_SECTIONS
553 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
554 #endif /* ARM_UNWIND_INFO */
555
556 #undef TARGET_DWARF_REGISTER_SPAN
557 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
558
559 #undef TARGET_CANNOT_COPY_INSN_P
560 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
561
562 #ifdef HAVE_AS_TLS
563 #undef TARGET_HAVE_TLS
564 #define TARGET_HAVE_TLS true
565 #endif
566
567 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
568 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
569
570 #undef TARGET_LEGITIMATE_CONSTANT_P
571 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
572
573 #undef TARGET_CANNOT_FORCE_CONST_MEM
574 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
575
576 #undef TARGET_MAX_ANCHOR_OFFSET
577 #define TARGET_MAX_ANCHOR_OFFSET 4095
578
579 /* The minimum is set such that the total size of the block
580 for a particular anchor is -4088 + 1 + 4095 bytes, which is
581 divisible by eight, ensuring natural spacing of anchors. */
582 #undef TARGET_MIN_ANCHOR_OFFSET
583 #define TARGET_MIN_ANCHOR_OFFSET -4088
584
585 #undef TARGET_SCHED_ISSUE_RATE
586 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
587
588 #undef TARGET_MANGLE_TYPE
589 #define TARGET_MANGLE_TYPE arm_mangle_type
590
591 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
592 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
593
594 #undef TARGET_BUILD_BUILTIN_VA_LIST
595 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
596 #undef TARGET_EXPAND_BUILTIN_VA_START
597 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
598 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
599 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
600
601 #ifdef HAVE_AS_TLS
602 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
603 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
604 #endif
605
606 #undef TARGET_LEGITIMATE_ADDRESS_P
607 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
608
609 #undef TARGET_PREFERRED_RELOAD_CLASS
610 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
611
612 #undef TARGET_INVALID_PARAMETER_TYPE
613 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
614
615 #undef TARGET_INVALID_RETURN_TYPE
616 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
617
618 #undef TARGET_PROMOTED_TYPE
619 #define TARGET_PROMOTED_TYPE arm_promoted_type
620
621 #undef TARGET_CONVERT_TO_TYPE
622 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
623
624 #undef TARGET_SCALAR_MODE_SUPPORTED_P
625 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
626
627 #undef TARGET_FRAME_POINTER_REQUIRED
628 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
629
630 #undef TARGET_CAN_ELIMINATE
631 #define TARGET_CAN_ELIMINATE arm_can_eliminate
632
633 #undef TARGET_CONDITIONAL_REGISTER_USAGE
634 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
635
636 #undef TARGET_CLASS_LIKELY_SPILLED_P
637 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
638
639 #undef TARGET_VECTORIZE_BUILTINS
640 #define TARGET_VECTORIZE_BUILTINS
641
642 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
643 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
644 arm_builtin_vectorized_function
645
646 #undef TARGET_VECTOR_ALIGNMENT
647 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
648
649 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
650 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
651 arm_vector_alignment_reachable
652
653 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
654 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
655 arm_builtin_support_vector_misalignment
656
657 #undef TARGET_PREFERRED_RENAME_CLASS
658 #define TARGET_PREFERRED_RENAME_CLASS \
659 arm_preferred_rename_class
660
661 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
662 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
663 arm_vectorize_vec_perm_const_ok
664
665 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
666 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
667 arm_builtin_vectorization_cost
668 #undef TARGET_VECTORIZE_ADD_STMT_COST
669 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
670
671 #undef TARGET_CANONICALIZE_COMPARISON
672 #define TARGET_CANONICALIZE_COMPARISON \
673 arm_canonicalize_comparison
674
675 #undef TARGET_ASAN_SHADOW_OFFSET
676 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
677
678 #undef MAX_INSN_PER_IT_BLOCK
679 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
680
681 #undef TARGET_CAN_USE_DOLOOP_P
682 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
683
684 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
685 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
686
687 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
688 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
689
690 struct gcc_target targetm = TARGET_INITIALIZER;
691 \f
692 /* Obstack for minipool constant handling. */
693 static struct obstack minipool_obstack;
694 static char * minipool_startobj;
695
696 /* The maximum number of insns skipped which
697 will be conditionalised if possible. */
698 static int max_insns_skipped = 5;
699
700 extern FILE * asm_out_file;
701
702 /* True if we are currently building a constant table. */
703 int making_const_table;
704
705 /* The processor for which instructions should be scheduled. */
706 enum processor_type arm_tune = arm_none;
707
708 /* The current tuning set. */
709 const struct tune_params *current_tune;
710
711 /* Which floating point hardware to schedule for. */
712 int arm_fpu_attr;
713
714 /* Which floating popint hardware to use. */
715 const struct arm_fpu_desc *arm_fpu_desc;
716
717 /* Used for Thumb call_via trampolines. */
718 rtx thumb_call_via_label[14];
719 static int thumb_call_reg_needed;
720
721 /* Bit values used to identify processor capabilities. */
722 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
723 #define FL_ARCH3M (1 << 1) /* Extended multiply */
724 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
725 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
726 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
727 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
728 #define FL_THUMB (1 << 6) /* Thumb aware */
729 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
730 #define FL_STRONG (1 << 8) /* StrongARM */
731 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
732 #define FL_XSCALE (1 << 10) /* XScale */
733 /* spare (1 << 11) */
734 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
735 media instructions. */
736 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
737 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
738 Note: ARM6 & 7 derivatives only. */
739 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
740 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
741 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
742 profile. */
743 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
744 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
745 #define FL_NEON (1 << 20) /* Neon instructions. */
746 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
747 architecture. */
748 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
749 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
750 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
751 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
752
753 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
754 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
755
756 /* Flags that only effect tuning, not available instructions. */
757 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
758 | FL_CO_PROC)
759
760 #define FL_FOR_ARCH2 FL_NOTM
761 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
762 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
763 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
764 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
765 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
766 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
767 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
768 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
769 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
770 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
771 #define FL_FOR_ARCH6J FL_FOR_ARCH6
772 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
773 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
774 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
775 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
776 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
777 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
778 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
779 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
780 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
781 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
782 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
783 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
784
785 /* The bits in this mask specify which
786 instructions we are allowed to generate. */
787 static unsigned long insn_flags = 0;
788
789 /* The bits in this mask specify which instruction scheduling options should
790 be used. */
791 static unsigned long tune_flags = 0;
792
793 /* The highest ARM architecture version supported by the
794 target. */
795 enum base_architecture arm_base_arch = BASE_ARCH_0;
796
797 /* The following are used in the arm.md file as equivalents to bits
798 in the above two flag variables. */
799
800 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
801 int arm_arch3m = 0;
802
803 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
804 int arm_arch4 = 0;
805
806 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
807 int arm_arch4t = 0;
808
809 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
810 int arm_arch5 = 0;
811
812 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
813 int arm_arch5e = 0;
814
815 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
816 int arm_arch6 = 0;
817
818 /* Nonzero if this chip supports the ARM 6K extensions. */
819 int arm_arch6k = 0;
820
821 /* Nonzero if instructions present in ARMv6-M can be used. */
822 int arm_arch6m = 0;
823
824 /* Nonzero if this chip supports the ARM 7 extensions. */
825 int arm_arch7 = 0;
826
827 /* Nonzero if instructions not present in the 'M' profile can be used. */
828 int arm_arch_notm = 0;
829
830 /* Nonzero if instructions present in ARMv7E-M can be used. */
831 int arm_arch7em = 0;
832
833 /* Nonzero if instructions present in ARMv8 can be used. */
834 int arm_arch8 = 0;
835
836 /* Nonzero if this chip can benefit from load scheduling. */
837 int arm_ld_sched = 0;
838
839 /* Nonzero if this chip is a StrongARM. */
840 int arm_tune_strongarm = 0;
841
842 /* Nonzero if this chip supports Intel Wireless MMX technology. */
843 int arm_arch_iwmmxt = 0;
844
845 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
846 int arm_arch_iwmmxt2 = 0;
847
848 /* Nonzero if this chip is an XScale. */
849 int arm_arch_xscale = 0;
850
851 /* Nonzero if tuning for XScale */
852 int arm_tune_xscale = 0;
853
854 /* Nonzero if we want to tune for stores that access the write-buffer.
855 This typically means an ARM6 or ARM7 with MMU or MPU. */
856 int arm_tune_wbuf = 0;
857
858 /* Nonzero if tuning for Cortex-A9. */
859 int arm_tune_cortex_a9 = 0;
860
861 /* Nonzero if generating Thumb instructions. */
862 int thumb_code = 0;
863
864 /* Nonzero if generating Thumb-1 instructions. */
865 int thumb1_code = 0;
866
867 /* Nonzero if we should define __THUMB_INTERWORK__ in the
868 preprocessor.
869 XXX This is a bit of a hack, it's intended to help work around
870 problems in GLD which doesn't understand that armv5t code is
871 interworking clean. */
872 int arm_cpp_interwork = 0;
873
874 /* Nonzero if chip supports Thumb 2. */
875 int arm_arch_thumb2;
876
877 /* Nonzero if chip supports integer division instruction. */
878 int arm_arch_arm_hwdiv;
879 int arm_arch_thumb_hwdiv;
880
881 /* Nonzero if we should use Neon to handle 64-bits operations rather
882 than core registers. */
883 int prefer_neon_for_64bits = 0;
884
885 /* Nonzero if we shouldn't use literal pools. */
886 bool arm_disable_literal_pool = false;
887
888 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
889 we must report the mode of the memory reference from
890 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
891 enum machine_mode output_memory_reference_mode;
892
893 /* The register number to be used for the PIC offset register. */
894 unsigned arm_pic_register = INVALID_REGNUM;
895
896 enum arm_pcs arm_pcs_default;
897
898 /* For an explanation of these variables, see final_prescan_insn below. */
899 int arm_ccfsm_state;
900 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
901 enum arm_cond_code arm_current_cc;
902
903 rtx arm_target_insn;
904 int arm_target_label;
905 /* The number of conditionally executed insns, including the current insn. */
906 int arm_condexec_count = 0;
907 /* A bitmask specifying the patterns for the IT block.
908 Zero means do not output an IT block before this insn. */
909 int arm_condexec_mask = 0;
910 /* The number of bits used in arm_condexec_mask. */
911 int arm_condexec_masklen = 0;
912
913 /* Nonzero if chip supports the ARMv8 CRC instructions. */
914 int arm_arch_crc = 0;
915
916 /* The condition codes of the ARM, and the inverse function. */
917 static const char * const arm_condition_codes[] =
918 {
919 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
920 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
921 };
922
923 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
924 int arm_regs_in_sequence[] =
925 {
926 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
927 };
928
929 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
930 #define streq(string1, string2) (strcmp (string1, string2) == 0)
931
932 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
933 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
934 | (1 << PIC_OFFSET_TABLE_REGNUM)))
935 \f
936 /* Initialization code. */
937
938 struct processors
939 {
940 const char *const name;
941 enum processor_type core;
942 const char *arch;
943 enum base_architecture base_arch;
944 const unsigned long flags;
945 const struct tune_params *const tune;
946 };
947
948
949 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
950 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
951 prefetch_slots, \
952 l1_size, \
953 l1_line_size
954
955 /* arm generic vectorizer costs. */
956 static const
957 struct cpu_vec_costs arm_default_vec_cost = {
958 1, /* scalar_stmt_cost. */
959 1, /* scalar load_cost. */
960 1, /* scalar_store_cost. */
961 1, /* vec_stmt_cost. */
962 1, /* vec_to_scalar_cost. */
963 1, /* scalar_to_vec_cost. */
964 1, /* vec_align_load_cost. */
965 1, /* vec_unalign_load_cost. */
966 1, /* vec_unalign_store_cost. */
967 1, /* vec_store_cost. */
968 3, /* cond_taken_branch_cost. */
969 1, /* cond_not_taken_branch_cost. */
970 };
971
972 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
973 #include "aarch-cost-tables.h"
974
975
976
977 const struct cpu_cost_table cortexa9_extra_costs =
978 {
979 /* ALU */
980 {
981 0, /* arith. */
982 0, /* logical. */
983 0, /* shift. */
984 COSTS_N_INSNS (1), /* shift_reg. */
985 COSTS_N_INSNS (1), /* arith_shift. */
986 COSTS_N_INSNS (2), /* arith_shift_reg. */
987 0, /* log_shift. */
988 COSTS_N_INSNS (1), /* log_shift_reg. */
989 COSTS_N_INSNS (1), /* extend. */
990 COSTS_N_INSNS (2), /* extend_arith. */
991 COSTS_N_INSNS (1), /* bfi. */
992 COSTS_N_INSNS (1), /* bfx. */
993 0, /* clz. */
994 0, /* rev. */
995 0, /* non_exec. */
996 true /* non_exec_costs_exec. */
997 },
998 {
999 /* MULT SImode */
1000 {
1001 COSTS_N_INSNS (3), /* simple. */
1002 COSTS_N_INSNS (3), /* flag_setting. */
1003 COSTS_N_INSNS (2), /* extend. */
1004 COSTS_N_INSNS (3), /* add. */
1005 COSTS_N_INSNS (2), /* extend_add. */
1006 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1007 },
1008 /* MULT DImode */
1009 {
1010 0, /* simple (N/A). */
1011 0, /* flag_setting (N/A). */
1012 COSTS_N_INSNS (4), /* extend. */
1013 0, /* add (N/A). */
1014 COSTS_N_INSNS (4), /* extend_add. */
1015 0 /* idiv (N/A). */
1016 }
1017 },
1018 /* LD/ST */
1019 {
1020 COSTS_N_INSNS (2), /* load. */
1021 COSTS_N_INSNS (2), /* load_sign_extend. */
1022 COSTS_N_INSNS (2), /* ldrd. */
1023 COSTS_N_INSNS (2), /* ldm_1st. */
1024 1, /* ldm_regs_per_insn_1st. */
1025 2, /* ldm_regs_per_insn_subsequent. */
1026 COSTS_N_INSNS (5), /* loadf. */
1027 COSTS_N_INSNS (5), /* loadd. */
1028 COSTS_N_INSNS (1), /* load_unaligned. */
1029 COSTS_N_INSNS (2), /* store. */
1030 COSTS_N_INSNS (2), /* strd. */
1031 COSTS_N_INSNS (2), /* stm_1st. */
1032 1, /* stm_regs_per_insn_1st. */
1033 2, /* stm_regs_per_insn_subsequent. */
1034 COSTS_N_INSNS (1), /* storef. */
1035 COSTS_N_INSNS (1), /* stored. */
1036 COSTS_N_INSNS (1) /* store_unaligned. */
1037 },
1038 {
1039 /* FP SFmode */
1040 {
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1054 },
1055 /* FP DFmode */
1056 {
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1070 }
1071 },
1072 /* Vector */
1073 {
1074 COSTS_N_INSNS (1) /* alu. */
1075 }
1076 };
1077
1078 const struct cpu_cost_table cortexa8_extra_costs =
1079 {
1080 /* ALU */
1081 {
1082 0, /* arith. */
1083 0, /* logical. */
1084 COSTS_N_INSNS (1), /* shift. */
1085 0, /* shift_reg. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1090 0, /* extend. */
1091 0, /* extend_arith. */
1092 0, /* bfi. */
1093 0, /* bfx. */
1094 0, /* clz. */
1095 0, /* rev. */
1096 0, /* non_exec. */
1097 true /* non_exec_costs_exec. */
1098 },
1099 {
1100 /* MULT SImode */
1101 {
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1108 },
1109 /* MULT DImode */
1110 {
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1114 0, /* add (N/A). */
1115 COSTS_N_INSNS (2), /* extend_add. */
1116 0 /* idiv (N/A). */
1117 }
1118 },
1119 /* LD/ST */
1120 {
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1) /* store_unaligned. */
1138 },
1139 {
1140 /* FP SFmode */
1141 {
1142 COSTS_N_INSNS (36), /* div. */
1143 COSTS_N_INSNS (11), /* mult. */
1144 COSTS_N_INSNS (20), /* mult_addsub. */
1145 COSTS_N_INSNS (30), /* fma. */
1146 COSTS_N_INSNS (9), /* addsub. */
1147 COSTS_N_INSNS (3), /* fpconst. */
1148 COSTS_N_INSNS (3), /* neg. */
1149 COSTS_N_INSNS (6), /* compare. */
1150 COSTS_N_INSNS (4), /* widen. */
1151 COSTS_N_INSNS (4), /* narrow. */
1152 COSTS_N_INSNS (8), /* toint. */
1153 COSTS_N_INSNS (8), /* fromint. */
1154 COSTS_N_INSNS (8) /* roundint. */
1155 },
1156 /* FP DFmode */
1157 {
1158 COSTS_N_INSNS (64), /* div. */
1159 COSTS_N_INSNS (16), /* mult. */
1160 COSTS_N_INSNS (25), /* mult_addsub. */
1161 COSTS_N_INSNS (30), /* fma. */
1162 COSTS_N_INSNS (9), /* addsub. */
1163 COSTS_N_INSNS (3), /* fpconst. */
1164 COSTS_N_INSNS (3), /* neg. */
1165 COSTS_N_INSNS (6), /* compare. */
1166 COSTS_N_INSNS (6), /* widen. */
1167 COSTS_N_INSNS (6), /* narrow. */
1168 COSTS_N_INSNS (8), /* toint. */
1169 COSTS_N_INSNS (8), /* fromint. */
1170 COSTS_N_INSNS (8) /* roundint. */
1171 }
1172 },
1173 /* Vector */
1174 {
1175 COSTS_N_INSNS (1) /* alu. */
1176 }
1177 };
1178
1179 const struct cpu_cost_table cortexa5_extra_costs =
1180 {
1181 /* ALU */
1182 {
1183 0, /* arith. */
1184 0, /* logical. */
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1197 0, /* non_exec. */
1198 true /* non_exec_costs_exec. */
1199 },
1200
1201 {
1202 /* MULT SImode */
1203 {
1204 0, /* simple. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1210 },
1211 /* MULT DImode */
1212 {
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1216 0, /* add. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1218 0 /* idiv (N/A). */
1219 }
1220 },
1221 /* LD/ST */
1222 {
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (6), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (4), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1240 },
1241 {
1242 /* FP SFmode */
1243 {
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1257 },
1258 /* FP DFmode */
1259 {
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1273 }
1274 },
1275 /* Vector */
1276 {
1277 COSTS_N_INSNS (1) /* alu. */
1278 }
1279 };
1280
1281
1282 const struct cpu_cost_table cortexa7_extra_costs =
1283 {
1284 /* ALU */
1285 {
1286 0, /* arith. */
1287 0, /* logical. */
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1300 0, /* non_exec. */
1301 true /* non_exec_costs_exec. */
1302 },
1303
1304 {
1305 /* MULT SImode */
1306 {
1307 0, /* simple. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1313 },
1314 /* MULT DImode */
1315 {
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1319 0, /* add. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1321 0 /* idiv (N/A). */
1322 }
1323 },
1324 /* LD/ST */
1325 {
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1) /* store_unaligned. */
1343 },
1344 {
1345 /* FP SFmode */
1346 {
1347 COSTS_N_INSNS (15), /* div. */
1348 COSTS_N_INSNS (3), /* mult. */
1349 COSTS_N_INSNS (7), /* mult_addsub. */
1350 COSTS_N_INSNS (7), /* fma. */
1351 COSTS_N_INSNS (3), /* addsub. */
1352 COSTS_N_INSNS (3), /* fpconst. */
1353 COSTS_N_INSNS (3), /* neg. */
1354 COSTS_N_INSNS (3), /* compare. */
1355 COSTS_N_INSNS (3), /* widen. */
1356 COSTS_N_INSNS (3), /* narrow. */
1357 COSTS_N_INSNS (3), /* toint. */
1358 COSTS_N_INSNS (3), /* fromint. */
1359 COSTS_N_INSNS (3) /* roundint. */
1360 },
1361 /* FP DFmode */
1362 {
1363 COSTS_N_INSNS (30), /* div. */
1364 COSTS_N_INSNS (6), /* mult. */
1365 COSTS_N_INSNS (10), /* mult_addsub. */
1366 COSTS_N_INSNS (7), /* fma. */
1367 COSTS_N_INSNS (3), /* addsub. */
1368 COSTS_N_INSNS (3), /* fpconst. */
1369 COSTS_N_INSNS (3), /* neg. */
1370 COSTS_N_INSNS (3), /* compare. */
1371 COSTS_N_INSNS (3), /* widen. */
1372 COSTS_N_INSNS (3), /* narrow. */
1373 COSTS_N_INSNS (3), /* toint. */
1374 COSTS_N_INSNS (3), /* fromint. */
1375 COSTS_N_INSNS (3) /* roundint. */
1376 }
1377 },
1378 /* Vector */
1379 {
1380 COSTS_N_INSNS (1) /* alu. */
1381 }
1382 };
1383
1384 const struct cpu_cost_table cortexa12_extra_costs =
1385 {
1386 /* ALU */
1387 {
1388 0, /* arith. */
1389 0, /* logical. */
1390 0, /* shift. */
1391 COSTS_N_INSNS (1), /* shift_reg. */
1392 COSTS_N_INSNS (1), /* arith_shift. */
1393 COSTS_N_INSNS (1), /* arith_shift_reg. */
1394 COSTS_N_INSNS (1), /* log_shift. */
1395 COSTS_N_INSNS (1), /* log_shift_reg. */
1396 0, /* extend. */
1397 COSTS_N_INSNS (1), /* extend_arith. */
1398 0, /* bfi. */
1399 COSTS_N_INSNS (1), /* bfx. */
1400 COSTS_N_INSNS (1), /* clz. */
1401 COSTS_N_INSNS (1), /* rev. */
1402 0, /* non_exec. */
1403 true /* non_exec_costs_exec. */
1404 },
1405 /* MULT SImode */
1406 {
1407 {
1408 COSTS_N_INSNS (2), /* simple. */
1409 COSTS_N_INSNS (3), /* flag_setting. */
1410 COSTS_N_INSNS (2), /* extend. */
1411 COSTS_N_INSNS (3), /* add. */
1412 COSTS_N_INSNS (2), /* extend_add. */
1413 COSTS_N_INSNS (18) /* idiv. */
1414 },
1415 /* MULT DImode */
1416 {
1417 0, /* simple (N/A). */
1418 0, /* flag_setting (N/A). */
1419 COSTS_N_INSNS (3), /* extend. */
1420 0, /* add (N/A). */
1421 COSTS_N_INSNS (3), /* extend_add. */
1422 0 /* idiv (N/A). */
1423 }
1424 },
1425 /* LD/ST */
1426 {
1427 COSTS_N_INSNS (3), /* load. */
1428 COSTS_N_INSNS (3), /* load_sign_extend. */
1429 COSTS_N_INSNS (3), /* ldrd. */
1430 COSTS_N_INSNS (3), /* ldm_1st. */
1431 1, /* ldm_regs_per_insn_1st. */
1432 2, /* ldm_regs_per_insn_subsequent. */
1433 COSTS_N_INSNS (3), /* loadf. */
1434 COSTS_N_INSNS (3), /* loadd. */
1435 0, /* load_unaligned. */
1436 0, /* store. */
1437 0, /* strd. */
1438 0, /* stm_1st. */
1439 1, /* stm_regs_per_insn_1st. */
1440 2, /* stm_regs_per_insn_subsequent. */
1441 COSTS_N_INSNS (2), /* storef. */
1442 COSTS_N_INSNS (2), /* stored. */
1443 0 /* store_unaligned. */
1444 },
1445 {
1446 /* FP SFmode */
1447 {
1448 COSTS_N_INSNS (17), /* div. */
1449 COSTS_N_INSNS (4), /* mult. */
1450 COSTS_N_INSNS (8), /* mult_addsub. */
1451 COSTS_N_INSNS (8), /* fma. */
1452 COSTS_N_INSNS (4), /* addsub. */
1453 COSTS_N_INSNS (2), /* fpconst. */
1454 COSTS_N_INSNS (2), /* neg. */
1455 COSTS_N_INSNS (2), /* compare. */
1456 COSTS_N_INSNS (4), /* widen. */
1457 COSTS_N_INSNS (4), /* narrow. */
1458 COSTS_N_INSNS (4), /* toint. */
1459 COSTS_N_INSNS (4), /* fromint. */
1460 COSTS_N_INSNS (4) /* roundint. */
1461 },
1462 /* FP DFmode */
1463 {
1464 COSTS_N_INSNS (31), /* div. */
1465 COSTS_N_INSNS (4), /* mult. */
1466 COSTS_N_INSNS (8), /* mult_addsub. */
1467 COSTS_N_INSNS (8), /* fma. */
1468 COSTS_N_INSNS (4), /* addsub. */
1469 COSTS_N_INSNS (2), /* fpconst. */
1470 COSTS_N_INSNS (2), /* neg. */
1471 COSTS_N_INSNS (2), /* compare. */
1472 COSTS_N_INSNS (4), /* widen. */
1473 COSTS_N_INSNS (4), /* narrow. */
1474 COSTS_N_INSNS (4), /* toint. */
1475 COSTS_N_INSNS (4), /* fromint. */
1476 COSTS_N_INSNS (4) /* roundint. */
1477 }
1478 },
1479 /* Vector */
1480 {
1481 COSTS_N_INSNS (1) /* alu. */
1482 }
1483 };
1484
1485 const struct cpu_cost_table cortexa15_extra_costs =
1486 {
1487 /* ALU */
1488 {
1489 0, /* arith. */
1490 0, /* logical. */
1491 0, /* shift. */
1492 0, /* shift_reg. */
1493 COSTS_N_INSNS (1), /* arith_shift. */
1494 COSTS_N_INSNS (1), /* arith_shift_reg. */
1495 COSTS_N_INSNS (1), /* log_shift. */
1496 COSTS_N_INSNS (1), /* log_shift_reg. */
1497 0, /* extend. */
1498 COSTS_N_INSNS (1), /* extend_arith. */
1499 COSTS_N_INSNS (1), /* bfi. */
1500 0, /* bfx. */
1501 0, /* clz. */
1502 0, /* rev. */
1503 0, /* non_exec. */
1504 true /* non_exec_costs_exec. */
1505 },
1506 /* MULT SImode */
1507 {
1508 {
1509 COSTS_N_INSNS (2), /* simple. */
1510 COSTS_N_INSNS (3), /* flag_setting. */
1511 COSTS_N_INSNS (2), /* extend. */
1512 COSTS_N_INSNS (2), /* add. */
1513 COSTS_N_INSNS (2), /* extend_add. */
1514 COSTS_N_INSNS (18) /* idiv. */
1515 },
1516 /* MULT DImode */
1517 {
1518 0, /* simple (N/A). */
1519 0, /* flag_setting (N/A). */
1520 COSTS_N_INSNS (3), /* extend. */
1521 0, /* add (N/A). */
1522 COSTS_N_INSNS (3), /* extend_add. */
1523 0 /* idiv (N/A). */
1524 }
1525 },
1526 /* LD/ST */
1527 {
1528 COSTS_N_INSNS (3), /* load. */
1529 COSTS_N_INSNS (3), /* load_sign_extend. */
1530 COSTS_N_INSNS (3), /* ldrd. */
1531 COSTS_N_INSNS (4), /* ldm_1st. */
1532 1, /* ldm_regs_per_insn_1st. */
1533 2, /* ldm_regs_per_insn_subsequent. */
1534 COSTS_N_INSNS (4), /* loadf. */
1535 COSTS_N_INSNS (4), /* loadd. */
1536 0, /* load_unaligned. */
1537 0, /* store. */
1538 0, /* strd. */
1539 COSTS_N_INSNS (1), /* stm_1st. */
1540 1, /* stm_regs_per_insn_1st. */
1541 2, /* stm_regs_per_insn_subsequent. */
1542 0, /* storef. */
1543 0, /* stored. */
1544 0 /* store_unaligned. */
1545 },
1546 {
1547 /* FP SFmode */
1548 {
1549 COSTS_N_INSNS (17), /* div. */
1550 COSTS_N_INSNS (4), /* mult. */
1551 COSTS_N_INSNS (8), /* mult_addsub. */
1552 COSTS_N_INSNS (8), /* fma. */
1553 COSTS_N_INSNS (4), /* addsub. */
1554 COSTS_N_INSNS (2), /* fpconst. */
1555 COSTS_N_INSNS (2), /* neg. */
1556 COSTS_N_INSNS (5), /* compare. */
1557 COSTS_N_INSNS (4), /* widen. */
1558 COSTS_N_INSNS (4), /* narrow. */
1559 COSTS_N_INSNS (4), /* toint. */
1560 COSTS_N_INSNS (4), /* fromint. */
1561 COSTS_N_INSNS (4) /* roundint. */
1562 },
1563 /* FP DFmode */
1564 {
1565 COSTS_N_INSNS (31), /* div. */
1566 COSTS_N_INSNS (4), /* mult. */
1567 COSTS_N_INSNS (8), /* mult_addsub. */
1568 COSTS_N_INSNS (8), /* fma. */
1569 COSTS_N_INSNS (4), /* addsub. */
1570 COSTS_N_INSNS (2), /* fpconst. */
1571 COSTS_N_INSNS (2), /* neg. */
1572 COSTS_N_INSNS (2), /* compare. */
1573 COSTS_N_INSNS (4), /* widen. */
1574 COSTS_N_INSNS (4), /* narrow. */
1575 COSTS_N_INSNS (4), /* toint. */
1576 COSTS_N_INSNS (4), /* fromint. */
1577 COSTS_N_INSNS (4) /* roundint. */
1578 }
1579 },
1580 /* Vector */
1581 {
1582 COSTS_N_INSNS (1) /* alu. */
1583 }
1584 };
1585
1586 const struct cpu_cost_table v7m_extra_costs =
1587 {
1588 /* ALU */
1589 {
1590 0, /* arith. */
1591 0, /* logical. */
1592 0, /* shift. */
1593 0, /* shift_reg. */
1594 0, /* arith_shift. */
1595 COSTS_N_INSNS (1), /* arith_shift_reg. */
1596 0, /* log_shift. */
1597 COSTS_N_INSNS (1), /* log_shift_reg. */
1598 0, /* extend. */
1599 COSTS_N_INSNS (1), /* extend_arith. */
1600 0, /* bfi. */
1601 0, /* bfx. */
1602 0, /* clz. */
1603 0, /* rev. */
1604 COSTS_N_INSNS (1), /* non_exec. */
1605 false /* non_exec_costs_exec. */
1606 },
1607 {
1608 /* MULT SImode */
1609 {
1610 COSTS_N_INSNS (1), /* simple. */
1611 COSTS_N_INSNS (1), /* flag_setting. */
1612 COSTS_N_INSNS (2), /* extend. */
1613 COSTS_N_INSNS (1), /* add. */
1614 COSTS_N_INSNS (3), /* extend_add. */
1615 COSTS_N_INSNS (8) /* idiv. */
1616 },
1617 /* MULT DImode */
1618 {
1619 0, /* simple (N/A). */
1620 0, /* flag_setting (N/A). */
1621 COSTS_N_INSNS (2), /* extend. */
1622 0, /* add (N/A). */
1623 COSTS_N_INSNS (3), /* extend_add. */
1624 0 /* idiv (N/A). */
1625 }
1626 },
1627 /* LD/ST */
1628 {
1629 COSTS_N_INSNS (2), /* load. */
1630 0, /* load_sign_extend. */
1631 COSTS_N_INSNS (3), /* ldrd. */
1632 COSTS_N_INSNS (2), /* ldm_1st. */
1633 1, /* ldm_regs_per_insn_1st. */
1634 1, /* ldm_regs_per_insn_subsequent. */
1635 COSTS_N_INSNS (2), /* loadf. */
1636 COSTS_N_INSNS (3), /* loadd. */
1637 COSTS_N_INSNS (1), /* load_unaligned. */
1638 COSTS_N_INSNS (2), /* store. */
1639 COSTS_N_INSNS (3), /* strd. */
1640 COSTS_N_INSNS (2), /* stm_1st. */
1641 1, /* stm_regs_per_insn_1st. */
1642 1, /* stm_regs_per_insn_subsequent. */
1643 COSTS_N_INSNS (2), /* storef. */
1644 COSTS_N_INSNS (3), /* stored. */
1645 COSTS_N_INSNS (1) /* store_unaligned. */
1646 },
1647 {
1648 /* FP SFmode */
1649 {
1650 COSTS_N_INSNS (7), /* div. */
1651 COSTS_N_INSNS (2), /* mult. */
1652 COSTS_N_INSNS (5), /* mult_addsub. */
1653 COSTS_N_INSNS (3), /* fma. */
1654 COSTS_N_INSNS (1), /* addsub. */
1655 0, /* fpconst. */
1656 0, /* neg. */
1657 0, /* compare. */
1658 0, /* widen. */
1659 0, /* narrow. */
1660 0, /* toint. */
1661 0, /* fromint. */
1662 0 /* roundint. */
1663 },
1664 /* FP DFmode */
1665 {
1666 COSTS_N_INSNS (15), /* div. */
1667 COSTS_N_INSNS (5), /* mult. */
1668 COSTS_N_INSNS (7), /* mult_addsub. */
1669 COSTS_N_INSNS (7), /* fma. */
1670 COSTS_N_INSNS (3), /* addsub. */
1671 0, /* fpconst. */
1672 0, /* neg. */
1673 0, /* compare. */
1674 0, /* widen. */
1675 0, /* narrow. */
1676 0, /* toint. */
1677 0, /* fromint. */
1678 0 /* roundint. */
1679 }
1680 },
1681 /* Vector */
1682 {
1683 COSTS_N_INSNS (1) /* alu. */
1684 }
1685 };
1686
1687 const struct tune_params arm_slowmul_tune =
1688 {
1689 arm_slowmul_rtx_costs,
1690 NULL,
1691 NULL, /* Sched adj cost. */
1692 3, /* Constant limit. */
1693 5, /* Max cond insns. */
1694 ARM_PREFETCH_NOT_BENEFICIAL,
1695 true, /* Prefer constant pool. */
1696 arm_default_branch_cost,
1697 false, /* Prefer LDRD/STRD. */
1698 {true, true}, /* Prefer non short circuit. */
1699 &arm_default_vec_cost, /* Vectorizer costs. */
1700 false, /* Prefer Neon for 64-bits bitops. */
1701 false, false /* Prefer 32-bit encodings. */
1702 };
1703
1704 const struct tune_params arm_fastmul_tune =
1705 {
1706 arm_fastmul_rtx_costs,
1707 NULL,
1708 NULL, /* Sched adj cost. */
1709 1, /* Constant limit. */
1710 5, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false /* Prefer 32-bit encodings. */
1719 };
1720
1721 /* StrongARM has early execution of branches, so a sequence that is worth
1722 skipping is shorter. Set max_insns_skipped to a lower value. */
1723
1724 const struct tune_params arm_strongarm_tune =
1725 {
1726 arm_fastmul_rtx_costs,
1727 NULL,
1728 NULL, /* Sched adj cost. */
1729 1, /* Constant limit. */
1730 3, /* Max cond insns. */
1731 ARM_PREFETCH_NOT_BENEFICIAL,
1732 true, /* Prefer constant pool. */
1733 arm_default_branch_cost,
1734 false, /* Prefer LDRD/STRD. */
1735 {true, true}, /* Prefer non short circuit. */
1736 &arm_default_vec_cost, /* Vectorizer costs. */
1737 false, /* Prefer Neon for 64-bits bitops. */
1738 false, false /* Prefer 32-bit encodings. */
1739 };
1740
1741 const struct tune_params arm_xscale_tune =
1742 {
1743 arm_xscale_rtx_costs,
1744 NULL,
1745 xscale_sched_adjust_cost,
1746 2, /* Constant limit. */
1747 3, /* Max cond insns. */
1748 ARM_PREFETCH_NOT_BENEFICIAL,
1749 true, /* Prefer constant pool. */
1750 arm_default_branch_cost,
1751 false, /* Prefer LDRD/STRD. */
1752 {true, true}, /* Prefer non short circuit. */
1753 &arm_default_vec_cost, /* Vectorizer costs. */
1754 false, /* Prefer Neon for 64-bits bitops. */
1755 false, false /* Prefer 32-bit encodings. */
1756 };
1757
1758 const struct tune_params arm_9e_tune =
1759 {
1760 arm_9e_rtx_costs,
1761 NULL,
1762 NULL, /* Sched adj cost. */
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 ARM_PREFETCH_NOT_BENEFICIAL,
1766 true, /* Prefer constant pool. */
1767 arm_default_branch_cost,
1768 false, /* Prefer LDRD/STRD. */
1769 {true, true}, /* Prefer non short circuit. */
1770 &arm_default_vec_cost, /* Vectorizer costs. */
1771 false, /* Prefer Neon for 64-bits bitops. */
1772 false, false /* Prefer 32-bit encodings. */
1773 };
1774
1775 const struct tune_params arm_v6t2_tune =
1776 {
1777 arm_9e_rtx_costs,
1778 NULL,
1779 NULL, /* Sched adj cost. */
1780 1, /* Constant limit. */
1781 5, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 false, /* Prefer constant pool. */
1784 arm_default_branch_cost,
1785 false, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 false, false /* Prefer 32-bit encodings. */
1790 };
1791
1792 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1793 const struct tune_params arm_cortex_tune =
1794 {
1795 arm_9e_rtx_costs,
1796 &generic_extra_costs,
1797 NULL, /* Sched adj cost. */
1798 1, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 ARM_PREFETCH_NOT_BENEFICIAL,
1801 false, /* Prefer constant pool. */
1802 arm_default_branch_cost,
1803 false, /* Prefer LDRD/STRD. */
1804 {true, true}, /* Prefer non short circuit. */
1805 &arm_default_vec_cost, /* Vectorizer costs. */
1806 false, /* Prefer Neon for 64-bits bitops. */
1807 false, false /* Prefer 32-bit encodings. */
1808 };
1809
1810 const struct tune_params arm_cortex_a8_tune =
1811 {
1812 arm_9e_rtx_costs,
1813 &cortexa8_extra_costs,
1814 NULL, /* Sched adj cost. */
1815 1, /* Constant limit. */
1816 5, /* Max cond insns. */
1817 ARM_PREFETCH_NOT_BENEFICIAL,
1818 false, /* Prefer constant pool. */
1819 arm_default_branch_cost,
1820 false, /* Prefer LDRD/STRD. */
1821 {true, true}, /* Prefer non short circuit. */
1822 &arm_default_vec_cost, /* Vectorizer costs. */
1823 false, /* Prefer Neon for 64-bits bitops. */
1824 false, false /* Prefer 32-bit encodings. */
1825 };
1826
1827 const struct tune_params arm_cortex_a7_tune =
1828 {
1829 arm_9e_rtx_costs,
1830 &cortexa7_extra_costs,
1831 NULL,
1832 1, /* Constant limit. */
1833 5, /* Max cond insns. */
1834 ARM_PREFETCH_NOT_BENEFICIAL,
1835 false, /* Prefer constant pool. */
1836 arm_default_branch_cost,
1837 false, /* Prefer LDRD/STRD. */
1838 {true, true}, /* Prefer non short circuit. */
1839 &arm_default_vec_cost, /* Vectorizer costs. */
1840 false, /* Prefer Neon for 64-bits bitops. */
1841 false, false /* Prefer 32-bit encodings. */
1842 };
1843
1844 const struct tune_params arm_cortex_a15_tune =
1845 {
1846 arm_9e_rtx_costs,
1847 &cortexa15_extra_costs,
1848 NULL, /* Sched adj cost. */
1849 1, /* Constant limit. */
1850 2, /* Max cond insns. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 false, /* Prefer constant pool. */
1853 arm_default_branch_cost,
1854 true, /* Prefer LDRD/STRD. */
1855 {true, true}, /* Prefer non short circuit. */
1856 &arm_default_vec_cost, /* Vectorizer costs. */
1857 false, /* Prefer Neon for 64-bits bitops. */
1858 true, true /* Prefer 32-bit encodings. */
1859 };
1860
1861 const struct tune_params arm_cortex_a53_tune =
1862 {
1863 arm_9e_rtx_costs,
1864 &cortexa53_extra_costs,
1865 NULL, /* Scheduler cost adjustment. */
1866 1, /* Constant limit. */
1867 5, /* Max cond insns. */
1868 ARM_PREFETCH_NOT_BENEFICIAL,
1869 false, /* Prefer constant pool. */
1870 arm_default_branch_cost,
1871 false, /* Prefer LDRD/STRD. */
1872 {true, true}, /* Prefer non short circuit. */
1873 &arm_default_vec_cost, /* Vectorizer costs. */
1874 false, /* Prefer Neon for 64-bits bitops. */
1875 false, false /* Prefer 32-bit encodings. */
1876 };
1877
1878 const struct tune_params arm_cortex_a57_tune =
1879 {
1880 arm_9e_rtx_costs,
1881 &cortexa57_extra_costs,
1882 NULL, /* Scheduler cost adjustment. */
1883 1, /* Constant limit. */
1884 2, /* Max cond insns. */
1885 ARM_PREFETCH_NOT_BENEFICIAL,
1886 false, /* Prefer constant pool. */
1887 arm_default_branch_cost,
1888 true, /* Prefer LDRD/STRD. */
1889 {true, true}, /* Prefer non short circuit. */
1890 &arm_default_vec_cost, /* Vectorizer costs. */
1891 false, /* Prefer Neon for 64-bits bitops. */
1892 true, true /* Prefer 32-bit encodings. */
1893 };
1894
1895 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1896 less appealing. Set max_insns_skipped to a low value. */
1897
1898 const struct tune_params arm_cortex_a5_tune =
1899 {
1900 arm_9e_rtx_costs,
1901 &cortexa5_extra_costs,
1902 NULL, /* Sched adj cost. */
1903 1, /* Constant limit. */
1904 1, /* Max cond insns. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 false, /* Prefer constant pool. */
1907 arm_cortex_a5_branch_cost,
1908 false, /* Prefer LDRD/STRD. */
1909 {false, false}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false /* Prefer 32-bit encodings. */
1913 };
1914
1915 const struct tune_params arm_cortex_a9_tune =
1916 {
1917 arm_9e_rtx_costs,
1918 &cortexa9_extra_costs,
1919 cortex_a9_sched_adjust_cost,
1920 1, /* Constant limit. */
1921 5, /* Max cond insns. */
1922 ARM_PREFETCH_BENEFICIAL(4,32,32),
1923 false, /* Prefer constant pool. */
1924 arm_default_branch_cost,
1925 false, /* Prefer LDRD/STRD. */
1926 {true, true}, /* Prefer non short circuit. */
1927 &arm_default_vec_cost, /* Vectorizer costs. */
1928 false, /* Prefer Neon for 64-bits bitops. */
1929 false, false /* Prefer 32-bit encodings. */
1930 };
1931
1932 const struct tune_params arm_cortex_a12_tune =
1933 {
1934 arm_9e_rtx_costs,
1935 &cortexa12_extra_costs,
1936 NULL,
1937 1, /* Constant limit. */
1938 5, /* Max cond insns. */
1939 ARM_PREFETCH_BENEFICIAL(4,32,32),
1940 false, /* Prefer constant pool. */
1941 arm_default_branch_cost,
1942 true, /* Prefer LDRD/STRD. */
1943 {true, true}, /* Prefer non short circuit. */
1944 &arm_default_vec_cost, /* Vectorizer costs. */
1945 false, /* Prefer Neon for 64-bits bitops. */
1946 false, false /* Prefer 32-bit encodings. */
1947 };
1948
1949 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1950 cycle to execute each. An LDR from the constant pool also takes two cycles
1951 to execute, but mildly increases pipelining opportunity (consecutive
1952 loads/stores can be pipelined together, saving one cycle), and may also
1953 improve icache utilisation. Hence we prefer the constant pool for such
1954 processors. */
1955
1956 const struct tune_params arm_v7m_tune =
1957 {
1958 arm_9e_rtx_costs,
1959 &v7m_extra_costs,
1960 NULL, /* Sched adj cost. */
1961 1, /* Constant limit. */
1962 2, /* Max cond insns. */
1963 ARM_PREFETCH_NOT_BENEFICIAL,
1964 true, /* Prefer constant pool. */
1965 arm_cortex_m_branch_cost,
1966 false, /* Prefer LDRD/STRD. */
1967 {false, false}, /* Prefer non short circuit. */
1968 &arm_default_vec_cost, /* Vectorizer costs. */
1969 false, /* Prefer Neon for 64-bits bitops. */
1970 false, false /* Prefer 32-bit encodings. */
1971 };
1972
1973 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1974 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1975 const struct tune_params arm_v6m_tune =
1976 {
1977 arm_9e_rtx_costs,
1978 NULL,
1979 NULL, /* Sched adj cost. */
1980 1, /* Constant limit. */
1981 5, /* Max cond insns. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 false, /* Prefer constant pool. */
1984 arm_default_branch_cost,
1985 false, /* Prefer LDRD/STRD. */
1986 {false, false}, /* Prefer non short circuit. */
1987 &arm_default_vec_cost, /* Vectorizer costs. */
1988 false, /* Prefer Neon for 64-bits bitops. */
1989 false, false /* Prefer 32-bit encodings. */
1990 };
1991
1992 const struct tune_params arm_fa726te_tune =
1993 {
1994 arm_9e_rtx_costs,
1995 NULL,
1996 fa726te_sched_adjust_cost,
1997 1, /* Constant limit. */
1998 5, /* Max cond insns. */
1999 ARM_PREFETCH_NOT_BENEFICIAL,
2000 true, /* Prefer constant pool. */
2001 arm_default_branch_cost,
2002 false, /* Prefer LDRD/STRD. */
2003 {true, true}, /* Prefer non short circuit. */
2004 &arm_default_vec_cost, /* Vectorizer costs. */
2005 false, /* Prefer Neon for 64-bits bitops. */
2006 false, false /* Prefer 32-bit encodings. */
2007 };
2008
2009
2010 /* Not all of these give usefully different compilation alternatives,
2011 but there is no simple way of generalizing them. */
2012 static const struct processors all_cores[] =
2013 {
2014 /* ARM Cores */
2015 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2016 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2017 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2018 #include "arm-cores.def"
2019 #undef ARM_CORE
2020 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2021 };
2022
2023 static const struct processors all_architectures[] =
2024 {
2025 /* ARM Architectures */
2026 /* We don't specify tuning costs here as it will be figured out
2027 from the core. */
2028
2029 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2030 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2031 #include "arm-arches.def"
2032 #undef ARM_ARCH
2033 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2034 };
2035
2036
2037 /* These are populated as commandline arguments are processed, or NULL
2038 if not specified. */
2039 static const struct processors *arm_selected_arch;
2040 static const struct processors *arm_selected_cpu;
2041 static const struct processors *arm_selected_tune;
2042
2043 /* The name of the preprocessor macro to define for this architecture. */
2044
2045 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2046
2047 /* Available values for -mfpu=. */
2048
2049 static const struct arm_fpu_desc all_fpus[] =
2050 {
2051 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2052 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2053 #include "arm-fpus.def"
2054 #undef ARM_FPU
2055 };
2056
2057
2058 /* Supported TLS relocations. */
2059
2060 enum tls_reloc {
2061 TLS_GD32,
2062 TLS_LDM32,
2063 TLS_LDO32,
2064 TLS_IE32,
2065 TLS_LE32,
2066 TLS_DESCSEQ /* GNU scheme */
2067 };
2068
2069 /* The maximum number of insns to be used when loading a constant. */
2070 inline static int
2071 arm_constant_limit (bool size_p)
2072 {
2073 return size_p ? 1 : current_tune->constant_limit;
2074 }
2075
2076 /* Emit an insn that's a simple single-set. Both the operands must be known
2077 to be valid. */
2078 inline static rtx
2079 emit_set_insn (rtx x, rtx y)
2080 {
2081 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2082 }
2083
2084 /* Return the number of bits set in VALUE. */
2085 static unsigned
2086 bit_count (unsigned long value)
2087 {
2088 unsigned long count = 0;
2089
2090 while (value)
2091 {
2092 count++;
2093 value &= value - 1; /* Clear the least-significant set bit. */
2094 }
2095
2096 return count;
2097 }
2098
2099 typedef struct
2100 {
2101 enum machine_mode mode;
2102 const char *name;
2103 } arm_fixed_mode_set;
2104
2105 /* A small helper for setting fixed-point library libfuncs. */
2106
2107 static void
2108 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2109 const char *funcname, const char *modename,
2110 int num_suffix)
2111 {
2112 char buffer[50];
2113
2114 if (num_suffix == 0)
2115 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2116 else
2117 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2118
2119 set_optab_libfunc (optable, mode, buffer);
2120 }
2121
2122 static void
2123 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2124 enum machine_mode from, const char *funcname,
2125 const char *toname, const char *fromname)
2126 {
2127 char buffer[50];
2128 const char *maybe_suffix_2 = "";
2129
2130 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2131 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2132 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2133 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2134 maybe_suffix_2 = "2";
2135
2136 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2137 maybe_suffix_2);
2138
2139 set_conv_libfunc (optable, to, from, buffer);
2140 }
2141
2142 /* Set up library functions unique to ARM. */
2143
2144 static void
2145 arm_init_libfuncs (void)
2146 {
2147 /* For Linux, we have access to kernel support for atomic operations. */
2148 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2149 init_sync_libfuncs (2 * UNITS_PER_WORD);
2150
2151 /* There are no special library functions unless we are using the
2152 ARM BPABI. */
2153 if (!TARGET_BPABI)
2154 return;
2155
2156 /* The functions below are described in Section 4 of the "Run-Time
2157 ABI for the ARM architecture", Version 1.0. */
2158
2159 /* Double-precision floating-point arithmetic. Table 2. */
2160 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2161 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2162 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2163 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2164 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2165
2166 /* Double-precision comparisons. Table 3. */
2167 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2168 set_optab_libfunc (ne_optab, DFmode, NULL);
2169 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2170 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2171 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2172 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2173 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2174
2175 /* Single-precision floating-point arithmetic. Table 4. */
2176 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2177 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2178 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2179 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2180 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2181
2182 /* Single-precision comparisons. Table 5. */
2183 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2184 set_optab_libfunc (ne_optab, SFmode, NULL);
2185 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2186 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2187 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2188 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2189 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2190
2191 /* Floating-point to integer conversions. Table 6. */
2192 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2193 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2194 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2195 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2196 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2197 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2198 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2199 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2200
2201 /* Conversions between floating types. Table 7. */
2202 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2203 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2204
2205 /* Integer to floating-point conversions. Table 8. */
2206 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2207 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2208 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2209 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2210 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2211 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2212 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2213 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2214
2215 /* Long long. Table 9. */
2216 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2217 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2218 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2219 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2220 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2221 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2222 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2223 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2224
2225 /* Integer (32/32->32) division. \S 4.3.1. */
2226 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2227 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2228
2229 /* The divmod functions are designed so that they can be used for
2230 plain division, even though they return both the quotient and the
2231 remainder. The quotient is returned in the usual location (i.e.,
2232 r0 for SImode, {r0, r1} for DImode), just as would be expected
2233 for an ordinary division routine. Because the AAPCS calling
2234 conventions specify that all of { r0, r1, r2, r3 } are
2235 callee-saved registers, there is no need to tell the compiler
2236 explicitly that those registers are clobbered by these
2237 routines. */
2238 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2239 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2240
2241 /* For SImode division the ABI provides div-without-mod routines,
2242 which are faster. */
2243 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2244 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2245
2246 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2247 divmod libcalls instead. */
2248 set_optab_libfunc (smod_optab, DImode, NULL);
2249 set_optab_libfunc (umod_optab, DImode, NULL);
2250 set_optab_libfunc (smod_optab, SImode, NULL);
2251 set_optab_libfunc (umod_optab, SImode, NULL);
2252
2253 /* Half-precision float operations. The compiler handles all operations
2254 with NULL libfuncs by converting the SFmode. */
2255 switch (arm_fp16_format)
2256 {
2257 case ARM_FP16_FORMAT_IEEE:
2258 case ARM_FP16_FORMAT_ALTERNATIVE:
2259
2260 /* Conversions. */
2261 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2262 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2263 ? "__gnu_f2h_ieee"
2264 : "__gnu_f2h_alternative"));
2265 set_conv_libfunc (sext_optab, SFmode, HFmode,
2266 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2267 ? "__gnu_h2f_ieee"
2268 : "__gnu_h2f_alternative"));
2269
2270 /* Arithmetic. */
2271 set_optab_libfunc (add_optab, HFmode, NULL);
2272 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2273 set_optab_libfunc (smul_optab, HFmode, NULL);
2274 set_optab_libfunc (neg_optab, HFmode, NULL);
2275 set_optab_libfunc (sub_optab, HFmode, NULL);
2276
2277 /* Comparisons. */
2278 set_optab_libfunc (eq_optab, HFmode, NULL);
2279 set_optab_libfunc (ne_optab, HFmode, NULL);
2280 set_optab_libfunc (lt_optab, HFmode, NULL);
2281 set_optab_libfunc (le_optab, HFmode, NULL);
2282 set_optab_libfunc (ge_optab, HFmode, NULL);
2283 set_optab_libfunc (gt_optab, HFmode, NULL);
2284 set_optab_libfunc (unord_optab, HFmode, NULL);
2285 break;
2286
2287 default:
2288 break;
2289 }
2290
2291 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2292 {
2293 const arm_fixed_mode_set fixed_arith_modes[] =
2294 {
2295 { QQmode, "qq" },
2296 { UQQmode, "uqq" },
2297 { HQmode, "hq" },
2298 { UHQmode, "uhq" },
2299 { SQmode, "sq" },
2300 { USQmode, "usq" },
2301 { DQmode, "dq" },
2302 { UDQmode, "udq" },
2303 { TQmode, "tq" },
2304 { UTQmode, "utq" },
2305 { HAmode, "ha" },
2306 { UHAmode, "uha" },
2307 { SAmode, "sa" },
2308 { USAmode, "usa" },
2309 { DAmode, "da" },
2310 { UDAmode, "uda" },
2311 { TAmode, "ta" },
2312 { UTAmode, "uta" }
2313 };
2314 const arm_fixed_mode_set fixed_conv_modes[] =
2315 {
2316 { QQmode, "qq" },
2317 { UQQmode, "uqq" },
2318 { HQmode, "hq" },
2319 { UHQmode, "uhq" },
2320 { SQmode, "sq" },
2321 { USQmode, "usq" },
2322 { DQmode, "dq" },
2323 { UDQmode, "udq" },
2324 { TQmode, "tq" },
2325 { UTQmode, "utq" },
2326 { HAmode, "ha" },
2327 { UHAmode, "uha" },
2328 { SAmode, "sa" },
2329 { USAmode, "usa" },
2330 { DAmode, "da" },
2331 { UDAmode, "uda" },
2332 { TAmode, "ta" },
2333 { UTAmode, "uta" },
2334 { QImode, "qi" },
2335 { HImode, "hi" },
2336 { SImode, "si" },
2337 { DImode, "di" },
2338 { TImode, "ti" },
2339 { SFmode, "sf" },
2340 { DFmode, "df" }
2341 };
2342 unsigned int i, j;
2343
2344 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2345 {
2346 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2347 "add", fixed_arith_modes[i].name, 3);
2348 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2349 "ssadd", fixed_arith_modes[i].name, 3);
2350 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2351 "usadd", fixed_arith_modes[i].name, 3);
2352 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2353 "sub", fixed_arith_modes[i].name, 3);
2354 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2355 "sssub", fixed_arith_modes[i].name, 3);
2356 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2357 "ussub", fixed_arith_modes[i].name, 3);
2358 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2359 "mul", fixed_arith_modes[i].name, 3);
2360 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2361 "ssmul", fixed_arith_modes[i].name, 3);
2362 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2363 "usmul", fixed_arith_modes[i].name, 3);
2364 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2365 "div", fixed_arith_modes[i].name, 3);
2366 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2367 "udiv", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2369 "ssdiv", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2371 "usdiv", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2373 "neg", fixed_arith_modes[i].name, 2);
2374 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2375 "ssneg", fixed_arith_modes[i].name, 2);
2376 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2377 "usneg", fixed_arith_modes[i].name, 2);
2378 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2379 "ashl", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2381 "ashr", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2383 "lshr", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2385 "ssashl", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2387 "usashl", fixed_arith_modes[i].name, 3);
2388 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2389 "cmp", fixed_arith_modes[i].name, 2);
2390 }
2391
2392 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2393 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2394 {
2395 if (i == j
2396 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2397 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2398 continue;
2399
2400 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2401 fixed_conv_modes[j].mode, "fract",
2402 fixed_conv_modes[i].name,
2403 fixed_conv_modes[j].name);
2404 arm_set_fixed_conv_libfunc (satfract_optab,
2405 fixed_conv_modes[i].mode,
2406 fixed_conv_modes[j].mode, "satfract",
2407 fixed_conv_modes[i].name,
2408 fixed_conv_modes[j].name);
2409 arm_set_fixed_conv_libfunc (fractuns_optab,
2410 fixed_conv_modes[i].mode,
2411 fixed_conv_modes[j].mode, "fractuns",
2412 fixed_conv_modes[i].name,
2413 fixed_conv_modes[j].name);
2414 arm_set_fixed_conv_libfunc (satfractuns_optab,
2415 fixed_conv_modes[i].mode,
2416 fixed_conv_modes[j].mode, "satfractuns",
2417 fixed_conv_modes[i].name,
2418 fixed_conv_modes[j].name);
2419 }
2420 }
2421
2422 if (TARGET_AAPCS_BASED)
2423 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2424 }
2425
2426 /* On AAPCS systems, this is the "struct __va_list". */
2427 static GTY(()) tree va_list_type;
2428
2429 /* Return the type to use as __builtin_va_list. */
2430 static tree
2431 arm_build_builtin_va_list (void)
2432 {
2433 tree va_list_name;
2434 tree ap_field;
2435
2436 if (!TARGET_AAPCS_BASED)
2437 return std_build_builtin_va_list ();
2438
2439 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2440 defined as:
2441
2442 struct __va_list
2443 {
2444 void *__ap;
2445 };
2446
2447 The C Library ABI further reinforces this definition in \S
2448 4.1.
2449
2450 We must follow this definition exactly. The structure tag
2451 name is visible in C++ mangled names, and thus forms a part
2452 of the ABI. The field name may be used by people who
2453 #include <stdarg.h>. */
2454 /* Create the type. */
2455 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2456 /* Give it the required name. */
2457 va_list_name = build_decl (BUILTINS_LOCATION,
2458 TYPE_DECL,
2459 get_identifier ("__va_list"),
2460 va_list_type);
2461 DECL_ARTIFICIAL (va_list_name) = 1;
2462 TYPE_NAME (va_list_type) = va_list_name;
2463 TYPE_STUB_DECL (va_list_type) = va_list_name;
2464 /* Create the __ap field. */
2465 ap_field = build_decl (BUILTINS_LOCATION,
2466 FIELD_DECL,
2467 get_identifier ("__ap"),
2468 ptr_type_node);
2469 DECL_ARTIFICIAL (ap_field) = 1;
2470 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2471 TYPE_FIELDS (va_list_type) = ap_field;
2472 /* Compute its layout. */
2473 layout_type (va_list_type);
2474
2475 return va_list_type;
2476 }
2477
2478 /* Return an expression of type "void *" pointing to the next
2479 available argument in a variable-argument list. VALIST is the
2480 user-level va_list object, of type __builtin_va_list. */
2481 static tree
2482 arm_extract_valist_ptr (tree valist)
2483 {
2484 if (TREE_TYPE (valist) == error_mark_node)
2485 return error_mark_node;
2486
2487 /* On an AAPCS target, the pointer is stored within "struct
2488 va_list". */
2489 if (TARGET_AAPCS_BASED)
2490 {
2491 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2492 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2493 valist, ap_field, NULL_TREE);
2494 }
2495
2496 return valist;
2497 }
2498
2499 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2500 static void
2501 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2502 {
2503 valist = arm_extract_valist_ptr (valist);
2504 std_expand_builtin_va_start (valist, nextarg);
2505 }
2506
2507 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2508 static tree
2509 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2510 gimple_seq *post_p)
2511 {
2512 valist = arm_extract_valist_ptr (valist);
2513 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2514 }
2515
2516 /* Fix up any incompatible options that the user has specified. */
2517 static void
2518 arm_option_override (void)
2519 {
2520 if (global_options_set.x_arm_arch_option)
2521 arm_selected_arch = &all_architectures[arm_arch_option];
2522
2523 if (global_options_set.x_arm_cpu_option)
2524 {
2525 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2526 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2527 }
2528
2529 if (global_options_set.x_arm_tune_option)
2530 arm_selected_tune = &all_cores[(int) arm_tune_option];
2531
2532 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2533 SUBTARGET_OVERRIDE_OPTIONS;
2534 #endif
2535
2536 if (arm_selected_arch)
2537 {
2538 if (arm_selected_cpu)
2539 {
2540 /* Check for conflict between mcpu and march. */
2541 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2542 {
2543 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2544 arm_selected_cpu->name, arm_selected_arch->name);
2545 /* -march wins for code generation.
2546 -mcpu wins for default tuning. */
2547 if (!arm_selected_tune)
2548 arm_selected_tune = arm_selected_cpu;
2549
2550 arm_selected_cpu = arm_selected_arch;
2551 }
2552 else
2553 /* -mcpu wins. */
2554 arm_selected_arch = NULL;
2555 }
2556 else
2557 /* Pick a CPU based on the architecture. */
2558 arm_selected_cpu = arm_selected_arch;
2559 }
2560
2561 /* If the user did not specify a processor, choose one for them. */
2562 if (!arm_selected_cpu)
2563 {
2564 const struct processors * sel;
2565 unsigned int sought;
2566
2567 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2568 if (!arm_selected_cpu->name)
2569 {
2570 #ifdef SUBTARGET_CPU_DEFAULT
2571 /* Use the subtarget default CPU if none was specified by
2572 configure. */
2573 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2574 #endif
2575 /* Default to ARM6. */
2576 if (!arm_selected_cpu->name)
2577 arm_selected_cpu = &all_cores[arm6];
2578 }
2579
2580 sel = arm_selected_cpu;
2581 insn_flags = sel->flags;
2582
2583 /* Now check to see if the user has specified some command line
2584 switch that require certain abilities from the cpu. */
2585 sought = 0;
2586
2587 if (TARGET_INTERWORK || TARGET_THUMB)
2588 {
2589 sought |= (FL_THUMB | FL_MODE32);
2590
2591 /* There are no ARM processors that support both APCS-26 and
2592 interworking. Therefore we force FL_MODE26 to be removed
2593 from insn_flags here (if it was set), so that the search
2594 below will always be able to find a compatible processor. */
2595 insn_flags &= ~FL_MODE26;
2596 }
2597
2598 if (sought != 0 && ((sought & insn_flags) != sought))
2599 {
2600 /* Try to locate a CPU type that supports all of the abilities
2601 of the default CPU, plus the extra abilities requested by
2602 the user. */
2603 for (sel = all_cores; sel->name != NULL; sel++)
2604 if ((sel->flags & sought) == (sought | insn_flags))
2605 break;
2606
2607 if (sel->name == NULL)
2608 {
2609 unsigned current_bit_count = 0;
2610 const struct processors * best_fit = NULL;
2611
2612 /* Ideally we would like to issue an error message here
2613 saying that it was not possible to find a CPU compatible
2614 with the default CPU, but which also supports the command
2615 line options specified by the programmer, and so they
2616 ought to use the -mcpu=<name> command line option to
2617 override the default CPU type.
2618
2619 If we cannot find a cpu that has both the
2620 characteristics of the default cpu and the given
2621 command line options we scan the array again looking
2622 for a best match. */
2623 for (sel = all_cores; sel->name != NULL; sel++)
2624 if ((sel->flags & sought) == sought)
2625 {
2626 unsigned count;
2627
2628 count = bit_count (sel->flags & insn_flags);
2629
2630 if (count >= current_bit_count)
2631 {
2632 best_fit = sel;
2633 current_bit_count = count;
2634 }
2635 }
2636
2637 gcc_assert (best_fit);
2638 sel = best_fit;
2639 }
2640
2641 arm_selected_cpu = sel;
2642 }
2643 }
2644
2645 gcc_assert (arm_selected_cpu);
2646 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2647 if (!arm_selected_tune)
2648 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2649
2650 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2651 insn_flags = arm_selected_cpu->flags;
2652 arm_base_arch = arm_selected_cpu->base_arch;
2653
2654 arm_tune = arm_selected_tune->core;
2655 tune_flags = arm_selected_tune->flags;
2656 current_tune = arm_selected_tune->tune;
2657
2658 /* Make sure that the processor choice does not conflict with any of the
2659 other command line choices. */
2660 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2661 error ("target CPU does not support ARM mode");
2662
2663 /* BPABI targets use linker tricks to allow interworking on cores
2664 without thumb support. */
2665 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2666 {
2667 warning (0, "target CPU does not support interworking" );
2668 target_flags &= ~MASK_INTERWORK;
2669 }
2670
2671 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2672 {
2673 warning (0, "target CPU does not support THUMB instructions");
2674 target_flags &= ~MASK_THUMB;
2675 }
2676
2677 if (TARGET_APCS_FRAME && TARGET_THUMB)
2678 {
2679 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2680 target_flags &= ~MASK_APCS_FRAME;
2681 }
2682
2683 /* Callee super interworking implies thumb interworking. Adding
2684 this to the flags here simplifies the logic elsewhere. */
2685 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2686 target_flags |= MASK_INTERWORK;
2687
2688 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2689 from here where no function is being compiled currently. */
2690 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2691 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2692
2693 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2694 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2695
2696 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2697 {
2698 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2699 target_flags |= MASK_APCS_FRAME;
2700 }
2701
2702 if (TARGET_POKE_FUNCTION_NAME)
2703 target_flags |= MASK_APCS_FRAME;
2704
2705 if (TARGET_APCS_REENT && flag_pic)
2706 error ("-fpic and -mapcs-reent are incompatible");
2707
2708 if (TARGET_APCS_REENT)
2709 warning (0, "APCS reentrant code not supported. Ignored");
2710
2711 /* If this target is normally configured to use APCS frames, warn if they
2712 are turned off and debugging is turned on. */
2713 if (TARGET_ARM
2714 && write_symbols != NO_DEBUG
2715 && !TARGET_APCS_FRAME
2716 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2717 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2718
2719 if (TARGET_APCS_FLOAT)
2720 warning (0, "passing floating point arguments in fp regs not yet supported");
2721
2722 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2723 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2724 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2725 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2726 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2727 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2728 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2729 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2730 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2731 arm_arch6m = arm_arch6 && !arm_arch_notm;
2732 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2733 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2734 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2735 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2736 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2737
2738 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2739 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2740 thumb_code = TARGET_ARM == 0;
2741 thumb1_code = TARGET_THUMB1 != 0;
2742 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2743 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2744 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2745 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2746 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2747 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2748 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2749 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2750 if (arm_restrict_it == 2)
2751 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2752
2753 if (!TARGET_THUMB2)
2754 arm_restrict_it = 0;
2755
2756 /* If we are not using the default (ARM mode) section anchor offset
2757 ranges, then set the correct ranges now. */
2758 if (TARGET_THUMB1)
2759 {
2760 /* Thumb-1 LDR instructions cannot have negative offsets.
2761 Permissible positive offset ranges are 5-bit (for byte loads),
2762 6-bit (for halfword loads), or 7-bit (for word loads).
2763 Empirical results suggest a 7-bit anchor range gives the best
2764 overall code size. */
2765 targetm.min_anchor_offset = 0;
2766 targetm.max_anchor_offset = 127;
2767 }
2768 else if (TARGET_THUMB2)
2769 {
2770 /* The minimum is set such that the total size of the block
2771 for a particular anchor is 248 + 1 + 4095 bytes, which is
2772 divisible by eight, ensuring natural spacing of anchors. */
2773 targetm.min_anchor_offset = -248;
2774 targetm.max_anchor_offset = 4095;
2775 }
2776
2777 /* V5 code we generate is completely interworking capable, so we turn off
2778 TARGET_INTERWORK here to avoid many tests later on. */
2779
2780 /* XXX However, we must pass the right pre-processor defines to CPP
2781 or GLD can get confused. This is a hack. */
2782 if (TARGET_INTERWORK)
2783 arm_cpp_interwork = 1;
2784
2785 if (arm_arch5)
2786 target_flags &= ~MASK_INTERWORK;
2787
2788 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2789 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2790
2791 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2792 error ("iwmmxt abi requires an iwmmxt capable cpu");
2793
2794 if (!global_options_set.x_arm_fpu_index)
2795 {
2796 const char *target_fpu_name;
2797 bool ok;
2798
2799 #ifdef FPUTYPE_DEFAULT
2800 target_fpu_name = FPUTYPE_DEFAULT;
2801 #else
2802 target_fpu_name = "vfp";
2803 #endif
2804
2805 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2806 CL_TARGET);
2807 gcc_assert (ok);
2808 }
2809
2810 arm_fpu_desc = &all_fpus[arm_fpu_index];
2811
2812 switch (arm_fpu_desc->model)
2813 {
2814 case ARM_FP_MODEL_VFP:
2815 arm_fpu_attr = FPU_VFP;
2816 break;
2817
2818 default:
2819 gcc_unreachable();
2820 }
2821
2822 if (TARGET_AAPCS_BASED)
2823 {
2824 if (TARGET_CALLER_INTERWORKING)
2825 error ("AAPCS does not support -mcaller-super-interworking");
2826 else
2827 if (TARGET_CALLEE_INTERWORKING)
2828 error ("AAPCS does not support -mcallee-super-interworking");
2829 }
2830
2831 /* iWMMXt and NEON are incompatible. */
2832 if (TARGET_IWMMXT && TARGET_NEON)
2833 error ("iWMMXt and NEON are incompatible");
2834
2835 /* iWMMXt unsupported under Thumb mode. */
2836 if (TARGET_THUMB && TARGET_IWMMXT)
2837 error ("iWMMXt unsupported under Thumb mode");
2838
2839 /* __fp16 support currently assumes the core has ldrh. */
2840 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2841 sorry ("__fp16 and no ldrh");
2842
2843 /* If soft-float is specified then don't use FPU. */
2844 if (TARGET_SOFT_FLOAT)
2845 arm_fpu_attr = FPU_NONE;
2846
2847 if (TARGET_AAPCS_BASED)
2848 {
2849 if (arm_abi == ARM_ABI_IWMMXT)
2850 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2851 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2852 && TARGET_HARD_FLOAT
2853 && TARGET_VFP)
2854 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2855 else
2856 arm_pcs_default = ARM_PCS_AAPCS;
2857 }
2858 else
2859 {
2860 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2861 sorry ("-mfloat-abi=hard and VFP");
2862
2863 if (arm_abi == ARM_ABI_APCS)
2864 arm_pcs_default = ARM_PCS_APCS;
2865 else
2866 arm_pcs_default = ARM_PCS_ATPCS;
2867 }
2868
2869 /* For arm2/3 there is no need to do any scheduling if we are doing
2870 software floating-point. */
2871 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2872 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2873
2874 /* Use the cp15 method if it is available. */
2875 if (target_thread_pointer == TP_AUTO)
2876 {
2877 if (arm_arch6k && !TARGET_THUMB1)
2878 target_thread_pointer = TP_CP15;
2879 else
2880 target_thread_pointer = TP_SOFT;
2881 }
2882
2883 if (TARGET_HARD_TP && TARGET_THUMB1)
2884 error ("can not use -mtp=cp15 with 16-bit Thumb");
2885
2886 /* Override the default structure alignment for AAPCS ABI. */
2887 if (!global_options_set.x_arm_structure_size_boundary)
2888 {
2889 if (TARGET_AAPCS_BASED)
2890 arm_structure_size_boundary = 8;
2891 }
2892 else
2893 {
2894 if (arm_structure_size_boundary != 8
2895 && arm_structure_size_boundary != 32
2896 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2897 {
2898 if (ARM_DOUBLEWORD_ALIGN)
2899 warning (0,
2900 "structure size boundary can only be set to 8, 32 or 64");
2901 else
2902 warning (0, "structure size boundary can only be set to 8 or 32");
2903 arm_structure_size_boundary
2904 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2905 }
2906 }
2907
2908 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2909 {
2910 error ("RTP PIC is incompatible with Thumb");
2911 flag_pic = 0;
2912 }
2913
2914 /* If stack checking is disabled, we can use r10 as the PIC register,
2915 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2916 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2917 {
2918 if (TARGET_VXWORKS_RTP)
2919 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2920 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2921 }
2922
2923 if (flag_pic && TARGET_VXWORKS_RTP)
2924 arm_pic_register = 9;
2925
2926 if (arm_pic_register_string != NULL)
2927 {
2928 int pic_register = decode_reg_name (arm_pic_register_string);
2929
2930 if (!flag_pic)
2931 warning (0, "-mpic-register= is useless without -fpic");
2932
2933 /* Prevent the user from choosing an obviously stupid PIC register. */
2934 else if (pic_register < 0 || call_used_regs[pic_register]
2935 || pic_register == HARD_FRAME_POINTER_REGNUM
2936 || pic_register == STACK_POINTER_REGNUM
2937 || pic_register >= PC_REGNUM
2938 || (TARGET_VXWORKS_RTP
2939 && (unsigned int) pic_register != arm_pic_register))
2940 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2941 else
2942 arm_pic_register = pic_register;
2943 }
2944
2945 if (TARGET_VXWORKS_RTP
2946 && !global_options_set.x_arm_pic_data_is_text_relative)
2947 arm_pic_data_is_text_relative = 0;
2948
2949 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2950 if (fix_cm3_ldrd == 2)
2951 {
2952 if (arm_selected_cpu->core == cortexm3)
2953 fix_cm3_ldrd = 1;
2954 else
2955 fix_cm3_ldrd = 0;
2956 }
2957
2958 /* Enable -munaligned-access by default for
2959 - all ARMv6 architecture-based processors
2960 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2961 - ARMv8 architecture-base processors.
2962
2963 Disable -munaligned-access by default for
2964 - all pre-ARMv6 architecture-based processors
2965 - ARMv6-M architecture-based processors. */
2966
2967 if (unaligned_access == 2)
2968 {
2969 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2970 unaligned_access = 1;
2971 else
2972 unaligned_access = 0;
2973 }
2974 else if (unaligned_access == 1
2975 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2976 {
2977 warning (0, "target CPU does not support unaligned accesses");
2978 unaligned_access = 0;
2979 }
2980
2981 if (TARGET_THUMB1 && flag_schedule_insns)
2982 {
2983 /* Don't warn since it's on by default in -O2. */
2984 flag_schedule_insns = 0;
2985 }
2986
2987 if (optimize_size)
2988 {
2989 /* If optimizing for size, bump the number of instructions that we
2990 are prepared to conditionally execute (even on a StrongARM). */
2991 max_insns_skipped = 6;
2992
2993 /* For THUMB2, we limit the conditional sequence to one IT block. */
2994 if (TARGET_THUMB2)
2995 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
2996 }
2997 else
2998 max_insns_skipped = current_tune->max_insns_skipped;
2999
3000 /* Hot/Cold partitioning is not currently supported, since we can't
3001 handle literal pool placement in that case. */
3002 if (flag_reorder_blocks_and_partition)
3003 {
3004 inform (input_location,
3005 "-freorder-blocks-and-partition not supported on this architecture");
3006 flag_reorder_blocks_and_partition = 0;
3007 flag_reorder_blocks = 1;
3008 }
3009
3010 if (flag_pic)
3011 /* Hoisting PIC address calculations more aggressively provides a small,
3012 but measurable, size reduction for PIC code. Therefore, we decrease
3013 the bar for unrestricted expression hoisting to the cost of PIC address
3014 calculation, which is 2 instructions. */
3015 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3016 global_options.x_param_values,
3017 global_options_set.x_param_values);
3018
3019 /* ARM EABI defaults to strict volatile bitfields. */
3020 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3021 && abi_version_at_least(2))
3022 flag_strict_volatile_bitfields = 1;
3023
3024 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3025 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3026 if (flag_prefetch_loop_arrays < 0
3027 && HAVE_prefetch
3028 && optimize >= 3
3029 && current_tune->num_prefetch_slots > 0)
3030 flag_prefetch_loop_arrays = 1;
3031
3032 /* Set up parameters to be used in prefetching algorithm. Do not override the
3033 defaults unless we are tuning for a core we have researched values for. */
3034 if (current_tune->num_prefetch_slots > 0)
3035 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3036 current_tune->num_prefetch_slots,
3037 global_options.x_param_values,
3038 global_options_set.x_param_values);
3039 if (current_tune->l1_cache_line_size >= 0)
3040 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3041 current_tune->l1_cache_line_size,
3042 global_options.x_param_values,
3043 global_options_set.x_param_values);
3044 if (current_tune->l1_cache_size >= 0)
3045 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3046 current_tune->l1_cache_size,
3047 global_options.x_param_values,
3048 global_options_set.x_param_values);
3049
3050 /* Use Neon to perform 64-bits operations rather than core
3051 registers. */
3052 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3053 if (use_neon_for_64bits == 1)
3054 prefer_neon_for_64bits = true;
3055
3056 /* Use the alternative scheduling-pressure algorithm by default. */
3057 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3058 global_options.x_param_values,
3059 global_options_set.x_param_values);
3060
3061 /* Disable shrink-wrap when optimizing function for size, since it tends to
3062 generate additional returns. */
3063 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3064 flag_shrink_wrap = false;
3065 /* TBD: Dwarf info for apcs frame is not handled yet. */
3066 if (TARGET_APCS_FRAME)
3067 flag_shrink_wrap = false;
3068
3069 /* We only support -mslow-flash-data on armv7-m targets. */
3070 if (target_slow_flash_data
3071 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3072 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3073 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3074
3075 /* Currently, for slow flash data, we just disable literal pools. */
3076 if (target_slow_flash_data)
3077 arm_disable_literal_pool = true;
3078
3079 /* Register global variables with the garbage collector. */
3080 arm_add_gc_roots ();
3081 }
3082
3083 static void
3084 arm_add_gc_roots (void)
3085 {
3086 gcc_obstack_init(&minipool_obstack);
3087 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3088 }
3089 \f
3090 /* A table of known ARM exception types.
3091 For use with the interrupt function attribute. */
3092
3093 typedef struct
3094 {
3095 const char *const arg;
3096 const unsigned long return_value;
3097 }
3098 isr_attribute_arg;
3099
3100 static const isr_attribute_arg isr_attribute_args [] =
3101 {
3102 { "IRQ", ARM_FT_ISR },
3103 { "irq", ARM_FT_ISR },
3104 { "FIQ", ARM_FT_FIQ },
3105 { "fiq", ARM_FT_FIQ },
3106 { "ABORT", ARM_FT_ISR },
3107 { "abort", ARM_FT_ISR },
3108 { "ABORT", ARM_FT_ISR },
3109 { "abort", ARM_FT_ISR },
3110 { "UNDEF", ARM_FT_EXCEPTION },
3111 { "undef", ARM_FT_EXCEPTION },
3112 { "SWI", ARM_FT_EXCEPTION },
3113 { "swi", ARM_FT_EXCEPTION },
3114 { NULL, ARM_FT_NORMAL }
3115 };
3116
3117 /* Returns the (interrupt) function type of the current
3118 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3119
3120 static unsigned long
3121 arm_isr_value (tree argument)
3122 {
3123 const isr_attribute_arg * ptr;
3124 const char * arg;
3125
3126 if (!arm_arch_notm)
3127 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3128
3129 /* No argument - default to IRQ. */
3130 if (argument == NULL_TREE)
3131 return ARM_FT_ISR;
3132
3133 /* Get the value of the argument. */
3134 if (TREE_VALUE (argument) == NULL_TREE
3135 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3136 return ARM_FT_UNKNOWN;
3137
3138 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3139
3140 /* Check it against the list of known arguments. */
3141 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3142 if (streq (arg, ptr->arg))
3143 return ptr->return_value;
3144
3145 /* An unrecognized interrupt type. */
3146 return ARM_FT_UNKNOWN;
3147 }
3148
3149 /* Computes the type of the current function. */
3150
3151 static unsigned long
3152 arm_compute_func_type (void)
3153 {
3154 unsigned long type = ARM_FT_UNKNOWN;
3155 tree a;
3156 tree attr;
3157
3158 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3159
3160 /* Decide if the current function is volatile. Such functions
3161 never return, and many memory cycles can be saved by not storing
3162 register values that will never be needed again. This optimization
3163 was added to speed up context switching in a kernel application. */
3164 if (optimize > 0
3165 && (TREE_NOTHROW (current_function_decl)
3166 || !(flag_unwind_tables
3167 || (flag_exceptions
3168 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3169 && TREE_THIS_VOLATILE (current_function_decl))
3170 type |= ARM_FT_VOLATILE;
3171
3172 if (cfun->static_chain_decl != NULL)
3173 type |= ARM_FT_NESTED;
3174
3175 attr = DECL_ATTRIBUTES (current_function_decl);
3176
3177 a = lookup_attribute ("naked", attr);
3178 if (a != NULL_TREE)
3179 type |= ARM_FT_NAKED;
3180
3181 a = lookup_attribute ("isr", attr);
3182 if (a == NULL_TREE)
3183 a = lookup_attribute ("interrupt", attr);
3184
3185 if (a == NULL_TREE)
3186 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3187 else
3188 type |= arm_isr_value (TREE_VALUE (a));
3189
3190 return type;
3191 }
3192
3193 /* Returns the type of the current function. */
3194
3195 unsigned long
3196 arm_current_func_type (void)
3197 {
3198 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3199 cfun->machine->func_type = arm_compute_func_type ();
3200
3201 return cfun->machine->func_type;
3202 }
3203
3204 bool
3205 arm_allocate_stack_slots_for_args (void)
3206 {
3207 /* Naked functions should not allocate stack slots for arguments. */
3208 return !IS_NAKED (arm_current_func_type ());
3209 }
3210
3211 static bool
3212 arm_warn_func_return (tree decl)
3213 {
3214 /* Naked functions are implemented entirely in assembly, including the
3215 return sequence, so suppress warnings about this. */
3216 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3217 }
3218
3219 \f
3220 /* Output assembler code for a block containing the constant parts
3221 of a trampoline, leaving space for the variable parts.
3222
3223 On the ARM, (if r8 is the static chain regnum, and remembering that
3224 referencing pc adds an offset of 8) the trampoline looks like:
3225 ldr r8, [pc, #0]
3226 ldr pc, [pc]
3227 .word static chain value
3228 .word function's address
3229 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3230
3231 static void
3232 arm_asm_trampoline_template (FILE *f)
3233 {
3234 if (TARGET_ARM)
3235 {
3236 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3237 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3238 }
3239 else if (TARGET_THUMB2)
3240 {
3241 /* The Thumb-2 trampoline is similar to the arm implementation.
3242 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3243 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3244 STATIC_CHAIN_REGNUM, PC_REGNUM);
3245 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3246 }
3247 else
3248 {
3249 ASM_OUTPUT_ALIGN (f, 2);
3250 fprintf (f, "\t.code\t16\n");
3251 fprintf (f, ".Ltrampoline_start:\n");
3252 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3253 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3254 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3255 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3256 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3257 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3258 }
3259 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3260 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3261 }
3262
3263 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3264
3265 static void
3266 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3267 {
3268 rtx fnaddr, mem, a_tramp;
3269
3270 emit_block_move (m_tramp, assemble_trampoline_template (),
3271 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3272
3273 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3274 emit_move_insn (mem, chain_value);
3275
3276 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3277 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3278 emit_move_insn (mem, fnaddr);
3279
3280 a_tramp = XEXP (m_tramp, 0);
3281 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3282 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3283 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3284 }
3285
3286 /* Thumb trampolines should be entered in thumb mode, so set
3287 the bottom bit of the address. */
3288
3289 static rtx
3290 arm_trampoline_adjust_address (rtx addr)
3291 {
3292 if (TARGET_THUMB)
3293 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3294 NULL, 0, OPTAB_LIB_WIDEN);
3295 return addr;
3296 }
3297 \f
3298 /* Return 1 if it is possible to return using a single instruction.
3299 If SIBLING is non-null, this is a test for a return before a sibling
3300 call. SIBLING is the call insn, so we can examine its register usage. */
3301
3302 int
3303 use_return_insn (int iscond, rtx sibling)
3304 {
3305 int regno;
3306 unsigned int func_type;
3307 unsigned long saved_int_regs;
3308 unsigned HOST_WIDE_INT stack_adjust;
3309 arm_stack_offsets *offsets;
3310
3311 /* Never use a return instruction before reload has run. */
3312 if (!reload_completed)
3313 return 0;
3314
3315 func_type = arm_current_func_type ();
3316
3317 /* Naked, volatile and stack alignment functions need special
3318 consideration. */
3319 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3320 return 0;
3321
3322 /* So do interrupt functions that use the frame pointer and Thumb
3323 interrupt functions. */
3324 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3325 return 0;
3326
3327 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3328 && !optimize_function_for_size_p (cfun))
3329 return 0;
3330
3331 offsets = arm_get_frame_offsets ();
3332 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3333
3334 /* As do variadic functions. */
3335 if (crtl->args.pretend_args_size
3336 || cfun->machine->uses_anonymous_args
3337 /* Or if the function calls __builtin_eh_return () */
3338 || crtl->calls_eh_return
3339 /* Or if the function calls alloca */
3340 || cfun->calls_alloca
3341 /* Or if there is a stack adjustment. However, if the stack pointer
3342 is saved on the stack, we can use a pre-incrementing stack load. */
3343 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3344 && stack_adjust == 4)))
3345 return 0;
3346
3347 saved_int_regs = offsets->saved_regs_mask;
3348
3349 /* Unfortunately, the insn
3350
3351 ldmib sp, {..., sp, ...}
3352
3353 triggers a bug on most SA-110 based devices, such that the stack
3354 pointer won't be correctly restored if the instruction takes a
3355 page fault. We work around this problem by popping r3 along with
3356 the other registers, since that is never slower than executing
3357 another instruction.
3358
3359 We test for !arm_arch5 here, because code for any architecture
3360 less than this could potentially be run on one of the buggy
3361 chips. */
3362 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3363 {
3364 /* Validate that r3 is a call-clobbered register (always true in
3365 the default abi) ... */
3366 if (!call_used_regs[3])
3367 return 0;
3368
3369 /* ... that it isn't being used for a return value ... */
3370 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3371 return 0;
3372
3373 /* ... or for a tail-call argument ... */
3374 if (sibling)
3375 {
3376 gcc_assert (CALL_P (sibling));
3377
3378 if (find_regno_fusage (sibling, USE, 3))
3379 return 0;
3380 }
3381
3382 /* ... and that there are no call-saved registers in r0-r2
3383 (always true in the default ABI). */
3384 if (saved_int_regs & 0x7)
3385 return 0;
3386 }
3387
3388 /* Can't be done if interworking with Thumb, and any registers have been
3389 stacked. */
3390 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3391 return 0;
3392
3393 /* On StrongARM, conditional returns are expensive if they aren't
3394 taken and multiple registers have been stacked. */
3395 if (iscond && arm_tune_strongarm)
3396 {
3397 /* Conditional return when just the LR is stored is a simple
3398 conditional-load instruction, that's not expensive. */
3399 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3400 return 0;
3401
3402 if (flag_pic
3403 && arm_pic_register != INVALID_REGNUM
3404 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3405 return 0;
3406 }
3407
3408 /* If there are saved registers but the LR isn't saved, then we need
3409 two instructions for the return. */
3410 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3411 return 0;
3412
3413 /* Can't be done if any of the VFP regs are pushed,
3414 since this also requires an insn. */
3415 if (TARGET_HARD_FLOAT && TARGET_VFP)
3416 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3417 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3418 return 0;
3419
3420 if (TARGET_REALLY_IWMMXT)
3421 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3422 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3423 return 0;
3424
3425 return 1;
3426 }
3427
3428 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3429 shrink-wrapping if possible. This is the case if we need to emit a
3430 prologue, which we can test by looking at the offsets. */
3431 bool
3432 use_simple_return_p (void)
3433 {
3434 arm_stack_offsets *offsets;
3435
3436 offsets = arm_get_frame_offsets ();
3437 return offsets->outgoing_args != 0;
3438 }
3439
3440 /* Return TRUE if int I is a valid immediate ARM constant. */
3441
3442 int
3443 const_ok_for_arm (HOST_WIDE_INT i)
3444 {
3445 int lowbit;
3446
3447 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3448 be all zero, or all one. */
3449 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3450 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3451 != ((~(unsigned HOST_WIDE_INT) 0)
3452 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3453 return FALSE;
3454
3455 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3456
3457 /* Fast return for 0 and small values. We must do this for zero, since
3458 the code below can't handle that one case. */
3459 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3460 return TRUE;
3461
3462 /* Get the number of trailing zeros. */
3463 lowbit = ffs((int) i) - 1;
3464
3465 /* Only even shifts are allowed in ARM mode so round down to the
3466 nearest even number. */
3467 if (TARGET_ARM)
3468 lowbit &= ~1;
3469
3470 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3471 return TRUE;
3472
3473 if (TARGET_ARM)
3474 {
3475 /* Allow rotated constants in ARM mode. */
3476 if (lowbit <= 4
3477 && ((i & ~0xc000003f) == 0
3478 || (i & ~0xf000000f) == 0
3479 || (i & ~0xfc000003) == 0))
3480 return TRUE;
3481 }
3482 else
3483 {
3484 HOST_WIDE_INT v;
3485
3486 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3487 v = i & 0xff;
3488 v |= v << 16;
3489 if (i == v || i == (v | (v << 8)))
3490 return TRUE;
3491
3492 /* Allow repeated pattern 0xXY00XY00. */
3493 v = i & 0xff00;
3494 v |= v << 16;
3495 if (i == v)
3496 return TRUE;
3497 }
3498
3499 return FALSE;
3500 }
3501
3502 /* Return true if I is a valid constant for the operation CODE. */
3503 int
3504 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3505 {
3506 if (const_ok_for_arm (i))
3507 return 1;
3508
3509 switch (code)
3510 {
3511 case SET:
3512 /* See if we can use movw. */
3513 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3514 return 1;
3515 else
3516 /* Otherwise, try mvn. */
3517 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3518
3519 case PLUS:
3520 /* See if we can use addw or subw. */
3521 if (TARGET_THUMB2
3522 && ((i & 0xfffff000) == 0
3523 || ((-i) & 0xfffff000) == 0))
3524 return 1;
3525 /* else fall through. */
3526
3527 case COMPARE:
3528 case EQ:
3529 case NE:
3530 case GT:
3531 case LE:
3532 case LT:
3533 case GE:
3534 case GEU:
3535 case LTU:
3536 case GTU:
3537 case LEU:
3538 case UNORDERED:
3539 case ORDERED:
3540 case UNEQ:
3541 case UNGE:
3542 case UNLT:
3543 case UNGT:
3544 case UNLE:
3545 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3546
3547 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3548 case XOR:
3549 return 0;
3550
3551 case IOR:
3552 if (TARGET_THUMB2)
3553 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3554 return 0;
3555
3556 case AND:
3557 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3558
3559 default:
3560 gcc_unreachable ();
3561 }
3562 }
3563
3564 /* Return true if I is a valid di mode constant for the operation CODE. */
3565 int
3566 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3567 {
3568 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3569 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3570 rtx hi = GEN_INT (hi_val);
3571 rtx lo = GEN_INT (lo_val);
3572
3573 if (TARGET_THUMB1)
3574 return 0;
3575
3576 switch (code)
3577 {
3578 case AND:
3579 case IOR:
3580 case XOR:
3581 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3582 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3583 case PLUS:
3584 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3585
3586 default:
3587 return 0;
3588 }
3589 }
3590
3591 /* Emit a sequence of insns to handle a large constant.
3592 CODE is the code of the operation required, it can be any of SET, PLUS,
3593 IOR, AND, XOR, MINUS;
3594 MODE is the mode in which the operation is being performed;
3595 VAL is the integer to operate on;
3596 SOURCE is the other operand (a register, or a null-pointer for SET);
3597 SUBTARGETS means it is safe to create scratch registers if that will
3598 either produce a simpler sequence, or we will want to cse the values.
3599 Return value is the number of insns emitted. */
3600
3601 /* ??? Tweak this for thumb2. */
3602 int
3603 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3604 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3605 {
3606 rtx cond;
3607
3608 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3609 cond = COND_EXEC_TEST (PATTERN (insn));
3610 else
3611 cond = NULL_RTX;
3612
3613 if (subtargets || code == SET
3614 || (REG_P (target) && REG_P (source)
3615 && REGNO (target) != REGNO (source)))
3616 {
3617 /* After arm_reorg has been called, we can't fix up expensive
3618 constants by pushing them into memory so we must synthesize
3619 them in-line, regardless of the cost. This is only likely to
3620 be more costly on chips that have load delay slots and we are
3621 compiling without running the scheduler (so no splitting
3622 occurred before the final instruction emission).
3623
3624 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3625 */
3626 if (!cfun->machine->after_arm_reorg
3627 && !cond
3628 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3629 1, 0)
3630 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3631 + (code != SET))))
3632 {
3633 if (code == SET)
3634 {
3635 /* Currently SET is the only monadic value for CODE, all
3636 the rest are diadic. */
3637 if (TARGET_USE_MOVT)
3638 arm_emit_movpair (target, GEN_INT (val));
3639 else
3640 emit_set_insn (target, GEN_INT (val));
3641
3642 return 1;
3643 }
3644 else
3645 {
3646 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3647
3648 if (TARGET_USE_MOVT)
3649 arm_emit_movpair (temp, GEN_INT (val));
3650 else
3651 emit_set_insn (temp, GEN_INT (val));
3652
3653 /* For MINUS, the value is subtracted from, since we never
3654 have subtraction of a constant. */
3655 if (code == MINUS)
3656 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3657 else
3658 emit_set_insn (target,
3659 gen_rtx_fmt_ee (code, mode, source, temp));
3660 return 2;
3661 }
3662 }
3663 }
3664
3665 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3666 1);
3667 }
3668
3669 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3670 ARM/THUMB2 immediates, and add up to VAL.
3671 Thr function return value gives the number of insns required. */
3672 static int
3673 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3674 struct four_ints *return_sequence)
3675 {
3676 int best_consecutive_zeros = 0;
3677 int i;
3678 int best_start = 0;
3679 int insns1, insns2;
3680 struct four_ints tmp_sequence;
3681
3682 /* If we aren't targeting ARM, the best place to start is always at
3683 the bottom, otherwise look more closely. */
3684 if (TARGET_ARM)
3685 {
3686 for (i = 0; i < 32; i += 2)
3687 {
3688 int consecutive_zeros = 0;
3689
3690 if (!(val & (3 << i)))
3691 {
3692 while ((i < 32) && !(val & (3 << i)))
3693 {
3694 consecutive_zeros += 2;
3695 i += 2;
3696 }
3697 if (consecutive_zeros > best_consecutive_zeros)
3698 {
3699 best_consecutive_zeros = consecutive_zeros;
3700 best_start = i - consecutive_zeros;
3701 }
3702 i -= 2;
3703 }
3704 }
3705 }
3706
3707 /* So long as it won't require any more insns to do so, it's
3708 desirable to emit a small constant (in bits 0...9) in the last
3709 insn. This way there is more chance that it can be combined with
3710 a later addressing insn to form a pre-indexed load or store
3711 operation. Consider:
3712
3713 *((volatile int *)0xe0000100) = 1;
3714 *((volatile int *)0xe0000110) = 2;
3715
3716 We want this to wind up as:
3717
3718 mov rA, #0xe0000000
3719 mov rB, #1
3720 str rB, [rA, #0x100]
3721 mov rB, #2
3722 str rB, [rA, #0x110]
3723
3724 rather than having to synthesize both large constants from scratch.
3725
3726 Therefore, we calculate how many insns would be required to emit
3727 the constant starting from `best_start', and also starting from
3728 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3729 yield a shorter sequence, we may as well use zero. */
3730 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3731 if (best_start != 0
3732 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3733 {
3734 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3735 if (insns2 <= insns1)
3736 {
3737 *return_sequence = tmp_sequence;
3738 insns1 = insns2;
3739 }
3740 }
3741
3742 return insns1;
3743 }
3744
3745 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3746 static int
3747 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3748 struct four_ints *return_sequence, int i)
3749 {
3750 int remainder = val & 0xffffffff;
3751 int insns = 0;
3752
3753 /* Try and find a way of doing the job in either two or three
3754 instructions.
3755
3756 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3757 location. We start at position I. This may be the MSB, or
3758 optimial_immediate_sequence may have positioned it at the largest block
3759 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3760 wrapping around to the top of the word when we drop off the bottom.
3761 In the worst case this code should produce no more than four insns.
3762
3763 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3764 constants, shifted to any arbitrary location. We should always start
3765 at the MSB. */
3766 do
3767 {
3768 int end;
3769 unsigned int b1, b2, b3, b4;
3770 unsigned HOST_WIDE_INT result;
3771 int loc;
3772
3773 gcc_assert (insns < 4);
3774
3775 if (i <= 0)
3776 i += 32;
3777
3778 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3779 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3780 {
3781 loc = i;
3782 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3783 /* We can use addw/subw for the last 12 bits. */
3784 result = remainder;
3785 else
3786 {
3787 /* Use an 8-bit shifted/rotated immediate. */
3788 end = i - 8;
3789 if (end < 0)
3790 end += 32;
3791 result = remainder & ((0x0ff << end)
3792 | ((i < end) ? (0xff >> (32 - end))
3793 : 0));
3794 i -= 8;
3795 }
3796 }
3797 else
3798 {
3799 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3800 arbitrary shifts. */
3801 i -= TARGET_ARM ? 2 : 1;
3802 continue;
3803 }
3804
3805 /* Next, see if we can do a better job with a thumb2 replicated
3806 constant.
3807
3808 We do it this way around to catch the cases like 0x01F001E0 where
3809 two 8-bit immediates would work, but a replicated constant would
3810 make it worse.
3811
3812 TODO: 16-bit constants that don't clear all the bits, but still win.
3813 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3814 if (TARGET_THUMB2)
3815 {
3816 b1 = (remainder & 0xff000000) >> 24;
3817 b2 = (remainder & 0x00ff0000) >> 16;
3818 b3 = (remainder & 0x0000ff00) >> 8;
3819 b4 = remainder & 0xff;
3820
3821 if (loc > 24)
3822 {
3823 /* The 8-bit immediate already found clears b1 (and maybe b2),
3824 but must leave b3 and b4 alone. */
3825
3826 /* First try to find a 32-bit replicated constant that clears
3827 almost everything. We can assume that we can't do it in one,
3828 or else we wouldn't be here. */
3829 unsigned int tmp = b1 & b2 & b3 & b4;
3830 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3831 + (tmp << 24);
3832 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3833 + (tmp == b3) + (tmp == b4);
3834 if (tmp
3835 && (matching_bytes >= 3
3836 || (matching_bytes == 2
3837 && const_ok_for_op (remainder & ~tmp2, code))))
3838 {
3839 /* At least 3 of the bytes match, and the fourth has at
3840 least as many bits set, or two of the bytes match
3841 and it will only require one more insn to finish. */
3842 result = tmp2;
3843 i = tmp != b1 ? 32
3844 : tmp != b2 ? 24
3845 : tmp != b3 ? 16
3846 : 8;
3847 }
3848
3849 /* Second, try to find a 16-bit replicated constant that can
3850 leave three of the bytes clear. If b2 or b4 is already
3851 zero, then we can. If the 8-bit from above would not
3852 clear b2 anyway, then we still win. */
3853 else if (b1 == b3 && (!b2 || !b4
3854 || (remainder & 0x00ff0000 & ~result)))
3855 {
3856 result = remainder & 0xff00ff00;
3857 i = 24;
3858 }
3859 }
3860 else if (loc > 16)
3861 {
3862 /* The 8-bit immediate already found clears b2 (and maybe b3)
3863 and we don't get here unless b1 is alredy clear, but it will
3864 leave b4 unchanged. */
3865
3866 /* If we can clear b2 and b4 at once, then we win, since the
3867 8-bits couldn't possibly reach that far. */
3868 if (b2 == b4)
3869 {
3870 result = remainder & 0x00ff00ff;
3871 i = 16;
3872 }
3873 }
3874 }
3875
3876 return_sequence->i[insns++] = result;
3877 remainder &= ~result;
3878
3879 if (code == SET || code == MINUS)
3880 code = PLUS;
3881 }
3882 while (remainder);
3883
3884 return insns;
3885 }
3886
3887 /* Emit an instruction with the indicated PATTERN. If COND is
3888 non-NULL, conditionalize the execution of the instruction on COND
3889 being true. */
3890
3891 static void
3892 emit_constant_insn (rtx cond, rtx pattern)
3893 {
3894 if (cond)
3895 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3896 emit_insn (pattern);
3897 }
3898
3899 /* As above, but extra parameter GENERATE which, if clear, suppresses
3900 RTL generation. */
3901
3902 static int
3903 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3904 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3905 int generate)
3906 {
3907 int can_invert = 0;
3908 int can_negate = 0;
3909 int final_invert = 0;
3910 int i;
3911 int set_sign_bit_copies = 0;
3912 int clear_sign_bit_copies = 0;
3913 int clear_zero_bit_copies = 0;
3914 int set_zero_bit_copies = 0;
3915 int insns = 0, neg_insns, inv_insns;
3916 unsigned HOST_WIDE_INT temp1, temp2;
3917 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3918 struct four_ints *immediates;
3919 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3920
3921 /* Find out which operations are safe for a given CODE. Also do a quick
3922 check for degenerate cases; these can occur when DImode operations
3923 are split. */
3924 switch (code)
3925 {
3926 case SET:
3927 can_invert = 1;
3928 break;
3929
3930 case PLUS:
3931 can_negate = 1;
3932 break;
3933
3934 case IOR:
3935 if (remainder == 0xffffffff)
3936 {
3937 if (generate)
3938 emit_constant_insn (cond,
3939 gen_rtx_SET (VOIDmode, target,
3940 GEN_INT (ARM_SIGN_EXTEND (val))));
3941 return 1;
3942 }
3943
3944 if (remainder == 0)
3945 {
3946 if (reload_completed && rtx_equal_p (target, source))
3947 return 0;
3948
3949 if (generate)
3950 emit_constant_insn (cond,
3951 gen_rtx_SET (VOIDmode, target, source));
3952 return 1;
3953 }
3954 break;
3955
3956 case AND:
3957 if (remainder == 0)
3958 {
3959 if (generate)
3960 emit_constant_insn (cond,
3961 gen_rtx_SET (VOIDmode, target, const0_rtx));
3962 return 1;
3963 }
3964 if (remainder == 0xffffffff)
3965 {
3966 if (reload_completed && rtx_equal_p (target, source))
3967 return 0;
3968 if (generate)
3969 emit_constant_insn (cond,
3970 gen_rtx_SET (VOIDmode, target, source));
3971 return 1;
3972 }
3973 can_invert = 1;
3974 break;
3975
3976 case XOR:
3977 if (remainder == 0)
3978 {
3979 if (reload_completed && rtx_equal_p (target, source))
3980 return 0;
3981 if (generate)
3982 emit_constant_insn (cond,
3983 gen_rtx_SET (VOIDmode, target, source));
3984 return 1;
3985 }
3986
3987 if (remainder == 0xffffffff)
3988 {
3989 if (generate)
3990 emit_constant_insn (cond,
3991 gen_rtx_SET (VOIDmode, target,
3992 gen_rtx_NOT (mode, source)));
3993 return 1;
3994 }
3995 final_invert = 1;
3996 break;
3997
3998 case MINUS:
3999 /* We treat MINUS as (val - source), since (source - val) is always
4000 passed as (source + (-val)). */
4001 if (remainder == 0)
4002 {
4003 if (generate)
4004 emit_constant_insn (cond,
4005 gen_rtx_SET (VOIDmode, target,
4006 gen_rtx_NEG (mode, source)));
4007 return 1;
4008 }
4009 if (const_ok_for_arm (val))
4010 {
4011 if (generate)
4012 emit_constant_insn (cond,
4013 gen_rtx_SET (VOIDmode, target,
4014 gen_rtx_MINUS (mode, GEN_INT (val),
4015 source)));
4016 return 1;
4017 }
4018
4019 break;
4020
4021 default:
4022 gcc_unreachable ();
4023 }
4024
4025 /* If we can do it in one insn get out quickly. */
4026 if (const_ok_for_op (val, code))
4027 {
4028 if (generate)
4029 emit_constant_insn (cond,
4030 gen_rtx_SET (VOIDmode, target,
4031 (source
4032 ? gen_rtx_fmt_ee (code, mode, source,
4033 GEN_INT (val))
4034 : GEN_INT (val))));
4035 return 1;
4036 }
4037
4038 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4039 insn. */
4040 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4041 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4042 {
4043 if (generate)
4044 {
4045 if (mode == SImode && i == 16)
4046 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4047 smaller insn. */
4048 emit_constant_insn (cond,
4049 gen_zero_extendhisi2
4050 (target, gen_lowpart (HImode, source)));
4051 else
4052 /* Extz only supports SImode, but we can coerce the operands
4053 into that mode. */
4054 emit_constant_insn (cond,
4055 gen_extzv_t2 (gen_lowpart (SImode, target),
4056 gen_lowpart (SImode, source),
4057 GEN_INT (i), const0_rtx));
4058 }
4059
4060 return 1;
4061 }
4062
4063 /* Calculate a few attributes that may be useful for specific
4064 optimizations. */
4065 /* Count number of leading zeros. */
4066 for (i = 31; i >= 0; i--)
4067 {
4068 if ((remainder & (1 << i)) == 0)
4069 clear_sign_bit_copies++;
4070 else
4071 break;
4072 }
4073
4074 /* Count number of leading 1's. */
4075 for (i = 31; i >= 0; i--)
4076 {
4077 if ((remainder & (1 << i)) != 0)
4078 set_sign_bit_copies++;
4079 else
4080 break;
4081 }
4082
4083 /* Count number of trailing zero's. */
4084 for (i = 0; i <= 31; i++)
4085 {
4086 if ((remainder & (1 << i)) == 0)
4087 clear_zero_bit_copies++;
4088 else
4089 break;
4090 }
4091
4092 /* Count number of trailing 1's. */
4093 for (i = 0; i <= 31; i++)
4094 {
4095 if ((remainder & (1 << i)) != 0)
4096 set_zero_bit_copies++;
4097 else
4098 break;
4099 }
4100
4101 switch (code)
4102 {
4103 case SET:
4104 /* See if we can do this by sign_extending a constant that is known
4105 to be negative. This is a good, way of doing it, since the shift
4106 may well merge into a subsequent insn. */
4107 if (set_sign_bit_copies > 1)
4108 {
4109 if (const_ok_for_arm
4110 (temp1 = ARM_SIGN_EXTEND (remainder
4111 << (set_sign_bit_copies - 1))))
4112 {
4113 if (generate)
4114 {
4115 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4116 emit_constant_insn (cond,
4117 gen_rtx_SET (VOIDmode, new_src,
4118 GEN_INT (temp1)));
4119 emit_constant_insn (cond,
4120 gen_ashrsi3 (target, new_src,
4121 GEN_INT (set_sign_bit_copies - 1)));
4122 }
4123 return 2;
4124 }
4125 /* For an inverted constant, we will need to set the low bits,
4126 these will be shifted out of harm's way. */
4127 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4128 if (const_ok_for_arm (~temp1))
4129 {
4130 if (generate)
4131 {
4132 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4133 emit_constant_insn (cond,
4134 gen_rtx_SET (VOIDmode, new_src,
4135 GEN_INT (temp1)));
4136 emit_constant_insn (cond,
4137 gen_ashrsi3 (target, new_src,
4138 GEN_INT (set_sign_bit_copies - 1)));
4139 }
4140 return 2;
4141 }
4142 }
4143
4144 /* See if we can calculate the value as the difference between two
4145 valid immediates. */
4146 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4147 {
4148 int topshift = clear_sign_bit_copies & ~1;
4149
4150 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4151 & (0xff000000 >> topshift));
4152
4153 /* If temp1 is zero, then that means the 9 most significant
4154 bits of remainder were 1 and we've caused it to overflow.
4155 When topshift is 0 we don't need to do anything since we
4156 can borrow from 'bit 32'. */
4157 if (temp1 == 0 && topshift != 0)
4158 temp1 = 0x80000000 >> (topshift - 1);
4159
4160 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4161
4162 if (const_ok_for_arm (temp2))
4163 {
4164 if (generate)
4165 {
4166 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4167 emit_constant_insn (cond,
4168 gen_rtx_SET (VOIDmode, new_src,
4169 GEN_INT (temp1)));
4170 emit_constant_insn (cond,
4171 gen_addsi3 (target, new_src,
4172 GEN_INT (-temp2)));
4173 }
4174
4175 return 2;
4176 }
4177 }
4178
4179 /* See if we can generate this by setting the bottom (or the top)
4180 16 bits, and then shifting these into the other half of the
4181 word. We only look for the simplest cases, to do more would cost
4182 too much. Be careful, however, not to generate this when the
4183 alternative would take fewer insns. */
4184 if (val & 0xffff0000)
4185 {
4186 temp1 = remainder & 0xffff0000;
4187 temp2 = remainder & 0x0000ffff;
4188
4189 /* Overlaps outside this range are best done using other methods. */
4190 for (i = 9; i < 24; i++)
4191 {
4192 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4193 && !const_ok_for_arm (temp2))
4194 {
4195 rtx new_src = (subtargets
4196 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4197 : target);
4198 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4199 source, subtargets, generate);
4200 source = new_src;
4201 if (generate)
4202 emit_constant_insn
4203 (cond,
4204 gen_rtx_SET
4205 (VOIDmode, target,
4206 gen_rtx_IOR (mode,
4207 gen_rtx_ASHIFT (mode, source,
4208 GEN_INT (i)),
4209 source)));
4210 return insns + 1;
4211 }
4212 }
4213
4214 /* Don't duplicate cases already considered. */
4215 for (i = 17; i < 24; i++)
4216 {
4217 if (((temp1 | (temp1 >> i)) == remainder)
4218 && !const_ok_for_arm (temp1))
4219 {
4220 rtx new_src = (subtargets
4221 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4222 : target);
4223 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4224 source, subtargets, generate);
4225 source = new_src;
4226 if (generate)
4227 emit_constant_insn
4228 (cond,
4229 gen_rtx_SET (VOIDmode, target,
4230 gen_rtx_IOR
4231 (mode,
4232 gen_rtx_LSHIFTRT (mode, source,
4233 GEN_INT (i)),
4234 source)));
4235 return insns + 1;
4236 }
4237 }
4238 }
4239 break;
4240
4241 case IOR:
4242 case XOR:
4243 /* If we have IOR or XOR, and the constant can be loaded in a
4244 single instruction, and we can find a temporary to put it in,
4245 then this can be done in two instructions instead of 3-4. */
4246 if (subtargets
4247 /* TARGET can't be NULL if SUBTARGETS is 0 */
4248 || (reload_completed && !reg_mentioned_p (target, source)))
4249 {
4250 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4251 {
4252 if (generate)
4253 {
4254 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4255
4256 emit_constant_insn (cond,
4257 gen_rtx_SET (VOIDmode, sub,
4258 GEN_INT (val)));
4259 emit_constant_insn (cond,
4260 gen_rtx_SET (VOIDmode, target,
4261 gen_rtx_fmt_ee (code, mode,
4262 source, sub)));
4263 }
4264 return 2;
4265 }
4266 }
4267
4268 if (code == XOR)
4269 break;
4270
4271 /* Convert.
4272 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4273 and the remainder 0s for e.g. 0xfff00000)
4274 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4275
4276 This can be done in 2 instructions by using shifts with mov or mvn.
4277 e.g. for
4278 x = x | 0xfff00000;
4279 we generate.
4280 mvn r0, r0, asl #12
4281 mvn r0, r0, lsr #12 */
4282 if (set_sign_bit_copies > 8
4283 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4284 {
4285 if (generate)
4286 {
4287 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4288 rtx shift = GEN_INT (set_sign_bit_copies);
4289
4290 emit_constant_insn
4291 (cond,
4292 gen_rtx_SET (VOIDmode, sub,
4293 gen_rtx_NOT (mode,
4294 gen_rtx_ASHIFT (mode,
4295 source,
4296 shift))));
4297 emit_constant_insn
4298 (cond,
4299 gen_rtx_SET (VOIDmode, target,
4300 gen_rtx_NOT (mode,
4301 gen_rtx_LSHIFTRT (mode, sub,
4302 shift))));
4303 }
4304 return 2;
4305 }
4306
4307 /* Convert
4308 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4309 to
4310 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4311
4312 For eg. r0 = r0 | 0xfff
4313 mvn r0, r0, lsr #12
4314 mvn r0, r0, asl #12
4315
4316 */
4317 if (set_zero_bit_copies > 8
4318 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4319 {
4320 if (generate)
4321 {
4322 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4323 rtx shift = GEN_INT (set_zero_bit_copies);
4324
4325 emit_constant_insn
4326 (cond,
4327 gen_rtx_SET (VOIDmode, sub,
4328 gen_rtx_NOT (mode,
4329 gen_rtx_LSHIFTRT (mode,
4330 source,
4331 shift))));
4332 emit_constant_insn
4333 (cond,
4334 gen_rtx_SET (VOIDmode, target,
4335 gen_rtx_NOT (mode,
4336 gen_rtx_ASHIFT (mode, sub,
4337 shift))));
4338 }
4339 return 2;
4340 }
4341
4342 /* This will never be reached for Thumb2 because orn is a valid
4343 instruction. This is for Thumb1 and the ARM 32 bit cases.
4344
4345 x = y | constant (such that ~constant is a valid constant)
4346 Transform this to
4347 x = ~(~y & ~constant).
4348 */
4349 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4350 {
4351 if (generate)
4352 {
4353 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4354 emit_constant_insn (cond,
4355 gen_rtx_SET (VOIDmode, sub,
4356 gen_rtx_NOT (mode, source)));
4357 source = sub;
4358 if (subtargets)
4359 sub = gen_reg_rtx (mode);
4360 emit_constant_insn (cond,
4361 gen_rtx_SET (VOIDmode, sub,
4362 gen_rtx_AND (mode, source,
4363 GEN_INT (temp1))));
4364 emit_constant_insn (cond,
4365 gen_rtx_SET (VOIDmode, target,
4366 gen_rtx_NOT (mode, sub)));
4367 }
4368 return 3;
4369 }
4370 break;
4371
4372 case AND:
4373 /* See if two shifts will do 2 or more insn's worth of work. */
4374 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4375 {
4376 HOST_WIDE_INT shift_mask = ((0xffffffff
4377 << (32 - clear_sign_bit_copies))
4378 & 0xffffffff);
4379
4380 if ((remainder | shift_mask) != 0xffffffff)
4381 {
4382 if (generate)
4383 {
4384 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4385 insns = arm_gen_constant (AND, mode, cond,
4386 remainder | shift_mask,
4387 new_src, source, subtargets, 1);
4388 source = new_src;
4389 }
4390 else
4391 {
4392 rtx targ = subtargets ? NULL_RTX : target;
4393 insns = arm_gen_constant (AND, mode, cond,
4394 remainder | shift_mask,
4395 targ, source, subtargets, 0);
4396 }
4397 }
4398
4399 if (generate)
4400 {
4401 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4402 rtx shift = GEN_INT (clear_sign_bit_copies);
4403
4404 emit_insn (gen_ashlsi3 (new_src, source, shift));
4405 emit_insn (gen_lshrsi3 (target, new_src, shift));
4406 }
4407
4408 return insns + 2;
4409 }
4410
4411 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4412 {
4413 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4414
4415 if ((remainder | shift_mask) != 0xffffffff)
4416 {
4417 if (generate)
4418 {
4419 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4420
4421 insns = arm_gen_constant (AND, mode, cond,
4422 remainder | shift_mask,
4423 new_src, source, subtargets, 1);
4424 source = new_src;
4425 }
4426 else
4427 {
4428 rtx targ = subtargets ? NULL_RTX : target;
4429
4430 insns = arm_gen_constant (AND, mode, cond,
4431 remainder | shift_mask,
4432 targ, source, subtargets, 0);
4433 }
4434 }
4435
4436 if (generate)
4437 {
4438 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4439 rtx shift = GEN_INT (clear_zero_bit_copies);
4440
4441 emit_insn (gen_lshrsi3 (new_src, source, shift));
4442 emit_insn (gen_ashlsi3 (target, new_src, shift));
4443 }
4444
4445 return insns + 2;
4446 }
4447
4448 break;
4449
4450 default:
4451 break;
4452 }
4453
4454 /* Calculate what the instruction sequences would be if we generated it
4455 normally, negated, or inverted. */
4456 if (code == AND)
4457 /* AND cannot be split into multiple insns, so invert and use BIC. */
4458 insns = 99;
4459 else
4460 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4461
4462 if (can_negate)
4463 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4464 &neg_immediates);
4465 else
4466 neg_insns = 99;
4467
4468 if (can_invert || final_invert)
4469 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4470 &inv_immediates);
4471 else
4472 inv_insns = 99;
4473
4474 immediates = &pos_immediates;
4475
4476 /* Is the negated immediate sequence more efficient? */
4477 if (neg_insns < insns && neg_insns <= inv_insns)
4478 {
4479 insns = neg_insns;
4480 immediates = &neg_immediates;
4481 }
4482 else
4483 can_negate = 0;
4484
4485 /* Is the inverted immediate sequence more efficient?
4486 We must allow for an extra NOT instruction for XOR operations, although
4487 there is some chance that the final 'mvn' will get optimized later. */
4488 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4489 {
4490 insns = inv_insns;
4491 immediates = &inv_immediates;
4492 }
4493 else
4494 {
4495 can_invert = 0;
4496 final_invert = 0;
4497 }
4498
4499 /* Now output the chosen sequence as instructions. */
4500 if (generate)
4501 {
4502 for (i = 0; i < insns; i++)
4503 {
4504 rtx new_src, temp1_rtx;
4505
4506 temp1 = immediates->i[i];
4507
4508 if (code == SET || code == MINUS)
4509 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4510 else if ((final_invert || i < (insns - 1)) && subtargets)
4511 new_src = gen_reg_rtx (mode);
4512 else
4513 new_src = target;
4514
4515 if (can_invert)
4516 temp1 = ~temp1;
4517 else if (can_negate)
4518 temp1 = -temp1;
4519
4520 temp1 = trunc_int_for_mode (temp1, mode);
4521 temp1_rtx = GEN_INT (temp1);
4522
4523 if (code == SET)
4524 ;
4525 else if (code == MINUS)
4526 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4527 else
4528 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4529
4530 emit_constant_insn (cond,
4531 gen_rtx_SET (VOIDmode, new_src,
4532 temp1_rtx));
4533 source = new_src;
4534
4535 if (code == SET)
4536 {
4537 can_negate = can_invert;
4538 can_invert = 0;
4539 code = PLUS;
4540 }
4541 else if (code == MINUS)
4542 code = PLUS;
4543 }
4544 }
4545
4546 if (final_invert)
4547 {
4548 if (generate)
4549 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4550 gen_rtx_NOT (mode, source)));
4551 insns++;
4552 }
4553
4554 return insns;
4555 }
4556
4557 /* Canonicalize a comparison so that we are more likely to recognize it.
4558 This can be done for a few constant compares, where we can make the
4559 immediate value easier to load. */
4560
4561 static void
4562 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4563 bool op0_preserve_value)
4564 {
4565 enum machine_mode mode;
4566 unsigned HOST_WIDE_INT i, maxval;
4567
4568 mode = GET_MODE (*op0);
4569 if (mode == VOIDmode)
4570 mode = GET_MODE (*op1);
4571
4572 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4573
4574 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4575 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4576 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4577 for GTU/LEU in Thumb mode. */
4578 if (mode == DImode)
4579 {
4580 rtx tem;
4581
4582 if (*code == GT || *code == LE
4583 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4584 {
4585 /* Missing comparison. First try to use an available
4586 comparison. */
4587 if (CONST_INT_P (*op1))
4588 {
4589 i = INTVAL (*op1);
4590 switch (*code)
4591 {
4592 case GT:
4593 case LE:
4594 if (i != maxval
4595 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4596 {
4597 *op1 = GEN_INT (i + 1);
4598 *code = *code == GT ? GE : LT;
4599 return;
4600 }
4601 break;
4602 case GTU:
4603 case LEU:
4604 if (i != ~((unsigned HOST_WIDE_INT) 0)
4605 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4606 {
4607 *op1 = GEN_INT (i + 1);
4608 *code = *code == GTU ? GEU : LTU;
4609 return;
4610 }
4611 break;
4612 default:
4613 gcc_unreachable ();
4614 }
4615 }
4616
4617 /* If that did not work, reverse the condition. */
4618 if (!op0_preserve_value)
4619 {
4620 tem = *op0;
4621 *op0 = *op1;
4622 *op1 = tem;
4623 *code = (int)swap_condition ((enum rtx_code)*code);
4624 }
4625 }
4626 return;
4627 }
4628
4629 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4630 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4631 to facilitate possible combining with a cmp into 'ands'. */
4632 if (mode == SImode
4633 && GET_CODE (*op0) == ZERO_EXTEND
4634 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4635 && GET_MODE (XEXP (*op0, 0)) == QImode
4636 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4637 && subreg_lowpart_p (XEXP (*op0, 0))
4638 && *op1 == const0_rtx)
4639 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4640 GEN_INT (255));
4641
4642 /* Comparisons smaller than DImode. Only adjust comparisons against
4643 an out-of-range constant. */
4644 if (!CONST_INT_P (*op1)
4645 || const_ok_for_arm (INTVAL (*op1))
4646 || const_ok_for_arm (- INTVAL (*op1)))
4647 return;
4648
4649 i = INTVAL (*op1);
4650
4651 switch (*code)
4652 {
4653 case EQ:
4654 case NE:
4655 return;
4656
4657 case GT:
4658 case LE:
4659 if (i != maxval
4660 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4661 {
4662 *op1 = GEN_INT (i + 1);
4663 *code = *code == GT ? GE : LT;
4664 return;
4665 }
4666 break;
4667
4668 case GE:
4669 case LT:
4670 if (i != ~maxval
4671 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4672 {
4673 *op1 = GEN_INT (i - 1);
4674 *code = *code == GE ? GT : LE;
4675 return;
4676 }
4677 break;
4678
4679 case GTU:
4680 case LEU:
4681 if (i != ~((unsigned HOST_WIDE_INT) 0)
4682 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4683 {
4684 *op1 = GEN_INT (i + 1);
4685 *code = *code == GTU ? GEU : LTU;
4686 return;
4687 }
4688 break;
4689
4690 case GEU:
4691 case LTU:
4692 if (i != 0
4693 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4694 {
4695 *op1 = GEN_INT (i - 1);
4696 *code = *code == GEU ? GTU : LEU;
4697 return;
4698 }
4699 break;
4700
4701 default:
4702 gcc_unreachable ();
4703 }
4704 }
4705
4706
4707 /* Define how to find the value returned by a function. */
4708
4709 static rtx
4710 arm_function_value(const_tree type, const_tree func,
4711 bool outgoing ATTRIBUTE_UNUSED)
4712 {
4713 enum machine_mode mode;
4714 int unsignedp ATTRIBUTE_UNUSED;
4715 rtx r ATTRIBUTE_UNUSED;
4716
4717 mode = TYPE_MODE (type);
4718
4719 if (TARGET_AAPCS_BASED)
4720 return aapcs_allocate_return_reg (mode, type, func);
4721
4722 /* Promote integer types. */
4723 if (INTEGRAL_TYPE_P (type))
4724 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4725
4726 /* Promotes small structs returned in a register to full-word size
4727 for big-endian AAPCS. */
4728 if (arm_return_in_msb (type))
4729 {
4730 HOST_WIDE_INT size = int_size_in_bytes (type);
4731 if (size % UNITS_PER_WORD != 0)
4732 {
4733 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4734 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4735 }
4736 }
4737
4738 return arm_libcall_value_1 (mode);
4739 }
4740
4741 /* libcall hashtable helpers. */
4742
4743 struct libcall_hasher : typed_noop_remove <rtx_def>
4744 {
4745 typedef rtx_def value_type;
4746 typedef rtx_def compare_type;
4747 static inline hashval_t hash (const value_type *);
4748 static inline bool equal (const value_type *, const compare_type *);
4749 static inline void remove (value_type *);
4750 };
4751
4752 inline bool
4753 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4754 {
4755 return rtx_equal_p (p1, p2);
4756 }
4757
4758 inline hashval_t
4759 libcall_hasher::hash (const value_type *p1)
4760 {
4761 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4762 }
4763
4764 typedef hash_table<libcall_hasher> libcall_table_type;
4765
4766 static void
4767 add_libcall (libcall_table_type *htab, rtx libcall)
4768 {
4769 *htab->find_slot (libcall, INSERT) = libcall;
4770 }
4771
4772 static bool
4773 arm_libcall_uses_aapcs_base (const_rtx libcall)
4774 {
4775 static bool init_done = false;
4776 static libcall_table_type *libcall_htab = NULL;
4777
4778 if (!init_done)
4779 {
4780 init_done = true;
4781
4782 libcall_htab = new libcall_table_type (31);
4783 add_libcall (libcall_htab,
4784 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4785 add_libcall (libcall_htab,
4786 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4787 add_libcall (libcall_htab,
4788 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4789 add_libcall (libcall_htab,
4790 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4791
4792 add_libcall (libcall_htab,
4793 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4794 add_libcall (libcall_htab,
4795 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4796 add_libcall (libcall_htab,
4797 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4798 add_libcall (libcall_htab,
4799 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4800
4801 add_libcall (libcall_htab,
4802 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4803 add_libcall (libcall_htab,
4804 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4805 add_libcall (libcall_htab,
4806 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4807 add_libcall (libcall_htab,
4808 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4809 add_libcall (libcall_htab,
4810 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4811 add_libcall (libcall_htab,
4812 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4813 add_libcall (libcall_htab,
4814 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4815 add_libcall (libcall_htab,
4816 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4817
4818 /* Values from double-precision helper functions are returned in core
4819 registers if the selected core only supports single-precision
4820 arithmetic, even if we are using the hard-float ABI. The same is
4821 true for single-precision helpers, but we will never be using the
4822 hard-float ABI on a CPU which doesn't support single-precision
4823 operations in hardware. */
4824 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4825 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4826 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4827 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4828 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4829 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4830 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4831 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4832 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4833 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4834 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4835 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4836 SFmode));
4837 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4838 DFmode));
4839 }
4840
4841 return libcall && libcall_htab->find (libcall) != NULL;
4842 }
4843
4844 static rtx
4845 arm_libcall_value_1 (enum machine_mode mode)
4846 {
4847 if (TARGET_AAPCS_BASED)
4848 return aapcs_libcall_value (mode);
4849 else if (TARGET_IWMMXT_ABI
4850 && arm_vector_mode_supported_p (mode))
4851 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4852 else
4853 return gen_rtx_REG (mode, ARG_REGISTER (1));
4854 }
4855
4856 /* Define how to find the value returned by a library function
4857 assuming the value has mode MODE. */
4858
4859 static rtx
4860 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4861 {
4862 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4863 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4864 {
4865 /* The following libcalls return their result in integer registers,
4866 even though they return a floating point value. */
4867 if (arm_libcall_uses_aapcs_base (libcall))
4868 return gen_rtx_REG (mode, ARG_REGISTER(1));
4869
4870 }
4871
4872 return arm_libcall_value_1 (mode);
4873 }
4874
4875 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4876
4877 static bool
4878 arm_function_value_regno_p (const unsigned int regno)
4879 {
4880 if (regno == ARG_REGISTER (1)
4881 || (TARGET_32BIT
4882 && TARGET_AAPCS_BASED
4883 && TARGET_VFP
4884 && TARGET_HARD_FLOAT
4885 && regno == FIRST_VFP_REGNUM)
4886 || (TARGET_IWMMXT_ABI
4887 && regno == FIRST_IWMMXT_REGNUM))
4888 return true;
4889
4890 return false;
4891 }
4892
4893 /* Determine the amount of memory needed to store the possible return
4894 registers of an untyped call. */
4895 int
4896 arm_apply_result_size (void)
4897 {
4898 int size = 16;
4899
4900 if (TARGET_32BIT)
4901 {
4902 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4903 size += 32;
4904 if (TARGET_IWMMXT_ABI)
4905 size += 8;
4906 }
4907
4908 return size;
4909 }
4910
4911 /* Decide whether TYPE should be returned in memory (true)
4912 or in a register (false). FNTYPE is the type of the function making
4913 the call. */
4914 static bool
4915 arm_return_in_memory (const_tree type, const_tree fntype)
4916 {
4917 HOST_WIDE_INT size;
4918
4919 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4920
4921 if (TARGET_AAPCS_BASED)
4922 {
4923 /* Simple, non-aggregate types (ie not including vectors and
4924 complex) are always returned in a register (or registers).
4925 We don't care about which register here, so we can short-cut
4926 some of the detail. */
4927 if (!AGGREGATE_TYPE_P (type)
4928 && TREE_CODE (type) != VECTOR_TYPE
4929 && TREE_CODE (type) != COMPLEX_TYPE)
4930 return false;
4931
4932 /* Any return value that is no larger than one word can be
4933 returned in r0. */
4934 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4935 return false;
4936
4937 /* Check any available co-processors to see if they accept the
4938 type as a register candidate (VFP, for example, can return
4939 some aggregates in consecutive registers). These aren't
4940 available if the call is variadic. */
4941 if (aapcs_select_return_coproc (type, fntype) >= 0)
4942 return false;
4943
4944 /* Vector values should be returned using ARM registers, not
4945 memory (unless they're over 16 bytes, which will break since
4946 we only have four call-clobbered registers to play with). */
4947 if (TREE_CODE (type) == VECTOR_TYPE)
4948 return (size < 0 || size > (4 * UNITS_PER_WORD));
4949
4950 /* The rest go in memory. */
4951 return true;
4952 }
4953
4954 if (TREE_CODE (type) == VECTOR_TYPE)
4955 return (size < 0 || size > (4 * UNITS_PER_WORD));
4956
4957 if (!AGGREGATE_TYPE_P (type) &&
4958 (TREE_CODE (type) != VECTOR_TYPE))
4959 /* All simple types are returned in registers. */
4960 return false;
4961
4962 if (arm_abi != ARM_ABI_APCS)
4963 {
4964 /* ATPCS and later return aggregate types in memory only if they are
4965 larger than a word (or are variable size). */
4966 return (size < 0 || size > UNITS_PER_WORD);
4967 }
4968
4969 /* For the arm-wince targets we choose to be compatible with Microsoft's
4970 ARM and Thumb compilers, which always return aggregates in memory. */
4971 #ifndef ARM_WINCE
4972 /* All structures/unions bigger than one word are returned in memory.
4973 Also catch the case where int_size_in_bytes returns -1. In this case
4974 the aggregate is either huge or of variable size, and in either case
4975 we will want to return it via memory and not in a register. */
4976 if (size < 0 || size > UNITS_PER_WORD)
4977 return true;
4978
4979 if (TREE_CODE (type) == RECORD_TYPE)
4980 {
4981 tree field;
4982
4983 /* For a struct the APCS says that we only return in a register
4984 if the type is 'integer like' and every addressable element
4985 has an offset of zero. For practical purposes this means
4986 that the structure can have at most one non bit-field element
4987 and that this element must be the first one in the structure. */
4988
4989 /* Find the first field, ignoring non FIELD_DECL things which will
4990 have been created by C++. */
4991 for (field = TYPE_FIELDS (type);
4992 field && TREE_CODE (field) != FIELD_DECL;
4993 field = DECL_CHAIN (field))
4994 continue;
4995
4996 if (field == NULL)
4997 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4998
4999 /* Check that the first field is valid for returning in a register. */
5000
5001 /* ... Floats are not allowed */
5002 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5003 return true;
5004
5005 /* ... Aggregates that are not themselves valid for returning in
5006 a register are not allowed. */
5007 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5008 return true;
5009
5010 /* Now check the remaining fields, if any. Only bitfields are allowed,
5011 since they are not addressable. */
5012 for (field = DECL_CHAIN (field);
5013 field;
5014 field = DECL_CHAIN (field))
5015 {
5016 if (TREE_CODE (field) != FIELD_DECL)
5017 continue;
5018
5019 if (!DECL_BIT_FIELD_TYPE (field))
5020 return true;
5021 }
5022
5023 return false;
5024 }
5025
5026 if (TREE_CODE (type) == UNION_TYPE)
5027 {
5028 tree field;
5029
5030 /* Unions can be returned in registers if every element is
5031 integral, or can be returned in an integer register. */
5032 for (field = TYPE_FIELDS (type);
5033 field;
5034 field = DECL_CHAIN (field))
5035 {
5036 if (TREE_CODE (field) != FIELD_DECL)
5037 continue;
5038
5039 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5040 return true;
5041
5042 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5043 return true;
5044 }
5045
5046 return false;
5047 }
5048 #endif /* not ARM_WINCE */
5049
5050 /* Return all other types in memory. */
5051 return true;
5052 }
5053
5054 const struct pcs_attribute_arg
5055 {
5056 const char *arg;
5057 enum arm_pcs value;
5058 } pcs_attribute_args[] =
5059 {
5060 {"aapcs", ARM_PCS_AAPCS},
5061 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5062 #if 0
5063 /* We could recognize these, but changes would be needed elsewhere
5064 * to implement them. */
5065 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5066 {"atpcs", ARM_PCS_ATPCS},
5067 {"apcs", ARM_PCS_APCS},
5068 #endif
5069 {NULL, ARM_PCS_UNKNOWN}
5070 };
5071
5072 static enum arm_pcs
5073 arm_pcs_from_attribute (tree attr)
5074 {
5075 const struct pcs_attribute_arg *ptr;
5076 const char *arg;
5077
5078 /* Get the value of the argument. */
5079 if (TREE_VALUE (attr) == NULL_TREE
5080 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5081 return ARM_PCS_UNKNOWN;
5082
5083 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5084
5085 /* Check it against the list of known arguments. */
5086 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5087 if (streq (arg, ptr->arg))
5088 return ptr->value;
5089
5090 /* An unrecognized interrupt type. */
5091 return ARM_PCS_UNKNOWN;
5092 }
5093
5094 /* Get the PCS variant to use for this call. TYPE is the function's type
5095 specification, DECL is the specific declartion. DECL may be null if
5096 the call could be indirect or if this is a library call. */
5097 static enum arm_pcs
5098 arm_get_pcs_model (const_tree type, const_tree decl)
5099 {
5100 bool user_convention = false;
5101 enum arm_pcs user_pcs = arm_pcs_default;
5102 tree attr;
5103
5104 gcc_assert (type);
5105
5106 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5107 if (attr)
5108 {
5109 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5110 user_convention = true;
5111 }
5112
5113 if (TARGET_AAPCS_BASED)
5114 {
5115 /* Detect varargs functions. These always use the base rules
5116 (no argument is ever a candidate for a co-processor
5117 register). */
5118 bool base_rules = stdarg_p (type);
5119
5120 if (user_convention)
5121 {
5122 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5123 sorry ("non-AAPCS derived PCS variant");
5124 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5125 error ("variadic functions must use the base AAPCS variant");
5126 }
5127
5128 if (base_rules)
5129 return ARM_PCS_AAPCS;
5130 else if (user_convention)
5131 return user_pcs;
5132 else if (decl && flag_unit_at_a_time)
5133 {
5134 /* Local functions never leak outside this compilation unit,
5135 so we are free to use whatever conventions are
5136 appropriate. */
5137 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5138 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5139 if (i && i->local)
5140 return ARM_PCS_AAPCS_LOCAL;
5141 }
5142 }
5143 else if (user_convention && user_pcs != arm_pcs_default)
5144 sorry ("PCS variant");
5145
5146 /* For everything else we use the target's default. */
5147 return arm_pcs_default;
5148 }
5149
5150
5151 static void
5152 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5153 const_tree fntype ATTRIBUTE_UNUSED,
5154 rtx libcall ATTRIBUTE_UNUSED,
5155 const_tree fndecl ATTRIBUTE_UNUSED)
5156 {
5157 /* Record the unallocated VFP registers. */
5158 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5159 pcum->aapcs_vfp_reg_alloc = 0;
5160 }
5161
5162 /* Walk down the type tree of TYPE counting consecutive base elements.
5163 If *MODEP is VOIDmode, then set it to the first valid floating point
5164 type. If a non-floating point type is found, or if a floating point
5165 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5166 otherwise return the count in the sub-tree. */
5167 static int
5168 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5169 {
5170 enum machine_mode mode;
5171 HOST_WIDE_INT size;
5172
5173 switch (TREE_CODE (type))
5174 {
5175 case REAL_TYPE:
5176 mode = TYPE_MODE (type);
5177 if (mode != DFmode && mode != SFmode)
5178 return -1;
5179
5180 if (*modep == VOIDmode)
5181 *modep = mode;
5182
5183 if (*modep == mode)
5184 return 1;
5185
5186 break;
5187
5188 case COMPLEX_TYPE:
5189 mode = TYPE_MODE (TREE_TYPE (type));
5190 if (mode != DFmode && mode != SFmode)
5191 return -1;
5192
5193 if (*modep == VOIDmode)
5194 *modep = mode;
5195
5196 if (*modep == mode)
5197 return 2;
5198
5199 break;
5200
5201 case VECTOR_TYPE:
5202 /* Use V2SImode and V4SImode as representatives of all 64-bit
5203 and 128-bit vector types, whether or not those modes are
5204 supported with the present options. */
5205 size = int_size_in_bytes (type);
5206 switch (size)
5207 {
5208 case 8:
5209 mode = V2SImode;
5210 break;
5211 case 16:
5212 mode = V4SImode;
5213 break;
5214 default:
5215 return -1;
5216 }
5217
5218 if (*modep == VOIDmode)
5219 *modep = mode;
5220
5221 /* Vector modes are considered to be opaque: two vectors are
5222 equivalent for the purposes of being homogeneous aggregates
5223 if they are the same size. */
5224 if (*modep == mode)
5225 return 1;
5226
5227 break;
5228
5229 case ARRAY_TYPE:
5230 {
5231 int count;
5232 tree index = TYPE_DOMAIN (type);
5233
5234 /* Can't handle incomplete types nor sizes that are not
5235 fixed. */
5236 if (!COMPLETE_TYPE_P (type)
5237 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5238 return -1;
5239
5240 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5241 if (count == -1
5242 || !index
5243 || !TYPE_MAX_VALUE (index)
5244 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5245 || !TYPE_MIN_VALUE (index)
5246 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5247 || count < 0)
5248 return -1;
5249
5250 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5251 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5252
5253 /* There must be no padding. */
5254 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5255 return -1;
5256
5257 return count;
5258 }
5259
5260 case RECORD_TYPE:
5261 {
5262 int count = 0;
5263 int sub_count;
5264 tree field;
5265
5266 /* Can't handle incomplete types nor sizes that are not
5267 fixed. */
5268 if (!COMPLETE_TYPE_P (type)
5269 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5270 return -1;
5271
5272 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5273 {
5274 if (TREE_CODE (field) != FIELD_DECL)
5275 continue;
5276
5277 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5278 if (sub_count < 0)
5279 return -1;
5280 count += sub_count;
5281 }
5282
5283 /* There must be no padding. */
5284 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5285 return -1;
5286
5287 return count;
5288 }
5289
5290 case UNION_TYPE:
5291 case QUAL_UNION_TYPE:
5292 {
5293 /* These aren't very interesting except in a degenerate case. */
5294 int count = 0;
5295 int sub_count;
5296 tree field;
5297
5298 /* Can't handle incomplete types nor sizes that are not
5299 fixed. */
5300 if (!COMPLETE_TYPE_P (type)
5301 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5302 return -1;
5303
5304 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5305 {
5306 if (TREE_CODE (field) != FIELD_DECL)
5307 continue;
5308
5309 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5310 if (sub_count < 0)
5311 return -1;
5312 count = count > sub_count ? count : sub_count;
5313 }
5314
5315 /* There must be no padding. */
5316 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5317 return -1;
5318
5319 return count;
5320 }
5321
5322 default:
5323 break;
5324 }
5325
5326 return -1;
5327 }
5328
5329 /* Return true if PCS_VARIANT should use VFP registers. */
5330 static bool
5331 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5332 {
5333 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5334 {
5335 static bool seen_thumb1_vfp = false;
5336
5337 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5338 {
5339 sorry ("Thumb-1 hard-float VFP ABI");
5340 /* sorry() is not immediately fatal, so only display this once. */
5341 seen_thumb1_vfp = true;
5342 }
5343
5344 return true;
5345 }
5346
5347 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5348 return false;
5349
5350 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5351 (TARGET_VFP_DOUBLE || !is_double));
5352 }
5353
5354 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5355 suitable for passing or returning in VFP registers for the PCS
5356 variant selected. If it is, then *BASE_MODE is updated to contain
5357 a machine mode describing each element of the argument's type and
5358 *COUNT to hold the number of such elements. */
5359 static bool
5360 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5361 enum machine_mode mode, const_tree type,
5362 enum machine_mode *base_mode, int *count)
5363 {
5364 enum machine_mode new_mode = VOIDmode;
5365
5366 /* If we have the type information, prefer that to working things
5367 out from the mode. */
5368 if (type)
5369 {
5370 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5371
5372 if (ag_count > 0 && ag_count <= 4)
5373 *count = ag_count;
5374 else
5375 return false;
5376 }
5377 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5378 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5379 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5380 {
5381 *count = 1;
5382 new_mode = mode;
5383 }
5384 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5385 {
5386 *count = 2;
5387 new_mode = (mode == DCmode ? DFmode : SFmode);
5388 }
5389 else
5390 return false;
5391
5392
5393 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5394 return false;
5395
5396 *base_mode = new_mode;
5397 return true;
5398 }
5399
5400 static bool
5401 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5402 enum machine_mode mode, const_tree type)
5403 {
5404 int count ATTRIBUTE_UNUSED;
5405 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5406
5407 if (!use_vfp_abi (pcs_variant, false))
5408 return false;
5409 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5410 &ag_mode, &count);
5411 }
5412
5413 static bool
5414 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5415 const_tree type)
5416 {
5417 if (!use_vfp_abi (pcum->pcs_variant, false))
5418 return false;
5419
5420 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5421 &pcum->aapcs_vfp_rmode,
5422 &pcum->aapcs_vfp_rcount);
5423 }
5424
5425 static bool
5426 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5427 const_tree type ATTRIBUTE_UNUSED)
5428 {
5429 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5430 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5431 int regno;
5432
5433 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5434 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5435 {
5436 pcum->aapcs_vfp_reg_alloc = mask << regno;
5437 if (mode == BLKmode
5438 || (mode == TImode && ! TARGET_NEON)
5439 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5440 {
5441 int i;
5442 int rcount = pcum->aapcs_vfp_rcount;
5443 int rshift = shift;
5444 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5445 rtx par;
5446 if (!TARGET_NEON)
5447 {
5448 /* Avoid using unsupported vector modes. */
5449 if (rmode == V2SImode)
5450 rmode = DImode;
5451 else if (rmode == V4SImode)
5452 {
5453 rmode = DImode;
5454 rcount *= 2;
5455 rshift /= 2;
5456 }
5457 }
5458 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5459 for (i = 0; i < rcount; i++)
5460 {
5461 rtx tmp = gen_rtx_REG (rmode,
5462 FIRST_VFP_REGNUM + regno + i * rshift);
5463 tmp = gen_rtx_EXPR_LIST
5464 (VOIDmode, tmp,
5465 GEN_INT (i * GET_MODE_SIZE (rmode)));
5466 XVECEXP (par, 0, i) = tmp;
5467 }
5468
5469 pcum->aapcs_reg = par;
5470 }
5471 else
5472 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5473 return true;
5474 }
5475 return false;
5476 }
5477
5478 static rtx
5479 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5480 enum machine_mode mode,
5481 const_tree type ATTRIBUTE_UNUSED)
5482 {
5483 if (!use_vfp_abi (pcs_variant, false))
5484 return NULL;
5485
5486 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5487 {
5488 int count;
5489 enum machine_mode ag_mode;
5490 int i;
5491 rtx par;
5492 int shift;
5493
5494 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5495 &ag_mode, &count);
5496
5497 if (!TARGET_NEON)
5498 {
5499 if (ag_mode == V2SImode)
5500 ag_mode = DImode;
5501 else if (ag_mode == V4SImode)
5502 {
5503 ag_mode = DImode;
5504 count *= 2;
5505 }
5506 }
5507 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5508 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5509 for (i = 0; i < count; i++)
5510 {
5511 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5512 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5513 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5514 XVECEXP (par, 0, i) = tmp;
5515 }
5516
5517 return par;
5518 }
5519
5520 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5521 }
5522
5523 static void
5524 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5525 enum machine_mode mode ATTRIBUTE_UNUSED,
5526 const_tree type ATTRIBUTE_UNUSED)
5527 {
5528 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5529 pcum->aapcs_vfp_reg_alloc = 0;
5530 return;
5531 }
5532
5533 #define AAPCS_CP(X) \
5534 { \
5535 aapcs_ ## X ## _cum_init, \
5536 aapcs_ ## X ## _is_call_candidate, \
5537 aapcs_ ## X ## _allocate, \
5538 aapcs_ ## X ## _is_return_candidate, \
5539 aapcs_ ## X ## _allocate_return_reg, \
5540 aapcs_ ## X ## _advance \
5541 }
5542
5543 /* Table of co-processors that can be used to pass arguments in
5544 registers. Idealy no arugment should be a candidate for more than
5545 one co-processor table entry, but the table is processed in order
5546 and stops after the first match. If that entry then fails to put
5547 the argument into a co-processor register, the argument will go on
5548 the stack. */
5549 static struct
5550 {
5551 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5552 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5553
5554 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5555 BLKmode) is a candidate for this co-processor's registers; this
5556 function should ignore any position-dependent state in
5557 CUMULATIVE_ARGS and only use call-type dependent information. */
5558 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5559
5560 /* Return true if the argument does get a co-processor register; it
5561 should set aapcs_reg to an RTX of the register allocated as is
5562 required for a return from FUNCTION_ARG. */
5563 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5564
5565 /* Return true if a result of mode MODE (or type TYPE if MODE is
5566 BLKmode) is can be returned in this co-processor's registers. */
5567 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5568
5569 /* Allocate and return an RTX element to hold the return type of a
5570 call, this routine must not fail and will only be called if
5571 is_return_candidate returned true with the same parameters. */
5572 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5573
5574 /* Finish processing this argument and prepare to start processing
5575 the next one. */
5576 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5577 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5578 {
5579 AAPCS_CP(vfp)
5580 };
5581
5582 #undef AAPCS_CP
5583
5584 static int
5585 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5586 const_tree type)
5587 {
5588 int i;
5589
5590 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5591 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5592 return i;
5593
5594 return -1;
5595 }
5596
5597 static int
5598 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5599 {
5600 /* We aren't passed a decl, so we can't check that a call is local.
5601 However, it isn't clear that that would be a win anyway, since it
5602 might limit some tail-calling opportunities. */
5603 enum arm_pcs pcs_variant;
5604
5605 if (fntype)
5606 {
5607 const_tree fndecl = NULL_TREE;
5608
5609 if (TREE_CODE (fntype) == FUNCTION_DECL)
5610 {
5611 fndecl = fntype;
5612 fntype = TREE_TYPE (fntype);
5613 }
5614
5615 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5616 }
5617 else
5618 pcs_variant = arm_pcs_default;
5619
5620 if (pcs_variant != ARM_PCS_AAPCS)
5621 {
5622 int i;
5623
5624 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5625 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5626 TYPE_MODE (type),
5627 type))
5628 return i;
5629 }
5630 return -1;
5631 }
5632
5633 static rtx
5634 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5635 const_tree fntype)
5636 {
5637 /* We aren't passed a decl, so we can't check that a call is local.
5638 However, it isn't clear that that would be a win anyway, since it
5639 might limit some tail-calling opportunities. */
5640 enum arm_pcs pcs_variant;
5641 int unsignedp ATTRIBUTE_UNUSED;
5642
5643 if (fntype)
5644 {
5645 const_tree fndecl = NULL_TREE;
5646
5647 if (TREE_CODE (fntype) == FUNCTION_DECL)
5648 {
5649 fndecl = fntype;
5650 fntype = TREE_TYPE (fntype);
5651 }
5652
5653 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5654 }
5655 else
5656 pcs_variant = arm_pcs_default;
5657
5658 /* Promote integer types. */
5659 if (type && INTEGRAL_TYPE_P (type))
5660 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5661
5662 if (pcs_variant != ARM_PCS_AAPCS)
5663 {
5664 int i;
5665
5666 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5667 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5668 type))
5669 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5670 mode, type);
5671 }
5672
5673 /* Promotes small structs returned in a register to full-word size
5674 for big-endian AAPCS. */
5675 if (type && arm_return_in_msb (type))
5676 {
5677 HOST_WIDE_INT size = int_size_in_bytes (type);
5678 if (size % UNITS_PER_WORD != 0)
5679 {
5680 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5681 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5682 }
5683 }
5684
5685 return gen_rtx_REG (mode, R0_REGNUM);
5686 }
5687
5688 static rtx
5689 aapcs_libcall_value (enum machine_mode mode)
5690 {
5691 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5692 && GET_MODE_SIZE (mode) <= 4)
5693 mode = SImode;
5694
5695 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5696 }
5697
5698 /* Lay out a function argument using the AAPCS rules. The rule
5699 numbers referred to here are those in the AAPCS. */
5700 static void
5701 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5702 const_tree type, bool named)
5703 {
5704 int nregs, nregs2;
5705 int ncrn;
5706
5707 /* We only need to do this once per argument. */
5708 if (pcum->aapcs_arg_processed)
5709 return;
5710
5711 pcum->aapcs_arg_processed = true;
5712
5713 /* Special case: if named is false then we are handling an incoming
5714 anonymous argument which is on the stack. */
5715 if (!named)
5716 return;
5717
5718 /* Is this a potential co-processor register candidate? */
5719 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5720 {
5721 int slot = aapcs_select_call_coproc (pcum, mode, type);
5722 pcum->aapcs_cprc_slot = slot;
5723
5724 /* We don't have to apply any of the rules from part B of the
5725 preparation phase, these are handled elsewhere in the
5726 compiler. */
5727
5728 if (slot >= 0)
5729 {
5730 /* A Co-processor register candidate goes either in its own
5731 class of registers or on the stack. */
5732 if (!pcum->aapcs_cprc_failed[slot])
5733 {
5734 /* C1.cp - Try to allocate the argument to co-processor
5735 registers. */
5736 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5737 return;
5738
5739 /* C2.cp - Put the argument on the stack and note that we
5740 can't assign any more candidates in this slot. We also
5741 need to note that we have allocated stack space, so that
5742 we won't later try to split a non-cprc candidate between
5743 core registers and the stack. */
5744 pcum->aapcs_cprc_failed[slot] = true;
5745 pcum->can_split = false;
5746 }
5747
5748 /* We didn't get a register, so this argument goes on the
5749 stack. */
5750 gcc_assert (pcum->can_split == false);
5751 return;
5752 }
5753 }
5754
5755 /* C3 - For double-word aligned arguments, round the NCRN up to the
5756 next even number. */
5757 ncrn = pcum->aapcs_ncrn;
5758 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5759 ncrn++;
5760
5761 nregs = ARM_NUM_REGS2(mode, type);
5762
5763 /* Sigh, this test should really assert that nregs > 0, but a GCC
5764 extension allows empty structs and then gives them empty size; it
5765 then allows such a structure to be passed by value. For some of
5766 the code below we have to pretend that such an argument has
5767 non-zero size so that we 'locate' it correctly either in
5768 registers or on the stack. */
5769 gcc_assert (nregs >= 0);
5770
5771 nregs2 = nregs ? nregs : 1;
5772
5773 /* C4 - Argument fits entirely in core registers. */
5774 if (ncrn + nregs2 <= NUM_ARG_REGS)
5775 {
5776 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5777 pcum->aapcs_next_ncrn = ncrn + nregs;
5778 return;
5779 }
5780
5781 /* C5 - Some core registers left and there are no arguments already
5782 on the stack: split this argument between the remaining core
5783 registers and the stack. */
5784 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5785 {
5786 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5787 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5788 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5789 return;
5790 }
5791
5792 /* C6 - NCRN is set to 4. */
5793 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5794
5795 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5796 return;
5797 }
5798
5799 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5800 for a call to a function whose data type is FNTYPE.
5801 For a library call, FNTYPE is NULL. */
5802 void
5803 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5804 rtx libname,
5805 tree fndecl ATTRIBUTE_UNUSED)
5806 {
5807 /* Long call handling. */
5808 if (fntype)
5809 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5810 else
5811 pcum->pcs_variant = arm_pcs_default;
5812
5813 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5814 {
5815 if (arm_libcall_uses_aapcs_base (libname))
5816 pcum->pcs_variant = ARM_PCS_AAPCS;
5817
5818 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5819 pcum->aapcs_reg = NULL_RTX;
5820 pcum->aapcs_partial = 0;
5821 pcum->aapcs_arg_processed = false;
5822 pcum->aapcs_cprc_slot = -1;
5823 pcum->can_split = true;
5824
5825 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5826 {
5827 int i;
5828
5829 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5830 {
5831 pcum->aapcs_cprc_failed[i] = false;
5832 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5833 }
5834 }
5835 return;
5836 }
5837
5838 /* Legacy ABIs */
5839
5840 /* On the ARM, the offset starts at 0. */
5841 pcum->nregs = 0;
5842 pcum->iwmmxt_nregs = 0;
5843 pcum->can_split = true;
5844
5845 /* Varargs vectors are treated the same as long long.
5846 named_count avoids having to change the way arm handles 'named' */
5847 pcum->named_count = 0;
5848 pcum->nargs = 0;
5849
5850 if (TARGET_REALLY_IWMMXT && fntype)
5851 {
5852 tree fn_arg;
5853
5854 for (fn_arg = TYPE_ARG_TYPES (fntype);
5855 fn_arg;
5856 fn_arg = TREE_CHAIN (fn_arg))
5857 pcum->named_count += 1;
5858
5859 if (! pcum->named_count)
5860 pcum->named_count = INT_MAX;
5861 }
5862 }
5863
5864 /* Return true if we use LRA instead of reload pass. */
5865 static bool
5866 arm_lra_p (void)
5867 {
5868 return arm_lra_flag;
5869 }
5870
5871 /* Return true if mode/type need doubleword alignment. */
5872 static bool
5873 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5874 {
5875 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5876 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5877 }
5878
5879
5880 /* Determine where to put an argument to a function.
5881 Value is zero to push the argument on the stack,
5882 or a hard register in which to store the argument.
5883
5884 MODE is the argument's machine mode.
5885 TYPE is the data type of the argument (as a tree).
5886 This is null for libcalls where that information may
5887 not be available.
5888 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5889 the preceding args and about the function being called.
5890 NAMED is nonzero if this argument is a named parameter
5891 (otherwise it is an extra parameter matching an ellipsis).
5892
5893 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5894 other arguments are passed on the stack. If (NAMED == 0) (which happens
5895 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5896 defined), say it is passed in the stack (function_prologue will
5897 indeed make it pass in the stack if necessary). */
5898
5899 static rtx
5900 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5901 const_tree type, bool named)
5902 {
5903 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5904 int nregs;
5905
5906 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5907 a call insn (op3 of a call_value insn). */
5908 if (mode == VOIDmode)
5909 return const0_rtx;
5910
5911 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5912 {
5913 aapcs_layout_arg (pcum, mode, type, named);
5914 return pcum->aapcs_reg;
5915 }
5916
5917 /* Varargs vectors are treated the same as long long.
5918 named_count avoids having to change the way arm handles 'named' */
5919 if (TARGET_IWMMXT_ABI
5920 && arm_vector_mode_supported_p (mode)
5921 && pcum->named_count > pcum->nargs + 1)
5922 {
5923 if (pcum->iwmmxt_nregs <= 9)
5924 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5925 else
5926 {
5927 pcum->can_split = false;
5928 return NULL_RTX;
5929 }
5930 }
5931
5932 /* Put doubleword aligned quantities in even register pairs. */
5933 if (pcum->nregs & 1
5934 && ARM_DOUBLEWORD_ALIGN
5935 && arm_needs_doubleword_align (mode, type))
5936 pcum->nregs++;
5937
5938 /* Only allow splitting an arg between regs and memory if all preceding
5939 args were allocated to regs. For args passed by reference we only count
5940 the reference pointer. */
5941 if (pcum->can_split)
5942 nregs = 1;
5943 else
5944 nregs = ARM_NUM_REGS2 (mode, type);
5945
5946 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5947 return NULL_RTX;
5948
5949 return gen_rtx_REG (mode, pcum->nregs);
5950 }
5951
5952 static unsigned int
5953 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5954 {
5955 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5956 ? DOUBLEWORD_ALIGNMENT
5957 : PARM_BOUNDARY);
5958 }
5959
5960 static int
5961 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5962 tree type, bool named)
5963 {
5964 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5965 int nregs = pcum->nregs;
5966
5967 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5968 {
5969 aapcs_layout_arg (pcum, mode, type, named);
5970 return pcum->aapcs_partial;
5971 }
5972
5973 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5974 return 0;
5975
5976 if (NUM_ARG_REGS > nregs
5977 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5978 && pcum->can_split)
5979 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5980
5981 return 0;
5982 }
5983
5984 /* Update the data in PCUM to advance over an argument
5985 of mode MODE and data type TYPE.
5986 (TYPE is null for libcalls where that information may not be available.) */
5987
5988 static void
5989 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5990 const_tree type, bool named)
5991 {
5992 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5993
5994 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5995 {
5996 aapcs_layout_arg (pcum, mode, type, named);
5997
5998 if (pcum->aapcs_cprc_slot >= 0)
5999 {
6000 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6001 type);
6002 pcum->aapcs_cprc_slot = -1;
6003 }
6004
6005 /* Generic stuff. */
6006 pcum->aapcs_arg_processed = false;
6007 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6008 pcum->aapcs_reg = NULL_RTX;
6009 pcum->aapcs_partial = 0;
6010 }
6011 else
6012 {
6013 pcum->nargs += 1;
6014 if (arm_vector_mode_supported_p (mode)
6015 && pcum->named_count > pcum->nargs
6016 && TARGET_IWMMXT_ABI)
6017 pcum->iwmmxt_nregs += 1;
6018 else
6019 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6020 }
6021 }
6022
6023 /* Variable sized types are passed by reference. This is a GCC
6024 extension to the ARM ABI. */
6025
6026 static bool
6027 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6028 enum machine_mode mode ATTRIBUTE_UNUSED,
6029 const_tree type, bool named ATTRIBUTE_UNUSED)
6030 {
6031 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6032 }
6033 \f
6034 /* Encode the current state of the #pragma [no_]long_calls. */
6035 typedef enum
6036 {
6037 OFF, /* No #pragma [no_]long_calls is in effect. */
6038 LONG, /* #pragma long_calls is in effect. */
6039 SHORT /* #pragma no_long_calls is in effect. */
6040 } arm_pragma_enum;
6041
6042 static arm_pragma_enum arm_pragma_long_calls = OFF;
6043
6044 void
6045 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6046 {
6047 arm_pragma_long_calls = LONG;
6048 }
6049
6050 void
6051 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6052 {
6053 arm_pragma_long_calls = SHORT;
6054 }
6055
6056 void
6057 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6058 {
6059 arm_pragma_long_calls = OFF;
6060 }
6061 \f
6062 /* Handle an attribute requiring a FUNCTION_DECL;
6063 arguments as in struct attribute_spec.handler. */
6064 static tree
6065 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6066 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6067 {
6068 if (TREE_CODE (*node) != FUNCTION_DECL)
6069 {
6070 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6071 name);
6072 *no_add_attrs = true;
6073 }
6074
6075 return NULL_TREE;
6076 }
6077
6078 /* Handle an "interrupt" or "isr" attribute;
6079 arguments as in struct attribute_spec.handler. */
6080 static tree
6081 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6082 bool *no_add_attrs)
6083 {
6084 if (DECL_P (*node))
6085 {
6086 if (TREE_CODE (*node) != FUNCTION_DECL)
6087 {
6088 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6089 name);
6090 *no_add_attrs = true;
6091 }
6092 /* FIXME: the argument if any is checked for type attributes;
6093 should it be checked for decl ones? */
6094 }
6095 else
6096 {
6097 if (TREE_CODE (*node) == FUNCTION_TYPE
6098 || TREE_CODE (*node) == METHOD_TYPE)
6099 {
6100 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6101 {
6102 warning (OPT_Wattributes, "%qE attribute ignored",
6103 name);
6104 *no_add_attrs = true;
6105 }
6106 }
6107 else if (TREE_CODE (*node) == POINTER_TYPE
6108 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6109 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6110 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6111 {
6112 *node = build_variant_type_copy (*node);
6113 TREE_TYPE (*node) = build_type_attribute_variant
6114 (TREE_TYPE (*node),
6115 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6116 *no_add_attrs = true;
6117 }
6118 else
6119 {
6120 /* Possibly pass this attribute on from the type to a decl. */
6121 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6122 | (int) ATTR_FLAG_FUNCTION_NEXT
6123 | (int) ATTR_FLAG_ARRAY_NEXT))
6124 {
6125 *no_add_attrs = true;
6126 return tree_cons (name, args, NULL_TREE);
6127 }
6128 else
6129 {
6130 warning (OPT_Wattributes, "%qE attribute ignored",
6131 name);
6132 }
6133 }
6134 }
6135
6136 return NULL_TREE;
6137 }
6138
6139 /* Handle a "pcs" attribute; arguments as in struct
6140 attribute_spec.handler. */
6141 static tree
6142 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6143 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6144 {
6145 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6146 {
6147 warning (OPT_Wattributes, "%qE attribute ignored", name);
6148 *no_add_attrs = true;
6149 }
6150 return NULL_TREE;
6151 }
6152
6153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6154 /* Handle the "notshared" attribute. This attribute is another way of
6155 requesting hidden visibility. ARM's compiler supports
6156 "__declspec(notshared)"; we support the same thing via an
6157 attribute. */
6158
6159 static tree
6160 arm_handle_notshared_attribute (tree *node,
6161 tree name ATTRIBUTE_UNUSED,
6162 tree args ATTRIBUTE_UNUSED,
6163 int flags ATTRIBUTE_UNUSED,
6164 bool *no_add_attrs)
6165 {
6166 tree decl = TYPE_NAME (*node);
6167
6168 if (decl)
6169 {
6170 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6171 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6172 *no_add_attrs = false;
6173 }
6174 return NULL_TREE;
6175 }
6176 #endif
6177
6178 /* Return 0 if the attributes for two types are incompatible, 1 if they
6179 are compatible, and 2 if they are nearly compatible (which causes a
6180 warning to be generated). */
6181 static int
6182 arm_comp_type_attributes (const_tree type1, const_tree type2)
6183 {
6184 int l1, l2, s1, s2;
6185
6186 /* Check for mismatch of non-default calling convention. */
6187 if (TREE_CODE (type1) != FUNCTION_TYPE)
6188 return 1;
6189
6190 /* Check for mismatched call attributes. */
6191 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6192 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6193 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6194 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6195
6196 /* Only bother to check if an attribute is defined. */
6197 if (l1 | l2 | s1 | s2)
6198 {
6199 /* If one type has an attribute, the other must have the same attribute. */
6200 if ((l1 != l2) || (s1 != s2))
6201 return 0;
6202
6203 /* Disallow mixed attributes. */
6204 if ((l1 & s2) || (l2 & s1))
6205 return 0;
6206 }
6207
6208 /* Check for mismatched ISR attribute. */
6209 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6210 if (! l1)
6211 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6212 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6213 if (! l2)
6214 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6215 if (l1 != l2)
6216 return 0;
6217
6218 return 1;
6219 }
6220
6221 /* Assigns default attributes to newly defined type. This is used to
6222 set short_call/long_call attributes for function types of
6223 functions defined inside corresponding #pragma scopes. */
6224 static void
6225 arm_set_default_type_attributes (tree type)
6226 {
6227 /* Add __attribute__ ((long_call)) to all functions, when
6228 inside #pragma long_calls or __attribute__ ((short_call)),
6229 when inside #pragma no_long_calls. */
6230 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6231 {
6232 tree type_attr_list, attr_name;
6233 type_attr_list = TYPE_ATTRIBUTES (type);
6234
6235 if (arm_pragma_long_calls == LONG)
6236 attr_name = get_identifier ("long_call");
6237 else if (arm_pragma_long_calls == SHORT)
6238 attr_name = get_identifier ("short_call");
6239 else
6240 return;
6241
6242 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6243 TYPE_ATTRIBUTES (type) = type_attr_list;
6244 }
6245 }
6246 \f
6247 /* Return true if DECL is known to be linked into section SECTION. */
6248
6249 static bool
6250 arm_function_in_section_p (tree decl, section *section)
6251 {
6252 /* We can only be certain about functions defined in the same
6253 compilation unit. */
6254 if (!TREE_STATIC (decl))
6255 return false;
6256
6257 /* Make sure that SYMBOL always binds to the definition in this
6258 compilation unit. */
6259 if (!targetm.binds_local_p (decl))
6260 return false;
6261
6262 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6263 if (!DECL_SECTION_NAME (decl))
6264 {
6265 /* Make sure that we will not create a unique section for DECL. */
6266 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6267 return false;
6268 }
6269
6270 return function_section (decl) == section;
6271 }
6272
6273 /* Return nonzero if a 32-bit "long_call" should be generated for
6274 a call from the current function to DECL. We generate a long_call
6275 if the function:
6276
6277 a. has an __attribute__((long call))
6278 or b. is within the scope of a #pragma long_calls
6279 or c. the -mlong-calls command line switch has been specified
6280
6281 However we do not generate a long call if the function:
6282
6283 d. has an __attribute__ ((short_call))
6284 or e. is inside the scope of a #pragma no_long_calls
6285 or f. is defined in the same section as the current function. */
6286
6287 bool
6288 arm_is_long_call_p (tree decl)
6289 {
6290 tree attrs;
6291
6292 if (!decl)
6293 return TARGET_LONG_CALLS;
6294
6295 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6296 if (lookup_attribute ("short_call", attrs))
6297 return false;
6298
6299 /* For "f", be conservative, and only cater for cases in which the
6300 whole of the current function is placed in the same section. */
6301 if (!flag_reorder_blocks_and_partition
6302 && TREE_CODE (decl) == FUNCTION_DECL
6303 && arm_function_in_section_p (decl, current_function_section ()))
6304 return false;
6305
6306 if (lookup_attribute ("long_call", attrs))
6307 return true;
6308
6309 return TARGET_LONG_CALLS;
6310 }
6311
6312 /* Return nonzero if it is ok to make a tail-call to DECL. */
6313 static bool
6314 arm_function_ok_for_sibcall (tree decl, tree exp)
6315 {
6316 unsigned long func_type;
6317
6318 if (cfun->machine->sibcall_blocked)
6319 return false;
6320
6321 /* Never tailcall something if we are generating code for Thumb-1. */
6322 if (TARGET_THUMB1)
6323 return false;
6324
6325 /* The PIC register is live on entry to VxWorks PLT entries, so we
6326 must make the call before restoring the PIC register. */
6327 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6328 return false;
6329
6330 /* If we are interworking and the function is not declared static
6331 then we can't tail-call it unless we know that it exists in this
6332 compilation unit (since it might be a Thumb routine). */
6333 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6334 && !TREE_ASM_WRITTEN (decl))
6335 return false;
6336
6337 func_type = arm_current_func_type ();
6338 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6339 if (IS_INTERRUPT (func_type))
6340 return false;
6341
6342 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6343 {
6344 /* Check that the return value locations are the same. For
6345 example that we aren't returning a value from the sibling in
6346 a VFP register but then need to transfer it to a core
6347 register. */
6348 rtx a, b;
6349
6350 a = arm_function_value (TREE_TYPE (exp), decl, false);
6351 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6352 cfun->decl, false);
6353 if (!rtx_equal_p (a, b))
6354 return false;
6355 }
6356
6357 /* Never tailcall if function may be called with a misaligned SP. */
6358 if (IS_STACKALIGN (func_type))
6359 return false;
6360
6361 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6362 references should become a NOP. Don't convert such calls into
6363 sibling calls. */
6364 if (TARGET_AAPCS_BASED
6365 && arm_abi == ARM_ABI_AAPCS
6366 && decl
6367 && DECL_WEAK (decl))
6368 return false;
6369
6370 /* Everything else is ok. */
6371 return true;
6372 }
6373
6374 \f
6375 /* Addressing mode support functions. */
6376
6377 /* Return nonzero if X is a legitimate immediate operand when compiling
6378 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6379 int
6380 legitimate_pic_operand_p (rtx x)
6381 {
6382 if (GET_CODE (x) == SYMBOL_REF
6383 || (GET_CODE (x) == CONST
6384 && GET_CODE (XEXP (x, 0)) == PLUS
6385 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6386 return 0;
6387
6388 return 1;
6389 }
6390
6391 /* Record that the current function needs a PIC register. Initialize
6392 cfun->machine->pic_reg if we have not already done so. */
6393
6394 static void
6395 require_pic_register (void)
6396 {
6397 /* A lot of the logic here is made obscure by the fact that this
6398 routine gets called as part of the rtx cost estimation process.
6399 We don't want those calls to affect any assumptions about the real
6400 function; and further, we can't call entry_of_function() until we
6401 start the real expansion process. */
6402 if (!crtl->uses_pic_offset_table)
6403 {
6404 gcc_assert (can_create_pseudo_p ());
6405 if (arm_pic_register != INVALID_REGNUM
6406 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6407 {
6408 if (!cfun->machine->pic_reg)
6409 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6410
6411 /* Play games to avoid marking the function as needing pic
6412 if we are being called as part of the cost-estimation
6413 process. */
6414 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6415 crtl->uses_pic_offset_table = 1;
6416 }
6417 else
6418 {
6419 rtx seq, insn;
6420
6421 if (!cfun->machine->pic_reg)
6422 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6423
6424 /* Play games to avoid marking the function as needing pic
6425 if we are being called as part of the cost-estimation
6426 process. */
6427 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6428 {
6429 crtl->uses_pic_offset_table = 1;
6430 start_sequence ();
6431
6432 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6433 && arm_pic_register > LAST_LO_REGNUM)
6434 emit_move_insn (cfun->machine->pic_reg,
6435 gen_rtx_REG (Pmode, arm_pic_register));
6436 else
6437 arm_load_pic_register (0UL);
6438
6439 seq = get_insns ();
6440 end_sequence ();
6441
6442 for (insn = seq; insn; insn = NEXT_INSN (insn))
6443 if (INSN_P (insn))
6444 INSN_LOCATION (insn) = prologue_location;
6445
6446 /* We can be called during expansion of PHI nodes, where
6447 we can't yet emit instructions directly in the final
6448 insn stream. Queue the insns on the entry edge, they will
6449 be committed after everything else is expanded. */
6450 insert_insn_on_edge (seq,
6451 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6452 }
6453 }
6454 }
6455 }
6456
6457 rtx
6458 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6459 {
6460 if (GET_CODE (orig) == SYMBOL_REF
6461 || GET_CODE (orig) == LABEL_REF)
6462 {
6463 rtx insn;
6464
6465 if (reg == 0)
6466 {
6467 gcc_assert (can_create_pseudo_p ());
6468 reg = gen_reg_rtx (Pmode);
6469 }
6470
6471 /* VxWorks does not impose a fixed gap between segments; the run-time
6472 gap can be different from the object-file gap. We therefore can't
6473 use GOTOFF unless we are absolutely sure that the symbol is in the
6474 same segment as the GOT. Unfortunately, the flexibility of linker
6475 scripts means that we can't be sure of that in general, so assume
6476 that GOTOFF is never valid on VxWorks. */
6477 if ((GET_CODE (orig) == LABEL_REF
6478 || (GET_CODE (orig) == SYMBOL_REF &&
6479 SYMBOL_REF_LOCAL_P (orig)))
6480 && NEED_GOT_RELOC
6481 && arm_pic_data_is_text_relative)
6482 insn = arm_pic_static_addr (orig, reg);
6483 else
6484 {
6485 rtx pat;
6486 rtx mem;
6487
6488 /* If this function doesn't have a pic register, create one now. */
6489 require_pic_register ();
6490
6491 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6492
6493 /* Make the MEM as close to a constant as possible. */
6494 mem = SET_SRC (pat);
6495 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6496 MEM_READONLY_P (mem) = 1;
6497 MEM_NOTRAP_P (mem) = 1;
6498
6499 insn = emit_insn (pat);
6500 }
6501
6502 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6503 by loop. */
6504 set_unique_reg_note (insn, REG_EQUAL, orig);
6505
6506 return reg;
6507 }
6508 else if (GET_CODE (orig) == CONST)
6509 {
6510 rtx base, offset;
6511
6512 if (GET_CODE (XEXP (orig, 0)) == PLUS
6513 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6514 return orig;
6515
6516 /* Handle the case where we have: const (UNSPEC_TLS). */
6517 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6518 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6519 return orig;
6520
6521 /* Handle the case where we have:
6522 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6523 CONST_INT. */
6524 if (GET_CODE (XEXP (orig, 0)) == PLUS
6525 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6526 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6527 {
6528 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6529 return orig;
6530 }
6531
6532 if (reg == 0)
6533 {
6534 gcc_assert (can_create_pseudo_p ());
6535 reg = gen_reg_rtx (Pmode);
6536 }
6537
6538 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6539
6540 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6541 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6542 base == reg ? 0 : reg);
6543
6544 if (CONST_INT_P (offset))
6545 {
6546 /* The base register doesn't really matter, we only want to
6547 test the index for the appropriate mode. */
6548 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6549 {
6550 gcc_assert (can_create_pseudo_p ());
6551 offset = force_reg (Pmode, offset);
6552 }
6553
6554 if (CONST_INT_P (offset))
6555 return plus_constant (Pmode, base, INTVAL (offset));
6556 }
6557
6558 if (GET_MODE_SIZE (mode) > 4
6559 && (GET_MODE_CLASS (mode) == MODE_INT
6560 || TARGET_SOFT_FLOAT))
6561 {
6562 emit_insn (gen_addsi3 (reg, base, offset));
6563 return reg;
6564 }
6565
6566 return gen_rtx_PLUS (Pmode, base, offset);
6567 }
6568
6569 return orig;
6570 }
6571
6572
6573 /* Find a spare register to use during the prolog of a function. */
6574
6575 static int
6576 thumb_find_work_register (unsigned long pushed_regs_mask)
6577 {
6578 int reg;
6579
6580 /* Check the argument registers first as these are call-used. The
6581 register allocation order means that sometimes r3 might be used
6582 but earlier argument registers might not, so check them all. */
6583 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6584 if (!df_regs_ever_live_p (reg))
6585 return reg;
6586
6587 /* Before going on to check the call-saved registers we can try a couple
6588 more ways of deducing that r3 is available. The first is when we are
6589 pushing anonymous arguments onto the stack and we have less than 4
6590 registers worth of fixed arguments(*). In this case r3 will be part of
6591 the variable argument list and so we can be sure that it will be
6592 pushed right at the start of the function. Hence it will be available
6593 for the rest of the prologue.
6594 (*): ie crtl->args.pretend_args_size is greater than 0. */
6595 if (cfun->machine->uses_anonymous_args
6596 && crtl->args.pretend_args_size > 0)
6597 return LAST_ARG_REGNUM;
6598
6599 /* The other case is when we have fixed arguments but less than 4 registers
6600 worth. In this case r3 might be used in the body of the function, but
6601 it is not being used to convey an argument into the function. In theory
6602 we could just check crtl->args.size to see how many bytes are
6603 being passed in argument registers, but it seems that it is unreliable.
6604 Sometimes it will have the value 0 when in fact arguments are being
6605 passed. (See testcase execute/20021111-1.c for an example). So we also
6606 check the args_info.nregs field as well. The problem with this field is
6607 that it makes no allowances for arguments that are passed to the
6608 function but which are not used. Hence we could miss an opportunity
6609 when a function has an unused argument in r3. But it is better to be
6610 safe than to be sorry. */
6611 if (! cfun->machine->uses_anonymous_args
6612 && crtl->args.size >= 0
6613 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6614 && (TARGET_AAPCS_BASED
6615 ? crtl->args.info.aapcs_ncrn < 4
6616 : crtl->args.info.nregs < 4))
6617 return LAST_ARG_REGNUM;
6618
6619 /* Otherwise look for a call-saved register that is going to be pushed. */
6620 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6621 if (pushed_regs_mask & (1 << reg))
6622 return reg;
6623
6624 if (TARGET_THUMB2)
6625 {
6626 /* Thumb-2 can use high regs. */
6627 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6628 if (pushed_regs_mask & (1 << reg))
6629 return reg;
6630 }
6631 /* Something went wrong - thumb_compute_save_reg_mask()
6632 should have arranged for a suitable register to be pushed. */
6633 gcc_unreachable ();
6634 }
6635
6636 static GTY(()) int pic_labelno;
6637
6638 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6639 low register. */
6640
6641 void
6642 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6643 {
6644 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6645
6646 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6647 return;
6648
6649 gcc_assert (flag_pic);
6650
6651 pic_reg = cfun->machine->pic_reg;
6652 if (TARGET_VXWORKS_RTP)
6653 {
6654 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6655 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6656 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6657
6658 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6659
6660 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6661 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6662 }
6663 else
6664 {
6665 /* We use an UNSPEC rather than a LABEL_REF because this label
6666 never appears in the code stream. */
6667
6668 labelno = GEN_INT (pic_labelno++);
6669 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6670 l1 = gen_rtx_CONST (VOIDmode, l1);
6671
6672 /* On the ARM the PC register contains 'dot + 8' at the time of the
6673 addition, on the Thumb it is 'dot + 4'. */
6674 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6675 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6676 UNSPEC_GOTSYM_OFF);
6677 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6678
6679 if (TARGET_32BIT)
6680 {
6681 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6682 }
6683 else /* TARGET_THUMB1 */
6684 {
6685 if (arm_pic_register != INVALID_REGNUM
6686 && REGNO (pic_reg) > LAST_LO_REGNUM)
6687 {
6688 /* We will have pushed the pic register, so we should always be
6689 able to find a work register. */
6690 pic_tmp = gen_rtx_REG (SImode,
6691 thumb_find_work_register (saved_regs));
6692 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6693 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6694 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6695 }
6696 else if (arm_pic_register != INVALID_REGNUM
6697 && arm_pic_register > LAST_LO_REGNUM
6698 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6699 {
6700 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6701 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6702 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6703 }
6704 else
6705 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6706 }
6707 }
6708
6709 /* Need to emit this whether or not we obey regdecls,
6710 since setjmp/longjmp can cause life info to screw up. */
6711 emit_use (pic_reg);
6712 }
6713
6714 /* Generate code to load the address of a static var when flag_pic is set. */
6715 static rtx
6716 arm_pic_static_addr (rtx orig, rtx reg)
6717 {
6718 rtx l1, labelno, offset_rtx, insn;
6719
6720 gcc_assert (flag_pic);
6721
6722 /* We use an UNSPEC rather than a LABEL_REF because this label
6723 never appears in the code stream. */
6724 labelno = GEN_INT (pic_labelno++);
6725 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6726 l1 = gen_rtx_CONST (VOIDmode, l1);
6727
6728 /* On the ARM the PC register contains 'dot + 8' at the time of the
6729 addition, on the Thumb it is 'dot + 4'. */
6730 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6731 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6732 UNSPEC_SYMBOL_OFFSET);
6733 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6734
6735 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6736 return insn;
6737 }
6738
6739 /* Return nonzero if X is valid as an ARM state addressing register. */
6740 static int
6741 arm_address_register_rtx_p (rtx x, int strict_p)
6742 {
6743 int regno;
6744
6745 if (!REG_P (x))
6746 return 0;
6747
6748 regno = REGNO (x);
6749
6750 if (strict_p)
6751 return ARM_REGNO_OK_FOR_BASE_P (regno);
6752
6753 return (regno <= LAST_ARM_REGNUM
6754 || regno >= FIRST_PSEUDO_REGISTER
6755 || regno == FRAME_POINTER_REGNUM
6756 || regno == ARG_POINTER_REGNUM);
6757 }
6758
6759 /* Return TRUE if this rtx is the difference of a symbol and a label,
6760 and will reduce to a PC-relative relocation in the object file.
6761 Expressions like this can be left alone when generating PIC, rather
6762 than forced through the GOT. */
6763 static int
6764 pcrel_constant_p (rtx x)
6765 {
6766 if (GET_CODE (x) == MINUS)
6767 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6768
6769 return FALSE;
6770 }
6771
6772 /* Return true if X will surely end up in an index register after next
6773 splitting pass. */
6774 static bool
6775 will_be_in_index_register (const_rtx x)
6776 {
6777 /* arm.md: calculate_pic_address will split this into a register. */
6778 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6779 }
6780
6781 /* Return nonzero if X is a valid ARM state address operand. */
6782 int
6783 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6784 int strict_p)
6785 {
6786 bool use_ldrd;
6787 enum rtx_code code = GET_CODE (x);
6788
6789 if (arm_address_register_rtx_p (x, strict_p))
6790 return 1;
6791
6792 use_ldrd = (TARGET_LDRD
6793 && (mode == DImode
6794 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6795
6796 if (code == POST_INC || code == PRE_DEC
6797 || ((code == PRE_INC || code == POST_DEC)
6798 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6799 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6800
6801 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6802 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6803 && GET_CODE (XEXP (x, 1)) == PLUS
6804 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6805 {
6806 rtx addend = XEXP (XEXP (x, 1), 1);
6807
6808 /* Don't allow ldrd post increment by register because it's hard
6809 to fixup invalid register choices. */
6810 if (use_ldrd
6811 && GET_CODE (x) == POST_MODIFY
6812 && REG_P (addend))
6813 return 0;
6814
6815 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6816 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6817 }
6818
6819 /* After reload constants split into minipools will have addresses
6820 from a LABEL_REF. */
6821 else if (reload_completed
6822 && (code == LABEL_REF
6823 || (code == CONST
6824 && GET_CODE (XEXP (x, 0)) == PLUS
6825 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6826 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6827 return 1;
6828
6829 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6830 return 0;
6831
6832 else if (code == PLUS)
6833 {
6834 rtx xop0 = XEXP (x, 0);
6835 rtx xop1 = XEXP (x, 1);
6836
6837 return ((arm_address_register_rtx_p (xop0, strict_p)
6838 && ((CONST_INT_P (xop1)
6839 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6840 || (!strict_p && will_be_in_index_register (xop1))))
6841 || (arm_address_register_rtx_p (xop1, strict_p)
6842 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6843 }
6844
6845 #if 0
6846 /* Reload currently can't handle MINUS, so disable this for now */
6847 else if (GET_CODE (x) == MINUS)
6848 {
6849 rtx xop0 = XEXP (x, 0);
6850 rtx xop1 = XEXP (x, 1);
6851
6852 return (arm_address_register_rtx_p (xop0, strict_p)
6853 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6854 }
6855 #endif
6856
6857 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6858 && code == SYMBOL_REF
6859 && CONSTANT_POOL_ADDRESS_P (x)
6860 && ! (flag_pic
6861 && symbol_mentioned_p (get_pool_constant (x))
6862 && ! pcrel_constant_p (get_pool_constant (x))))
6863 return 1;
6864
6865 return 0;
6866 }
6867
6868 /* Return nonzero if X is a valid Thumb-2 address operand. */
6869 static int
6870 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6871 {
6872 bool use_ldrd;
6873 enum rtx_code code = GET_CODE (x);
6874
6875 if (arm_address_register_rtx_p (x, strict_p))
6876 return 1;
6877
6878 use_ldrd = (TARGET_LDRD
6879 && (mode == DImode
6880 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6881
6882 if (code == POST_INC || code == PRE_DEC
6883 || ((code == PRE_INC || code == POST_DEC)
6884 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6885 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6886
6887 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6888 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6889 && GET_CODE (XEXP (x, 1)) == PLUS
6890 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6891 {
6892 /* Thumb-2 only has autoincrement by constant. */
6893 rtx addend = XEXP (XEXP (x, 1), 1);
6894 HOST_WIDE_INT offset;
6895
6896 if (!CONST_INT_P (addend))
6897 return 0;
6898
6899 offset = INTVAL(addend);
6900 if (GET_MODE_SIZE (mode) <= 4)
6901 return (offset > -256 && offset < 256);
6902
6903 return (use_ldrd && offset > -1024 && offset < 1024
6904 && (offset & 3) == 0);
6905 }
6906
6907 /* After reload constants split into minipools will have addresses
6908 from a LABEL_REF. */
6909 else if (reload_completed
6910 && (code == LABEL_REF
6911 || (code == CONST
6912 && GET_CODE (XEXP (x, 0)) == PLUS
6913 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6914 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6915 return 1;
6916
6917 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6918 return 0;
6919
6920 else if (code == PLUS)
6921 {
6922 rtx xop0 = XEXP (x, 0);
6923 rtx xop1 = XEXP (x, 1);
6924
6925 return ((arm_address_register_rtx_p (xop0, strict_p)
6926 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6927 || (!strict_p && will_be_in_index_register (xop1))))
6928 || (arm_address_register_rtx_p (xop1, strict_p)
6929 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6930 }
6931
6932 /* Normally we can assign constant values to target registers without
6933 the help of constant pool. But there are cases we have to use constant
6934 pool like:
6935 1) assign a label to register.
6936 2) sign-extend a 8bit value to 32bit and then assign to register.
6937
6938 Constant pool access in format:
6939 (set (reg r0) (mem (symbol_ref (".LC0"))))
6940 will cause the use of literal pool (later in function arm_reorg).
6941 So here we mark such format as an invalid format, then the compiler
6942 will adjust it into:
6943 (set (reg r0) (symbol_ref (".LC0")))
6944 (set (reg r0) (mem (reg r0))).
6945 No extra register is required, and (mem (reg r0)) won't cause the use
6946 of literal pools. */
6947 else if (arm_disable_literal_pool && code == SYMBOL_REF
6948 && CONSTANT_POOL_ADDRESS_P (x))
6949 return 0;
6950
6951 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6952 && code == SYMBOL_REF
6953 && CONSTANT_POOL_ADDRESS_P (x)
6954 && ! (flag_pic
6955 && symbol_mentioned_p (get_pool_constant (x))
6956 && ! pcrel_constant_p (get_pool_constant (x))))
6957 return 1;
6958
6959 return 0;
6960 }
6961
6962 /* Return nonzero if INDEX is valid for an address index operand in
6963 ARM state. */
6964 static int
6965 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6966 int strict_p)
6967 {
6968 HOST_WIDE_INT range;
6969 enum rtx_code code = GET_CODE (index);
6970
6971 /* Standard coprocessor addressing modes. */
6972 if (TARGET_HARD_FLOAT
6973 && TARGET_VFP
6974 && (mode == SFmode || mode == DFmode))
6975 return (code == CONST_INT && INTVAL (index) < 1024
6976 && INTVAL (index) > -1024
6977 && (INTVAL (index) & 3) == 0);
6978
6979 /* For quad modes, we restrict the constant offset to be slightly less
6980 than what the instruction format permits. We do this because for
6981 quad mode moves, we will actually decompose them into two separate
6982 double-mode reads or writes. INDEX must therefore be a valid
6983 (double-mode) offset and so should INDEX+8. */
6984 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6985 return (code == CONST_INT
6986 && INTVAL (index) < 1016
6987 && INTVAL (index) > -1024
6988 && (INTVAL (index) & 3) == 0);
6989
6990 /* We have no such constraint on double mode offsets, so we permit the
6991 full range of the instruction format. */
6992 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6993 return (code == CONST_INT
6994 && INTVAL (index) < 1024
6995 && INTVAL (index) > -1024
6996 && (INTVAL (index) & 3) == 0);
6997
6998 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6999 return (code == CONST_INT
7000 && INTVAL (index) < 1024
7001 && INTVAL (index) > -1024
7002 && (INTVAL (index) & 3) == 0);
7003
7004 if (arm_address_register_rtx_p (index, strict_p)
7005 && (GET_MODE_SIZE (mode) <= 4))
7006 return 1;
7007
7008 if (mode == DImode || mode == DFmode)
7009 {
7010 if (code == CONST_INT)
7011 {
7012 HOST_WIDE_INT val = INTVAL (index);
7013
7014 if (TARGET_LDRD)
7015 return val > -256 && val < 256;
7016 else
7017 return val > -4096 && val < 4092;
7018 }
7019
7020 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7021 }
7022
7023 if (GET_MODE_SIZE (mode) <= 4
7024 && ! (arm_arch4
7025 && (mode == HImode
7026 || mode == HFmode
7027 || (mode == QImode && outer == SIGN_EXTEND))))
7028 {
7029 if (code == MULT)
7030 {
7031 rtx xiop0 = XEXP (index, 0);
7032 rtx xiop1 = XEXP (index, 1);
7033
7034 return ((arm_address_register_rtx_p (xiop0, strict_p)
7035 && power_of_two_operand (xiop1, SImode))
7036 || (arm_address_register_rtx_p (xiop1, strict_p)
7037 && power_of_two_operand (xiop0, SImode)));
7038 }
7039 else if (code == LSHIFTRT || code == ASHIFTRT
7040 || code == ASHIFT || code == ROTATERT)
7041 {
7042 rtx op = XEXP (index, 1);
7043
7044 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7045 && CONST_INT_P (op)
7046 && INTVAL (op) > 0
7047 && INTVAL (op) <= 31);
7048 }
7049 }
7050
7051 /* For ARM v4 we may be doing a sign-extend operation during the
7052 load. */
7053 if (arm_arch4)
7054 {
7055 if (mode == HImode
7056 || mode == HFmode
7057 || (outer == SIGN_EXTEND && mode == QImode))
7058 range = 256;
7059 else
7060 range = 4096;
7061 }
7062 else
7063 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7064
7065 return (code == CONST_INT
7066 && INTVAL (index) < range
7067 && INTVAL (index) > -range);
7068 }
7069
7070 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7071 index operand. i.e. 1, 2, 4 or 8. */
7072 static bool
7073 thumb2_index_mul_operand (rtx op)
7074 {
7075 HOST_WIDE_INT val;
7076
7077 if (!CONST_INT_P (op))
7078 return false;
7079
7080 val = INTVAL(op);
7081 return (val == 1 || val == 2 || val == 4 || val == 8);
7082 }
7083
7084 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7085 static int
7086 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
7087 {
7088 enum rtx_code code = GET_CODE (index);
7089
7090 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7091 /* Standard coprocessor addressing modes. */
7092 if (TARGET_HARD_FLOAT
7093 && TARGET_VFP
7094 && (mode == SFmode || mode == DFmode))
7095 return (code == CONST_INT && INTVAL (index) < 1024
7096 /* Thumb-2 allows only > -256 index range for it's core register
7097 load/stores. Since we allow SF/DF in core registers, we have
7098 to use the intersection between -256~4096 (core) and -1024~1024
7099 (coprocessor). */
7100 && INTVAL (index) > -256
7101 && (INTVAL (index) & 3) == 0);
7102
7103 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7104 {
7105 /* For DImode assume values will usually live in core regs
7106 and only allow LDRD addressing modes. */
7107 if (!TARGET_LDRD || mode != DImode)
7108 return (code == CONST_INT
7109 && INTVAL (index) < 1024
7110 && INTVAL (index) > -1024
7111 && (INTVAL (index) & 3) == 0);
7112 }
7113
7114 /* For quad modes, we restrict the constant offset to be slightly less
7115 than what the instruction format permits. We do this because for
7116 quad mode moves, we will actually decompose them into two separate
7117 double-mode reads or writes. INDEX must therefore be a valid
7118 (double-mode) offset and so should INDEX+8. */
7119 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7120 return (code == CONST_INT
7121 && INTVAL (index) < 1016
7122 && INTVAL (index) > -1024
7123 && (INTVAL (index) & 3) == 0);
7124
7125 /* We have no such constraint on double mode offsets, so we permit the
7126 full range of the instruction format. */
7127 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7128 return (code == CONST_INT
7129 && INTVAL (index) < 1024
7130 && INTVAL (index) > -1024
7131 && (INTVAL (index) & 3) == 0);
7132
7133 if (arm_address_register_rtx_p (index, strict_p)
7134 && (GET_MODE_SIZE (mode) <= 4))
7135 return 1;
7136
7137 if (mode == DImode || mode == DFmode)
7138 {
7139 if (code == CONST_INT)
7140 {
7141 HOST_WIDE_INT val = INTVAL (index);
7142 /* ??? Can we assume ldrd for thumb2? */
7143 /* Thumb-2 ldrd only has reg+const addressing modes. */
7144 /* ldrd supports offsets of +-1020.
7145 However the ldr fallback does not. */
7146 return val > -256 && val < 256 && (val & 3) == 0;
7147 }
7148 else
7149 return 0;
7150 }
7151
7152 if (code == MULT)
7153 {
7154 rtx xiop0 = XEXP (index, 0);
7155 rtx xiop1 = XEXP (index, 1);
7156
7157 return ((arm_address_register_rtx_p (xiop0, strict_p)
7158 && thumb2_index_mul_operand (xiop1))
7159 || (arm_address_register_rtx_p (xiop1, strict_p)
7160 && thumb2_index_mul_operand (xiop0)));
7161 }
7162 else if (code == ASHIFT)
7163 {
7164 rtx op = XEXP (index, 1);
7165
7166 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7167 && CONST_INT_P (op)
7168 && INTVAL (op) > 0
7169 && INTVAL (op) <= 3);
7170 }
7171
7172 return (code == CONST_INT
7173 && INTVAL (index) < 4096
7174 && INTVAL (index) > -256);
7175 }
7176
7177 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7178 static int
7179 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7180 {
7181 int regno;
7182
7183 if (!REG_P (x))
7184 return 0;
7185
7186 regno = REGNO (x);
7187
7188 if (strict_p)
7189 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7190
7191 return (regno <= LAST_LO_REGNUM
7192 || regno > LAST_VIRTUAL_REGISTER
7193 || regno == FRAME_POINTER_REGNUM
7194 || (GET_MODE_SIZE (mode) >= 4
7195 && (regno == STACK_POINTER_REGNUM
7196 || regno >= FIRST_PSEUDO_REGISTER
7197 || x == hard_frame_pointer_rtx
7198 || x == arg_pointer_rtx)));
7199 }
7200
7201 /* Return nonzero if x is a legitimate index register. This is the case
7202 for any base register that can access a QImode object. */
7203 inline static int
7204 thumb1_index_register_rtx_p (rtx x, int strict_p)
7205 {
7206 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7207 }
7208
7209 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7210
7211 The AP may be eliminated to either the SP or the FP, so we use the
7212 least common denominator, e.g. SImode, and offsets from 0 to 64.
7213
7214 ??? Verify whether the above is the right approach.
7215
7216 ??? Also, the FP may be eliminated to the SP, so perhaps that
7217 needs special handling also.
7218
7219 ??? Look at how the mips16 port solves this problem. It probably uses
7220 better ways to solve some of these problems.
7221
7222 Although it is not incorrect, we don't accept QImode and HImode
7223 addresses based on the frame pointer or arg pointer until the
7224 reload pass starts. This is so that eliminating such addresses
7225 into stack based ones won't produce impossible code. */
7226 int
7227 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7228 {
7229 /* ??? Not clear if this is right. Experiment. */
7230 if (GET_MODE_SIZE (mode) < 4
7231 && !(reload_in_progress || reload_completed)
7232 && (reg_mentioned_p (frame_pointer_rtx, x)
7233 || reg_mentioned_p (arg_pointer_rtx, x)
7234 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7235 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7236 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7237 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7238 return 0;
7239
7240 /* Accept any base register. SP only in SImode or larger. */
7241 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7242 return 1;
7243
7244 /* This is PC relative data before arm_reorg runs. */
7245 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7246 && GET_CODE (x) == SYMBOL_REF
7247 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7248 return 1;
7249
7250 /* This is PC relative data after arm_reorg runs. */
7251 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7252 && reload_completed
7253 && (GET_CODE (x) == LABEL_REF
7254 || (GET_CODE (x) == CONST
7255 && GET_CODE (XEXP (x, 0)) == PLUS
7256 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7257 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7258 return 1;
7259
7260 /* Post-inc indexing only supported for SImode and larger. */
7261 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7262 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7263 return 1;
7264
7265 else if (GET_CODE (x) == PLUS)
7266 {
7267 /* REG+REG address can be any two index registers. */
7268 /* We disallow FRAME+REG addressing since we know that FRAME
7269 will be replaced with STACK, and SP relative addressing only
7270 permits SP+OFFSET. */
7271 if (GET_MODE_SIZE (mode) <= 4
7272 && XEXP (x, 0) != frame_pointer_rtx
7273 && XEXP (x, 1) != frame_pointer_rtx
7274 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7275 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7276 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7277 return 1;
7278
7279 /* REG+const has 5-7 bit offset for non-SP registers. */
7280 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7281 || XEXP (x, 0) == arg_pointer_rtx)
7282 && CONST_INT_P (XEXP (x, 1))
7283 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7284 return 1;
7285
7286 /* REG+const has 10-bit offset for SP, but only SImode and
7287 larger is supported. */
7288 /* ??? Should probably check for DI/DFmode overflow here
7289 just like GO_IF_LEGITIMATE_OFFSET does. */
7290 else if (REG_P (XEXP (x, 0))
7291 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7292 && GET_MODE_SIZE (mode) >= 4
7293 && CONST_INT_P (XEXP (x, 1))
7294 && INTVAL (XEXP (x, 1)) >= 0
7295 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7296 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7297 return 1;
7298
7299 else if (REG_P (XEXP (x, 0))
7300 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7301 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7302 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7303 && REGNO (XEXP (x, 0))
7304 <= LAST_VIRTUAL_POINTER_REGISTER))
7305 && GET_MODE_SIZE (mode) >= 4
7306 && CONST_INT_P (XEXP (x, 1))
7307 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7308 return 1;
7309 }
7310
7311 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7312 && GET_MODE_SIZE (mode) == 4
7313 && GET_CODE (x) == SYMBOL_REF
7314 && CONSTANT_POOL_ADDRESS_P (x)
7315 && ! (flag_pic
7316 && symbol_mentioned_p (get_pool_constant (x))
7317 && ! pcrel_constant_p (get_pool_constant (x))))
7318 return 1;
7319
7320 return 0;
7321 }
7322
7323 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7324 instruction of mode MODE. */
7325 int
7326 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7327 {
7328 switch (GET_MODE_SIZE (mode))
7329 {
7330 case 1:
7331 return val >= 0 && val < 32;
7332
7333 case 2:
7334 return val >= 0 && val < 64 && (val & 1) == 0;
7335
7336 default:
7337 return (val >= 0
7338 && (val + GET_MODE_SIZE (mode)) <= 128
7339 && (val & 3) == 0);
7340 }
7341 }
7342
7343 bool
7344 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7345 {
7346 if (TARGET_ARM)
7347 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7348 else if (TARGET_THUMB2)
7349 return thumb2_legitimate_address_p (mode, x, strict_p);
7350 else /* if (TARGET_THUMB1) */
7351 return thumb1_legitimate_address_p (mode, x, strict_p);
7352 }
7353
7354 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7355
7356 Given an rtx X being reloaded into a reg required to be
7357 in class CLASS, return the class of reg to actually use.
7358 In general this is just CLASS, but for the Thumb core registers and
7359 immediate constants we prefer a LO_REGS class or a subset. */
7360
7361 static reg_class_t
7362 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7363 {
7364 if (TARGET_32BIT)
7365 return rclass;
7366 else
7367 {
7368 if (rclass == GENERAL_REGS)
7369 return LO_REGS;
7370 else
7371 return rclass;
7372 }
7373 }
7374
7375 /* Build the SYMBOL_REF for __tls_get_addr. */
7376
7377 static GTY(()) rtx tls_get_addr_libfunc;
7378
7379 static rtx
7380 get_tls_get_addr (void)
7381 {
7382 if (!tls_get_addr_libfunc)
7383 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7384 return tls_get_addr_libfunc;
7385 }
7386
7387 rtx
7388 arm_load_tp (rtx target)
7389 {
7390 if (!target)
7391 target = gen_reg_rtx (SImode);
7392
7393 if (TARGET_HARD_TP)
7394 {
7395 /* Can return in any reg. */
7396 emit_insn (gen_load_tp_hard (target));
7397 }
7398 else
7399 {
7400 /* Always returned in r0. Immediately copy the result into a pseudo,
7401 otherwise other uses of r0 (e.g. setting up function arguments) may
7402 clobber the value. */
7403
7404 rtx tmp;
7405
7406 emit_insn (gen_load_tp_soft ());
7407
7408 tmp = gen_rtx_REG (SImode, 0);
7409 emit_move_insn (target, tmp);
7410 }
7411 return target;
7412 }
7413
7414 static rtx
7415 load_tls_operand (rtx x, rtx reg)
7416 {
7417 rtx tmp;
7418
7419 if (reg == NULL_RTX)
7420 reg = gen_reg_rtx (SImode);
7421
7422 tmp = gen_rtx_CONST (SImode, x);
7423
7424 emit_move_insn (reg, tmp);
7425
7426 return reg;
7427 }
7428
7429 static rtx
7430 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7431 {
7432 rtx insns, label, labelno, sum;
7433
7434 gcc_assert (reloc != TLS_DESCSEQ);
7435 start_sequence ();
7436
7437 labelno = GEN_INT (pic_labelno++);
7438 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7439 label = gen_rtx_CONST (VOIDmode, label);
7440
7441 sum = gen_rtx_UNSPEC (Pmode,
7442 gen_rtvec (4, x, GEN_INT (reloc), label,
7443 GEN_INT (TARGET_ARM ? 8 : 4)),
7444 UNSPEC_TLS);
7445 reg = load_tls_operand (sum, reg);
7446
7447 if (TARGET_ARM)
7448 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7449 else
7450 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7451
7452 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7453 LCT_PURE, /* LCT_CONST? */
7454 Pmode, 1, reg, Pmode);
7455
7456 insns = get_insns ();
7457 end_sequence ();
7458
7459 return insns;
7460 }
7461
7462 static rtx
7463 arm_tls_descseq_addr (rtx x, rtx reg)
7464 {
7465 rtx labelno = GEN_INT (pic_labelno++);
7466 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7467 rtx sum = gen_rtx_UNSPEC (Pmode,
7468 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7469 gen_rtx_CONST (VOIDmode, label),
7470 GEN_INT (!TARGET_ARM)),
7471 UNSPEC_TLS);
7472 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7473
7474 emit_insn (gen_tlscall (x, labelno));
7475 if (!reg)
7476 reg = gen_reg_rtx (SImode);
7477 else
7478 gcc_assert (REGNO (reg) != 0);
7479
7480 emit_move_insn (reg, reg0);
7481
7482 return reg;
7483 }
7484
7485 rtx
7486 legitimize_tls_address (rtx x, rtx reg)
7487 {
7488 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7489 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7490
7491 switch (model)
7492 {
7493 case TLS_MODEL_GLOBAL_DYNAMIC:
7494 if (TARGET_GNU2_TLS)
7495 {
7496 reg = arm_tls_descseq_addr (x, reg);
7497
7498 tp = arm_load_tp (NULL_RTX);
7499
7500 dest = gen_rtx_PLUS (Pmode, tp, reg);
7501 }
7502 else
7503 {
7504 /* Original scheme */
7505 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7506 dest = gen_reg_rtx (Pmode);
7507 emit_libcall_block (insns, dest, ret, x);
7508 }
7509 return dest;
7510
7511 case TLS_MODEL_LOCAL_DYNAMIC:
7512 if (TARGET_GNU2_TLS)
7513 {
7514 reg = arm_tls_descseq_addr (x, reg);
7515
7516 tp = arm_load_tp (NULL_RTX);
7517
7518 dest = gen_rtx_PLUS (Pmode, tp, reg);
7519 }
7520 else
7521 {
7522 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7523
7524 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7525 share the LDM result with other LD model accesses. */
7526 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7527 UNSPEC_TLS);
7528 dest = gen_reg_rtx (Pmode);
7529 emit_libcall_block (insns, dest, ret, eqv);
7530
7531 /* Load the addend. */
7532 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7533 GEN_INT (TLS_LDO32)),
7534 UNSPEC_TLS);
7535 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7536 dest = gen_rtx_PLUS (Pmode, dest, addend);
7537 }
7538 return dest;
7539
7540 case TLS_MODEL_INITIAL_EXEC:
7541 labelno = GEN_INT (pic_labelno++);
7542 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7543 label = gen_rtx_CONST (VOIDmode, label);
7544 sum = gen_rtx_UNSPEC (Pmode,
7545 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7546 GEN_INT (TARGET_ARM ? 8 : 4)),
7547 UNSPEC_TLS);
7548 reg = load_tls_operand (sum, reg);
7549
7550 if (TARGET_ARM)
7551 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7552 else if (TARGET_THUMB2)
7553 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7554 else
7555 {
7556 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7557 emit_move_insn (reg, gen_const_mem (SImode, reg));
7558 }
7559
7560 tp = arm_load_tp (NULL_RTX);
7561
7562 return gen_rtx_PLUS (Pmode, tp, reg);
7563
7564 case TLS_MODEL_LOCAL_EXEC:
7565 tp = arm_load_tp (NULL_RTX);
7566
7567 reg = gen_rtx_UNSPEC (Pmode,
7568 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7569 UNSPEC_TLS);
7570 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7571
7572 return gen_rtx_PLUS (Pmode, tp, reg);
7573
7574 default:
7575 abort ();
7576 }
7577 }
7578
7579 /* Try machine-dependent ways of modifying an illegitimate address
7580 to be legitimate. If we find one, return the new, valid address. */
7581 rtx
7582 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7583 {
7584 if (arm_tls_referenced_p (x))
7585 {
7586 rtx addend = NULL;
7587
7588 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7589 {
7590 addend = XEXP (XEXP (x, 0), 1);
7591 x = XEXP (XEXP (x, 0), 0);
7592 }
7593
7594 if (GET_CODE (x) != SYMBOL_REF)
7595 return x;
7596
7597 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7598
7599 x = legitimize_tls_address (x, NULL_RTX);
7600
7601 if (addend)
7602 {
7603 x = gen_rtx_PLUS (SImode, x, addend);
7604 orig_x = x;
7605 }
7606 else
7607 return x;
7608 }
7609
7610 if (!TARGET_ARM)
7611 {
7612 /* TODO: legitimize_address for Thumb2. */
7613 if (TARGET_THUMB2)
7614 return x;
7615 return thumb_legitimize_address (x, orig_x, mode);
7616 }
7617
7618 if (GET_CODE (x) == PLUS)
7619 {
7620 rtx xop0 = XEXP (x, 0);
7621 rtx xop1 = XEXP (x, 1);
7622
7623 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7624 xop0 = force_reg (SImode, xop0);
7625
7626 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7627 && !symbol_mentioned_p (xop1))
7628 xop1 = force_reg (SImode, xop1);
7629
7630 if (ARM_BASE_REGISTER_RTX_P (xop0)
7631 && CONST_INT_P (xop1))
7632 {
7633 HOST_WIDE_INT n, low_n;
7634 rtx base_reg, val;
7635 n = INTVAL (xop1);
7636
7637 /* VFP addressing modes actually allow greater offsets, but for
7638 now we just stick with the lowest common denominator. */
7639 if (mode == DImode
7640 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7641 {
7642 low_n = n & 0x0f;
7643 n &= ~0x0f;
7644 if (low_n > 4)
7645 {
7646 n += 16;
7647 low_n -= 16;
7648 }
7649 }
7650 else
7651 {
7652 low_n = ((mode) == TImode ? 0
7653 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7654 n -= low_n;
7655 }
7656
7657 base_reg = gen_reg_rtx (SImode);
7658 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7659 emit_move_insn (base_reg, val);
7660 x = plus_constant (Pmode, base_reg, low_n);
7661 }
7662 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7663 x = gen_rtx_PLUS (SImode, xop0, xop1);
7664 }
7665
7666 /* XXX We don't allow MINUS any more -- see comment in
7667 arm_legitimate_address_outer_p (). */
7668 else if (GET_CODE (x) == MINUS)
7669 {
7670 rtx xop0 = XEXP (x, 0);
7671 rtx xop1 = XEXP (x, 1);
7672
7673 if (CONSTANT_P (xop0))
7674 xop0 = force_reg (SImode, xop0);
7675
7676 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7677 xop1 = force_reg (SImode, xop1);
7678
7679 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7680 x = gen_rtx_MINUS (SImode, xop0, xop1);
7681 }
7682
7683 /* Make sure to take full advantage of the pre-indexed addressing mode
7684 with absolute addresses which often allows for the base register to
7685 be factorized for multiple adjacent memory references, and it might
7686 even allows for the mini pool to be avoided entirely. */
7687 else if (CONST_INT_P (x) && optimize > 0)
7688 {
7689 unsigned int bits;
7690 HOST_WIDE_INT mask, base, index;
7691 rtx base_reg;
7692
7693 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7694 use a 8-bit index. So let's use a 12-bit index for SImode only and
7695 hope that arm_gen_constant will enable ldrb to use more bits. */
7696 bits = (mode == SImode) ? 12 : 8;
7697 mask = (1 << bits) - 1;
7698 base = INTVAL (x) & ~mask;
7699 index = INTVAL (x) & mask;
7700 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7701 {
7702 /* It'll most probably be more efficient to generate the base
7703 with more bits set and use a negative index instead. */
7704 base |= mask;
7705 index -= mask;
7706 }
7707 base_reg = force_reg (SImode, GEN_INT (base));
7708 x = plus_constant (Pmode, base_reg, index);
7709 }
7710
7711 if (flag_pic)
7712 {
7713 /* We need to find and carefully transform any SYMBOL and LABEL
7714 references; so go back to the original address expression. */
7715 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7716
7717 if (new_x != orig_x)
7718 x = new_x;
7719 }
7720
7721 return x;
7722 }
7723
7724
7725 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7726 to be legitimate. If we find one, return the new, valid address. */
7727 rtx
7728 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7729 {
7730 if (GET_CODE (x) == PLUS
7731 && CONST_INT_P (XEXP (x, 1))
7732 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7733 || INTVAL (XEXP (x, 1)) < 0))
7734 {
7735 rtx xop0 = XEXP (x, 0);
7736 rtx xop1 = XEXP (x, 1);
7737 HOST_WIDE_INT offset = INTVAL (xop1);
7738
7739 /* Try and fold the offset into a biasing of the base register and
7740 then offsetting that. Don't do this when optimizing for space
7741 since it can cause too many CSEs. */
7742 if (optimize_size && offset >= 0
7743 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7744 {
7745 HOST_WIDE_INT delta;
7746
7747 if (offset >= 256)
7748 delta = offset - (256 - GET_MODE_SIZE (mode));
7749 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7750 delta = 31 * GET_MODE_SIZE (mode);
7751 else
7752 delta = offset & (~31 * GET_MODE_SIZE (mode));
7753
7754 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7755 NULL_RTX);
7756 x = plus_constant (Pmode, xop0, delta);
7757 }
7758 else if (offset < 0 && offset > -256)
7759 /* Small negative offsets are best done with a subtract before the
7760 dereference, forcing these into a register normally takes two
7761 instructions. */
7762 x = force_operand (x, NULL_RTX);
7763 else
7764 {
7765 /* For the remaining cases, force the constant into a register. */
7766 xop1 = force_reg (SImode, xop1);
7767 x = gen_rtx_PLUS (SImode, xop0, xop1);
7768 }
7769 }
7770 else if (GET_CODE (x) == PLUS
7771 && s_register_operand (XEXP (x, 1), SImode)
7772 && !s_register_operand (XEXP (x, 0), SImode))
7773 {
7774 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7775
7776 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7777 }
7778
7779 if (flag_pic)
7780 {
7781 /* We need to find and carefully transform any SYMBOL and LABEL
7782 references; so go back to the original address expression. */
7783 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7784
7785 if (new_x != orig_x)
7786 x = new_x;
7787 }
7788
7789 return x;
7790 }
7791
7792 bool
7793 arm_legitimize_reload_address (rtx *p,
7794 enum machine_mode mode,
7795 int opnum, int type,
7796 int ind_levels ATTRIBUTE_UNUSED)
7797 {
7798 /* We must recognize output that we have already generated ourselves. */
7799 if (GET_CODE (*p) == PLUS
7800 && GET_CODE (XEXP (*p, 0)) == PLUS
7801 && REG_P (XEXP (XEXP (*p, 0), 0))
7802 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7803 && CONST_INT_P (XEXP (*p, 1)))
7804 {
7805 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7806 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7807 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7808 return true;
7809 }
7810
7811 if (GET_CODE (*p) == PLUS
7812 && REG_P (XEXP (*p, 0))
7813 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7814 /* If the base register is equivalent to a constant, let the generic
7815 code handle it. Otherwise we will run into problems if a future
7816 reload pass decides to rematerialize the constant. */
7817 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7818 && CONST_INT_P (XEXP (*p, 1)))
7819 {
7820 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7821 HOST_WIDE_INT low, high;
7822
7823 /* Detect coprocessor load/stores. */
7824 bool coproc_p = ((TARGET_HARD_FLOAT
7825 && TARGET_VFP
7826 && (mode == SFmode || mode == DFmode))
7827 || (TARGET_REALLY_IWMMXT
7828 && VALID_IWMMXT_REG_MODE (mode))
7829 || (TARGET_NEON
7830 && (VALID_NEON_DREG_MODE (mode)
7831 || VALID_NEON_QREG_MODE (mode))));
7832
7833 /* For some conditions, bail out when lower two bits are unaligned. */
7834 if ((val & 0x3) != 0
7835 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7836 && (coproc_p
7837 /* For DI, and DF under soft-float: */
7838 || ((mode == DImode || mode == DFmode)
7839 /* Without ldrd, we use stm/ldm, which does not
7840 fair well with unaligned bits. */
7841 && (! TARGET_LDRD
7842 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7843 || TARGET_THUMB2))))
7844 return false;
7845
7846 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7847 of which the (reg+high) gets turned into a reload add insn,
7848 we try to decompose the index into high/low values that can often
7849 also lead to better reload CSE.
7850 For example:
7851 ldr r0, [r2, #4100] // Offset too large
7852 ldr r1, [r2, #4104] // Offset too large
7853
7854 is best reloaded as:
7855 add t1, r2, #4096
7856 ldr r0, [t1, #4]
7857 add t2, r2, #4096
7858 ldr r1, [t2, #8]
7859
7860 which post-reload CSE can simplify in most cases to eliminate the
7861 second add instruction:
7862 add t1, r2, #4096
7863 ldr r0, [t1, #4]
7864 ldr r1, [t1, #8]
7865
7866 The idea here is that we want to split out the bits of the constant
7867 as a mask, rather than as subtracting the maximum offset that the
7868 respective type of load/store used can handle.
7869
7870 When encountering negative offsets, we can still utilize it even if
7871 the overall offset is positive; sometimes this may lead to an immediate
7872 that can be constructed with fewer instructions.
7873 For example:
7874 ldr r0, [r2, #0x3FFFFC]
7875
7876 This is best reloaded as:
7877 add t1, r2, #0x400000
7878 ldr r0, [t1, #-4]
7879
7880 The trick for spotting this for a load insn with N bits of offset
7881 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7882 negative offset that is going to make bit N and all the bits below
7883 it become zero in the remainder part.
7884
7885 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7886 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7887 used in most cases of ARM load/store instructions. */
7888
7889 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7890 (((VAL) & ((1 << (N)) - 1)) \
7891 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7892 : 0)
7893
7894 if (coproc_p)
7895 {
7896 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7897
7898 /* NEON quad-word load/stores are made of two double-word accesses,
7899 so the valid index range is reduced by 8. Treat as 9-bit range if
7900 we go over it. */
7901 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7902 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7903 }
7904 else if (GET_MODE_SIZE (mode) == 8)
7905 {
7906 if (TARGET_LDRD)
7907 low = (TARGET_THUMB2
7908 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7909 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7910 else
7911 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7912 to access doublewords. The supported load/store offsets are
7913 -8, -4, and 4, which we try to produce here. */
7914 low = ((val & 0xf) ^ 0x8) - 0x8;
7915 }
7916 else if (GET_MODE_SIZE (mode) < 8)
7917 {
7918 /* NEON element load/stores do not have an offset. */
7919 if (TARGET_NEON_FP16 && mode == HFmode)
7920 return false;
7921
7922 if (TARGET_THUMB2)
7923 {
7924 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7925 Try the wider 12-bit range first, and re-try if the result
7926 is out of range. */
7927 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7928 if (low < -255)
7929 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7930 }
7931 else
7932 {
7933 if (mode == HImode || mode == HFmode)
7934 {
7935 if (arm_arch4)
7936 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7937 else
7938 {
7939 /* The storehi/movhi_bytes fallbacks can use only
7940 [-4094,+4094] of the full ldrb/strb index range. */
7941 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7942 if (low == 4095 || low == -4095)
7943 return false;
7944 }
7945 }
7946 else
7947 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7948 }
7949 }
7950 else
7951 return false;
7952
7953 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7954 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7955 - (unsigned HOST_WIDE_INT) 0x80000000);
7956 /* Check for overflow or zero */
7957 if (low == 0 || high == 0 || (high + low != val))
7958 return false;
7959
7960 /* Reload the high part into a base reg; leave the low part
7961 in the mem.
7962 Note that replacing this gen_rtx_PLUS with plus_constant is
7963 wrong in this case because we rely on the
7964 (plus (plus reg c1) c2) structure being preserved so that
7965 XEXP (*p, 0) in push_reload below uses the correct term. */
7966 *p = gen_rtx_PLUS (GET_MODE (*p),
7967 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7968 GEN_INT (high)),
7969 GEN_INT (low));
7970 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7971 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7972 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7973 return true;
7974 }
7975
7976 return false;
7977 }
7978
7979 rtx
7980 thumb_legitimize_reload_address (rtx *x_p,
7981 enum machine_mode mode,
7982 int opnum, int type,
7983 int ind_levels ATTRIBUTE_UNUSED)
7984 {
7985 rtx x = *x_p;
7986
7987 if (GET_CODE (x) == PLUS
7988 && GET_MODE_SIZE (mode) < 4
7989 && REG_P (XEXP (x, 0))
7990 && XEXP (x, 0) == stack_pointer_rtx
7991 && CONST_INT_P (XEXP (x, 1))
7992 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7993 {
7994 rtx orig_x = x;
7995
7996 x = copy_rtx (x);
7997 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7998 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7999 return x;
8000 }
8001
8002 /* If both registers are hi-regs, then it's better to reload the
8003 entire expression rather than each register individually. That
8004 only requires one reload register rather than two. */
8005 if (GET_CODE (x) == PLUS
8006 && REG_P (XEXP (x, 0))
8007 && REG_P (XEXP (x, 1))
8008 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8009 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8010 {
8011 rtx orig_x = x;
8012
8013 x = copy_rtx (x);
8014 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8015 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8016 return x;
8017 }
8018
8019 return NULL;
8020 }
8021
8022 /* Test for various thread-local symbols. */
8023
8024 /* Helper for arm_tls_referenced_p. */
8025
8026 static int
8027 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
8028 {
8029 if (GET_CODE (*x) == SYMBOL_REF)
8030 return SYMBOL_REF_TLS_MODEL (*x) != 0;
8031
8032 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8033 TLS offsets, not real symbol references. */
8034 if (GET_CODE (*x) == UNSPEC
8035 && XINT (*x, 1) == UNSPEC_TLS)
8036 return -1;
8037
8038 return 0;
8039 }
8040
8041 /* Return TRUE if X contains any TLS symbol references. */
8042
8043 bool
8044 arm_tls_referenced_p (rtx x)
8045 {
8046 if (! TARGET_HAVE_TLS)
8047 return false;
8048
8049 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
8050 }
8051
8052 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8053
8054 On the ARM, allow any integer (invalid ones are removed later by insn
8055 patterns), nice doubles and symbol_refs which refer to the function's
8056 constant pool XXX.
8057
8058 When generating pic allow anything. */
8059
8060 static bool
8061 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
8062 {
8063 /* At present, we have no support for Neon structure constants, so forbid
8064 them here. It might be possible to handle simple cases like 0 and -1
8065 in future. */
8066 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8067 return false;
8068
8069 return flag_pic || !label_mentioned_p (x);
8070 }
8071
8072 static bool
8073 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8074 {
8075 return (CONST_INT_P (x)
8076 || CONST_DOUBLE_P (x)
8077 || CONSTANT_ADDRESS_P (x)
8078 || flag_pic);
8079 }
8080
8081 static bool
8082 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
8083 {
8084 return (!arm_cannot_force_const_mem (mode, x)
8085 && (TARGET_32BIT
8086 ? arm_legitimate_constant_p_1 (mode, x)
8087 : thumb_legitimate_constant_p (mode, x)));
8088 }
8089
8090 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8091
8092 static bool
8093 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8094 {
8095 rtx base, offset;
8096
8097 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8098 {
8099 split_const (x, &base, &offset);
8100 if (GET_CODE (base) == SYMBOL_REF
8101 && !offset_within_block_p (base, INTVAL (offset)))
8102 return true;
8103 }
8104 return arm_tls_referenced_p (x);
8105 }
8106 \f
8107 #define REG_OR_SUBREG_REG(X) \
8108 (REG_P (X) \
8109 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8110
8111 #define REG_OR_SUBREG_RTX(X) \
8112 (REG_P (X) ? (X) : SUBREG_REG (X))
8113
8114 static inline int
8115 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8116 {
8117 enum machine_mode mode = GET_MODE (x);
8118 int total, words;
8119
8120 switch (code)
8121 {
8122 case ASHIFT:
8123 case ASHIFTRT:
8124 case LSHIFTRT:
8125 case ROTATERT:
8126 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8127
8128 case PLUS:
8129 case MINUS:
8130 case COMPARE:
8131 case NEG:
8132 case NOT:
8133 return COSTS_N_INSNS (1);
8134
8135 case MULT:
8136 if (CONST_INT_P (XEXP (x, 1)))
8137 {
8138 int cycles = 0;
8139 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8140
8141 while (i)
8142 {
8143 i >>= 2;
8144 cycles++;
8145 }
8146 return COSTS_N_INSNS (2) + cycles;
8147 }
8148 return COSTS_N_INSNS (1) + 16;
8149
8150 case SET:
8151 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8152 the mode. */
8153 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8154 return (COSTS_N_INSNS (words)
8155 + 4 * ((MEM_P (SET_SRC (x)))
8156 + MEM_P (SET_DEST (x))));
8157
8158 case CONST_INT:
8159 if (outer == SET)
8160 {
8161 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8162 return 0;
8163 if (thumb_shiftable_const (INTVAL (x)))
8164 return COSTS_N_INSNS (2);
8165 return COSTS_N_INSNS (3);
8166 }
8167 else if ((outer == PLUS || outer == COMPARE)
8168 && INTVAL (x) < 256 && INTVAL (x) > -256)
8169 return 0;
8170 else if ((outer == IOR || outer == XOR || outer == AND)
8171 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8172 return COSTS_N_INSNS (1);
8173 else if (outer == AND)
8174 {
8175 int i;
8176 /* This duplicates the tests in the andsi3 expander. */
8177 for (i = 9; i <= 31; i++)
8178 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8179 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8180 return COSTS_N_INSNS (2);
8181 }
8182 else if (outer == ASHIFT || outer == ASHIFTRT
8183 || outer == LSHIFTRT)
8184 return 0;
8185 return COSTS_N_INSNS (2);
8186
8187 case CONST:
8188 case CONST_DOUBLE:
8189 case LABEL_REF:
8190 case SYMBOL_REF:
8191 return COSTS_N_INSNS (3);
8192
8193 case UDIV:
8194 case UMOD:
8195 case DIV:
8196 case MOD:
8197 return 100;
8198
8199 case TRUNCATE:
8200 return 99;
8201
8202 case AND:
8203 case XOR:
8204 case IOR:
8205 /* XXX guess. */
8206 return 8;
8207
8208 case MEM:
8209 /* XXX another guess. */
8210 /* Memory costs quite a lot for the first word, but subsequent words
8211 load at the equivalent of a single insn each. */
8212 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8213 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8214 ? 4 : 0));
8215
8216 case IF_THEN_ELSE:
8217 /* XXX a guess. */
8218 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8219 return 14;
8220 return 2;
8221
8222 case SIGN_EXTEND:
8223 case ZERO_EXTEND:
8224 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8225 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8226
8227 if (mode == SImode)
8228 return total;
8229
8230 if (arm_arch6)
8231 return total + COSTS_N_INSNS (1);
8232
8233 /* Assume a two-shift sequence. Increase the cost slightly so
8234 we prefer actual shifts over an extend operation. */
8235 return total + 1 + COSTS_N_INSNS (2);
8236
8237 default:
8238 return 99;
8239 }
8240 }
8241
8242 static inline bool
8243 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8244 {
8245 enum machine_mode mode = GET_MODE (x);
8246 enum rtx_code subcode;
8247 rtx operand;
8248 enum rtx_code code = GET_CODE (x);
8249 *total = 0;
8250
8251 switch (code)
8252 {
8253 case MEM:
8254 /* Memory costs quite a lot for the first word, but subsequent words
8255 load at the equivalent of a single insn each. */
8256 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8257 return true;
8258
8259 case DIV:
8260 case MOD:
8261 case UDIV:
8262 case UMOD:
8263 if (TARGET_HARD_FLOAT && mode == SFmode)
8264 *total = COSTS_N_INSNS (2);
8265 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8266 *total = COSTS_N_INSNS (4);
8267 else
8268 *total = COSTS_N_INSNS (20);
8269 return false;
8270
8271 case ROTATE:
8272 if (REG_P (XEXP (x, 1)))
8273 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8274 else if (!CONST_INT_P (XEXP (x, 1)))
8275 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8276
8277 /* Fall through */
8278 case ROTATERT:
8279 if (mode != SImode)
8280 {
8281 *total += COSTS_N_INSNS (4);
8282 return true;
8283 }
8284
8285 /* Fall through */
8286 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8287 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8288 if (mode == DImode)
8289 {
8290 *total += COSTS_N_INSNS (3);
8291 return true;
8292 }
8293
8294 *total += COSTS_N_INSNS (1);
8295 /* Increase the cost of complex shifts because they aren't any faster,
8296 and reduce dual issue opportunities. */
8297 if (arm_tune_cortex_a9
8298 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8299 ++*total;
8300
8301 return true;
8302
8303 case MINUS:
8304 if (mode == DImode)
8305 {
8306 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8307 if (CONST_INT_P (XEXP (x, 0))
8308 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8309 {
8310 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8311 return true;
8312 }
8313
8314 if (CONST_INT_P (XEXP (x, 1))
8315 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8316 {
8317 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8318 return true;
8319 }
8320
8321 return false;
8322 }
8323
8324 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8325 {
8326 if (TARGET_HARD_FLOAT
8327 && (mode == SFmode
8328 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8329 {
8330 *total = COSTS_N_INSNS (1);
8331 if (CONST_DOUBLE_P (XEXP (x, 0))
8332 && arm_const_double_rtx (XEXP (x, 0)))
8333 {
8334 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8335 return true;
8336 }
8337
8338 if (CONST_DOUBLE_P (XEXP (x, 1))
8339 && arm_const_double_rtx (XEXP (x, 1)))
8340 {
8341 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8342 return true;
8343 }
8344
8345 return false;
8346 }
8347 *total = COSTS_N_INSNS (20);
8348 return false;
8349 }
8350
8351 *total = COSTS_N_INSNS (1);
8352 if (CONST_INT_P (XEXP (x, 0))
8353 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8354 {
8355 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8356 return true;
8357 }
8358
8359 subcode = GET_CODE (XEXP (x, 1));
8360 if (subcode == ASHIFT || subcode == ASHIFTRT
8361 || subcode == LSHIFTRT
8362 || subcode == ROTATE || subcode == ROTATERT)
8363 {
8364 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8365 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8366 return true;
8367 }
8368
8369 /* A shift as a part of RSB costs no more than RSB itself. */
8370 if (GET_CODE (XEXP (x, 0)) == MULT
8371 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8372 {
8373 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8374 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8375 return true;
8376 }
8377
8378 if (subcode == MULT
8379 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8380 {
8381 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8382 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8383 return true;
8384 }
8385
8386 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8387 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8388 {
8389 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8390 if (REG_P (XEXP (XEXP (x, 1), 0))
8391 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8392 *total += COSTS_N_INSNS (1);
8393
8394 return true;
8395 }
8396
8397 /* Fall through */
8398
8399 case PLUS:
8400 if (code == PLUS && arm_arch6 && mode == SImode
8401 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8402 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8403 {
8404 *total = COSTS_N_INSNS (1);
8405 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8406 0, speed);
8407 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8408 return true;
8409 }
8410
8411 /* MLA: All arguments must be registers. We filter out
8412 multiplication by a power of two, so that we fall down into
8413 the code below. */
8414 if (GET_CODE (XEXP (x, 0)) == MULT
8415 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8416 {
8417 /* The cost comes from the cost of the multiply. */
8418 return false;
8419 }
8420
8421 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8422 {
8423 if (TARGET_HARD_FLOAT
8424 && (mode == SFmode
8425 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8426 {
8427 *total = COSTS_N_INSNS (1);
8428 if (CONST_DOUBLE_P (XEXP (x, 1))
8429 && arm_const_double_rtx (XEXP (x, 1)))
8430 {
8431 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8432 return true;
8433 }
8434
8435 return false;
8436 }
8437
8438 *total = COSTS_N_INSNS (20);
8439 return false;
8440 }
8441
8442 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8443 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8444 {
8445 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8446 if (REG_P (XEXP (XEXP (x, 0), 0))
8447 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8448 *total += COSTS_N_INSNS (1);
8449 return true;
8450 }
8451
8452 /* Fall through */
8453
8454 case AND: case XOR: case IOR:
8455
8456 /* Normally the frame registers will be spilt into reg+const during
8457 reload, so it is a bad idea to combine them with other instructions,
8458 since then they might not be moved outside of loops. As a compromise
8459 we allow integration with ops that have a constant as their second
8460 operand. */
8461 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8462 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8463 && !CONST_INT_P (XEXP (x, 1)))
8464 *total = COSTS_N_INSNS (1);
8465
8466 if (mode == DImode)
8467 {
8468 *total += COSTS_N_INSNS (2);
8469 if (CONST_INT_P (XEXP (x, 1))
8470 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8471 {
8472 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8473 return true;
8474 }
8475
8476 return false;
8477 }
8478
8479 *total += COSTS_N_INSNS (1);
8480 if (CONST_INT_P (XEXP (x, 1))
8481 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8482 {
8483 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8484 return true;
8485 }
8486 subcode = GET_CODE (XEXP (x, 0));
8487 if (subcode == ASHIFT || subcode == ASHIFTRT
8488 || subcode == LSHIFTRT
8489 || subcode == ROTATE || subcode == ROTATERT)
8490 {
8491 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8492 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8493 return true;
8494 }
8495
8496 if (subcode == MULT
8497 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8498 {
8499 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8500 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8501 return true;
8502 }
8503
8504 if (subcode == UMIN || subcode == UMAX
8505 || subcode == SMIN || subcode == SMAX)
8506 {
8507 *total = COSTS_N_INSNS (3);
8508 return true;
8509 }
8510
8511 return false;
8512
8513 case MULT:
8514 /* This should have been handled by the CPU specific routines. */
8515 gcc_unreachable ();
8516
8517 case TRUNCATE:
8518 if (arm_arch3m && mode == SImode
8519 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8520 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8521 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8522 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8523 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8524 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8525 {
8526 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8527 return true;
8528 }
8529 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8530 return false;
8531
8532 case NEG:
8533 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8534 {
8535 if (TARGET_HARD_FLOAT
8536 && (mode == SFmode
8537 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8538 {
8539 *total = COSTS_N_INSNS (1);
8540 return false;
8541 }
8542 *total = COSTS_N_INSNS (2);
8543 return false;
8544 }
8545
8546 /* Fall through */
8547 case NOT:
8548 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8549 if (mode == SImode && code == NOT)
8550 {
8551 subcode = GET_CODE (XEXP (x, 0));
8552 if (subcode == ASHIFT || subcode == ASHIFTRT
8553 || subcode == LSHIFTRT
8554 || subcode == ROTATE || subcode == ROTATERT
8555 || (subcode == MULT
8556 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8557 {
8558 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8559 /* Register shifts cost an extra cycle. */
8560 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8561 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8562 subcode, 1, speed);
8563 return true;
8564 }
8565 }
8566
8567 return false;
8568
8569 case IF_THEN_ELSE:
8570 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8571 {
8572 *total = COSTS_N_INSNS (4);
8573 return true;
8574 }
8575
8576 operand = XEXP (x, 0);
8577
8578 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8579 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8580 && REG_P (XEXP (operand, 0))
8581 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8582 *total += COSTS_N_INSNS (1);
8583 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8584 + rtx_cost (XEXP (x, 2), code, 2, speed));
8585 return true;
8586
8587 case NE:
8588 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8589 {
8590 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8591 return true;
8592 }
8593 goto scc_insn;
8594
8595 case GE:
8596 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8597 && mode == SImode && XEXP (x, 1) == const0_rtx)
8598 {
8599 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8600 return true;
8601 }
8602 goto scc_insn;
8603
8604 case LT:
8605 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8606 && mode == SImode && XEXP (x, 1) == const0_rtx)
8607 {
8608 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8609 return true;
8610 }
8611 goto scc_insn;
8612
8613 case EQ:
8614 case GT:
8615 case LE:
8616 case GEU:
8617 case LTU:
8618 case GTU:
8619 case LEU:
8620 case UNORDERED:
8621 case ORDERED:
8622 case UNEQ:
8623 case UNGE:
8624 case UNLT:
8625 case UNGT:
8626 case UNLE:
8627 scc_insn:
8628 /* SCC insns. In the case where the comparison has already been
8629 performed, then they cost 2 instructions. Otherwise they need
8630 an additional comparison before them. */
8631 *total = COSTS_N_INSNS (2);
8632 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8633 {
8634 return true;
8635 }
8636
8637 /* Fall through */
8638 case COMPARE:
8639 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8640 {
8641 *total = 0;
8642 return true;
8643 }
8644
8645 *total += COSTS_N_INSNS (1);
8646 if (CONST_INT_P (XEXP (x, 1))
8647 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8648 {
8649 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8650 return true;
8651 }
8652
8653 subcode = GET_CODE (XEXP (x, 0));
8654 if (subcode == ASHIFT || subcode == ASHIFTRT
8655 || subcode == LSHIFTRT
8656 || subcode == ROTATE || subcode == ROTATERT)
8657 {
8658 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8659 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8660 return true;
8661 }
8662
8663 if (subcode == MULT
8664 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8665 {
8666 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8667 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8668 return true;
8669 }
8670
8671 return false;
8672
8673 case UMIN:
8674 case UMAX:
8675 case SMIN:
8676 case SMAX:
8677 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8678 if (!CONST_INT_P (XEXP (x, 1))
8679 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8680 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8681 return true;
8682
8683 case ABS:
8684 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8685 {
8686 if (TARGET_HARD_FLOAT
8687 && (mode == SFmode
8688 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8689 {
8690 *total = COSTS_N_INSNS (1);
8691 return false;
8692 }
8693 *total = COSTS_N_INSNS (20);
8694 return false;
8695 }
8696 *total = COSTS_N_INSNS (1);
8697 if (mode == DImode)
8698 *total += COSTS_N_INSNS (3);
8699 return false;
8700
8701 case SIGN_EXTEND:
8702 case ZERO_EXTEND:
8703 *total = 0;
8704 if (GET_MODE_CLASS (mode) == MODE_INT)
8705 {
8706 rtx op = XEXP (x, 0);
8707 enum machine_mode opmode = GET_MODE (op);
8708
8709 if (mode == DImode)
8710 *total += COSTS_N_INSNS (1);
8711
8712 if (opmode != SImode)
8713 {
8714 if (MEM_P (op))
8715 {
8716 /* If !arm_arch4, we use one of the extendhisi2_mem
8717 or movhi_bytes patterns for HImode. For a QImode
8718 sign extension, we first zero-extend from memory
8719 and then perform a shift sequence. */
8720 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8721 *total += COSTS_N_INSNS (2);
8722 }
8723 else if (arm_arch6)
8724 *total += COSTS_N_INSNS (1);
8725
8726 /* We don't have the necessary insn, so we need to perform some
8727 other operation. */
8728 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8729 /* An and with constant 255. */
8730 *total += COSTS_N_INSNS (1);
8731 else
8732 /* A shift sequence. Increase costs slightly to avoid
8733 combining two shifts into an extend operation. */
8734 *total += COSTS_N_INSNS (2) + 1;
8735 }
8736
8737 return false;
8738 }
8739
8740 switch (GET_MODE (XEXP (x, 0)))
8741 {
8742 case V8QImode:
8743 case V4HImode:
8744 case V2SImode:
8745 case V4QImode:
8746 case V2HImode:
8747 *total = COSTS_N_INSNS (1);
8748 return false;
8749
8750 default:
8751 gcc_unreachable ();
8752 }
8753 gcc_unreachable ();
8754
8755 case ZERO_EXTRACT:
8756 case SIGN_EXTRACT:
8757 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8758 return true;
8759
8760 case CONST_INT:
8761 if (const_ok_for_arm (INTVAL (x))
8762 || const_ok_for_arm (~INTVAL (x)))
8763 *total = COSTS_N_INSNS (1);
8764 else
8765 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8766 INTVAL (x), NULL_RTX,
8767 NULL_RTX, 0, 0));
8768 return true;
8769
8770 case CONST:
8771 case LABEL_REF:
8772 case SYMBOL_REF:
8773 *total = COSTS_N_INSNS (3);
8774 return true;
8775
8776 case HIGH:
8777 *total = COSTS_N_INSNS (1);
8778 return true;
8779
8780 case LO_SUM:
8781 *total = COSTS_N_INSNS (1);
8782 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8783 return true;
8784
8785 case CONST_DOUBLE:
8786 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8787 && (mode == SFmode || !TARGET_VFP_SINGLE))
8788 *total = COSTS_N_INSNS (1);
8789 else
8790 *total = COSTS_N_INSNS (4);
8791 return true;
8792
8793 case SET:
8794 /* The vec_extract patterns accept memory operands that require an
8795 address reload. Account for the cost of that reload to give the
8796 auto-inc-dec pass an incentive to try to replace them. */
8797 if (TARGET_NEON && MEM_P (SET_DEST (x))
8798 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8799 {
8800 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8801 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8802 *total += COSTS_N_INSNS (1);
8803 return true;
8804 }
8805 /* Likewise for the vec_set patterns. */
8806 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8807 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8808 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8809 {
8810 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8811 *total = rtx_cost (mem, code, 0, speed);
8812 if (!neon_vector_mem_operand (mem, 2, true))
8813 *total += COSTS_N_INSNS (1);
8814 return true;
8815 }
8816 return false;
8817
8818 case UNSPEC:
8819 /* We cost this as high as our memory costs to allow this to
8820 be hoisted from loops. */
8821 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8822 {
8823 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8824 }
8825 return true;
8826
8827 case CONST_VECTOR:
8828 if (TARGET_NEON
8829 && TARGET_HARD_FLOAT
8830 && outer == SET
8831 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8832 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8833 *total = COSTS_N_INSNS (1);
8834 else
8835 *total = COSTS_N_INSNS (4);
8836 return true;
8837
8838 default:
8839 *total = COSTS_N_INSNS (4);
8840 return false;
8841 }
8842 }
8843
8844 /* Estimates the size cost of thumb1 instructions.
8845 For now most of the code is copied from thumb1_rtx_costs. We need more
8846 fine grain tuning when we have more related test cases. */
8847 static inline int
8848 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8849 {
8850 enum machine_mode mode = GET_MODE (x);
8851 int words;
8852
8853 switch (code)
8854 {
8855 case ASHIFT:
8856 case ASHIFTRT:
8857 case LSHIFTRT:
8858 case ROTATERT:
8859 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8860
8861 case PLUS:
8862 case MINUS:
8863 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8864 defined by RTL expansion, especially for the expansion of
8865 multiplication. */
8866 if ((GET_CODE (XEXP (x, 0)) == MULT
8867 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8868 || (GET_CODE (XEXP (x, 1)) == MULT
8869 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8870 return COSTS_N_INSNS (2);
8871 /* On purpose fall through for normal RTX. */
8872 case COMPARE:
8873 case NEG:
8874 case NOT:
8875 return COSTS_N_INSNS (1);
8876
8877 case MULT:
8878 if (CONST_INT_P (XEXP (x, 1)))
8879 {
8880 /* Thumb1 mul instruction can't operate on const. We must Load it
8881 into a register first. */
8882 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8883 return COSTS_N_INSNS (1) + const_size;
8884 }
8885 return COSTS_N_INSNS (1);
8886
8887 case SET:
8888 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8889 the mode. */
8890 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8891 return (COSTS_N_INSNS (words)
8892 + 4 * ((MEM_P (SET_SRC (x)))
8893 + MEM_P (SET_DEST (x))));
8894
8895 case CONST_INT:
8896 if (outer == SET)
8897 {
8898 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8899 return COSTS_N_INSNS (1);
8900 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8901 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8902 return COSTS_N_INSNS (2);
8903 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8904 if (thumb_shiftable_const (INTVAL (x)))
8905 return COSTS_N_INSNS (2);
8906 return COSTS_N_INSNS (3);
8907 }
8908 else if ((outer == PLUS || outer == COMPARE)
8909 && INTVAL (x) < 256 && INTVAL (x) > -256)
8910 return 0;
8911 else if ((outer == IOR || outer == XOR || outer == AND)
8912 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8913 return COSTS_N_INSNS (1);
8914 else if (outer == AND)
8915 {
8916 int i;
8917 /* This duplicates the tests in the andsi3 expander. */
8918 for (i = 9; i <= 31; i++)
8919 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8920 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8921 return COSTS_N_INSNS (2);
8922 }
8923 else if (outer == ASHIFT || outer == ASHIFTRT
8924 || outer == LSHIFTRT)
8925 return 0;
8926 return COSTS_N_INSNS (2);
8927
8928 case CONST:
8929 case CONST_DOUBLE:
8930 case LABEL_REF:
8931 case SYMBOL_REF:
8932 return COSTS_N_INSNS (3);
8933
8934 case UDIV:
8935 case UMOD:
8936 case DIV:
8937 case MOD:
8938 return 100;
8939
8940 case TRUNCATE:
8941 return 99;
8942
8943 case AND:
8944 case XOR:
8945 case IOR:
8946 /* XXX guess. */
8947 return 8;
8948
8949 case MEM:
8950 /* XXX another guess. */
8951 /* Memory costs quite a lot for the first word, but subsequent words
8952 load at the equivalent of a single insn each. */
8953 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8954 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8955 ? 4 : 0));
8956
8957 case IF_THEN_ELSE:
8958 /* XXX a guess. */
8959 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8960 return 14;
8961 return 2;
8962
8963 case ZERO_EXTEND:
8964 /* XXX still guessing. */
8965 switch (GET_MODE (XEXP (x, 0)))
8966 {
8967 case QImode:
8968 return (1 + (mode == DImode ? 4 : 0)
8969 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8970
8971 case HImode:
8972 return (4 + (mode == DImode ? 4 : 0)
8973 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8974
8975 case SImode:
8976 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8977
8978 default:
8979 return 99;
8980 }
8981
8982 default:
8983 return 99;
8984 }
8985 }
8986
8987 /* RTX costs when optimizing for size. */
8988 static bool
8989 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8990 int *total)
8991 {
8992 enum machine_mode mode = GET_MODE (x);
8993 if (TARGET_THUMB1)
8994 {
8995 *total = thumb1_size_rtx_costs (x, code, outer_code);
8996 return true;
8997 }
8998
8999 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9000 switch (code)
9001 {
9002 case MEM:
9003 /* A memory access costs 1 insn if the mode is small, or the address is
9004 a single register, otherwise it costs one insn per word. */
9005 if (REG_P (XEXP (x, 0)))
9006 *total = COSTS_N_INSNS (1);
9007 else if (flag_pic
9008 && GET_CODE (XEXP (x, 0)) == PLUS
9009 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9010 /* This will be split into two instructions.
9011 See arm.md:calculate_pic_address. */
9012 *total = COSTS_N_INSNS (2);
9013 else
9014 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9015 return true;
9016
9017 case DIV:
9018 case MOD:
9019 case UDIV:
9020 case UMOD:
9021 /* Needs a libcall, so it costs about this. */
9022 *total = COSTS_N_INSNS (2);
9023 return false;
9024
9025 case ROTATE:
9026 if (mode == SImode && REG_P (XEXP (x, 1)))
9027 {
9028 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9029 return true;
9030 }
9031 /* Fall through */
9032 case ROTATERT:
9033 case ASHIFT:
9034 case LSHIFTRT:
9035 case ASHIFTRT:
9036 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9037 {
9038 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9039 return true;
9040 }
9041 else if (mode == SImode)
9042 {
9043 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9044 /* Slightly disparage register shifts, but not by much. */
9045 if (!CONST_INT_P (XEXP (x, 1)))
9046 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9047 return true;
9048 }
9049
9050 /* Needs a libcall. */
9051 *total = COSTS_N_INSNS (2);
9052 return false;
9053
9054 case MINUS:
9055 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9056 && (mode == SFmode || !TARGET_VFP_SINGLE))
9057 {
9058 *total = COSTS_N_INSNS (1);
9059 return false;
9060 }
9061
9062 if (mode == SImode)
9063 {
9064 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9065 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9066
9067 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9068 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9069 || subcode1 == ROTATE || subcode1 == ROTATERT
9070 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9071 || subcode1 == ASHIFTRT)
9072 {
9073 /* It's just the cost of the two operands. */
9074 *total = 0;
9075 return false;
9076 }
9077
9078 *total = COSTS_N_INSNS (1);
9079 return false;
9080 }
9081
9082 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9083 return false;
9084
9085 case PLUS:
9086 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9087 && (mode == SFmode || !TARGET_VFP_SINGLE))
9088 {
9089 *total = COSTS_N_INSNS (1);
9090 return false;
9091 }
9092
9093 /* A shift as a part of ADD costs nothing. */
9094 if (GET_CODE (XEXP (x, 0)) == MULT
9095 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9096 {
9097 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9098 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9099 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9100 return true;
9101 }
9102
9103 /* Fall through */
9104 case AND: case XOR: case IOR:
9105 if (mode == SImode)
9106 {
9107 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9108
9109 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9110 || subcode == LSHIFTRT || subcode == ASHIFTRT
9111 || (code == AND && subcode == NOT))
9112 {
9113 /* It's just the cost of the two operands. */
9114 *total = 0;
9115 return false;
9116 }
9117 }
9118
9119 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9120 return false;
9121
9122 case MULT:
9123 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9124 return false;
9125
9126 case NEG:
9127 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9128 && (mode == SFmode || !TARGET_VFP_SINGLE))
9129 {
9130 *total = COSTS_N_INSNS (1);
9131 return false;
9132 }
9133
9134 /* Fall through */
9135 case NOT:
9136 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9137
9138 return false;
9139
9140 case IF_THEN_ELSE:
9141 *total = 0;
9142 return false;
9143
9144 case COMPARE:
9145 if (cc_register (XEXP (x, 0), VOIDmode))
9146 * total = 0;
9147 else
9148 *total = COSTS_N_INSNS (1);
9149 return false;
9150
9151 case ABS:
9152 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9153 && (mode == SFmode || !TARGET_VFP_SINGLE))
9154 *total = COSTS_N_INSNS (1);
9155 else
9156 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9157 return false;
9158
9159 case SIGN_EXTEND:
9160 case ZERO_EXTEND:
9161 return arm_rtx_costs_1 (x, outer_code, total, 0);
9162
9163 case CONST_INT:
9164 if (const_ok_for_arm (INTVAL (x)))
9165 /* A multiplication by a constant requires another instruction
9166 to load the constant to a register. */
9167 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9168 ? 1 : 0);
9169 else if (const_ok_for_arm (~INTVAL (x)))
9170 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9171 else if (const_ok_for_arm (-INTVAL (x)))
9172 {
9173 if (outer_code == COMPARE || outer_code == PLUS
9174 || outer_code == MINUS)
9175 *total = 0;
9176 else
9177 *total = COSTS_N_INSNS (1);
9178 }
9179 else
9180 *total = COSTS_N_INSNS (2);
9181 return true;
9182
9183 case CONST:
9184 case LABEL_REF:
9185 case SYMBOL_REF:
9186 *total = COSTS_N_INSNS (2);
9187 return true;
9188
9189 case CONST_DOUBLE:
9190 *total = COSTS_N_INSNS (4);
9191 return true;
9192
9193 case CONST_VECTOR:
9194 if (TARGET_NEON
9195 && TARGET_HARD_FLOAT
9196 && outer_code == SET
9197 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9198 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9199 *total = COSTS_N_INSNS (1);
9200 else
9201 *total = COSTS_N_INSNS (4);
9202 return true;
9203
9204 case HIGH:
9205 case LO_SUM:
9206 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9207 cost of these slightly. */
9208 *total = COSTS_N_INSNS (1) + 1;
9209 return true;
9210
9211 case SET:
9212 return false;
9213
9214 default:
9215 if (mode != VOIDmode)
9216 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9217 else
9218 *total = COSTS_N_INSNS (4); /* How knows? */
9219 return false;
9220 }
9221 }
9222
9223 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9224 operand, then return the operand that is being shifted. If the shift
9225 is not by a constant, then set SHIFT_REG to point to the operand.
9226 Return NULL if OP is not a shifter operand. */
9227 static rtx
9228 shifter_op_p (rtx op, rtx *shift_reg)
9229 {
9230 enum rtx_code code = GET_CODE (op);
9231
9232 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9233 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9234 return XEXP (op, 0);
9235 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9236 return XEXP (op, 0);
9237 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9238 || code == ASHIFTRT)
9239 {
9240 if (!CONST_INT_P (XEXP (op, 1)))
9241 *shift_reg = XEXP (op, 1);
9242 return XEXP (op, 0);
9243 }
9244
9245 return NULL;
9246 }
9247
9248 static bool
9249 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9250 {
9251 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9252 gcc_assert (GET_CODE (x) == UNSPEC);
9253
9254 switch (XINT (x, 1))
9255 {
9256 case UNSPEC_UNALIGNED_LOAD:
9257 /* We can only do unaligned loads into the integer unit, and we can't
9258 use LDM or LDRD. */
9259 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9260 if (speed_p)
9261 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9262 + extra_cost->ldst.load_unaligned);
9263
9264 #ifdef NOT_YET
9265 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9266 ADDR_SPACE_GENERIC, speed_p);
9267 #endif
9268 return true;
9269
9270 case UNSPEC_UNALIGNED_STORE:
9271 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9272 if (speed_p)
9273 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9274 + extra_cost->ldst.store_unaligned);
9275
9276 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9277 #ifdef NOT_YET
9278 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9279 ADDR_SPACE_GENERIC, speed_p);
9280 #endif
9281 return true;
9282
9283 case UNSPEC_VRINTZ:
9284 case UNSPEC_VRINTP:
9285 case UNSPEC_VRINTM:
9286 case UNSPEC_VRINTR:
9287 case UNSPEC_VRINTX:
9288 case UNSPEC_VRINTA:
9289 *cost = COSTS_N_INSNS (1);
9290 if (speed_p)
9291 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9292
9293 return true;
9294 default:
9295 *cost = COSTS_N_INSNS (2);
9296 break;
9297 }
9298 return false;
9299 }
9300
9301 /* Cost of a libcall. We assume one insn per argument, an amount for the
9302 call (one insn for -Os) and then one for processing the result. */
9303 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9304
9305 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9306 do \
9307 { \
9308 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9309 if (shift_op != NULL \
9310 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9311 { \
9312 if (shift_reg) \
9313 { \
9314 if (speed_p) \
9315 *cost += extra_cost->alu.arith_shift_reg; \
9316 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9317 } \
9318 else if (speed_p) \
9319 *cost += extra_cost->alu.arith_shift; \
9320 \
9321 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9322 + rtx_cost (XEXP (x, 1 - IDX), \
9323 OP, 1, speed_p)); \
9324 return true; \
9325 } \
9326 } \
9327 while (0);
9328
9329 /* RTX costs. Make an estimate of the cost of executing the operation
9330 X, which is contained with an operation with code OUTER_CODE.
9331 SPEED_P indicates whether the cost desired is the performance cost,
9332 or the size cost. The estimate is stored in COST and the return
9333 value is TRUE if the cost calculation is final, or FALSE if the
9334 caller should recurse through the operands of X to add additional
9335 costs.
9336
9337 We currently make no attempt to model the size savings of Thumb-2
9338 16-bit instructions. At the normal points in compilation where
9339 this code is called we have no measure of whether the condition
9340 flags are live or not, and thus no realistic way to determine what
9341 the size will eventually be. */
9342 static bool
9343 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9344 const struct cpu_cost_table *extra_cost,
9345 int *cost, bool speed_p)
9346 {
9347 enum machine_mode mode = GET_MODE (x);
9348
9349 if (TARGET_THUMB1)
9350 {
9351 if (speed_p)
9352 *cost = thumb1_rtx_costs (x, code, outer_code);
9353 else
9354 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9355 return true;
9356 }
9357
9358 switch (code)
9359 {
9360 case SET:
9361 *cost = 0;
9362 /* SET RTXs don't have a mode so we get it from the destination. */
9363 mode = GET_MODE (SET_DEST (x));
9364
9365 if (REG_P (SET_SRC (x))
9366 && REG_P (SET_DEST (x)))
9367 {
9368 /* Assume that most copies can be done with a single insn,
9369 unless we don't have HW FP, in which case everything
9370 larger than word mode will require two insns. */
9371 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9372 && GET_MODE_SIZE (mode) > 4)
9373 || mode == DImode)
9374 ? 2 : 1);
9375 /* Conditional register moves can be encoded
9376 in 16 bits in Thumb mode. */
9377 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9378 *cost >>= 1;
9379
9380 return true;
9381 }
9382
9383 if (CONST_INT_P (SET_SRC (x)))
9384 {
9385 /* Handle CONST_INT here, since the value doesn't have a mode
9386 and we would otherwise be unable to work out the true cost. */
9387 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9388 outer_code = SET;
9389 /* Slightly lower the cost of setting a core reg to a constant.
9390 This helps break up chains and allows for better scheduling. */
9391 if (REG_P (SET_DEST (x))
9392 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9393 *cost -= 1;
9394 x = SET_SRC (x);
9395 /* Immediate moves with an immediate in the range [0, 255] can be
9396 encoded in 16 bits in Thumb mode. */
9397 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9398 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9399 *cost >>= 1;
9400 goto const_int_cost;
9401 }
9402
9403 return false;
9404
9405 case MEM:
9406 /* A memory access costs 1 insn if the mode is small, or the address is
9407 a single register, otherwise it costs one insn per word. */
9408 if (REG_P (XEXP (x, 0)))
9409 *cost = COSTS_N_INSNS (1);
9410 else if (flag_pic
9411 && GET_CODE (XEXP (x, 0)) == PLUS
9412 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9413 /* This will be split into two instructions.
9414 See arm.md:calculate_pic_address. */
9415 *cost = COSTS_N_INSNS (2);
9416 else
9417 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9418
9419 /* For speed optimizations, add the costs of the address and
9420 accessing memory. */
9421 if (speed_p)
9422 #ifdef NOT_YET
9423 *cost += (extra_cost->ldst.load
9424 + arm_address_cost (XEXP (x, 0), mode,
9425 ADDR_SPACE_GENERIC, speed_p));
9426 #else
9427 *cost += extra_cost->ldst.load;
9428 #endif
9429 return true;
9430
9431 case PARALLEL:
9432 {
9433 /* Calculations of LDM costs are complex. We assume an initial cost
9434 (ldm_1st) which will load the number of registers mentioned in
9435 ldm_regs_per_insn_1st registers; then each additional
9436 ldm_regs_per_insn_subsequent registers cost one more insn. The
9437 formula for N regs is thus:
9438
9439 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9440 + ldm_regs_per_insn_subsequent - 1)
9441 / ldm_regs_per_insn_subsequent).
9442
9443 Additional costs may also be added for addressing. A similar
9444 formula is used for STM. */
9445
9446 bool is_ldm = load_multiple_operation (x, SImode);
9447 bool is_stm = store_multiple_operation (x, SImode);
9448
9449 *cost = COSTS_N_INSNS (1);
9450
9451 if (is_ldm || is_stm)
9452 {
9453 if (speed_p)
9454 {
9455 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9456 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9457 ? extra_cost->ldst.ldm_regs_per_insn_1st
9458 : extra_cost->ldst.stm_regs_per_insn_1st;
9459 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9460 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9461 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9462
9463 *cost += regs_per_insn_1st
9464 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9465 + regs_per_insn_sub - 1)
9466 / regs_per_insn_sub);
9467 return true;
9468 }
9469
9470 }
9471 return false;
9472 }
9473 case DIV:
9474 case UDIV:
9475 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9476 && (mode == SFmode || !TARGET_VFP_SINGLE))
9477 *cost = COSTS_N_INSNS (speed_p
9478 ? extra_cost->fp[mode != SFmode].div : 1);
9479 else if (mode == SImode && TARGET_IDIV)
9480 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9481 else
9482 *cost = LIBCALL_COST (2);
9483 return false; /* All arguments must be in registers. */
9484
9485 case MOD:
9486 case UMOD:
9487 *cost = LIBCALL_COST (2);
9488 return false; /* All arguments must be in registers. */
9489
9490 case ROTATE:
9491 if (mode == SImode && REG_P (XEXP (x, 1)))
9492 {
9493 *cost = (COSTS_N_INSNS (2)
9494 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9495 if (speed_p)
9496 *cost += extra_cost->alu.shift_reg;
9497 return true;
9498 }
9499 /* Fall through */
9500 case ROTATERT:
9501 case ASHIFT:
9502 case LSHIFTRT:
9503 case ASHIFTRT:
9504 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9505 {
9506 *cost = (COSTS_N_INSNS (3)
9507 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9508 if (speed_p)
9509 *cost += 2 * extra_cost->alu.shift;
9510 return true;
9511 }
9512 else if (mode == SImode)
9513 {
9514 *cost = (COSTS_N_INSNS (1)
9515 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9516 /* Slightly disparage register shifts at -Os, but not by much. */
9517 if (!CONST_INT_P (XEXP (x, 1)))
9518 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9519 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9520 return true;
9521 }
9522 else if (GET_MODE_CLASS (mode) == MODE_INT
9523 && GET_MODE_SIZE (mode) < 4)
9524 {
9525 if (code == ASHIFT)
9526 {
9527 *cost = (COSTS_N_INSNS (1)
9528 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9529 /* Slightly disparage register shifts at -Os, but not by
9530 much. */
9531 if (!CONST_INT_P (XEXP (x, 1)))
9532 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9533 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9534 }
9535 else if (code == LSHIFTRT || code == ASHIFTRT)
9536 {
9537 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9538 {
9539 /* Can use SBFX/UBFX. */
9540 *cost = COSTS_N_INSNS (1);
9541 if (speed_p)
9542 *cost += extra_cost->alu.bfx;
9543 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9544 }
9545 else
9546 {
9547 *cost = COSTS_N_INSNS (2);
9548 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9549 if (speed_p)
9550 {
9551 if (CONST_INT_P (XEXP (x, 1)))
9552 *cost += 2 * extra_cost->alu.shift;
9553 else
9554 *cost += (extra_cost->alu.shift
9555 + extra_cost->alu.shift_reg);
9556 }
9557 else
9558 /* Slightly disparage register shifts. */
9559 *cost += !CONST_INT_P (XEXP (x, 1));
9560 }
9561 }
9562 else /* Rotates. */
9563 {
9564 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9565 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9566 if (speed_p)
9567 {
9568 if (CONST_INT_P (XEXP (x, 1)))
9569 *cost += (2 * extra_cost->alu.shift
9570 + extra_cost->alu.log_shift);
9571 else
9572 *cost += (extra_cost->alu.shift
9573 + extra_cost->alu.shift_reg
9574 + extra_cost->alu.log_shift_reg);
9575 }
9576 }
9577 return true;
9578 }
9579
9580 *cost = LIBCALL_COST (2);
9581 return false;
9582
9583 case BSWAP:
9584 if (arm_arch6)
9585 {
9586 if (mode == SImode)
9587 {
9588 *cost = COSTS_N_INSNS (1);
9589 if (speed_p)
9590 *cost += extra_cost->alu.rev;
9591
9592 return false;
9593 }
9594 }
9595 else
9596 {
9597 /* No rev instruction available. Look at arm_legacy_rev
9598 and thumb_legacy_rev for the form of RTL used then. */
9599 if (TARGET_THUMB)
9600 {
9601 *cost = COSTS_N_INSNS (10);
9602
9603 if (speed_p)
9604 {
9605 *cost += 6 * extra_cost->alu.shift;
9606 *cost += 3 * extra_cost->alu.logical;
9607 }
9608 }
9609 else
9610 {
9611 *cost = COSTS_N_INSNS (5);
9612
9613 if (speed_p)
9614 {
9615 *cost += 2 * extra_cost->alu.shift;
9616 *cost += extra_cost->alu.arith_shift;
9617 *cost += 2 * extra_cost->alu.logical;
9618 }
9619 }
9620 return true;
9621 }
9622 return false;
9623
9624 case MINUS:
9625 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9626 && (mode == SFmode || !TARGET_VFP_SINGLE))
9627 {
9628 *cost = COSTS_N_INSNS (1);
9629 if (GET_CODE (XEXP (x, 0)) == MULT
9630 || GET_CODE (XEXP (x, 1)) == MULT)
9631 {
9632 rtx mul_op0, mul_op1, sub_op;
9633
9634 if (speed_p)
9635 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9636
9637 if (GET_CODE (XEXP (x, 0)) == MULT)
9638 {
9639 mul_op0 = XEXP (XEXP (x, 0), 0);
9640 mul_op1 = XEXP (XEXP (x, 0), 1);
9641 sub_op = XEXP (x, 1);
9642 }
9643 else
9644 {
9645 mul_op0 = XEXP (XEXP (x, 1), 0);
9646 mul_op1 = XEXP (XEXP (x, 1), 1);
9647 sub_op = XEXP (x, 0);
9648 }
9649
9650 /* The first operand of the multiply may be optionally
9651 negated. */
9652 if (GET_CODE (mul_op0) == NEG)
9653 mul_op0 = XEXP (mul_op0, 0);
9654
9655 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9656 + rtx_cost (mul_op1, code, 0, speed_p)
9657 + rtx_cost (sub_op, code, 0, speed_p));
9658
9659 return true;
9660 }
9661
9662 if (speed_p)
9663 *cost += extra_cost->fp[mode != SFmode].addsub;
9664 return false;
9665 }
9666
9667 if (mode == SImode)
9668 {
9669 rtx shift_by_reg = NULL;
9670 rtx shift_op;
9671 rtx non_shift_op;
9672
9673 *cost = COSTS_N_INSNS (1);
9674
9675 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9676 if (shift_op == NULL)
9677 {
9678 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9679 non_shift_op = XEXP (x, 0);
9680 }
9681 else
9682 non_shift_op = XEXP (x, 1);
9683
9684 if (shift_op != NULL)
9685 {
9686 if (shift_by_reg != NULL)
9687 {
9688 if (speed_p)
9689 *cost += extra_cost->alu.arith_shift_reg;
9690 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9691 }
9692 else if (speed_p)
9693 *cost += extra_cost->alu.arith_shift;
9694
9695 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9696 + rtx_cost (non_shift_op, code, 0, speed_p));
9697 return true;
9698 }
9699
9700 if (arm_arch_thumb2
9701 && GET_CODE (XEXP (x, 1)) == MULT)
9702 {
9703 /* MLS. */
9704 if (speed_p)
9705 *cost += extra_cost->mult[0].add;
9706 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9707 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9708 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9709 return true;
9710 }
9711
9712 if (CONST_INT_P (XEXP (x, 0)))
9713 {
9714 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9715 INTVAL (XEXP (x, 0)), NULL_RTX,
9716 NULL_RTX, 1, 0);
9717 *cost = COSTS_N_INSNS (insns);
9718 if (speed_p)
9719 *cost += insns * extra_cost->alu.arith;
9720 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9721 return true;
9722 }
9723
9724 return false;
9725 }
9726
9727 if (GET_MODE_CLASS (mode) == MODE_INT
9728 && GET_MODE_SIZE (mode) < 4)
9729 {
9730 rtx shift_op, shift_reg;
9731 shift_reg = NULL;
9732
9733 /* We check both sides of the MINUS for shifter operands since,
9734 unlike PLUS, it's not commutative. */
9735
9736 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9737 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9738
9739 /* Slightly disparage, as we might need to widen the result. */
9740 *cost = 1 + COSTS_N_INSNS (1);
9741 if (speed_p)
9742 *cost += extra_cost->alu.arith;
9743
9744 if (CONST_INT_P (XEXP (x, 0)))
9745 {
9746 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9747 return true;
9748 }
9749
9750 return false;
9751 }
9752
9753 if (mode == DImode)
9754 {
9755 *cost = COSTS_N_INSNS (2);
9756
9757 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9758 {
9759 rtx op1 = XEXP (x, 1);
9760
9761 if (speed_p)
9762 *cost += 2 * extra_cost->alu.arith;
9763
9764 if (GET_CODE (op1) == ZERO_EXTEND)
9765 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9766 else
9767 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9768 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9769 0, speed_p);
9770 return true;
9771 }
9772 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9773 {
9774 if (speed_p)
9775 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9776 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9777 0, speed_p)
9778 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9779 return true;
9780 }
9781 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9782 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9783 {
9784 if (speed_p)
9785 *cost += (extra_cost->alu.arith
9786 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9787 ? extra_cost->alu.arith
9788 : extra_cost->alu.arith_shift));
9789 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9790 + rtx_cost (XEXP (XEXP (x, 1), 0),
9791 GET_CODE (XEXP (x, 1)), 0, speed_p));
9792 return true;
9793 }
9794
9795 if (speed_p)
9796 *cost += 2 * extra_cost->alu.arith;
9797 return false;
9798 }
9799
9800 /* Vector mode? */
9801
9802 *cost = LIBCALL_COST (2);
9803 return false;
9804
9805 case PLUS:
9806 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9807 && (mode == SFmode || !TARGET_VFP_SINGLE))
9808 {
9809 *cost = COSTS_N_INSNS (1);
9810 if (GET_CODE (XEXP (x, 0)) == MULT)
9811 {
9812 rtx mul_op0, mul_op1, add_op;
9813
9814 if (speed_p)
9815 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9816
9817 mul_op0 = XEXP (XEXP (x, 0), 0);
9818 mul_op1 = XEXP (XEXP (x, 0), 1);
9819 add_op = XEXP (x, 1);
9820
9821 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9822 + rtx_cost (mul_op1, code, 0, speed_p)
9823 + rtx_cost (add_op, code, 0, speed_p));
9824
9825 return true;
9826 }
9827
9828 if (speed_p)
9829 *cost += extra_cost->fp[mode != SFmode].addsub;
9830 return false;
9831 }
9832 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9833 {
9834 *cost = LIBCALL_COST (2);
9835 return false;
9836 }
9837
9838 /* Narrow modes can be synthesized in SImode, but the range
9839 of useful sub-operations is limited. Check for shift operations
9840 on one of the operands. Only left shifts can be used in the
9841 narrow modes. */
9842 if (GET_MODE_CLASS (mode) == MODE_INT
9843 && GET_MODE_SIZE (mode) < 4)
9844 {
9845 rtx shift_op, shift_reg;
9846 shift_reg = NULL;
9847
9848 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9849
9850 if (CONST_INT_P (XEXP (x, 1)))
9851 {
9852 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9853 INTVAL (XEXP (x, 1)), NULL_RTX,
9854 NULL_RTX, 1, 0);
9855 *cost = COSTS_N_INSNS (insns);
9856 if (speed_p)
9857 *cost += insns * extra_cost->alu.arith;
9858 /* Slightly penalize a narrow operation as the result may
9859 need widening. */
9860 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9861 return true;
9862 }
9863
9864 /* Slightly penalize a narrow operation as the result may
9865 need widening. */
9866 *cost = 1 + COSTS_N_INSNS (1);
9867 if (speed_p)
9868 *cost += extra_cost->alu.arith;
9869
9870 return false;
9871 }
9872
9873 if (mode == SImode)
9874 {
9875 rtx shift_op, shift_reg;
9876
9877 *cost = COSTS_N_INSNS (1);
9878 if (TARGET_INT_SIMD
9879 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9880 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9881 {
9882 /* UXTA[BH] or SXTA[BH]. */
9883 if (speed_p)
9884 *cost += extra_cost->alu.extend_arith;
9885 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9886 speed_p)
9887 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9888 return true;
9889 }
9890
9891 shift_reg = NULL;
9892 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9893 if (shift_op != NULL)
9894 {
9895 if (shift_reg)
9896 {
9897 if (speed_p)
9898 *cost += extra_cost->alu.arith_shift_reg;
9899 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9900 }
9901 else if (speed_p)
9902 *cost += extra_cost->alu.arith_shift;
9903
9904 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9905 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9906 return true;
9907 }
9908 if (GET_CODE (XEXP (x, 0)) == MULT)
9909 {
9910 rtx mul_op = XEXP (x, 0);
9911
9912 *cost = COSTS_N_INSNS (1);
9913
9914 if (TARGET_DSP_MULTIPLY
9915 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9916 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9917 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9918 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9919 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9920 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9921 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9922 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9923 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9924 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9925 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9926 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9927 == 16))))))
9928 {
9929 /* SMLA[BT][BT]. */
9930 if (speed_p)
9931 *cost += extra_cost->mult[0].extend_add;
9932 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9933 SIGN_EXTEND, 0, speed_p)
9934 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9935 SIGN_EXTEND, 0, speed_p)
9936 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9937 return true;
9938 }
9939
9940 if (speed_p)
9941 *cost += extra_cost->mult[0].add;
9942 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9943 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9944 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9945 return true;
9946 }
9947 if (CONST_INT_P (XEXP (x, 1)))
9948 {
9949 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9950 INTVAL (XEXP (x, 1)), NULL_RTX,
9951 NULL_RTX, 1, 0);
9952 *cost = COSTS_N_INSNS (insns);
9953 if (speed_p)
9954 *cost += insns * extra_cost->alu.arith;
9955 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9956 return true;
9957 }
9958 return false;
9959 }
9960
9961 if (mode == DImode)
9962 {
9963 if (arm_arch3m
9964 && GET_CODE (XEXP (x, 0)) == MULT
9965 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9966 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9967 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9968 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9969 {
9970 *cost = COSTS_N_INSNS (1);
9971 if (speed_p)
9972 *cost += extra_cost->mult[1].extend_add;
9973 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9974 ZERO_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9976 ZERO_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9978 return true;
9979 }
9980
9981 *cost = COSTS_N_INSNS (2);
9982
9983 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9984 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9985 {
9986 if (speed_p)
9987 *cost += (extra_cost->alu.arith
9988 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9989 ? extra_cost->alu.arith
9990 : extra_cost->alu.arith_shift));
9991
9992 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9993 speed_p)
9994 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9995 return true;
9996 }
9997
9998 if (speed_p)
9999 *cost += 2 * extra_cost->alu.arith;
10000 return false;
10001 }
10002
10003 /* Vector mode? */
10004 *cost = LIBCALL_COST (2);
10005 return false;
10006 case IOR:
10007 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10008 {
10009 *cost = COSTS_N_INSNS (1);
10010 if (speed_p)
10011 *cost += extra_cost->alu.rev;
10012
10013 return true;
10014 }
10015 /* Fall through. */
10016 case AND: case XOR:
10017 if (mode == SImode)
10018 {
10019 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10020 rtx op0 = XEXP (x, 0);
10021 rtx shift_op, shift_reg;
10022
10023 *cost = COSTS_N_INSNS (1);
10024
10025 if (subcode == NOT
10026 && (code == AND
10027 || (code == IOR && TARGET_THUMB2)))
10028 op0 = XEXP (op0, 0);
10029
10030 shift_reg = NULL;
10031 shift_op = shifter_op_p (op0, &shift_reg);
10032 if (shift_op != NULL)
10033 {
10034 if (shift_reg)
10035 {
10036 if (speed_p)
10037 *cost += extra_cost->alu.log_shift_reg;
10038 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10039 }
10040 else if (speed_p)
10041 *cost += extra_cost->alu.log_shift;
10042
10043 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10044 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10045 return true;
10046 }
10047
10048 if (CONST_INT_P (XEXP (x, 1)))
10049 {
10050 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10051 INTVAL (XEXP (x, 1)), NULL_RTX,
10052 NULL_RTX, 1, 0);
10053
10054 *cost = COSTS_N_INSNS (insns);
10055 if (speed_p)
10056 *cost += insns * extra_cost->alu.logical;
10057 *cost += rtx_cost (op0, code, 0, speed_p);
10058 return true;
10059 }
10060
10061 if (speed_p)
10062 *cost += extra_cost->alu.logical;
10063 *cost += (rtx_cost (op0, code, 0, speed_p)
10064 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10065 return true;
10066 }
10067
10068 if (mode == DImode)
10069 {
10070 rtx op0 = XEXP (x, 0);
10071 enum rtx_code subcode = GET_CODE (op0);
10072
10073 *cost = COSTS_N_INSNS (2);
10074
10075 if (subcode == NOT
10076 && (code == AND
10077 || (code == IOR && TARGET_THUMB2)))
10078 op0 = XEXP (op0, 0);
10079
10080 if (GET_CODE (op0) == ZERO_EXTEND)
10081 {
10082 if (speed_p)
10083 *cost += 2 * extra_cost->alu.logical;
10084
10085 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10086 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10087 return true;
10088 }
10089 else if (GET_CODE (op0) == SIGN_EXTEND)
10090 {
10091 if (speed_p)
10092 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10093
10094 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10095 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10096 return true;
10097 }
10098
10099 if (speed_p)
10100 *cost += 2 * extra_cost->alu.logical;
10101
10102 return true;
10103 }
10104 /* Vector mode? */
10105
10106 *cost = LIBCALL_COST (2);
10107 return false;
10108
10109 case MULT:
10110 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10111 && (mode == SFmode || !TARGET_VFP_SINGLE))
10112 {
10113 rtx op0 = XEXP (x, 0);
10114
10115 *cost = COSTS_N_INSNS (1);
10116
10117 if (GET_CODE (op0) == NEG)
10118 op0 = XEXP (op0, 0);
10119
10120 if (speed_p)
10121 *cost += extra_cost->fp[mode != SFmode].mult;
10122
10123 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10124 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10125 return true;
10126 }
10127 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10128 {
10129 *cost = LIBCALL_COST (2);
10130 return false;
10131 }
10132
10133 if (mode == SImode)
10134 {
10135 *cost = COSTS_N_INSNS (1);
10136 if (TARGET_DSP_MULTIPLY
10137 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10138 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10139 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10140 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10141 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10142 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10143 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10144 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10145 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10146 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10147 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10148 && (INTVAL (XEXP (XEXP (x, 1), 1))
10149 == 16))))))
10150 {
10151 /* SMUL[TB][TB]. */
10152 if (speed_p)
10153 *cost += extra_cost->mult[0].extend;
10154 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10155 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10156 return true;
10157 }
10158 if (speed_p)
10159 *cost += extra_cost->mult[0].simple;
10160 return false;
10161 }
10162
10163 if (mode == DImode)
10164 {
10165 if (arm_arch3m
10166 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10167 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10168 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10169 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10170 {
10171 *cost = COSTS_N_INSNS (1);
10172 if (speed_p)
10173 *cost += extra_cost->mult[1].extend;
10174 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10175 ZERO_EXTEND, 0, speed_p)
10176 + rtx_cost (XEXP (XEXP (x, 1), 0),
10177 ZERO_EXTEND, 0, speed_p));
10178 return true;
10179 }
10180
10181 *cost = LIBCALL_COST (2);
10182 return false;
10183 }
10184
10185 /* Vector mode? */
10186 *cost = LIBCALL_COST (2);
10187 return false;
10188
10189 case NEG:
10190 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10191 && (mode == SFmode || !TARGET_VFP_SINGLE))
10192 {
10193 *cost = COSTS_N_INSNS (1);
10194 if (speed_p)
10195 *cost += extra_cost->fp[mode != SFmode].neg;
10196
10197 return false;
10198 }
10199 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10200 {
10201 *cost = LIBCALL_COST (1);
10202 return false;
10203 }
10204
10205 if (mode == SImode)
10206 {
10207 if (GET_CODE (XEXP (x, 0)) == ABS)
10208 {
10209 *cost = COSTS_N_INSNS (2);
10210 /* Assume the non-flag-changing variant. */
10211 if (speed_p)
10212 *cost += (extra_cost->alu.log_shift
10213 + extra_cost->alu.arith_shift);
10214 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10215 return true;
10216 }
10217
10218 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10219 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10220 {
10221 *cost = COSTS_N_INSNS (2);
10222 /* No extra cost for MOV imm and MVN imm. */
10223 /* If the comparison op is using the flags, there's no further
10224 cost, otherwise we need to add the cost of the comparison. */
10225 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10226 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10227 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10228 {
10229 *cost += (COSTS_N_INSNS (1)
10230 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10231 speed_p)
10232 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10233 speed_p));
10234 if (speed_p)
10235 *cost += extra_cost->alu.arith;
10236 }
10237 return true;
10238 }
10239 *cost = COSTS_N_INSNS (1);
10240 if (speed_p)
10241 *cost += extra_cost->alu.arith;
10242 return false;
10243 }
10244
10245 if (GET_MODE_CLASS (mode) == MODE_INT
10246 && GET_MODE_SIZE (mode) < 4)
10247 {
10248 /* Slightly disparage, as we might need an extend operation. */
10249 *cost = 1 + COSTS_N_INSNS (1);
10250 if (speed_p)
10251 *cost += extra_cost->alu.arith;
10252 return false;
10253 }
10254
10255 if (mode == DImode)
10256 {
10257 *cost = COSTS_N_INSNS (2);
10258 if (speed_p)
10259 *cost += 2 * extra_cost->alu.arith;
10260 return false;
10261 }
10262
10263 /* Vector mode? */
10264 *cost = LIBCALL_COST (1);
10265 return false;
10266
10267 case NOT:
10268 if (mode == SImode)
10269 {
10270 rtx shift_op;
10271 rtx shift_reg = NULL;
10272
10273 *cost = COSTS_N_INSNS (1);
10274 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10275
10276 if (shift_op)
10277 {
10278 if (shift_reg != NULL)
10279 {
10280 if (speed_p)
10281 *cost += extra_cost->alu.log_shift_reg;
10282 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10283 }
10284 else if (speed_p)
10285 *cost += extra_cost->alu.log_shift;
10286 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10287 return true;
10288 }
10289
10290 if (speed_p)
10291 *cost += extra_cost->alu.logical;
10292 return false;
10293 }
10294 if (mode == DImode)
10295 {
10296 *cost = COSTS_N_INSNS (2);
10297 return false;
10298 }
10299
10300 /* Vector mode? */
10301
10302 *cost += LIBCALL_COST (1);
10303 return false;
10304
10305 case IF_THEN_ELSE:
10306 {
10307 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10308 {
10309 *cost = COSTS_N_INSNS (4);
10310 return true;
10311 }
10312 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10313 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10314
10315 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10316 /* Assume that if one arm of the if_then_else is a register,
10317 that it will be tied with the result and eliminate the
10318 conditional insn. */
10319 if (REG_P (XEXP (x, 1)))
10320 *cost += op2cost;
10321 else if (REG_P (XEXP (x, 2)))
10322 *cost += op1cost;
10323 else
10324 {
10325 if (speed_p)
10326 {
10327 if (extra_cost->alu.non_exec_costs_exec)
10328 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10329 else
10330 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10331 }
10332 else
10333 *cost += op1cost + op2cost;
10334 }
10335 }
10336 return true;
10337
10338 case COMPARE:
10339 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10340 *cost = 0;
10341 else
10342 {
10343 enum machine_mode op0mode;
10344 /* We'll mostly assume that the cost of a compare is the cost of the
10345 LHS. However, there are some notable exceptions. */
10346
10347 /* Floating point compares are never done as side-effects. */
10348 op0mode = GET_MODE (XEXP (x, 0));
10349 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10350 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10351 {
10352 *cost = COSTS_N_INSNS (1);
10353 if (speed_p)
10354 *cost += extra_cost->fp[op0mode != SFmode].compare;
10355
10356 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10357 {
10358 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10359 return true;
10360 }
10361
10362 return false;
10363 }
10364 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10365 {
10366 *cost = LIBCALL_COST (2);
10367 return false;
10368 }
10369
10370 /* DImode compares normally take two insns. */
10371 if (op0mode == DImode)
10372 {
10373 *cost = COSTS_N_INSNS (2);
10374 if (speed_p)
10375 *cost += 2 * extra_cost->alu.arith;
10376 return false;
10377 }
10378
10379 if (op0mode == SImode)
10380 {
10381 rtx shift_op;
10382 rtx shift_reg;
10383
10384 if (XEXP (x, 1) == const0_rtx
10385 && !(REG_P (XEXP (x, 0))
10386 || (GET_CODE (XEXP (x, 0)) == SUBREG
10387 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10388 {
10389 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10390
10391 /* Multiply operations that set the flags are often
10392 significantly more expensive. */
10393 if (speed_p
10394 && GET_CODE (XEXP (x, 0)) == MULT
10395 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10396 *cost += extra_cost->mult[0].flag_setting;
10397
10398 if (speed_p
10399 && GET_CODE (XEXP (x, 0)) == PLUS
10400 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10401 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10402 0), 1), mode))
10403 *cost += extra_cost->mult[0].flag_setting;
10404 return true;
10405 }
10406
10407 shift_reg = NULL;
10408 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10409 if (shift_op != NULL)
10410 {
10411 *cost = COSTS_N_INSNS (1);
10412 if (shift_reg != NULL)
10413 {
10414 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10415 if (speed_p)
10416 *cost += extra_cost->alu.arith_shift_reg;
10417 }
10418 else if (speed_p)
10419 *cost += extra_cost->alu.arith_shift;
10420 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10421 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10422 return true;
10423 }
10424
10425 *cost = COSTS_N_INSNS (1);
10426 if (speed_p)
10427 *cost += extra_cost->alu.arith;
10428 if (CONST_INT_P (XEXP (x, 1))
10429 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10430 {
10431 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10432 return true;
10433 }
10434 return false;
10435 }
10436
10437 /* Vector mode? */
10438
10439 *cost = LIBCALL_COST (2);
10440 return false;
10441 }
10442 return true;
10443
10444 case EQ:
10445 case NE:
10446 case LT:
10447 case LE:
10448 case GT:
10449 case GE:
10450 case LTU:
10451 case LEU:
10452 case GEU:
10453 case GTU:
10454 case ORDERED:
10455 case UNORDERED:
10456 case UNEQ:
10457 case UNLE:
10458 case UNLT:
10459 case UNGE:
10460 case UNGT:
10461 case LTGT:
10462 if (outer_code == SET)
10463 {
10464 /* Is it a store-flag operation? */
10465 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10466 && XEXP (x, 1) == const0_rtx)
10467 {
10468 /* Thumb also needs an IT insn. */
10469 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10470 return true;
10471 }
10472 if (XEXP (x, 1) == const0_rtx)
10473 {
10474 switch (code)
10475 {
10476 case LT:
10477 /* LSR Rd, Rn, #31. */
10478 *cost = COSTS_N_INSNS (1);
10479 if (speed_p)
10480 *cost += extra_cost->alu.shift;
10481 break;
10482
10483 case EQ:
10484 /* RSBS T1, Rn, #0
10485 ADC Rd, Rn, T1. */
10486
10487 case NE:
10488 /* SUBS T1, Rn, #1
10489 SBC Rd, Rn, T1. */
10490 *cost = COSTS_N_INSNS (2);
10491 break;
10492
10493 case LE:
10494 /* RSBS T1, Rn, Rn, LSR #31
10495 ADC Rd, Rn, T1. */
10496 *cost = COSTS_N_INSNS (2);
10497 if (speed_p)
10498 *cost += extra_cost->alu.arith_shift;
10499 break;
10500
10501 case GT:
10502 /* RSB Rd, Rn, Rn, ASR #1
10503 LSR Rd, Rd, #31. */
10504 *cost = COSTS_N_INSNS (2);
10505 if (speed_p)
10506 *cost += (extra_cost->alu.arith_shift
10507 + extra_cost->alu.shift);
10508 break;
10509
10510 case GE:
10511 /* ASR Rd, Rn, #31
10512 ADD Rd, Rn, #1. */
10513 *cost = COSTS_N_INSNS (2);
10514 if (speed_p)
10515 *cost += extra_cost->alu.shift;
10516 break;
10517
10518 default:
10519 /* Remaining cases are either meaningless or would take
10520 three insns anyway. */
10521 *cost = COSTS_N_INSNS (3);
10522 break;
10523 }
10524 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10525 return true;
10526 }
10527 else
10528 {
10529 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10530 if (CONST_INT_P (XEXP (x, 1))
10531 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10532 {
10533 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10534 return true;
10535 }
10536
10537 return false;
10538 }
10539 }
10540 /* Not directly inside a set. If it involves the condition code
10541 register it must be the condition for a branch, cond_exec or
10542 I_T_E operation. Since the comparison is performed elsewhere
10543 this is just the control part which has no additional
10544 cost. */
10545 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10546 && XEXP (x, 1) == const0_rtx)
10547 {
10548 *cost = 0;
10549 return true;
10550 }
10551 return false;
10552
10553 case ABS:
10554 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10555 && (mode == SFmode || !TARGET_VFP_SINGLE))
10556 {
10557 *cost = COSTS_N_INSNS (1);
10558 if (speed_p)
10559 *cost += extra_cost->fp[mode != SFmode].neg;
10560
10561 return false;
10562 }
10563 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10564 {
10565 *cost = LIBCALL_COST (1);
10566 return false;
10567 }
10568
10569 if (mode == SImode)
10570 {
10571 *cost = COSTS_N_INSNS (1);
10572 if (speed_p)
10573 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10574 return false;
10575 }
10576 /* Vector mode? */
10577 *cost = LIBCALL_COST (1);
10578 return false;
10579
10580 case SIGN_EXTEND:
10581 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10582 && MEM_P (XEXP (x, 0)))
10583 {
10584 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10585
10586 if (mode == DImode)
10587 *cost += COSTS_N_INSNS (1);
10588
10589 if (!speed_p)
10590 return true;
10591
10592 if (GET_MODE (XEXP (x, 0)) == SImode)
10593 *cost += extra_cost->ldst.load;
10594 else
10595 *cost += extra_cost->ldst.load_sign_extend;
10596
10597 if (mode == DImode)
10598 *cost += extra_cost->alu.shift;
10599
10600 return true;
10601 }
10602
10603 /* Widening from less than 32-bits requires an extend operation. */
10604 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10605 {
10606 /* We have SXTB/SXTH. */
10607 *cost = COSTS_N_INSNS (1);
10608 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10609 if (speed_p)
10610 *cost += extra_cost->alu.extend;
10611 }
10612 else if (GET_MODE (XEXP (x, 0)) != SImode)
10613 {
10614 /* Needs two shifts. */
10615 *cost = COSTS_N_INSNS (2);
10616 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10617 if (speed_p)
10618 *cost += 2 * extra_cost->alu.shift;
10619 }
10620
10621 /* Widening beyond 32-bits requires one more insn. */
10622 if (mode == DImode)
10623 {
10624 *cost += COSTS_N_INSNS (1);
10625 if (speed_p)
10626 *cost += extra_cost->alu.shift;
10627 }
10628
10629 return true;
10630
10631 case ZERO_EXTEND:
10632 if ((arm_arch4
10633 || GET_MODE (XEXP (x, 0)) == SImode
10634 || GET_MODE (XEXP (x, 0)) == QImode)
10635 && MEM_P (XEXP (x, 0)))
10636 {
10637 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10638
10639 if (mode == DImode)
10640 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10641
10642 return true;
10643 }
10644
10645 /* Widening from less than 32-bits requires an extend operation. */
10646 if (GET_MODE (XEXP (x, 0)) == QImode)
10647 {
10648 /* UXTB can be a shorter instruction in Thumb2, but it might
10649 be slower than the AND Rd, Rn, #255 alternative. When
10650 optimizing for speed it should never be slower to use
10651 AND, and we don't really model 16-bit vs 32-bit insns
10652 here. */
10653 *cost = COSTS_N_INSNS (1);
10654 if (speed_p)
10655 *cost += extra_cost->alu.logical;
10656 }
10657 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10658 {
10659 /* We have UXTB/UXTH. */
10660 *cost = COSTS_N_INSNS (1);
10661 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10662 if (speed_p)
10663 *cost += extra_cost->alu.extend;
10664 }
10665 else if (GET_MODE (XEXP (x, 0)) != SImode)
10666 {
10667 /* Needs two shifts. It's marginally preferable to use
10668 shifts rather than two BIC instructions as the second
10669 shift may merge with a subsequent insn as a shifter
10670 op. */
10671 *cost = COSTS_N_INSNS (2);
10672 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10673 if (speed_p)
10674 *cost += 2 * extra_cost->alu.shift;
10675 }
10676 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10677 *cost = COSTS_N_INSNS (1);
10678
10679 /* Widening beyond 32-bits requires one more insn. */
10680 if (mode == DImode)
10681 {
10682 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10683 }
10684
10685 return true;
10686
10687 case CONST_INT:
10688 *cost = 0;
10689 /* CONST_INT has no mode, so we cannot tell for sure how many
10690 insns are really going to be needed. The best we can do is
10691 look at the value passed. If it fits in SImode, then assume
10692 that's the mode it will be used for. Otherwise assume it
10693 will be used in DImode. */
10694 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10695 mode = SImode;
10696 else
10697 mode = DImode;
10698
10699 /* Avoid blowing up in arm_gen_constant (). */
10700 if (!(outer_code == PLUS
10701 || outer_code == AND
10702 || outer_code == IOR
10703 || outer_code == XOR
10704 || outer_code == MINUS))
10705 outer_code = SET;
10706
10707 const_int_cost:
10708 if (mode == SImode)
10709 {
10710 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10711 INTVAL (x), NULL, NULL,
10712 0, 0));
10713 /* Extra costs? */
10714 }
10715 else
10716 {
10717 *cost += COSTS_N_INSNS (arm_gen_constant
10718 (outer_code, SImode, NULL,
10719 trunc_int_for_mode (INTVAL (x), SImode),
10720 NULL, NULL, 0, 0)
10721 + arm_gen_constant (outer_code, SImode, NULL,
10722 INTVAL (x) >> 32, NULL,
10723 NULL, 0, 0));
10724 /* Extra costs? */
10725 }
10726
10727 return true;
10728
10729 case CONST:
10730 case LABEL_REF:
10731 case SYMBOL_REF:
10732 if (speed_p)
10733 {
10734 if (arm_arch_thumb2 && !flag_pic)
10735 *cost = COSTS_N_INSNS (2);
10736 else
10737 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10738 }
10739 else
10740 *cost = COSTS_N_INSNS (2);
10741
10742 if (flag_pic)
10743 {
10744 *cost += COSTS_N_INSNS (1);
10745 if (speed_p)
10746 *cost += extra_cost->alu.arith;
10747 }
10748
10749 return true;
10750
10751 case CONST_FIXED:
10752 *cost = COSTS_N_INSNS (4);
10753 /* Fixme. */
10754 return true;
10755
10756 case CONST_DOUBLE:
10757 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10758 && (mode == SFmode || !TARGET_VFP_SINGLE))
10759 {
10760 if (vfp3_const_double_rtx (x))
10761 {
10762 *cost = COSTS_N_INSNS (1);
10763 if (speed_p)
10764 *cost += extra_cost->fp[mode == DFmode].fpconst;
10765 return true;
10766 }
10767
10768 if (speed_p)
10769 {
10770 *cost = COSTS_N_INSNS (1);
10771 if (mode == DFmode)
10772 *cost += extra_cost->ldst.loadd;
10773 else
10774 *cost += extra_cost->ldst.loadf;
10775 }
10776 else
10777 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10778
10779 return true;
10780 }
10781 *cost = COSTS_N_INSNS (4);
10782 return true;
10783
10784 case CONST_VECTOR:
10785 /* Fixme. */
10786 if (TARGET_NEON
10787 && TARGET_HARD_FLOAT
10788 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10789 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10790 *cost = COSTS_N_INSNS (1);
10791 else
10792 *cost = COSTS_N_INSNS (4);
10793 return true;
10794
10795 case HIGH:
10796 case LO_SUM:
10797 *cost = COSTS_N_INSNS (1);
10798 /* When optimizing for size, we prefer constant pool entries to
10799 MOVW/MOVT pairs, so bump the cost of these slightly. */
10800 if (!speed_p)
10801 *cost += 1;
10802 return true;
10803
10804 case CLZ:
10805 *cost = COSTS_N_INSNS (1);
10806 if (speed_p)
10807 *cost += extra_cost->alu.clz;
10808 return false;
10809
10810 case SMIN:
10811 if (XEXP (x, 1) == const0_rtx)
10812 {
10813 *cost = COSTS_N_INSNS (1);
10814 if (speed_p)
10815 *cost += extra_cost->alu.log_shift;
10816 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10817 return true;
10818 }
10819 /* Fall through. */
10820 case SMAX:
10821 case UMIN:
10822 case UMAX:
10823 *cost = COSTS_N_INSNS (2);
10824 return false;
10825
10826 case TRUNCATE:
10827 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10828 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10829 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10830 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10831 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10832 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10833 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10834 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10835 == ZERO_EXTEND))))
10836 {
10837 *cost = COSTS_N_INSNS (1);
10838 if (speed_p)
10839 *cost += extra_cost->mult[1].extend;
10840 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10841 speed_p)
10842 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10843 0, speed_p));
10844 return true;
10845 }
10846 *cost = LIBCALL_COST (1);
10847 return false;
10848
10849 case UNSPEC:
10850 return arm_unspec_cost (x, outer_code, speed_p, cost);
10851
10852 case PC:
10853 /* Reading the PC is like reading any other register. Writing it
10854 is more expensive, but we take that into account elsewhere. */
10855 *cost = 0;
10856 return true;
10857
10858 case ZERO_EXTRACT:
10859 /* TODO: Simple zero_extract of bottom bits using AND. */
10860 /* Fall through. */
10861 case SIGN_EXTRACT:
10862 if (arm_arch6
10863 && mode == SImode
10864 && CONST_INT_P (XEXP (x, 1))
10865 && CONST_INT_P (XEXP (x, 2)))
10866 {
10867 *cost = COSTS_N_INSNS (1);
10868 if (speed_p)
10869 *cost += extra_cost->alu.bfx;
10870 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10871 return true;
10872 }
10873 /* Without UBFX/SBFX, need to resort to shift operations. */
10874 *cost = COSTS_N_INSNS (2);
10875 if (speed_p)
10876 *cost += 2 * extra_cost->alu.shift;
10877 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10878 return true;
10879
10880 case FLOAT_EXTEND:
10881 if (TARGET_HARD_FLOAT)
10882 {
10883 *cost = COSTS_N_INSNS (1);
10884 if (speed_p)
10885 *cost += extra_cost->fp[mode == DFmode].widen;
10886 if (!TARGET_FPU_ARMV8
10887 && GET_MODE (XEXP (x, 0)) == HFmode)
10888 {
10889 /* Pre v8, widening HF->DF is a two-step process, first
10890 widening to SFmode. */
10891 *cost += COSTS_N_INSNS (1);
10892 if (speed_p)
10893 *cost += extra_cost->fp[0].widen;
10894 }
10895 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10896 return true;
10897 }
10898
10899 *cost = LIBCALL_COST (1);
10900 return false;
10901
10902 case FLOAT_TRUNCATE:
10903 if (TARGET_HARD_FLOAT)
10904 {
10905 *cost = COSTS_N_INSNS (1);
10906 if (speed_p)
10907 *cost += extra_cost->fp[mode == DFmode].narrow;
10908 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10909 return true;
10910 /* Vector modes? */
10911 }
10912 *cost = LIBCALL_COST (1);
10913 return false;
10914
10915 case FMA:
10916 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10917 {
10918 rtx op0 = XEXP (x, 0);
10919 rtx op1 = XEXP (x, 1);
10920 rtx op2 = XEXP (x, 2);
10921
10922 *cost = COSTS_N_INSNS (1);
10923
10924 /* vfms or vfnma. */
10925 if (GET_CODE (op0) == NEG)
10926 op0 = XEXP (op0, 0);
10927
10928 /* vfnms or vfnma. */
10929 if (GET_CODE (op2) == NEG)
10930 op2 = XEXP (op2, 0);
10931
10932 *cost += rtx_cost (op0, FMA, 0, speed_p);
10933 *cost += rtx_cost (op1, FMA, 1, speed_p);
10934 *cost += rtx_cost (op2, FMA, 2, speed_p);
10935
10936 if (speed_p)
10937 *cost += extra_cost->fp[mode ==DFmode].fma;
10938
10939 return true;
10940 }
10941
10942 *cost = LIBCALL_COST (3);
10943 return false;
10944
10945 case FIX:
10946 case UNSIGNED_FIX:
10947 if (TARGET_HARD_FLOAT)
10948 {
10949 if (GET_MODE_CLASS (mode) == MODE_INT)
10950 {
10951 *cost = COSTS_N_INSNS (1);
10952 if (speed_p)
10953 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10954 /* Strip of the 'cost' of rounding towards zero. */
10955 if (GET_CODE (XEXP (x, 0)) == FIX)
10956 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10957 else
10958 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10959 /* ??? Increase the cost to deal with transferring from
10960 FP -> CORE registers? */
10961 return true;
10962 }
10963 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10964 && TARGET_FPU_ARMV8)
10965 {
10966 *cost = COSTS_N_INSNS (1);
10967 if (speed_p)
10968 *cost += extra_cost->fp[mode == DFmode].roundint;
10969 return false;
10970 }
10971 /* Vector costs? */
10972 }
10973 *cost = LIBCALL_COST (1);
10974 return false;
10975
10976 case FLOAT:
10977 case UNSIGNED_FLOAT:
10978 if (TARGET_HARD_FLOAT)
10979 {
10980 /* ??? Increase the cost to deal with transferring from CORE
10981 -> FP registers? */
10982 *cost = COSTS_N_INSNS (1);
10983 if (speed_p)
10984 *cost += extra_cost->fp[mode == DFmode].fromint;
10985 return false;
10986 }
10987 *cost = LIBCALL_COST (1);
10988 return false;
10989
10990 case CALL:
10991 *cost = COSTS_N_INSNS (1);
10992 return true;
10993
10994 case ASM_OPERANDS:
10995 {
10996 /* Just a guess. Guess number of instructions in the asm
10997 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10998 though (see PR60663). */
10999 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11000 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11001
11002 *cost = COSTS_N_INSNS (asm_length + num_operands);
11003 return true;
11004 }
11005 default:
11006 if (mode != VOIDmode)
11007 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11008 else
11009 *cost = COSTS_N_INSNS (4); /* Who knows? */
11010 return false;
11011 }
11012 }
11013
11014 #undef HANDLE_NARROW_SHIFT_ARITH
11015
11016 /* RTX costs when optimizing for size. */
11017 static bool
11018 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11019 int *total, bool speed)
11020 {
11021 bool result;
11022
11023 if (TARGET_OLD_RTX_COSTS
11024 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11025 {
11026 /* Old way. (Deprecated.) */
11027 if (!speed)
11028 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11029 (enum rtx_code) outer_code, total);
11030 else
11031 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11032 (enum rtx_code) outer_code, total,
11033 speed);
11034 }
11035 else
11036 {
11037 /* New way. */
11038 if (current_tune->insn_extra_cost)
11039 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11040 (enum rtx_code) outer_code,
11041 current_tune->insn_extra_cost,
11042 total, speed);
11043 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11044 && current_tune->insn_extra_cost != NULL */
11045 else
11046 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11047 (enum rtx_code) outer_code,
11048 &generic_extra_costs, total, speed);
11049 }
11050
11051 if (dump_file && (dump_flags & TDF_DETAILS))
11052 {
11053 print_rtl_single (dump_file, x);
11054 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11055 *total, result ? "final" : "partial");
11056 }
11057 return result;
11058 }
11059
11060 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11061 supported on any "slowmul" cores, so it can be ignored. */
11062
11063 static bool
11064 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11065 int *total, bool speed)
11066 {
11067 enum machine_mode mode = GET_MODE (x);
11068
11069 if (TARGET_THUMB)
11070 {
11071 *total = thumb1_rtx_costs (x, code, outer_code);
11072 return true;
11073 }
11074
11075 switch (code)
11076 {
11077 case MULT:
11078 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11079 || mode == DImode)
11080 {
11081 *total = COSTS_N_INSNS (20);
11082 return false;
11083 }
11084
11085 if (CONST_INT_P (XEXP (x, 1)))
11086 {
11087 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11088 & (unsigned HOST_WIDE_INT) 0xffffffff);
11089 int cost, const_ok = const_ok_for_arm (i);
11090 int j, booth_unit_size;
11091
11092 /* Tune as appropriate. */
11093 cost = const_ok ? 4 : 8;
11094 booth_unit_size = 2;
11095 for (j = 0; i && j < 32; j += booth_unit_size)
11096 {
11097 i >>= booth_unit_size;
11098 cost++;
11099 }
11100
11101 *total = COSTS_N_INSNS (cost);
11102 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11103 return true;
11104 }
11105
11106 *total = COSTS_N_INSNS (20);
11107 return false;
11108
11109 default:
11110 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11111 }
11112 }
11113
11114
11115 /* RTX cost for cores with a fast multiply unit (M variants). */
11116
11117 static bool
11118 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11119 int *total, bool speed)
11120 {
11121 enum machine_mode mode = GET_MODE (x);
11122
11123 if (TARGET_THUMB1)
11124 {
11125 *total = thumb1_rtx_costs (x, code, outer_code);
11126 return true;
11127 }
11128
11129 /* ??? should thumb2 use different costs? */
11130 switch (code)
11131 {
11132 case MULT:
11133 /* There is no point basing this on the tuning, since it is always the
11134 fast variant if it exists at all. */
11135 if (mode == DImode
11136 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11137 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11138 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11139 {
11140 *total = COSTS_N_INSNS(2);
11141 return false;
11142 }
11143
11144
11145 if (mode == DImode)
11146 {
11147 *total = COSTS_N_INSNS (5);
11148 return false;
11149 }
11150
11151 if (CONST_INT_P (XEXP (x, 1)))
11152 {
11153 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11154 & (unsigned HOST_WIDE_INT) 0xffffffff);
11155 int cost, const_ok = const_ok_for_arm (i);
11156 int j, booth_unit_size;
11157
11158 /* Tune as appropriate. */
11159 cost = const_ok ? 4 : 8;
11160 booth_unit_size = 8;
11161 for (j = 0; i && j < 32; j += booth_unit_size)
11162 {
11163 i >>= booth_unit_size;
11164 cost++;
11165 }
11166
11167 *total = COSTS_N_INSNS(cost);
11168 return false;
11169 }
11170
11171 if (mode == SImode)
11172 {
11173 *total = COSTS_N_INSNS (4);
11174 return false;
11175 }
11176
11177 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11178 {
11179 if (TARGET_HARD_FLOAT
11180 && (mode == SFmode
11181 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11182 {
11183 *total = COSTS_N_INSNS (1);
11184 return false;
11185 }
11186 }
11187
11188 /* Requires a lib call */
11189 *total = COSTS_N_INSNS (20);
11190 return false;
11191
11192 default:
11193 return arm_rtx_costs_1 (x, outer_code, total, speed);
11194 }
11195 }
11196
11197
11198 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11199 so it can be ignored. */
11200
11201 static bool
11202 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11203 int *total, bool speed)
11204 {
11205 enum machine_mode mode = GET_MODE (x);
11206
11207 if (TARGET_THUMB)
11208 {
11209 *total = thumb1_rtx_costs (x, code, outer_code);
11210 return true;
11211 }
11212
11213 switch (code)
11214 {
11215 case COMPARE:
11216 if (GET_CODE (XEXP (x, 0)) != MULT)
11217 return arm_rtx_costs_1 (x, outer_code, total, speed);
11218
11219 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11220 will stall until the multiplication is complete. */
11221 *total = COSTS_N_INSNS (3);
11222 return false;
11223
11224 case MULT:
11225 /* There is no point basing this on the tuning, since it is always the
11226 fast variant if it exists at all. */
11227 if (mode == DImode
11228 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11229 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11230 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11231 {
11232 *total = COSTS_N_INSNS (2);
11233 return false;
11234 }
11235
11236
11237 if (mode == DImode)
11238 {
11239 *total = COSTS_N_INSNS (5);
11240 return false;
11241 }
11242
11243 if (CONST_INT_P (XEXP (x, 1)))
11244 {
11245 /* If operand 1 is a constant we can more accurately
11246 calculate the cost of the multiply. The multiplier can
11247 retire 15 bits on the first cycle and a further 12 on the
11248 second. We do, of course, have to load the constant into
11249 a register first. */
11250 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11251 /* There's a general overhead of one cycle. */
11252 int cost = 1;
11253 unsigned HOST_WIDE_INT masked_const;
11254
11255 if (i & 0x80000000)
11256 i = ~i;
11257
11258 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11259
11260 masked_const = i & 0xffff8000;
11261 if (masked_const != 0)
11262 {
11263 cost++;
11264 masked_const = i & 0xf8000000;
11265 if (masked_const != 0)
11266 cost++;
11267 }
11268 *total = COSTS_N_INSNS (cost);
11269 return false;
11270 }
11271
11272 if (mode == SImode)
11273 {
11274 *total = COSTS_N_INSNS (3);
11275 return false;
11276 }
11277
11278 /* Requires a lib call */
11279 *total = COSTS_N_INSNS (20);
11280 return false;
11281
11282 default:
11283 return arm_rtx_costs_1 (x, outer_code, total, speed);
11284 }
11285 }
11286
11287
11288 /* RTX costs for 9e (and later) cores. */
11289
11290 static bool
11291 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11292 int *total, bool speed)
11293 {
11294 enum machine_mode mode = GET_MODE (x);
11295
11296 if (TARGET_THUMB1)
11297 {
11298 switch (code)
11299 {
11300 case MULT:
11301 *total = COSTS_N_INSNS (3);
11302 return true;
11303
11304 default:
11305 *total = thumb1_rtx_costs (x, code, outer_code);
11306 return true;
11307 }
11308 }
11309
11310 switch (code)
11311 {
11312 case MULT:
11313 /* There is no point basing this on the tuning, since it is always the
11314 fast variant if it exists at all. */
11315 if (mode == DImode
11316 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11317 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11318 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11319 {
11320 *total = COSTS_N_INSNS (2);
11321 return false;
11322 }
11323
11324
11325 if (mode == DImode)
11326 {
11327 *total = COSTS_N_INSNS (5);
11328 return false;
11329 }
11330
11331 if (mode == SImode)
11332 {
11333 *total = COSTS_N_INSNS (2);
11334 return false;
11335 }
11336
11337 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11338 {
11339 if (TARGET_HARD_FLOAT
11340 && (mode == SFmode
11341 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11342 {
11343 *total = COSTS_N_INSNS (1);
11344 return false;
11345 }
11346 }
11347
11348 *total = COSTS_N_INSNS (20);
11349 return false;
11350
11351 default:
11352 return arm_rtx_costs_1 (x, outer_code, total, speed);
11353 }
11354 }
11355 /* All address computations that can be done are free, but rtx cost returns
11356 the same for practically all of them. So we weight the different types
11357 of address here in the order (most pref first):
11358 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11359 static inline int
11360 arm_arm_address_cost (rtx x)
11361 {
11362 enum rtx_code c = GET_CODE (x);
11363
11364 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11365 return 0;
11366 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11367 return 10;
11368
11369 if (c == PLUS)
11370 {
11371 if (CONST_INT_P (XEXP (x, 1)))
11372 return 2;
11373
11374 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11375 return 3;
11376
11377 return 4;
11378 }
11379
11380 return 6;
11381 }
11382
11383 static inline int
11384 arm_thumb_address_cost (rtx x)
11385 {
11386 enum rtx_code c = GET_CODE (x);
11387
11388 if (c == REG)
11389 return 1;
11390 if (c == PLUS
11391 && REG_P (XEXP (x, 0))
11392 && CONST_INT_P (XEXP (x, 1)))
11393 return 1;
11394
11395 return 2;
11396 }
11397
11398 static int
11399 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11400 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11401 {
11402 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11403 }
11404
11405 /* Adjust cost hook for XScale. */
11406 static bool
11407 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11408 {
11409 /* Some true dependencies can have a higher cost depending
11410 on precisely how certain input operands are used. */
11411 if (REG_NOTE_KIND(link) == 0
11412 && recog_memoized (insn) >= 0
11413 && recog_memoized (dep) >= 0)
11414 {
11415 int shift_opnum = get_attr_shift (insn);
11416 enum attr_type attr_type = get_attr_type (dep);
11417
11418 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11419 operand for INSN. If we have a shifted input operand and the
11420 instruction we depend on is another ALU instruction, then we may
11421 have to account for an additional stall. */
11422 if (shift_opnum != 0
11423 && (attr_type == TYPE_ALU_SHIFT_IMM
11424 || attr_type == TYPE_ALUS_SHIFT_IMM
11425 || attr_type == TYPE_LOGIC_SHIFT_IMM
11426 || attr_type == TYPE_LOGICS_SHIFT_IMM
11427 || attr_type == TYPE_ALU_SHIFT_REG
11428 || attr_type == TYPE_ALUS_SHIFT_REG
11429 || attr_type == TYPE_LOGIC_SHIFT_REG
11430 || attr_type == TYPE_LOGICS_SHIFT_REG
11431 || attr_type == TYPE_MOV_SHIFT
11432 || attr_type == TYPE_MVN_SHIFT
11433 || attr_type == TYPE_MOV_SHIFT_REG
11434 || attr_type == TYPE_MVN_SHIFT_REG))
11435 {
11436 rtx shifted_operand;
11437 int opno;
11438
11439 /* Get the shifted operand. */
11440 extract_insn (insn);
11441 shifted_operand = recog_data.operand[shift_opnum];
11442
11443 /* Iterate over all the operands in DEP. If we write an operand
11444 that overlaps with SHIFTED_OPERAND, then we have increase the
11445 cost of this dependency. */
11446 extract_insn (dep);
11447 preprocess_constraints (dep);
11448 for (opno = 0; opno < recog_data.n_operands; opno++)
11449 {
11450 /* We can ignore strict inputs. */
11451 if (recog_data.operand_type[opno] == OP_IN)
11452 continue;
11453
11454 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11455 shifted_operand))
11456 {
11457 *cost = 2;
11458 return false;
11459 }
11460 }
11461 }
11462 }
11463 return true;
11464 }
11465
11466 /* Adjust cost hook for Cortex A9. */
11467 static bool
11468 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11469 {
11470 switch (REG_NOTE_KIND (link))
11471 {
11472 case REG_DEP_ANTI:
11473 *cost = 0;
11474 return false;
11475
11476 case REG_DEP_TRUE:
11477 case REG_DEP_OUTPUT:
11478 if (recog_memoized (insn) >= 0
11479 && recog_memoized (dep) >= 0)
11480 {
11481 if (GET_CODE (PATTERN (insn)) == SET)
11482 {
11483 if (GET_MODE_CLASS
11484 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11485 || GET_MODE_CLASS
11486 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11487 {
11488 enum attr_type attr_type_insn = get_attr_type (insn);
11489 enum attr_type attr_type_dep = get_attr_type (dep);
11490
11491 /* By default all dependencies of the form
11492 s0 = s0 <op> s1
11493 s0 = s0 <op> s2
11494 have an extra latency of 1 cycle because
11495 of the input and output dependency in this
11496 case. However this gets modeled as an true
11497 dependency and hence all these checks. */
11498 if (REG_P (SET_DEST (PATTERN (insn)))
11499 && REG_P (SET_DEST (PATTERN (dep)))
11500 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11501 SET_DEST (PATTERN (dep))))
11502 {
11503 /* FMACS is a special case where the dependent
11504 instruction can be issued 3 cycles before
11505 the normal latency in case of an output
11506 dependency. */
11507 if ((attr_type_insn == TYPE_FMACS
11508 || attr_type_insn == TYPE_FMACD)
11509 && (attr_type_dep == TYPE_FMACS
11510 || attr_type_dep == TYPE_FMACD))
11511 {
11512 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11513 *cost = insn_default_latency (dep) - 3;
11514 else
11515 *cost = insn_default_latency (dep);
11516 return false;
11517 }
11518 else
11519 {
11520 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11521 *cost = insn_default_latency (dep) + 1;
11522 else
11523 *cost = insn_default_latency (dep);
11524 }
11525 return false;
11526 }
11527 }
11528 }
11529 }
11530 break;
11531
11532 default:
11533 gcc_unreachable ();
11534 }
11535
11536 return true;
11537 }
11538
11539 /* Adjust cost hook for FA726TE. */
11540 static bool
11541 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11542 {
11543 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11544 have penalty of 3. */
11545 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11546 && recog_memoized (insn) >= 0
11547 && recog_memoized (dep) >= 0
11548 && get_attr_conds (dep) == CONDS_SET)
11549 {
11550 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11551 if (get_attr_conds (insn) == CONDS_USE
11552 && get_attr_type (insn) != TYPE_BRANCH)
11553 {
11554 *cost = 3;
11555 return false;
11556 }
11557
11558 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11559 || get_attr_conds (insn) == CONDS_USE)
11560 {
11561 *cost = 0;
11562 return false;
11563 }
11564 }
11565
11566 return true;
11567 }
11568
11569 /* Implement TARGET_REGISTER_MOVE_COST.
11570
11571 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11572 it is typically more expensive than a single memory access. We set
11573 the cost to less than two memory accesses so that floating
11574 point to integer conversion does not go through memory. */
11575
11576 int
11577 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11578 reg_class_t from, reg_class_t to)
11579 {
11580 if (TARGET_32BIT)
11581 {
11582 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11583 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11584 return 15;
11585 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11586 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11587 return 4;
11588 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11589 return 20;
11590 else
11591 return 2;
11592 }
11593 else
11594 {
11595 if (from == HI_REGS || to == HI_REGS)
11596 return 4;
11597 else
11598 return 2;
11599 }
11600 }
11601
11602 /* Implement TARGET_MEMORY_MOVE_COST. */
11603
11604 int
11605 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11606 bool in ATTRIBUTE_UNUSED)
11607 {
11608 if (TARGET_32BIT)
11609 return 10;
11610 else
11611 {
11612 if (GET_MODE_SIZE (mode) < 4)
11613 return 8;
11614 else
11615 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11616 }
11617 }
11618
11619 /* Vectorizer cost model implementation. */
11620
11621 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11622 static int
11623 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11624 tree vectype,
11625 int misalign ATTRIBUTE_UNUSED)
11626 {
11627 unsigned elements;
11628
11629 switch (type_of_cost)
11630 {
11631 case scalar_stmt:
11632 return current_tune->vec_costs->scalar_stmt_cost;
11633
11634 case scalar_load:
11635 return current_tune->vec_costs->scalar_load_cost;
11636
11637 case scalar_store:
11638 return current_tune->vec_costs->scalar_store_cost;
11639
11640 case vector_stmt:
11641 return current_tune->vec_costs->vec_stmt_cost;
11642
11643 case vector_load:
11644 return current_tune->vec_costs->vec_align_load_cost;
11645
11646 case vector_store:
11647 return current_tune->vec_costs->vec_store_cost;
11648
11649 case vec_to_scalar:
11650 return current_tune->vec_costs->vec_to_scalar_cost;
11651
11652 case scalar_to_vec:
11653 return current_tune->vec_costs->scalar_to_vec_cost;
11654
11655 case unaligned_load:
11656 return current_tune->vec_costs->vec_unalign_load_cost;
11657
11658 case unaligned_store:
11659 return current_tune->vec_costs->vec_unalign_store_cost;
11660
11661 case cond_branch_taken:
11662 return current_tune->vec_costs->cond_taken_branch_cost;
11663
11664 case cond_branch_not_taken:
11665 return current_tune->vec_costs->cond_not_taken_branch_cost;
11666
11667 case vec_perm:
11668 case vec_promote_demote:
11669 return current_tune->vec_costs->vec_stmt_cost;
11670
11671 case vec_construct:
11672 elements = TYPE_VECTOR_SUBPARTS (vectype);
11673 return elements / 2 + 1;
11674
11675 default:
11676 gcc_unreachable ();
11677 }
11678 }
11679
11680 /* Implement targetm.vectorize.add_stmt_cost. */
11681
11682 static unsigned
11683 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11684 struct _stmt_vec_info *stmt_info, int misalign,
11685 enum vect_cost_model_location where)
11686 {
11687 unsigned *cost = (unsigned *) data;
11688 unsigned retval = 0;
11689
11690 if (flag_vect_cost_model)
11691 {
11692 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11693 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11694
11695 /* Statements in an inner loop relative to the loop being
11696 vectorized are weighted more heavily. The value here is
11697 arbitrary and could potentially be improved with analysis. */
11698 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11699 count *= 50; /* FIXME. */
11700
11701 retval = (unsigned) (count * stmt_cost);
11702 cost[where] += retval;
11703 }
11704
11705 return retval;
11706 }
11707
11708 /* Return true if and only if this insn can dual-issue only as older. */
11709 static bool
11710 cortexa7_older_only (rtx insn)
11711 {
11712 if (recog_memoized (insn) < 0)
11713 return false;
11714
11715 switch (get_attr_type (insn))
11716 {
11717 case TYPE_ALU_DSP_REG:
11718 case TYPE_ALU_SREG:
11719 case TYPE_ALUS_SREG:
11720 case TYPE_LOGIC_REG:
11721 case TYPE_LOGICS_REG:
11722 case TYPE_ADC_REG:
11723 case TYPE_ADCS_REG:
11724 case TYPE_ADR:
11725 case TYPE_BFM:
11726 case TYPE_REV:
11727 case TYPE_MVN_REG:
11728 case TYPE_SHIFT_IMM:
11729 case TYPE_SHIFT_REG:
11730 case TYPE_LOAD_BYTE:
11731 case TYPE_LOAD1:
11732 case TYPE_STORE1:
11733 case TYPE_FFARITHS:
11734 case TYPE_FADDS:
11735 case TYPE_FFARITHD:
11736 case TYPE_FADDD:
11737 case TYPE_FMOV:
11738 case TYPE_F_CVT:
11739 case TYPE_FCMPS:
11740 case TYPE_FCMPD:
11741 case TYPE_FCONSTS:
11742 case TYPE_FCONSTD:
11743 case TYPE_FMULS:
11744 case TYPE_FMACS:
11745 case TYPE_FMULD:
11746 case TYPE_FMACD:
11747 case TYPE_FDIVS:
11748 case TYPE_FDIVD:
11749 case TYPE_F_MRC:
11750 case TYPE_F_MRRC:
11751 case TYPE_F_FLAG:
11752 case TYPE_F_LOADS:
11753 case TYPE_F_STORES:
11754 return true;
11755 default:
11756 return false;
11757 }
11758 }
11759
11760 /* Return true if and only if this insn can dual-issue as younger. */
11761 static bool
11762 cortexa7_younger (FILE *file, int verbose, rtx insn)
11763 {
11764 if (recog_memoized (insn) < 0)
11765 {
11766 if (verbose > 5)
11767 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11768 return false;
11769 }
11770
11771 switch (get_attr_type (insn))
11772 {
11773 case TYPE_ALU_IMM:
11774 case TYPE_ALUS_IMM:
11775 case TYPE_LOGIC_IMM:
11776 case TYPE_LOGICS_IMM:
11777 case TYPE_EXTEND:
11778 case TYPE_MVN_IMM:
11779 case TYPE_MOV_IMM:
11780 case TYPE_MOV_REG:
11781 case TYPE_MOV_SHIFT:
11782 case TYPE_MOV_SHIFT_REG:
11783 case TYPE_BRANCH:
11784 case TYPE_CALL:
11785 return true;
11786 default:
11787 return false;
11788 }
11789 }
11790
11791
11792 /* Look for an instruction that can dual issue only as an older
11793 instruction, and move it in front of any instructions that can
11794 dual-issue as younger, while preserving the relative order of all
11795 other instructions in the ready list. This is a hueuristic to help
11796 dual-issue in later cycles, by postponing issue of more flexible
11797 instructions. This heuristic may affect dual issue opportunities
11798 in the current cycle. */
11799 static void
11800 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11801 int clock)
11802 {
11803 int i;
11804 int first_older_only = -1, first_younger = -1;
11805
11806 if (verbose > 5)
11807 fprintf (file,
11808 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11809 clock,
11810 *n_readyp);
11811
11812 /* Traverse the ready list from the head (the instruction to issue
11813 first), and looking for the first instruction that can issue as
11814 younger and the first instruction that can dual-issue only as
11815 older. */
11816 for (i = *n_readyp - 1; i >= 0; i--)
11817 {
11818 rtx insn = ready[i];
11819 if (cortexa7_older_only (insn))
11820 {
11821 first_older_only = i;
11822 if (verbose > 5)
11823 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11824 break;
11825 }
11826 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11827 first_younger = i;
11828 }
11829
11830 /* Nothing to reorder because either no younger insn found or insn
11831 that can dual-issue only as older appears before any insn that
11832 can dual-issue as younger. */
11833 if (first_younger == -1)
11834 {
11835 if (verbose > 5)
11836 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11837 return;
11838 }
11839
11840 /* Nothing to reorder because no older-only insn in the ready list. */
11841 if (first_older_only == -1)
11842 {
11843 if (verbose > 5)
11844 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11845 return;
11846 }
11847
11848 /* Move first_older_only insn before first_younger. */
11849 if (verbose > 5)
11850 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11851 INSN_UID(ready [first_older_only]),
11852 INSN_UID(ready [first_younger]));
11853 rtx first_older_only_insn = ready [first_older_only];
11854 for (i = first_older_only; i < first_younger; i++)
11855 {
11856 ready[i] = ready[i+1];
11857 }
11858
11859 ready[i] = first_older_only_insn;
11860 return;
11861 }
11862
11863 /* Implement TARGET_SCHED_REORDER. */
11864 static int
11865 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11866 int clock)
11867 {
11868 switch (arm_tune)
11869 {
11870 case cortexa7:
11871 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11872 break;
11873 default:
11874 /* Do nothing for other cores. */
11875 break;
11876 }
11877
11878 return arm_issue_rate ();
11879 }
11880
11881 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11882 It corrects the value of COST based on the relationship between
11883 INSN and DEP through the dependence LINK. It returns the new
11884 value. There is a per-core adjust_cost hook to adjust scheduler costs
11885 and the per-core hook can choose to completely override the generic
11886 adjust_cost function. Only put bits of code into arm_adjust_cost that
11887 are common across all cores. */
11888 static int
11889 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11890 {
11891 rtx i_pat, d_pat;
11892
11893 /* When generating Thumb-1 code, we want to place flag-setting operations
11894 close to a conditional branch which depends on them, so that we can
11895 omit the comparison. */
11896 if (TARGET_THUMB1
11897 && REG_NOTE_KIND (link) == 0
11898 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11899 && recog_memoized (dep) >= 0
11900 && get_attr_conds (dep) == CONDS_SET)
11901 return 0;
11902
11903 if (current_tune->sched_adjust_cost != NULL)
11904 {
11905 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11906 return cost;
11907 }
11908
11909 /* XXX Is this strictly true? */
11910 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11911 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11912 return 0;
11913
11914 /* Call insns don't incur a stall, even if they follow a load. */
11915 if (REG_NOTE_KIND (link) == 0
11916 && CALL_P (insn))
11917 return 1;
11918
11919 if ((i_pat = single_set (insn)) != NULL
11920 && MEM_P (SET_SRC (i_pat))
11921 && (d_pat = single_set (dep)) != NULL
11922 && MEM_P (SET_DEST (d_pat)))
11923 {
11924 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11925 /* This is a load after a store, there is no conflict if the load reads
11926 from a cached area. Assume that loads from the stack, and from the
11927 constant pool are cached, and that others will miss. This is a
11928 hack. */
11929
11930 if ((GET_CODE (src_mem) == SYMBOL_REF
11931 && CONSTANT_POOL_ADDRESS_P (src_mem))
11932 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11933 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11934 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11935 return 1;
11936 }
11937
11938 return cost;
11939 }
11940
11941 int
11942 arm_max_conditional_execute (void)
11943 {
11944 return max_insns_skipped;
11945 }
11946
11947 static int
11948 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11949 {
11950 if (TARGET_32BIT)
11951 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11952 else
11953 return (optimize > 0) ? 2 : 0;
11954 }
11955
11956 static int
11957 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11958 {
11959 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11960 }
11961
11962 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11963 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11964 sequences of non-executed instructions in IT blocks probably take the same
11965 amount of time as executed instructions (and the IT instruction itself takes
11966 space in icache). This function was experimentally determined to give good
11967 results on a popular embedded benchmark. */
11968
11969 static int
11970 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11971 {
11972 return (TARGET_32BIT && speed_p) ? 1
11973 : arm_default_branch_cost (speed_p, predictable_p);
11974 }
11975
11976 static bool fp_consts_inited = false;
11977
11978 static REAL_VALUE_TYPE value_fp0;
11979
11980 static void
11981 init_fp_table (void)
11982 {
11983 REAL_VALUE_TYPE r;
11984
11985 r = REAL_VALUE_ATOF ("0", DFmode);
11986 value_fp0 = r;
11987 fp_consts_inited = true;
11988 }
11989
11990 /* Return TRUE if rtx X is a valid immediate FP constant. */
11991 int
11992 arm_const_double_rtx (rtx x)
11993 {
11994 REAL_VALUE_TYPE r;
11995
11996 if (!fp_consts_inited)
11997 init_fp_table ();
11998
11999 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12000 if (REAL_VALUE_MINUS_ZERO (r))
12001 return 0;
12002
12003 if (REAL_VALUES_EQUAL (r, value_fp0))
12004 return 1;
12005
12006 return 0;
12007 }
12008
12009 /* VFPv3 has a fairly wide range of representable immediates, formed from
12010 "quarter-precision" floating-point values. These can be evaluated using this
12011 formula (with ^ for exponentiation):
12012
12013 -1^s * n * 2^-r
12014
12015 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12016 16 <= n <= 31 and 0 <= r <= 7.
12017
12018 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12019
12020 - A (most-significant) is the sign bit.
12021 - BCD are the exponent (encoded as r XOR 3).
12022 - EFGH are the mantissa (encoded as n - 16).
12023 */
12024
12025 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12026 fconst[sd] instruction, or -1 if X isn't suitable. */
12027 static int
12028 vfp3_const_double_index (rtx x)
12029 {
12030 REAL_VALUE_TYPE r, m;
12031 int sign, exponent;
12032 unsigned HOST_WIDE_INT mantissa, mant_hi;
12033 unsigned HOST_WIDE_INT mask;
12034 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12035 bool fail;
12036
12037 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12038 return -1;
12039
12040 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12041
12042 /* We can't represent these things, so detect them first. */
12043 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12044 return -1;
12045
12046 /* Extract sign, exponent and mantissa. */
12047 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12048 r = real_value_abs (&r);
12049 exponent = REAL_EXP (&r);
12050 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12051 highest (sign) bit, with a fixed binary point at bit point_pos.
12052 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12053 bits for the mantissa, this may fail (low bits would be lost). */
12054 real_ldexp (&m, &r, point_pos - exponent);
12055 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12056 mantissa = w.elt (0);
12057 mant_hi = w.elt (1);
12058
12059 /* If there are bits set in the low part of the mantissa, we can't
12060 represent this value. */
12061 if (mantissa != 0)
12062 return -1;
12063
12064 /* Now make it so that mantissa contains the most-significant bits, and move
12065 the point_pos to indicate that the least-significant bits have been
12066 discarded. */
12067 point_pos -= HOST_BITS_PER_WIDE_INT;
12068 mantissa = mant_hi;
12069
12070 /* We can permit four significant bits of mantissa only, plus a high bit
12071 which is always 1. */
12072 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12073 if ((mantissa & mask) != 0)
12074 return -1;
12075
12076 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12077 mantissa >>= point_pos - 5;
12078
12079 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12080 floating-point immediate zero with Neon using an integer-zero load, but
12081 that case is handled elsewhere.) */
12082 if (mantissa == 0)
12083 return -1;
12084
12085 gcc_assert (mantissa >= 16 && mantissa <= 31);
12086
12087 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12088 normalized significands are in the range [1, 2). (Our mantissa is shifted
12089 left 4 places at this point relative to normalized IEEE754 values). GCC
12090 internally uses [0.5, 1) (see real.c), so the exponent returned from
12091 REAL_EXP must be altered. */
12092 exponent = 5 - exponent;
12093
12094 if (exponent < 0 || exponent > 7)
12095 return -1;
12096
12097 /* Sign, mantissa and exponent are now in the correct form to plug into the
12098 formula described in the comment above. */
12099 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12100 }
12101
12102 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12103 int
12104 vfp3_const_double_rtx (rtx x)
12105 {
12106 if (!TARGET_VFP3)
12107 return 0;
12108
12109 return vfp3_const_double_index (x) != -1;
12110 }
12111
12112 /* Recognize immediates which can be used in various Neon instructions. Legal
12113 immediates are described by the following table (for VMVN variants, the
12114 bitwise inverse of the constant shown is recognized. In either case, VMOV
12115 is output and the correct instruction to use for a given constant is chosen
12116 by the assembler). The constant shown is replicated across all elements of
12117 the destination vector.
12118
12119 insn elems variant constant (binary)
12120 ---- ----- ------- -----------------
12121 vmov i32 0 00000000 00000000 00000000 abcdefgh
12122 vmov i32 1 00000000 00000000 abcdefgh 00000000
12123 vmov i32 2 00000000 abcdefgh 00000000 00000000
12124 vmov i32 3 abcdefgh 00000000 00000000 00000000
12125 vmov i16 4 00000000 abcdefgh
12126 vmov i16 5 abcdefgh 00000000
12127 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12128 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12129 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12130 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12131 vmvn i16 10 00000000 abcdefgh
12132 vmvn i16 11 abcdefgh 00000000
12133 vmov i32 12 00000000 00000000 abcdefgh 11111111
12134 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12135 vmov i32 14 00000000 abcdefgh 11111111 11111111
12136 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12137 vmov i8 16 abcdefgh
12138 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12139 eeeeeeee ffffffff gggggggg hhhhhhhh
12140 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12141 vmov f32 19 00000000 00000000 00000000 00000000
12142
12143 For case 18, B = !b. Representable values are exactly those accepted by
12144 vfp3_const_double_index, but are output as floating-point numbers rather
12145 than indices.
12146
12147 For case 19, we will change it to vmov.i32 when assembling.
12148
12149 Variants 0-5 (inclusive) may also be used as immediates for the second
12150 operand of VORR/VBIC instructions.
12151
12152 The INVERSE argument causes the bitwise inverse of the given operand to be
12153 recognized instead (used for recognizing legal immediates for the VAND/VORN
12154 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12155 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12156 output, rather than the real insns vbic/vorr).
12157
12158 INVERSE makes no difference to the recognition of float vectors.
12159
12160 The return value is the variant of immediate as shown in the above table, or
12161 -1 if the given value doesn't match any of the listed patterns.
12162 */
12163 static int
12164 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12165 rtx *modconst, int *elementwidth)
12166 {
12167 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12168 matches = 1; \
12169 for (i = 0; i < idx; i += (STRIDE)) \
12170 if (!(TEST)) \
12171 matches = 0; \
12172 if (matches) \
12173 { \
12174 immtype = (CLASS); \
12175 elsize = (ELSIZE); \
12176 break; \
12177 }
12178
12179 unsigned int i, elsize = 0, idx = 0, n_elts;
12180 unsigned int innersize;
12181 unsigned char bytes[16];
12182 int immtype = -1, matches;
12183 unsigned int invmask = inverse ? 0xff : 0;
12184 bool vector = GET_CODE (op) == CONST_VECTOR;
12185
12186 if (vector)
12187 {
12188 n_elts = CONST_VECTOR_NUNITS (op);
12189 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12190 }
12191 else
12192 {
12193 n_elts = 1;
12194 if (mode == VOIDmode)
12195 mode = DImode;
12196 innersize = GET_MODE_SIZE (mode);
12197 }
12198
12199 /* Vectors of float constants. */
12200 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12201 {
12202 rtx el0 = CONST_VECTOR_ELT (op, 0);
12203 REAL_VALUE_TYPE r0;
12204
12205 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12206 return -1;
12207
12208 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12209
12210 for (i = 1; i < n_elts; i++)
12211 {
12212 rtx elt = CONST_VECTOR_ELT (op, i);
12213 REAL_VALUE_TYPE re;
12214
12215 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12216
12217 if (!REAL_VALUES_EQUAL (r0, re))
12218 return -1;
12219 }
12220
12221 if (modconst)
12222 *modconst = CONST_VECTOR_ELT (op, 0);
12223
12224 if (elementwidth)
12225 *elementwidth = 0;
12226
12227 if (el0 == CONST0_RTX (GET_MODE (el0)))
12228 return 19;
12229 else
12230 return 18;
12231 }
12232
12233 /* Splat vector constant out into a byte vector. */
12234 for (i = 0; i < n_elts; i++)
12235 {
12236 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12237 unsigned HOST_WIDE_INT elpart;
12238 unsigned int part, parts;
12239
12240 if (CONST_INT_P (el))
12241 {
12242 elpart = INTVAL (el);
12243 parts = 1;
12244 }
12245 else if (CONST_DOUBLE_P (el))
12246 {
12247 elpart = CONST_DOUBLE_LOW (el);
12248 parts = 2;
12249 }
12250 else
12251 gcc_unreachable ();
12252
12253 for (part = 0; part < parts; part++)
12254 {
12255 unsigned int byte;
12256 for (byte = 0; byte < innersize; byte++)
12257 {
12258 bytes[idx++] = (elpart & 0xff) ^ invmask;
12259 elpart >>= BITS_PER_UNIT;
12260 }
12261 if (CONST_DOUBLE_P (el))
12262 elpart = CONST_DOUBLE_HIGH (el);
12263 }
12264 }
12265
12266 /* Sanity check. */
12267 gcc_assert (idx == GET_MODE_SIZE (mode));
12268
12269 do
12270 {
12271 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12272 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12273
12274 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12275 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12276
12277 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12278 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12279
12280 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12281 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12282
12283 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12284
12285 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12286
12287 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12288 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12289
12290 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12291 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12292
12293 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12294 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12295
12296 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12297 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12298
12299 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12300
12301 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12302
12303 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12304 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12305
12306 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12307 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12308
12309 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12310 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12311
12312 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12313 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12314
12315 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12316
12317 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12318 && bytes[i] == bytes[(i + 8) % idx]);
12319 }
12320 while (0);
12321
12322 if (immtype == -1)
12323 return -1;
12324
12325 if (elementwidth)
12326 *elementwidth = elsize;
12327
12328 if (modconst)
12329 {
12330 unsigned HOST_WIDE_INT imm = 0;
12331
12332 /* Un-invert bytes of recognized vector, if necessary. */
12333 if (invmask != 0)
12334 for (i = 0; i < idx; i++)
12335 bytes[i] ^= invmask;
12336
12337 if (immtype == 17)
12338 {
12339 /* FIXME: Broken on 32-bit H_W_I hosts. */
12340 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12341
12342 for (i = 0; i < 8; i++)
12343 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12344 << (i * BITS_PER_UNIT);
12345
12346 *modconst = GEN_INT (imm);
12347 }
12348 else
12349 {
12350 unsigned HOST_WIDE_INT imm = 0;
12351
12352 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12353 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12354
12355 *modconst = GEN_INT (imm);
12356 }
12357 }
12358
12359 return immtype;
12360 #undef CHECK
12361 }
12362
12363 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12364 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12365 float elements), and a modified constant (whatever should be output for a
12366 VMOV) in *MODCONST. */
12367
12368 int
12369 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12370 rtx *modconst, int *elementwidth)
12371 {
12372 rtx tmpconst;
12373 int tmpwidth;
12374 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12375
12376 if (retval == -1)
12377 return 0;
12378
12379 if (modconst)
12380 *modconst = tmpconst;
12381
12382 if (elementwidth)
12383 *elementwidth = tmpwidth;
12384
12385 return 1;
12386 }
12387
12388 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12389 the immediate is valid, write a constant suitable for using as an operand
12390 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12391 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12392
12393 int
12394 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12395 rtx *modconst, int *elementwidth)
12396 {
12397 rtx tmpconst;
12398 int tmpwidth;
12399 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12400
12401 if (retval < 0 || retval > 5)
12402 return 0;
12403
12404 if (modconst)
12405 *modconst = tmpconst;
12406
12407 if (elementwidth)
12408 *elementwidth = tmpwidth;
12409
12410 return 1;
12411 }
12412
12413 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12414 the immediate is valid, write a constant suitable for using as an operand
12415 to VSHR/VSHL to *MODCONST and the corresponding element width to
12416 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12417 because they have different limitations. */
12418
12419 int
12420 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12421 rtx *modconst, int *elementwidth,
12422 bool isleftshift)
12423 {
12424 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12425 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12426 unsigned HOST_WIDE_INT last_elt = 0;
12427 unsigned HOST_WIDE_INT maxshift;
12428
12429 /* Split vector constant out into a byte vector. */
12430 for (i = 0; i < n_elts; i++)
12431 {
12432 rtx el = CONST_VECTOR_ELT (op, i);
12433 unsigned HOST_WIDE_INT elpart;
12434
12435 if (CONST_INT_P (el))
12436 elpart = INTVAL (el);
12437 else if (CONST_DOUBLE_P (el))
12438 return 0;
12439 else
12440 gcc_unreachable ();
12441
12442 if (i != 0 && elpart != last_elt)
12443 return 0;
12444
12445 last_elt = elpart;
12446 }
12447
12448 /* Shift less than element size. */
12449 maxshift = innersize * 8;
12450
12451 if (isleftshift)
12452 {
12453 /* Left shift immediate value can be from 0 to <size>-1. */
12454 if (last_elt >= maxshift)
12455 return 0;
12456 }
12457 else
12458 {
12459 /* Right shift immediate value can be from 1 to <size>. */
12460 if (last_elt == 0 || last_elt > maxshift)
12461 return 0;
12462 }
12463
12464 if (elementwidth)
12465 *elementwidth = innersize * 8;
12466
12467 if (modconst)
12468 *modconst = CONST_VECTOR_ELT (op, 0);
12469
12470 return 1;
12471 }
12472
12473 /* Return a string suitable for output of Neon immediate logic operation
12474 MNEM. */
12475
12476 char *
12477 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12478 int inverse, int quad)
12479 {
12480 int width, is_valid;
12481 static char templ[40];
12482
12483 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12484
12485 gcc_assert (is_valid != 0);
12486
12487 if (quad)
12488 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12489 else
12490 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12491
12492 return templ;
12493 }
12494
12495 /* Return a string suitable for output of Neon immediate shift operation
12496 (VSHR or VSHL) MNEM. */
12497
12498 char *
12499 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12500 enum machine_mode mode, int quad,
12501 bool isleftshift)
12502 {
12503 int width, is_valid;
12504 static char templ[40];
12505
12506 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12507 gcc_assert (is_valid != 0);
12508
12509 if (quad)
12510 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12511 else
12512 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12513
12514 return templ;
12515 }
12516
12517 /* Output a sequence of pairwise operations to implement a reduction.
12518 NOTE: We do "too much work" here, because pairwise operations work on two
12519 registers-worth of operands in one go. Unfortunately we can't exploit those
12520 extra calculations to do the full operation in fewer steps, I don't think.
12521 Although all vector elements of the result but the first are ignored, we
12522 actually calculate the same result in each of the elements. An alternative
12523 such as initially loading a vector with zero to use as each of the second
12524 operands would use up an additional register and take an extra instruction,
12525 for no particular gain. */
12526
12527 void
12528 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12529 rtx (*reduc) (rtx, rtx, rtx))
12530 {
12531 enum machine_mode inner = GET_MODE_INNER (mode);
12532 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12533 rtx tmpsum = op1;
12534
12535 for (i = parts / 2; i >= 1; i /= 2)
12536 {
12537 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12538 emit_insn (reduc (dest, tmpsum, tmpsum));
12539 tmpsum = dest;
12540 }
12541 }
12542
12543 /* If VALS is a vector constant that can be loaded into a register
12544 using VDUP, generate instructions to do so and return an RTX to
12545 assign to the register. Otherwise return NULL_RTX. */
12546
12547 static rtx
12548 neon_vdup_constant (rtx vals)
12549 {
12550 enum machine_mode mode = GET_MODE (vals);
12551 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12552 int n_elts = GET_MODE_NUNITS (mode);
12553 bool all_same = true;
12554 rtx x;
12555 int i;
12556
12557 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12558 return NULL_RTX;
12559
12560 for (i = 0; i < n_elts; ++i)
12561 {
12562 x = XVECEXP (vals, 0, i);
12563 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12564 all_same = false;
12565 }
12566
12567 if (!all_same)
12568 /* The elements are not all the same. We could handle repeating
12569 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12570 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12571 vdup.i16). */
12572 return NULL_RTX;
12573
12574 /* We can load this constant by using VDUP and a constant in a
12575 single ARM register. This will be cheaper than a vector
12576 load. */
12577
12578 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12579 return gen_rtx_VEC_DUPLICATE (mode, x);
12580 }
12581
12582 /* Generate code to load VALS, which is a PARALLEL containing only
12583 constants (for vec_init) or CONST_VECTOR, efficiently into a
12584 register. Returns an RTX to copy into the register, or NULL_RTX
12585 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12586
12587 rtx
12588 neon_make_constant (rtx vals)
12589 {
12590 enum machine_mode mode = GET_MODE (vals);
12591 rtx target;
12592 rtx const_vec = NULL_RTX;
12593 int n_elts = GET_MODE_NUNITS (mode);
12594 int n_const = 0;
12595 int i;
12596
12597 if (GET_CODE (vals) == CONST_VECTOR)
12598 const_vec = vals;
12599 else if (GET_CODE (vals) == PARALLEL)
12600 {
12601 /* A CONST_VECTOR must contain only CONST_INTs and
12602 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12603 Only store valid constants in a CONST_VECTOR. */
12604 for (i = 0; i < n_elts; ++i)
12605 {
12606 rtx x = XVECEXP (vals, 0, i);
12607 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12608 n_const++;
12609 }
12610 if (n_const == n_elts)
12611 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12612 }
12613 else
12614 gcc_unreachable ();
12615
12616 if (const_vec != NULL
12617 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12618 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12619 return const_vec;
12620 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12621 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12622 pipeline cycle; creating the constant takes one or two ARM
12623 pipeline cycles. */
12624 return target;
12625 else if (const_vec != NULL_RTX)
12626 /* Load from constant pool. On Cortex-A8 this takes two cycles
12627 (for either double or quad vectors). We can not take advantage
12628 of single-cycle VLD1 because we need a PC-relative addressing
12629 mode. */
12630 return const_vec;
12631 else
12632 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12633 We can not construct an initializer. */
12634 return NULL_RTX;
12635 }
12636
12637 /* Initialize vector TARGET to VALS. */
12638
12639 void
12640 neon_expand_vector_init (rtx target, rtx vals)
12641 {
12642 enum machine_mode mode = GET_MODE (target);
12643 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12644 int n_elts = GET_MODE_NUNITS (mode);
12645 int n_var = 0, one_var = -1;
12646 bool all_same = true;
12647 rtx x, mem;
12648 int i;
12649
12650 for (i = 0; i < n_elts; ++i)
12651 {
12652 x = XVECEXP (vals, 0, i);
12653 if (!CONSTANT_P (x))
12654 ++n_var, one_var = i;
12655
12656 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12657 all_same = false;
12658 }
12659
12660 if (n_var == 0)
12661 {
12662 rtx constant = neon_make_constant (vals);
12663 if (constant != NULL_RTX)
12664 {
12665 emit_move_insn (target, constant);
12666 return;
12667 }
12668 }
12669
12670 /* Splat a single non-constant element if we can. */
12671 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12672 {
12673 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12674 emit_insn (gen_rtx_SET (VOIDmode, target,
12675 gen_rtx_VEC_DUPLICATE (mode, x)));
12676 return;
12677 }
12678
12679 /* One field is non-constant. Load constant then overwrite varying
12680 field. This is more efficient than using the stack. */
12681 if (n_var == 1)
12682 {
12683 rtx copy = copy_rtx (vals);
12684 rtx index = GEN_INT (one_var);
12685
12686 /* Load constant part of vector, substitute neighboring value for
12687 varying element. */
12688 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12689 neon_expand_vector_init (target, copy);
12690
12691 /* Insert variable. */
12692 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12693 switch (mode)
12694 {
12695 case V8QImode:
12696 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12697 break;
12698 case V16QImode:
12699 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12700 break;
12701 case V4HImode:
12702 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12703 break;
12704 case V8HImode:
12705 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12706 break;
12707 case V2SImode:
12708 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12709 break;
12710 case V4SImode:
12711 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12712 break;
12713 case V2SFmode:
12714 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12715 break;
12716 case V4SFmode:
12717 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12718 break;
12719 case V2DImode:
12720 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12721 break;
12722 default:
12723 gcc_unreachable ();
12724 }
12725 return;
12726 }
12727
12728 /* Construct the vector in memory one field at a time
12729 and load the whole vector. */
12730 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12731 for (i = 0; i < n_elts; i++)
12732 emit_move_insn (adjust_address_nv (mem, inner_mode,
12733 i * GET_MODE_SIZE (inner_mode)),
12734 XVECEXP (vals, 0, i));
12735 emit_move_insn (target, mem);
12736 }
12737
12738 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12739 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12740 reported source locations are bogus. */
12741
12742 static void
12743 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12744 const char *err)
12745 {
12746 HOST_WIDE_INT lane;
12747
12748 gcc_assert (CONST_INT_P (operand));
12749
12750 lane = INTVAL (operand);
12751
12752 if (lane < low || lane >= high)
12753 error (err);
12754 }
12755
12756 /* Bounds-check lanes. */
12757
12758 void
12759 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12760 {
12761 bounds_check (operand, low, high, "lane out of range");
12762 }
12763
12764 /* Bounds-check constants. */
12765
12766 void
12767 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12768 {
12769 bounds_check (operand, low, high, "constant out of range");
12770 }
12771
12772 HOST_WIDE_INT
12773 neon_element_bits (enum machine_mode mode)
12774 {
12775 if (mode == DImode)
12776 return GET_MODE_BITSIZE (mode);
12777 else
12778 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12779 }
12780
12781 \f
12782 /* Predicates for `match_operand' and `match_operator'. */
12783
12784 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12785 WB is true if full writeback address modes are allowed and is false
12786 if limited writeback address modes (POST_INC and PRE_DEC) are
12787 allowed. */
12788
12789 int
12790 arm_coproc_mem_operand (rtx op, bool wb)
12791 {
12792 rtx ind;
12793
12794 /* Reject eliminable registers. */
12795 if (! (reload_in_progress || reload_completed || lra_in_progress)
12796 && ( reg_mentioned_p (frame_pointer_rtx, op)
12797 || reg_mentioned_p (arg_pointer_rtx, op)
12798 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12799 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12800 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12801 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12802 return FALSE;
12803
12804 /* Constants are converted into offsets from labels. */
12805 if (!MEM_P (op))
12806 return FALSE;
12807
12808 ind = XEXP (op, 0);
12809
12810 if (reload_completed
12811 && (GET_CODE (ind) == LABEL_REF
12812 || (GET_CODE (ind) == CONST
12813 && GET_CODE (XEXP (ind, 0)) == PLUS
12814 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12815 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12816 return TRUE;
12817
12818 /* Match: (mem (reg)). */
12819 if (REG_P (ind))
12820 return arm_address_register_rtx_p (ind, 0);
12821
12822 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12823 acceptable in any case (subject to verification by
12824 arm_address_register_rtx_p). We need WB to be true to accept
12825 PRE_INC and POST_DEC. */
12826 if (GET_CODE (ind) == POST_INC
12827 || GET_CODE (ind) == PRE_DEC
12828 || (wb
12829 && (GET_CODE (ind) == PRE_INC
12830 || GET_CODE (ind) == POST_DEC)))
12831 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12832
12833 if (wb
12834 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12835 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12836 && GET_CODE (XEXP (ind, 1)) == PLUS
12837 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12838 ind = XEXP (ind, 1);
12839
12840 /* Match:
12841 (plus (reg)
12842 (const)). */
12843 if (GET_CODE (ind) == PLUS
12844 && REG_P (XEXP (ind, 0))
12845 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12846 && CONST_INT_P (XEXP (ind, 1))
12847 && INTVAL (XEXP (ind, 1)) > -1024
12848 && INTVAL (XEXP (ind, 1)) < 1024
12849 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12850 return TRUE;
12851
12852 return FALSE;
12853 }
12854
12855 /* Return TRUE if OP is a memory operand which we can load or store a vector
12856 to/from. TYPE is one of the following values:
12857 0 - Vector load/stor (vldr)
12858 1 - Core registers (ldm)
12859 2 - Element/structure loads (vld1)
12860 */
12861 int
12862 neon_vector_mem_operand (rtx op, int type, bool strict)
12863 {
12864 rtx ind;
12865
12866 /* Reject eliminable registers. */
12867 if (! (reload_in_progress || reload_completed)
12868 && ( reg_mentioned_p (frame_pointer_rtx, op)
12869 || reg_mentioned_p (arg_pointer_rtx, op)
12870 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12871 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12872 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12873 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12874 return !strict;
12875
12876 /* Constants are converted into offsets from labels. */
12877 if (!MEM_P (op))
12878 return FALSE;
12879
12880 ind = XEXP (op, 0);
12881
12882 if (reload_completed
12883 && (GET_CODE (ind) == LABEL_REF
12884 || (GET_CODE (ind) == CONST
12885 && GET_CODE (XEXP (ind, 0)) == PLUS
12886 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12887 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12888 return TRUE;
12889
12890 /* Match: (mem (reg)). */
12891 if (REG_P (ind))
12892 return arm_address_register_rtx_p (ind, 0);
12893
12894 /* Allow post-increment with Neon registers. */
12895 if ((type != 1 && GET_CODE (ind) == POST_INC)
12896 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12897 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12898
12899 /* Allow post-increment by register for VLDn */
12900 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12901 && GET_CODE (XEXP (ind, 1)) == PLUS
12902 && REG_P (XEXP (XEXP (ind, 1), 1)))
12903 return true;
12904
12905 /* Match:
12906 (plus (reg)
12907 (const)). */
12908 if (type == 0
12909 && GET_CODE (ind) == PLUS
12910 && REG_P (XEXP (ind, 0))
12911 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12912 && CONST_INT_P (XEXP (ind, 1))
12913 && INTVAL (XEXP (ind, 1)) > -1024
12914 /* For quad modes, we restrict the constant offset to be slightly less
12915 than what the instruction format permits. We have no such constraint
12916 on double mode offsets. (This must match arm_legitimate_index_p.) */
12917 && (INTVAL (XEXP (ind, 1))
12918 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12919 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12920 return TRUE;
12921
12922 return FALSE;
12923 }
12924
12925 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12926 type. */
12927 int
12928 neon_struct_mem_operand (rtx op)
12929 {
12930 rtx ind;
12931
12932 /* Reject eliminable registers. */
12933 if (! (reload_in_progress || reload_completed)
12934 && ( reg_mentioned_p (frame_pointer_rtx, op)
12935 || reg_mentioned_p (arg_pointer_rtx, op)
12936 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12937 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12938 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12939 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12940 return FALSE;
12941
12942 /* Constants are converted into offsets from labels. */
12943 if (!MEM_P (op))
12944 return FALSE;
12945
12946 ind = XEXP (op, 0);
12947
12948 if (reload_completed
12949 && (GET_CODE (ind) == LABEL_REF
12950 || (GET_CODE (ind) == CONST
12951 && GET_CODE (XEXP (ind, 0)) == PLUS
12952 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12953 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12954 return TRUE;
12955
12956 /* Match: (mem (reg)). */
12957 if (REG_P (ind))
12958 return arm_address_register_rtx_p (ind, 0);
12959
12960 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12961 if (GET_CODE (ind) == POST_INC
12962 || GET_CODE (ind) == PRE_DEC)
12963 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12964
12965 return FALSE;
12966 }
12967
12968 /* Return true if X is a register that will be eliminated later on. */
12969 int
12970 arm_eliminable_register (rtx x)
12971 {
12972 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12973 || REGNO (x) == ARG_POINTER_REGNUM
12974 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12975 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12976 }
12977
12978 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12979 coprocessor registers. Otherwise return NO_REGS. */
12980
12981 enum reg_class
12982 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12983 {
12984 if (mode == HFmode)
12985 {
12986 if (!TARGET_NEON_FP16)
12987 return GENERAL_REGS;
12988 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12989 return NO_REGS;
12990 return GENERAL_REGS;
12991 }
12992
12993 /* The neon move patterns handle all legitimate vector and struct
12994 addresses. */
12995 if (TARGET_NEON
12996 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12997 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12998 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12999 || VALID_NEON_STRUCT_MODE (mode)))
13000 return NO_REGS;
13001
13002 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13003 return NO_REGS;
13004
13005 return GENERAL_REGS;
13006 }
13007
13008 /* Values which must be returned in the most-significant end of the return
13009 register. */
13010
13011 static bool
13012 arm_return_in_msb (const_tree valtype)
13013 {
13014 return (TARGET_AAPCS_BASED
13015 && BYTES_BIG_ENDIAN
13016 && (AGGREGATE_TYPE_P (valtype)
13017 || TREE_CODE (valtype) == COMPLEX_TYPE
13018 || FIXED_POINT_TYPE_P (valtype)));
13019 }
13020
13021 /* Return TRUE if X references a SYMBOL_REF. */
13022 int
13023 symbol_mentioned_p (rtx x)
13024 {
13025 const char * fmt;
13026 int i;
13027
13028 if (GET_CODE (x) == SYMBOL_REF)
13029 return 1;
13030
13031 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13032 are constant offsets, not symbols. */
13033 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13034 return 0;
13035
13036 fmt = GET_RTX_FORMAT (GET_CODE (x));
13037
13038 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13039 {
13040 if (fmt[i] == 'E')
13041 {
13042 int j;
13043
13044 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13045 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13046 return 1;
13047 }
13048 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13049 return 1;
13050 }
13051
13052 return 0;
13053 }
13054
13055 /* Return TRUE if X references a LABEL_REF. */
13056 int
13057 label_mentioned_p (rtx x)
13058 {
13059 const char * fmt;
13060 int i;
13061
13062 if (GET_CODE (x) == LABEL_REF)
13063 return 1;
13064
13065 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13066 instruction, but they are constant offsets, not symbols. */
13067 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13068 return 0;
13069
13070 fmt = GET_RTX_FORMAT (GET_CODE (x));
13071 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13072 {
13073 if (fmt[i] == 'E')
13074 {
13075 int j;
13076
13077 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13078 if (label_mentioned_p (XVECEXP (x, i, j)))
13079 return 1;
13080 }
13081 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13082 return 1;
13083 }
13084
13085 return 0;
13086 }
13087
13088 int
13089 tls_mentioned_p (rtx x)
13090 {
13091 switch (GET_CODE (x))
13092 {
13093 case CONST:
13094 return tls_mentioned_p (XEXP (x, 0));
13095
13096 case UNSPEC:
13097 if (XINT (x, 1) == UNSPEC_TLS)
13098 return 1;
13099
13100 default:
13101 return 0;
13102 }
13103 }
13104
13105 /* Must not copy any rtx that uses a pc-relative address. */
13106
13107 static int
13108 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13109 {
13110 if (GET_CODE (*x) == UNSPEC
13111 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13112 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13113 return 1;
13114 return 0;
13115 }
13116
13117 static bool
13118 arm_cannot_copy_insn_p (rtx insn)
13119 {
13120 /* The tls call insn cannot be copied, as it is paired with a data
13121 word. */
13122 if (recog_memoized (insn) == CODE_FOR_tlscall)
13123 return true;
13124
13125 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13126 }
13127
13128 enum rtx_code
13129 minmax_code (rtx x)
13130 {
13131 enum rtx_code code = GET_CODE (x);
13132
13133 switch (code)
13134 {
13135 case SMAX:
13136 return GE;
13137 case SMIN:
13138 return LE;
13139 case UMIN:
13140 return LEU;
13141 case UMAX:
13142 return GEU;
13143 default:
13144 gcc_unreachable ();
13145 }
13146 }
13147
13148 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13149
13150 bool
13151 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13152 int *mask, bool *signed_sat)
13153 {
13154 /* The high bound must be a power of two minus one. */
13155 int log = exact_log2 (INTVAL (hi_bound) + 1);
13156 if (log == -1)
13157 return false;
13158
13159 /* The low bound is either zero (for usat) or one less than the
13160 negation of the high bound (for ssat). */
13161 if (INTVAL (lo_bound) == 0)
13162 {
13163 if (mask)
13164 *mask = log;
13165 if (signed_sat)
13166 *signed_sat = false;
13167
13168 return true;
13169 }
13170
13171 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13172 {
13173 if (mask)
13174 *mask = log + 1;
13175 if (signed_sat)
13176 *signed_sat = true;
13177
13178 return true;
13179 }
13180
13181 return false;
13182 }
13183
13184 /* Return 1 if memory locations are adjacent. */
13185 int
13186 adjacent_mem_locations (rtx a, rtx b)
13187 {
13188 /* We don't guarantee to preserve the order of these memory refs. */
13189 if (volatile_refs_p (a) || volatile_refs_p (b))
13190 return 0;
13191
13192 if ((REG_P (XEXP (a, 0))
13193 || (GET_CODE (XEXP (a, 0)) == PLUS
13194 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13195 && (REG_P (XEXP (b, 0))
13196 || (GET_CODE (XEXP (b, 0)) == PLUS
13197 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13198 {
13199 HOST_WIDE_INT val0 = 0, val1 = 0;
13200 rtx reg0, reg1;
13201 int val_diff;
13202
13203 if (GET_CODE (XEXP (a, 0)) == PLUS)
13204 {
13205 reg0 = XEXP (XEXP (a, 0), 0);
13206 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13207 }
13208 else
13209 reg0 = XEXP (a, 0);
13210
13211 if (GET_CODE (XEXP (b, 0)) == PLUS)
13212 {
13213 reg1 = XEXP (XEXP (b, 0), 0);
13214 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13215 }
13216 else
13217 reg1 = XEXP (b, 0);
13218
13219 /* Don't accept any offset that will require multiple
13220 instructions to handle, since this would cause the
13221 arith_adjacentmem pattern to output an overlong sequence. */
13222 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13223 return 0;
13224
13225 /* Don't allow an eliminable register: register elimination can make
13226 the offset too large. */
13227 if (arm_eliminable_register (reg0))
13228 return 0;
13229
13230 val_diff = val1 - val0;
13231
13232 if (arm_ld_sched)
13233 {
13234 /* If the target has load delay slots, then there's no benefit
13235 to using an ldm instruction unless the offset is zero and
13236 we are optimizing for size. */
13237 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13238 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13239 && (val_diff == 4 || val_diff == -4));
13240 }
13241
13242 return ((REGNO (reg0) == REGNO (reg1))
13243 && (val_diff == 4 || val_diff == -4));
13244 }
13245
13246 return 0;
13247 }
13248
13249 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13250 for load operations, false for store operations. CONSECUTIVE is true
13251 if the register numbers in the operation must be consecutive in the register
13252 bank. RETURN_PC is true if value is to be loaded in PC.
13253 The pattern we are trying to match for load is:
13254 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13255 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13256 :
13257 :
13258 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13259 ]
13260 where
13261 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13262 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13263 3. If consecutive is TRUE, then for kth register being loaded,
13264 REGNO (R_dk) = REGNO (R_d0) + k.
13265 The pattern for store is similar. */
13266 bool
13267 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13268 bool consecutive, bool return_pc)
13269 {
13270 HOST_WIDE_INT count = XVECLEN (op, 0);
13271 rtx reg, mem, addr;
13272 unsigned regno;
13273 unsigned first_regno;
13274 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13275 rtx elt;
13276 bool addr_reg_in_reglist = false;
13277 bool update = false;
13278 int reg_increment;
13279 int offset_adj;
13280 int regs_per_val;
13281
13282 /* If not in SImode, then registers must be consecutive
13283 (e.g., VLDM instructions for DFmode). */
13284 gcc_assert ((mode == SImode) || consecutive);
13285 /* Setting return_pc for stores is illegal. */
13286 gcc_assert (!return_pc || load);
13287
13288 /* Set up the increments and the regs per val based on the mode. */
13289 reg_increment = GET_MODE_SIZE (mode);
13290 regs_per_val = reg_increment / 4;
13291 offset_adj = return_pc ? 1 : 0;
13292
13293 if (count <= 1
13294 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13295 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13296 return false;
13297
13298 /* Check if this is a write-back. */
13299 elt = XVECEXP (op, 0, offset_adj);
13300 if (GET_CODE (SET_SRC (elt)) == PLUS)
13301 {
13302 i++;
13303 base = 1;
13304 update = true;
13305
13306 /* The offset adjustment must be the number of registers being
13307 popped times the size of a single register. */
13308 if (!REG_P (SET_DEST (elt))
13309 || !REG_P (XEXP (SET_SRC (elt), 0))
13310 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13311 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13312 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13313 ((count - 1 - offset_adj) * reg_increment))
13314 return false;
13315 }
13316
13317 i = i + offset_adj;
13318 base = base + offset_adj;
13319 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13320 success depends on the type: VLDM can do just one reg,
13321 LDM must do at least two. */
13322 if ((count <= i) && (mode == SImode))
13323 return false;
13324
13325 elt = XVECEXP (op, 0, i - 1);
13326 if (GET_CODE (elt) != SET)
13327 return false;
13328
13329 if (load)
13330 {
13331 reg = SET_DEST (elt);
13332 mem = SET_SRC (elt);
13333 }
13334 else
13335 {
13336 reg = SET_SRC (elt);
13337 mem = SET_DEST (elt);
13338 }
13339
13340 if (!REG_P (reg) || !MEM_P (mem))
13341 return false;
13342
13343 regno = REGNO (reg);
13344 first_regno = regno;
13345 addr = XEXP (mem, 0);
13346 if (GET_CODE (addr) == PLUS)
13347 {
13348 if (!CONST_INT_P (XEXP (addr, 1)))
13349 return false;
13350
13351 offset = INTVAL (XEXP (addr, 1));
13352 addr = XEXP (addr, 0);
13353 }
13354
13355 if (!REG_P (addr))
13356 return false;
13357
13358 /* Don't allow SP to be loaded unless it is also the base register. It
13359 guarantees that SP is reset correctly when an LDM instruction
13360 is interrupted. Otherwise, we might end up with a corrupt stack. */
13361 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13362 return false;
13363
13364 for (; i < count; i++)
13365 {
13366 elt = XVECEXP (op, 0, i);
13367 if (GET_CODE (elt) != SET)
13368 return false;
13369
13370 if (load)
13371 {
13372 reg = SET_DEST (elt);
13373 mem = SET_SRC (elt);
13374 }
13375 else
13376 {
13377 reg = SET_SRC (elt);
13378 mem = SET_DEST (elt);
13379 }
13380
13381 if (!REG_P (reg)
13382 || GET_MODE (reg) != mode
13383 || REGNO (reg) <= regno
13384 || (consecutive
13385 && (REGNO (reg) !=
13386 (unsigned int) (first_regno + regs_per_val * (i - base))))
13387 /* Don't allow SP to be loaded unless it is also the base register. It
13388 guarantees that SP is reset correctly when an LDM instruction
13389 is interrupted. Otherwise, we might end up with a corrupt stack. */
13390 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13391 || !MEM_P (mem)
13392 || GET_MODE (mem) != mode
13393 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13394 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13395 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13396 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13397 offset + (i - base) * reg_increment))
13398 && (!REG_P (XEXP (mem, 0))
13399 || offset + (i - base) * reg_increment != 0)))
13400 return false;
13401
13402 regno = REGNO (reg);
13403 if (regno == REGNO (addr))
13404 addr_reg_in_reglist = true;
13405 }
13406
13407 if (load)
13408 {
13409 if (update && addr_reg_in_reglist)
13410 return false;
13411
13412 /* For Thumb-1, address register is always modified - either by write-back
13413 or by explicit load. If the pattern does not describe an update,
13414 then the address register must be in the list of loaded registers. */
13415 if (TARGET_THUMB1)
13416 return update || addr_reg_in_reglist;
13417 }
13418
13419 return true;
13420 }
13421
13422 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13423 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13424 instruction. ADD_OFFSET is nonzero if the base address register needs
13425 to be modified with an add instruction before we can use it. */
13426
13427 static bool
13428 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13429 int nops, HOST_WIDE_INT add_offset)
13430 {
13431 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13432 if the offset isn't small enough. The reason 2 ldrs are faster
13433 is because these ARMs are able to do more than one cache access
13434 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13435 whilst the ARM8 has a double bandwidth cache. This means that
13436 these cores can do both an instruction fetch and a data fetch in
13437 a single cycle, so the trick of calculating the address into a
13438 scratch register (one of the result regs) and then doing a load
13439 multiple actually becomes slower (and no smaller in code size).
13440 That is the transformation
13441
13442 ldr rd1, [rbase + offset]
13443 ldr rd2, [rbase + offset + 4]
13444
13445 to
13446
13447 add rd1, rbase, offset
13448 ldmia rd1, {rd1, rd2}
13449
13450 produces worse code -- '3 cycles + any stalls on rd2' instead of
13451 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13452 access per cycle, the first sequence could never complete in less
13453 than 6 cycles, whereas the ldm sequence would only take 5 and
13454 would make better use of sequential accesses if not hitting the
13455 cache.
13456
13457 We cheat here and test 'arm_ld_sched' which we currently know to
13458 only be true for the ARM8, ARM9 and StrongARM. If this ever
13459 changes, then the test below needs to be reworked. */
13460 if (nops == 2 && arm_ld_sched && add_offset != 0)
13461 return false;
13462
13463 /* XScale has load-store double instructions, but they have stricter
13464 alignment requirements than load-store multiple, so we cannot
13465 use them.
13466
13467 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13468 the pipeline until completion.
13469
13470 NREGS CYCLES
13471 1 3
13472 2 4
13473 3 5
13474 4 6
13475
13476 An ldr instruction takes 1-3 cycles, but does not block the
13477 pipeline.
13478
13479 NREGS CYCLES
13480 1 1-3
13481 2 2-6
13482 3 3-9
13483 4 4-12
13484
13485 Best case ldr will always win. However, the more ldr instructions
13486 we issue, the less likely we are to be able to schedule them well.
13487 Using ldr instructions also increases code size.
13488
13489 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13490 for counts of 3 or 4 regs. */
13491 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13492 return false;
13493 return true;
13494 }
13495
13496 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13497 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13498 an array ORDER which describes the sequence to use when accessing the
13499 offsets that produces an ascending order. In this sequence, each
13500 offset must be larger by exactly 4 than the previous one. ORDER[0]
13501 must have been filled in with the lowest offset by the caller.
13502 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13503 we use to verify that ORDER produces an ascending order of registers.
13504 Return true if it was possible to construct such an order, false if
13505 not. */
13506
13507 static bool
13508 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13509 int *unsorted_regs)
13510 {
13511 int i;
13512 for (i = 1; i < nops; i++)
13513 {
13514 int j;
13515
13516 order[i] = order[i - 1];
13517 for (j = 0; j < nops; j++)
13518 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13519 {
13520 /* We must find exactly one offset that is higher than the
13521 previous one by 4. */
13522 if (order[i] != order[i - 1])
13523 return false;
13524 order[i] = j;
13525 }
13526 if (order[i] == order[i - 1])
13527 return false;
13528 /* The register numbers must be ascending. */
13529 if (unsorted_regs != NULL
13530 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13531 return false;
13532 }
13533 return true;
13534 }
13535
13536 /* Used to determine in a peephole whether a sequence of load
13537 instructions can be changed into a load-multiple instruction.
13538 NOPS is the number of separate load instructions we are examining. The
13539 first NOPS entries in OPERANDS are the destination registers, the
13540 next NOPS entries are memory operands. If this function is
13541 successful, *BASE is set to the common base register of the memory
13542 accesses; *LOAD_OFFSET is set to the first memory location's offset
13543 from that base register.
13544 REGS is an array filled in with the destination register numbers.
13545 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13546 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13547 the sequence of registers in REGS matches the loads from ascending memory
13548 locations, and the function verifies that the register numbers are
13549 themselves ascending. If CHECK_REGS is false, the register numbers
13550 are stored in the order they are found in the operands. */
13551 static int
13552 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13553 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13554 {
13555 int unsorted_regs[MAX_LDM_STM_OPS];
13556 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13557 int order[MAX_LDM_STM_OPS];
13558 rtx base_reg_rtx = NULL;
13559 int base_reg = -1;
13560 int i, ldm_case;
13561
13562 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13563 easily extended if required. */
13564 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13565
13566 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13567
13568 /* Loop over the operands and check that the memory references are
13569 suitable (i.e. immediate offsets from the same base register). At
13570 the same time, extract the target register, and the memory
13571 offsets. */
13572 for (i = 0; i < nops; i++)
13573 {
13574 rtx reg;
13575 rtx offset;
13576
13577 /* Convert a subreg of a mem into the mem itself. */
13578 if (GET_CODE (operands[nops + i]) == SUBREG)
13579 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13580
13581 gcc_assert (MEM_P (operands[nops + i]));
13582
13583 /* Don't reorder volatile memory references; it doesn't seem worth
13584 looking for the case where the order is ok anyway. */
13585 if (MEM_VOLATILE_P (operands[nops + i]))
13586 return 0;
13587
13588 offset = const0_rtx;
13589
13590 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13591 || (GET_CODE (reg) == SUBREG
13592 && REG_P (reg = SUBREG_REG (reg))))
13593 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13594 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13595 || (GET_CODE (reg) == SUBREG
13596 && REG_P (reg = SUBREG_REG (reg))))
13597 && (CONST_INT_P (offset
13598 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13599 {
13600 if (i == 0)
13601 {
13602 base_reg = REGNO (reg);
13603 base_reg_rtx = reg;
13604 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13605 return 0;
13606 }
13607 else if (base_reg != (int) REGNO (reg))
13608 /* Not addressed from the same base register. */
13609 return 0;
13610
13611 unsorted_regs[i] = (REG_P (operands[i])
13612 ? REGNO (operands[i])
13613 : REGNO (SUBREG_REG (operands[i])));
13614
13615 /* If it isn't an integer register, or if it overwrites the
13616 base register but isn't the last insn in the list, then
13617 we can't do this. */
13618 if (unsorted_regs[i] < 0
13619 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13620 || unsorted_regs[i] > 14
13621 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13622 return 0;
13623
13624 /* Don't allow SP to be loaded unless it is also the base
13625 register. It guarantees that SP is reset correctly when
13626 an LDM instruction is interrupted. Otherwise, we might
13627 end up with a corrupt stack. */
13628 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13629 return 0;
13630
13631 unsorted_offsets[i] = INTVAL (offset);
13632 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13633 order[0] = i;
13634 }
13635 else
13636 /* Not a suitable memory address. */
13637 return 0;
13638 }
13639
13640 /* All the useful information has now been extracted from the
13641 operands into unsorted_regs and unsorted_offsets; additionally,
13642 order[0] has been set to the lowest offset in the list. Sort
13643 the offsets into order, verifying that they are adjacent, and
13644 check that the register numbers are ascending. */
13645 if (!compute_offset_order (nops, unsorted_offsets, order,
13646 check_regs ? unsorted_regs : NULL))
13647 return 0;
13648
13649 if (saved_order)
13650 memcpy (saved_order, order, sizeof order);
13651
13652 if (base)
13653 {
13654 *base = base_reg;
13655
13656 for (i = 0; i < nops; i++)
13657 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13658
13659 *load_offset = unsorted_offsets[order[0]];
13660 }
13661
13662 if (TARGET_THUMB1
13663 && !peep2_reg_dead_p (nops, base_reg_rtx))
13664 return 0;
13665
13666 if (unsorted_offsets[order[0]] == 0)
13667 ldm_case = 1; /* ldmia */
13668 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13669 ldm_case = 2; /* ldmib */
13670 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13671 ldm_case = 3; /* ldmda */
13672 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13673 ldm_case = 4; /* ldmdb */
13674 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13675 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13676 ldm_case = 5;
13677 else
13678 return 0;
13679
13680 if (!multiple_operation_profitable_p (false, nops,
13681 ldm_case == 5
13682 ? unsorted_offsets[order[0]] : 0))
13683 return 0;
13684
13685 return ldm_case;
13686 }
13687
13688 /* Used to determine in a peephole whether a sequence of store instructions can
13689 be changed into a store-multiple instruction.
13690 NOPS is the number of separate store instructions we are examining.
13691 NOPS_TOTAL is the total number of instructions recognized by the peephole
13692 pattern.
13693 The first NOPS entries in OPERANDS are the source registers, the next
13694 NOPS entries are memory operands. If this function is successful, *BASE is
13695 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13696 to the first memory location's offset from that base register. REGS is an
13697 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13698 likewise filled with the corresponding rtx's.
13699 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13700 numbers to an ascending order of stores.
13701 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13702 from ascending memory locations, and the function verifies that the register
13703 numbers are themselves ascending. If CHECK_REGS is false, the register
13704 numbers are stored in the order they are found in the operands. */
13705 static int
13706 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13707 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13708 HOST_WIDE_INT *load_offset, bool check_regs)
13709 {
13710 int unsorted_regs[MAX_LDM_STM_OPS];
13711 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13712 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13713 int order[MAX_LDM_STM_OPS];
13714 int base_reg = -1;
13715 rtx base_reg_rtx = NULL;
13716 int i, stm_case;
13717
13718 /* Write back of base register is currently only supported for Thumb 1. */
13719 int base_writeback = TARGET_THUMB1;
13720
13721 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13722 easily extended if required. */
13723 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13724
13725 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13726
13727 /* Loop over the operands and check that the memory references are
13728 suitable (i.e. immediate offsets from the same base register). At
13729 the same time, extract the target register, and the memory
13730 offsets. */
13731 for (i = 0; i < nops; i++)
13732 {
13733 rtx reg;
13734 rtx offset;
13735
13736 /* Convert a subreg of a mem into the mem itself. */
13737 if (GET_CODE (operands[nops + i]) == SUBREG)
13738 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13739
13740 gcc_assert (MEM_P (operands[nops + i]));
13741
13742 /* Don't reorder volatile memory references; it doesn't seem worth
13743 looking for the case where the order is ok anyway. */
13744 if (MEM_VOLATILE_P (operands[nops + i]))
13745 return 0;
13746
13747 offset = const0_rtx;
13748
13749 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13750 || (GET_CODE (reg) == SUBREG
13751 && REG_P (reg = SUBREG_REG (reg))))
13752 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13753 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13754 || (GET_CODE (reg) == SUBREG
13755 && REG_P (reg = SUBREG_REG (reg))))
13756 && (CONST_INT_P (offset
13757 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13758 {
13759 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13760 ? operands[i] : SUBREG_REG (operands[i]));
13761 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13762
13763 if (i == 0)
13764 {
13765 base_reg = REGNO (reg);
13766 base_reg_rtx = reg;
13767 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13768 return 0;
13769 }
13770 else if (base_reg != (int) REGNO (reg))
13771 /* Not addressed from the same base register. */
13772 return 0;
13773
13774 /* If it isn't an integer register, then we can't do this. */
13775 if (unsorted_regs[i] < 0
13776 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13777 /* The effects are unpredictable if the base register is
13778 both updated and stored. */
13779 || (base_writeback && unsorted_regs[i] == base_reg)
13780 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13781 || unsorted_regs[i] > 14)
13782 return 0;
13783
13784 unsorted_offsets[i] = INTVAL (offset);
13785 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13786 order[0] = i;
13787 }
13788 else
13789 /* Not a suitable memory address. */
13790 return 0;
13791 }
13792
13793 /* All the useful information has now been extracted from the
13794 operands into unsorted_regs and unsorted_offsets; additionally,
13795 order[0] has been set to the lowest offset in the list. Sort
13796 the offsets into order, verifying that they are adjacent, and
13797 check that the register numbers are ascending. */
13798 if (!compute_offset_order (nops, unsorted_offsets, order,
13799 check_regs ? unsorted_regs : NULL))
13800 return 0;
13801
13802 if (saved_order)
13803 memcpy (saved_order, order, sizeof order);
13804
13805 if (base)
13806 {
13807 *base = base_reg;
13808
13809 for (i = 0; i < nops; i++)
13810 {
13811 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13812 if (reg_rtxs)
13813 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13814 }
13815
13816 *load_offset = unsorted_offsets[order[0]];
13817 }
13818
13819 if (TARGET_THUMB1
13820 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13821 return 0;
13822
13823 if (unsorted_offsets[order[0]] == 0)
13824 stm_case = 1; /* stmia */
13825 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13826 stm_case = 2; /* stmib */
13827 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13828 stm_case = 3; /* stmda */
13829 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13830 stm_case = 4; /* stmdb */
13831 else
13832 return 0;
13833
13834 if (!multiple_operation_profitable_p (false, nops, 0))
13835 return 0;
13836
13837 return stm_case;
13838 }
13839 \f
13840 /* Routines for use in generating RTL. */
13841
13842 /* Generate a load-multiple instruction. COUNT is the number of loads in
13843 the instruction; REGS and MEMS are arrays containing the operands.
13844 BASEREG is the base register to be used in addressing the memory operands.
13845 WBACK_OFFSET is nonzero if the instruction should update the base
13846 register. */
13847
13848 static rtx
13849 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13850 HOST_WIDE_INT wback_offset)
13851 {
13852 int i = 0, j;
13853 rtx result;
13854
13855 if (!multiple_operation_profitable_p (false, count, 0))
13856 {
13857 rtx seq;
13858
13859 start_sequence ();
13860
13861 for (i = 0; i < count; i++)
13862 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13863
13864 if (wback_offset != 0)
13865 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13866
13867 seq = get_insns ();
13868 end_sequence ();
13869
13870 return seq;
13871 }
13872
13873 result = gen_rtx_PARALLEL (VOIDmode,
13874 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13875 if (wback_offset != 0)
13876 {
13877 XVECEXP (result, 0, 0)
13878 = gen_rtx_SET (VOIDmode, basereg,
13879 plus_constant (Pmode, basereg, wback_offset));
13880 i = 1;
13881 count++;
13882 }
13883
13884 for (j = 0; i < count; i++, j++)
13885 XVECEXP (result, 0, i)
13886 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13887
13888 return result;
13889 }
13890
13891 /* Generate a store-multiple instruction. COUNT is the number of stores in
13892 the instruction; REGS and MEMS are arrays containing the operands.
13893 BASEREG is the base register to be used in addressing the memory operands.
13894 WBACK_OFFSET is nonzero if the instruction should update the base
13895 register. */
13896
13897 static rtx
13898 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13899 HOST_WIDE_INT wback_offset)
13900 {
13901 int i = 0, j;
13902 rtx result;
13903
13904 if (GET_CODE (basereg) == PLUS)
13905 basereg = XEXP (basereg, 0);
13906
13907 if (!multiple_operation_profitable_p (false, count, 0))
13908 {
13909 rtx seq;
13910
13911 start_sequence ();
13912
13913 for (i = 0; i < count; i++)
13914 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13915
13916 if (wback_offset != 0)
13917 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13918
13919 seq = get_insns ();
13920 end_sequence ();
13921
13922 return seq;
13923 }
13924
13925 result = gen_rtx_PARALLEL (VOIDmode,
13926 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13927 if (wback_offset != 0)
13928 {
13929 XVECEXP (result, 0, 0)
13930 = gen_rtx_SET (VOIDmode, basereg,
13931 plus_constant (Pmode, basereg, wback_offset));
13932 i = 1;
13933 count++;
13934 }
13935
13936 for (j = 0; i < count; i++, j++)
13937 XVECEXP (result, 0, i)
13938 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13939
13940 return result;
13941 }
13942
13943 /* Generate either a load-multiple or a store-multiple instruction. This
13944 function can be used in situations where we can start with a single MEM
13945 rtx and adjust its address upwards.
13946 COUNT is the number of operations in the instruction, not counting a
13947 possible update of the base register. REGS is an array containing the
13948 register operands.
13949 BASEREG is the base register to be used in addressing the memory operands,
13950 which are constructed from BASEMEM.
13951 WRITE_BACK specifies whether the generated instruction should include an
13952 update of the base register.
13953 OFFSETP is used to pass an offset to and from this function; this offset
13954 is not used when constructing the address (instead BASEMEM should have an
13955 appropriate offset in its address), it is used only for setting
13956 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13957
13958 static rtx
13959 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13960 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13961 {
13962 rtx mems[MAX_LDM_STM_OPS];
13963 HOST_WIDE_INT offset = *offsetp;
13964 int i;
13965
13966 gcc_assert (count <= MAX_LDM_STM_OPS);
13967
13968 if (GET_CODE (basereg) == PLUS)
13969 basereg = XEXP (basereg, 0);
13970
13971 for (i = 0; i < count; i++)
13972 {
13973 rtx addr = plus_constant (Pmode, basereg, i * 4);
13974 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13975 offset += 4;
13976 }
13977
13978 if (write_back)
13979 *offsetp = offset;
13980
13981 if (is_load)
13982 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13983 write_back ? 4 * count : 0);
13984 else
13985 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13986 write_back ? 4 * count : 0);
13987 }
13988
13989 rtx
13990 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13991 rtx basemem, HOST_WIDE_INT *offsetp)
13992 {
13993 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13994 offsetp);
13995 }
13996
13997 rtx
13998 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13999 rtx basemem, HOST_WIDE_INT *offsetp)
14000 {
14001 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14002 offsetp);
14003 }
14004
14005 /* Called from a peephole2 expander to turn a sequence of loads into an
14006 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14007 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14008 is true if we can reorder the registers because they are used commutatively
14009 subsequently.
14010 Returns true iff we could generate a new instruction. */
14011
14012 bool
14013 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14014 {
14015 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14016 rtx mems[MAX_LDM_STM_OPS];
14017 int i, j, base_reg;
14018 rtx base_reg_rtx;
14019 HOST_WIDE_INT offset;
14020 int write_back = FALSE;
14021 int ldm_case;
14022 rtx addr;
14023
14024 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14025 &base_reg, &offset, !sort_regs);
14026
14027 if (ldm_case == 0)
14028 return false;
14029
14030 if (sort_regs)
14031 for (i = 0; i < nops - 1; i++)
14032 for (j = i + 1; j < nops; j++)
14033 if (regs[i] > regs[j])
14034 {
14035 int t = regs[i];
14036 regs[i] = regs[j];
14037 regs[j] = t;
14038 }
14039 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14040
14041 if (TARGET_THUMB1)
14042 {
14043 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14044 gcc_assert (ldm_case == 1 || ldm_case == 5);
14045 write_back = TRUE;
14046 }
14047
14048 if (ldm_case == 5)
14049 {
14050 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14051 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14052 offset = 0;
14053 if (!TARGET_THUMB1)
14054 {
14055 base_reg = regs[0];
14056 base_reg_rtx = newbase;
14057 }
14058 }
14059
14060 for (i = 0; i < nops; i++)
14061 {
14062 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14063 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14064 SImode, addr, 0);
14065 }
14066 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14067 write_back ? offset + i * 4 : 0));
14068 return true;
14069 }
14070
14071 /* Called from a peephole2 expander to turn a sequence of stores into an
14072 STM instruction. OPERANDS are the operands found by the peephole matcher;
14073 NOPS indicates how many separate stores we are trying to combine.
14074 Returns true iff we could generate a new instruction. */
14075
14076 bool
14077 gen_stm_seq (rtx *operands, int nops)
14078 {
14079 int i;
14080 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14081 rtx mems[MAX_LDM_STM_OPS];
14082 int base_reg;
14083 rtx base_reg_rtx;
14084 HOST_WIDE_INT offset;
14085 int write_back = FALSE;
14086 int stm_case;
14087 rtx addr;
14088 bool base_reg_dies;
14089
14090 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14091 mem_order, &base_reg, &offset, true);
14092
14093 if (stm_case == 0)
14094 return false;
14095
14096 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14097
14098 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14099 if (TARGET_THUMB1)
14100 {
14101 gcc_assert (base_reg_dies);
14102 write_back = TRUE;
14103 }
14104
14105 if (stm_case == 5)
14106 {
14107 gcc_assert (base_reg_dies);
14108 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14109 offset = 0;
14110 }
14111
14112 addr = plus_constant (Pmode, base_reg_rtx, offset);
14113
14114 for (i = 0; i < nops; i++)
14115 {
14116 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14117 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14118 SImode, addr, 0);
14119 }
14120 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14121 write_back ? offset + i * 4 : 0));
14122 return true;
14123 }
14124
14125 /* Called from a peephole2 expander to turn a sequence of stores that are
14126 preceded by constant loads into an STM instruction. OPERANDS are the
14127 operands found by the peephole matcher; NOPS indicates how many
14128 separate stores we are trying to combine; there are 2 * NOPS
14129 instructions in the peephole.
14130 Returns true iff we could generate a new instruction. */
14131
14132 bool
14133 gen_const_stm_seq (rtx *operands, int nops)
14134 {
14135 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14136 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14137 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14138 rtx mems[MAX_LDM_STM_OPS];
14139 int base_reg;
14140 rtx base_reg_rtx;
14141 HOST_WIDE_INT offset;
14142 int write_back = FALSE;
14143 int stm_case;
14144 rtx addr;
14145 bool base_reg_dies;
14146 int i, j;
14147 HARD_REG_SET allocated;
14148
14149 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14150 mem_order, &base_reg, &offset, false);
14151
14152 if (stm_case == 0)
14153 return false;
14154
14155 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14156
14157 /* If the same register is used more than once, try to find a free
14158 register. */
14159 CLEAR_HARD_REG_SET (allocated);
14160 for (i = 0; i < nops; i++)
14161 {
14162 for (j = i + 1; j < nops; j++)
14163 if (regs[i] == regs[j])
14164 {
14165 rtx t = peep2_find_free_register (0, nops * 2,
14166 TARGET_THUMB1 ? "l" : "r",
14167 SImode, &allocated);
14168 if (t == NULL_RTX)
14169 return false;
14170 reg_rtxs[i] = t;
14171 regs[i] = REGNO (t);
14172 }
14173 }
14174
14175 /* Compute an ordering that maps the register numbers to an ascending
14176 sequence. */
14177 reg_order[0] = 0;
14178 for (i = 0; i < nops; i++)
14179 if (regs[i] < regs[reg_order[0]])
14180 reg_order[0] = i;
14181
14182 for (i = 1; i < nops; i++)
14183 {
14184 int this_order = reg_order[i - 1];
14185 for (j = 0; j < nops; j++)
14186 if (regs[j] > regs[reg_order[i - 1]]
14187 && (this_order == reg_order[i - 1]
14188 || regs[j] < regs[this_order]))
14189 this_order = j;
14190 reg_order[i] = this_order;
14191 }
14192
14193 /* Ensure that registers that must be live after the instruction end
14194 up with the correct value. */
14195 for (i = 0; i < nops; i++)
14196 {
14197 int this_order = reg_order[i];
14198 if ((this_order != mem_order[i]
14199 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14200 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14201 return false;
14202 }
14203
14204 /* Load the constants. */
14205 for (i = 0; i < nops; i++)
14206 {
14207 rtx op = operands[2 * nops + mem_order[i]];
14208 sorted_regs[i] = regs[reg_order[i]];
14209 emit_move_insn (reg_rtxs[reg_order[i]], op);
14210 }
14211
14212 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14213
14214 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14215 if (TARGET_THUMB1)
14216 {
14217 gcc_assert (base_reg_dies);
14218 write_back = TRUE;
14219 }
14220
14221 if (stm_case == 5)
14222 {
14223 gcc_assert (base_reg_dies);
14224 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14225 offset = 0;
14226 }
14227
14228 addr = plus_constant (Pmode, base_reg_rtx, offset);
14229
14230 for (i = 0; i < nops; i++)
14231 {
14232 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14233 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14234 SImode, addr, 0);
14235 }
14236 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14237 write_back ? offset + i * 4 : 0));
14238 return true;
14239 }
14240
14241 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14242 unaligned copies on processors which support unaligned semantics for those
14243 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14244 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14245 An interleave factor of 1 (the minimum) will perform no interleaving.
14246 Load/store multiple are used for aligned addresses where possible. */
14247
14248 static void
14249 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14250 HOST_WIDE_INT length,
14251 unsigned int interleave_factor)
14252 {
14253 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14254 int *regnos = XALLOCAVEC (int, interleave_factor);
14255 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14256 HOST_WIDE_INT i, j;
14257 HOST_WIDE_INT remaining = length, words;
14258 rtx halfword_tmp = NULL, byte_tmp = NULL;
14259 rtx dst, src;
14260 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14261 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14262 HOST_WIDE_INT srcoffset, dstoffset;
14263 HOST_WIDE_INT src_autoinc, dst_autoinc;
14264 rtx mem, addr;
14265
14266 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14267
14268 /* Use hard registers if we have aligned source or destination so we can use
14269 load/store multiple with contiguous registers. */
14270 if (dst_aligned || src_aligned)
14271 for (i = 0; i < interleave_factor; i++)
14272 regs[i] = gen_rtx_REG (SImode, i);
14273 else
14274 for (i = 0; i < interleave_factor; i++)
14275 regs[i] = gen_reg_rtx (SImode);
14276
14277 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14278 src = copy_addr_to_reg (XEXP (srcbase, 0));
14279
14280 srcoffset = dstoffset = 0;
14281
14282 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14283 For copying the last bytes we want to subtract this offset again. */
14284 src_autoinc = dst_autoinc = 0;
14285
14286 for (i = 0; i < interleave_factor; i++)
14287 regnos[i] = i;
14288
14289 /* Copy BLOCK_SIZE_BYTES chunks. */
14290
14291 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14292 {
14293 /* Load words. */
14294 if (src_aligned && interleave_factor > 1)
14295 {
14296 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14297 TRUE, srcbase, &srcoffset));
14298 src_autoinc += UNITS_PER_WORD * interleave_factor;
14299 }
14300 else
14301 {
14302 for (j = 0; j < interleave_factor; j++)
14303 {
14304 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14305 - src_autoinc));
14306 mem = adjust_automodify_address (srcbase, SImode, addr,
14307 srcoffset + j * UNITS_PER_WORD);
14308 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14309 }
14310 srcoffset += block_size_bytes;
14311 }
14312
14313 /* Store words. */
14314 if (dst_aligned && interleave_factor > 1)
14315 {
14316 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14317 TRUE, dstbase, &dstoffset));
14318 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14319 }
14320 else
14321 {
14322 for (j = 0; j < interleave_factor; j++)
14323 {
14324 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14325 - dst_autoinc));
14326 mem = adjust_automodify_address (dstbase, SImode, addr,
14327 dstoffset + j * UNITS_PER_WORD);
14328 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14329 }
14330 dstoffset += block_size_bytes;
14331 }
14332
14333 remaining -= block_size_bytes;
14334 }
14335
14336 /* Copy any whole words left (note these aren't interleaved with any
14337 subsequent halfword/byte load/stores in the interests of simplicity). */
14338
14339 words = remaining / UNITS_PER_WORD;
14340
14341 gcc_assert (words < interleave_factor);
14342
14343 if (src_aligned && words > 1)
14344 {
14345 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14346 &srcoffset));
14347 src_autoinc += UNITS_PER_WORD * words;
14348 }
14349 else
14350 {
14351 for (j = 0; j < words; j++)
14352 {
14353 addr = plus_constant (Pmode, src,
14354 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14355 mem = adjust_automodify_address (srcbase, SImode, addr,
14356 srcoffset + j * UNITS_PER_WORD);
14357 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14358 }
14359 srcoffset += words * UNITS_PER_WORD;
14360 }
14361
14362 if (dst_aligned && words > 1)
14363 {
14364 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14365 &dstoffset));
14366 dst_autoinc += words * UNITS_PER_WORD;
14367 }
14368 else
14369 {
14370 for (j = 0; j < words; j++)
14371 {
14372 addr = plus_constant (Pmode, dst,
14373 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14374 mem = adjust_automodify_address (dstbase, SImode, addr,
14375 dstoffset + j * UNITS_PER_WORD);
14376 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14377 }
14378 dstoffset += words * UNITS_PER_WORD;
14379 }
14380
14381 remaining -= words * UNITS_PER_WORD;
14382
14383 gcc_assert (remaining < 4);
14384
14385 /* Copy a halfword if necessary. */
14386
14387 if (remaining >= 2)
14388 {
14389 halfword_tmp = gen_reg_rtx (SImode);
14390
14391 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14392 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14393 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14394
14395 /* Either write out immediately, or delay until we've loaded the last
14396 byte, depending on interleave factor. */
14397 if (interleave_factor == 1)
14398 {
14399 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14400 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14401 emit_insn (gen_unaligned_storehi (mem,
14402 gen_lowpart (HImode, halfword_tmp)));
14403 halfword_tmp = NULL;
14404 dstoffset += 2;
14405 }
14406
14407 remaining -= 2;
14408 srcoffset += 2;
14409 }
14410
14411 gcc_assert (remaining < 2);
14412
14413 /* Copy last byte. */
14414
14415 if ((remaining & 1) != 0)
14416 {
14417 byte_tmp = gen_reg_rtx (SImode);
14418
14419 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14420 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14421 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14422
14423 if (interleave_factor == 1)
14424 {
14425 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14426 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14427 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14428 byte_tmp = NULL;
14429 dstoffset++;
14430 }
14431
14432 remaining--;
14433 srcoffset++;
14434 }
14435
14436 /* Store last halfword if we haven't done so already. */
14437
14438 if (halfword_tmp)
14439 {
14440 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14441 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14442 emit_insn (gen_unaligned_storehi (mem,
14443 gen_lowpart (HImode, halfword_tmp)));
14444 dstoffset += 2;
14445 }
14446
14447 /* Likewise for last byte. */
14448
14449 if (byte_tmp)
14450 {
14451 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14452 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14453 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14454 dstoffset++;
14455 }
14456
14457 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14458 }
14459
14460 /* From mips_adjust_block_mem:
14461
14462 Helper function for doing a loop-based block operation on memory
14463 reference MEM. Each iteration of the loop will operate on LENGTH
14464 bytes of MEM.
14465
14466 Create a new base register for use within the loop and point it to
14467 the start of MEM. Create a new memory reference that uses this
14468 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14469
14470 static void
14471 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14472 rtx *loop_mem)
14473 {
14474 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14475
14476 /* Although the new mem does not refer to a known location,
14477 it does keep up to LENGTH bytes of alignment. */
14478 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14479 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14480 }
14481
14482 /* From mips_block_move_loop:
14483
14484 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14485 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14486 the memory regions do not overlap. */
14487
14488 static void
14489 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14490 unsigned int interleave_factor,
14491 HOST_WIDE_INT bytes_per_iter)
14492 {
14493 rtx label, src_reg, dest_reg, final_src, test;
14494 HOST_WIDE_INT leftover;
14495
14496 leftover = length % bytes_per_iter;
14497 length -= leftover;
14498
14499 /* Create registers and memory references for use within the loop. */
14500 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14501 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14502
14503 /* Calculate the value that SRC_REG should have after the last iteration of
14504 the loop. */
14505 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14506 0, 0, OPTAB_WIDEN);
14507
14508 /* Emit the start of the loop. */
14509 label = gen_label_rtx ();
14510 emit_label (label);
14511
14512 /* Emit the loop body. */
14513 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14514 interleave_factor);
14515
14516 /* Move on to the next block. */
14517 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14518 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14519
14520 /* Emit the loop condition. */
14521 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14522 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14523
14524 /* Mop up any left-over bytes. */
14525 if (leftover)
14526 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14527 }
14528
14529 /* Emit a block move when either the source or destination is unaligned (not
14530 aligned to a four-byte boundary). This may need further tuning depending on
14531 core type, optimize_size setting, etc. */
14532
14533 static int
14534 arm_movmemqi_unaligned (rtx *operands)
14535 {
14536 HOST_WIDE_INT length = INTVAL (operands[2]);
14537
14538 if (optimize_size)
14539 {
14540 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14541 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14542 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14543 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14544 or dst_aligned though: allow more interleaving in those cases since the
14545 resulting code can be smaller. */
14546 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14547 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14548
14549 if (length > 12)
14550 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14551 interleave_factor, bytes_per_iter);
14552 else
14553 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14554 interleave_factor);
14555 }
14556 else
14557 {
14558 /* Note that the loop created by arm_block_move_unaligned_loop may be
14559 subject to loop unrolling, which makes tuning this condition a little
14560 redundant. */
14561 if (length > 32)
14562 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14563 else
14564 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14565 }
14566
14567 return 1;
14568 }
14569
14570 int
14571 arm_gen_movmemqi (rtx *operands)
14572 {
14573 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14574 HOST_WIDE_INT srcoffset, dstoffset;
14575 int i;
14576 rtx src, dst, srcbase, dstbase;
14577 rtx part_bytes_reg = NULL;
14578 rtx mem;
14579
14580 if (!CONST_INT_P (operands[2])
14581 || !CONST_INT_P (operands[3])
14582 || INTVAL (operands[2]) > 64)
14583 return 0;
14584
14585 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14586 return arm_movmemqi_unaligned (operands);
14587
14588 if (INTVAL (operands[3]) & 3)
14589 return 0;
14590
14591 dstbase = operands[0];
14592 srcbase = operands[1];
14593
14594 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14595 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14596
14597 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14598 out_words_to_go = INTVAL (operands[2]) / 4;
14599 last_bytes = INTVAL (operands[2]) & 3;
14600 dstoffset = srcoffset = 0;
14601
14602 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14603 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14604
14605 for (i = 0; in_words_to_go >= 2; i+=4)
14606 {
14607 if (in_words_to_go > 4)
14608 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14609 TRUE, srcbase, &srcoffset));
14610 else
14611 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14612 src, FALSE, srcbase,
14613 &srcoffset));
14614
14615 if (out_words_to_go)
14616 {
14617 if (out_words_to_go > 4)
14618 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14619 TRUE, dstbase, &dstoffset));
14620 else if (out_words_to_go != 1)
14621 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14622 out_words_to_go, dst,
14623 (last_bytes == 0
14624 ? FALSE : TRUE),
14625 dstbase, &dstoffset));
14626 else
14627 {
14628 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14629 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14630 if (last_bytes != 0)
14631 {
14632 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14633 dstoffset += 4;
14634 }
14635 }
14636 }
14637
14638 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14639 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14640 }
14641
14642 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14643 if (out_words_to_go)
14644 {
14645 rtx sreg;
14646
14647 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14648 sreg = copy_to_reg (mem);
14649
14650 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14651 emit_move_insn (mem, sreg);
14652 in_words_to_go--;
14653
14654 gcc_assert (!in_words_to_go); /* Sanity check */
14655 }
14656
14657 if (in_words_to_go)
14658 {
14659 gcc_assert (in_words_to_go > 0);
14660
14661 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14662 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14663 }
14664
14665 gcc_assert (!last_bytes || part_bytes_reg);
14666
14667 if (BYTES_BIG_ENDIAN && last_bytes)
14668 {
14669 rtx tmp = gen_reg_rtx (SImode);
14670
14671 /* The bytes we want are in the top end of the word. */
14672 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14673 GEN_INT (8 * (4 - last_bytes))));
14674 part_bytes_reg = tmp;
14675
14676 while (last_bytes)
14677 {
14678 mem = adjust_automodify_address (dstbase, QImode,
14679 plus_constant (Pmode, dst,
14680 last_bytes - 1),
14681 dstoffset + last_bytes - 1);
14682 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14683
14684 if (--last_bytes)
14685 {
14686 tmp = gen_reg_rtx (SImode);
14687 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14688 part_bytes_reg = tmp;
14689 }
14690 }
14691
14692 }
14693 else
14694 {
14695 if (last_bytes > 1)
14696 {
14697 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14698 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14699 last_bytes -= 2;
14700 if (last_bytes)
14701 {
14702 rtx tmp = gen_reg_rtx (SImode);
14703 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14704 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14705 part_bytes_reg = tmp;
14706 dstoffset += 2;
14707 }
14708 }
14709
14710 if (last_bytes)
14711 {
14712 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14713 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14714 }
14715 }
14716
14717 return 1;
14718 }
14719
14720 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14721 by mode size. */
14722 inline static rtx
14723 next_consecutive_mem (rtx mem)
14724 {
14725 enum machine_mode mode = GET_MODE (mem);
14726 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14727 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14728
14729 return adjust_automodify_address (mem, mode, addr, offset);
14730 }
14731
14732 /* Copy using LDRD/STRD instructions whenever possible.
14733 Returns true upon success. */
14734 bool
14735 gen_movmem_ldrd_strd (rtx *operands)
14736 {
14737 unsigned HOST_WIDE_INT len;
14738 HOST_WIDE_INT align;
14739 rtx src, dst, base;
14740 rtx reg0;
14741 bool src_aligned, dst_aligned;
14742 bool src_volatile, dst_volatile;
14743
14744 gcc_assert (CONST_INT_P (operands[2]));
14745 gcc_assert (CONST_INT_P (operands[3]));
14746
14747 len = UINTVAL (operands[2]);
14748 if (len > 64)
14749 return false;
14750
14751 /* Maximum alignment we can assume for both src and dst buffers. */
14752 align = INTVAL (operands[3]);
14753
14754 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14755 return false;
14756
14757 /* Place src and dst addresses in registers
14758 and update the corresponding mem rtx. */
14759 dst = operands[0];
14760 dst_volatile = MEM_VOLATILE_P (dst);
14761 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14762 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14763 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14764
14765 src = operands[1];
14766 src_volatile = MEM_VOLATILE_P (src);
14767 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14768 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14769 src = adjust_automodify_address (src, VOIDmode, base, 0);
14770
14771 if (!unaligned_access && !(src_aligned && dst_aligned))
14772 return false;
14773
14774 if (src_volatile || dst_volatile)
14775 return false;
14776
14777 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14778 if (!(dst_aligned || src_aligned))
14779 return arm_gen_movmemqi (operands);
14780
14781 src = adjust_address (src, DImode, 0);
14782 dst = adjust_address (dst, DImode, 0);
14783 while (len >= 8)
14784 {
14785 len -= 8;
14786 reg0 = gen_reg_rtx (DImode);
14787 if (src_aligned)
14788 emit_move_insn (reg0, src);
14789 else
14790 emit_insn (gen_unaligned_loaddi (reg0, src));
14791
14792 if (dst_aligned)
14793 emit_move_insn (dst, reg0);
14794 else
14795 emit_insn (gen_unaligned_storedi (dst, reg0));
14796
14797 src = next_consecutive_mem (src);
14798 dst = next_consecutive_mem (dst);
14799 }
14800
14801 gcc_assert (len < 8);
14802 if (len >= 4)
14803 {
14804 /* More than a word but less than a double-word to copy. Copy a word. */
14805 reg0 = gen_reg_rtx (SImode);
14806 src = adjust_address (src, SImode, 0);
14807 dst = adjust_address (dst, SImode, 0);
14808 if (src_aligned)
14809 emit_move_insn (reg0, src);
14810 else
14811 emit_insn (gen_unaligned_loadsi (reg0, src));
14812
14813 if (dst_aligned)
14814 emit_move_insn (dst, reg0);
14815 else
14816 emit_insn (gen_unaligned_storesi (dst, reg0));
14817
14818 src = next_consecutive_mem (src);
14819 dst = next_consecutive_mem (dst);
14820 len -= 4;
14821 }
14822
14823 if (len == 0)
14824 return true;
14825
14826 /* Copy the remaining bytes. */
14827 if (len >= 2)
14828 {
14829 dst = adjust_address (dst, HImode, 0);
14830 src = adjust_address (src, HImode, 0);
14831 reg0 = gen_reg_rtx (SImode);
14832 if (src_aligned)
14833 emit_insn (gen_zero_extendhisi2 (reg0, src));
14834 else
14835 emit_insn (gen_unaligned_loadhiu (reg0, src));
14836
14837 if (dst_aligned)
14838 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14839 else
14840 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14841
14842 src = next_consecutive_mem (src);
14843 dst = next_consecutive_mem (dst);
14844 if (len == 2)
14845 return true;
14846 }
14847
14848 dst = adjust_address (dst, QImode, 0);
14849 src = adjust_address (src, QImode, 0);
14850 reg0 = gen_reg_rtx (QImode);
14851 emit_move_insn (reg0, src);
14852 emit_move_insn (dst, reg0);
14853 return true;
14854 }
14855
14856 /* Select a dominance comparison mode if possible for a test of the general
14857 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14858 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14859 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14860 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14861 In all cases OP will be either EQ or NE, but we don't need to know which
14862 here. If we are unable to support a dominance comparison we return
14863 CC mode. This will then fail to match for the RTL expressions that
14864 generate this call. */
14865 enum machine_mode
14866 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14867 {
14868 enum rtx_code cond1, cond2;
14869 int swapped = 0;
14870
14871 /* Currently we will probably get the wrong result if the individual
14872 comparisons are not simple. This also ensures that it is safe to
14873 reverse a comparison if necessary. */
14874 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14875 != CCmode)
14876 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14877 != CCmode))
14878 return CCmode;
14879
14880 /* The if_then_else variant of this tests the second condition if the
14881 first passes, but is true if the first fails. Reverse the first
14882 condition to get a true "inclusive-or" expression. */
14883 if (cond_or == DOM_CC_NX_OR_Y)
14884 cond1 = reverse_condition (cond1);
14885
14886 /* If the comparisons are not equal, and one doesn't dominate the other,
14887 then we can't do this. */
14888 if (cond1 != cond2
14889 && !comparison_dominates_p (cond1, cond2)
14890 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14891 return CCmode;
14892
14893 if (swapped)
14894 {
14895 enum rtx_code temp = cond1;
14896 cond1 = cond2;
14897 cond2 = temp;
14898 }
14899
14900 switch (cond1)
14901 {
14902 case EQ:
14903 if (cond_or == DOM_CC_X_AND_Y)
14904 return CC_DEQmode;
14905
14906 switch (cond2)
14907 {
14908 case EQ: return CC_DEQmode;
14909 case LE: return CC_DLEmode;
14910 case LEU: return CC_DLEUmode;
14911 case GE: return CC_DGEmode;
14912 case GEU: return CC_DGEUmode;
14913 default: gcc_unreachable ();
14914 }
14915
14916 case LT:
14917 if (cond_or == DOM_CC_X_AND_Y)
14918 return CC_DLTmode;
14919
14920 switch (cond2)
14921 {
14922 case LT:
14923 return CC_DLTmode;
14924 case LE:
14925 return CC_DLEmode;
14926 case NE:
14927 return CC_DNEmode;
14928 default:
14929 gcc_unreachable ();
14930 }
14931
14932 case GT:
14933 if (cond_or == DOM_CC_X_AND_Y)
14934 return CC_DGTmode;
14935
14936 switch (cond2)
14937 {
14938 case GT:
14939 return CC_DGTmode;
14940 case GE:
14941 return CC_DGEmode;
14942 case NE:
14943 return CC_DNEmode;
14944 default:
14945 gcc_unreachable ();
14946 }
14947
14948 case LTU:
14949 if (cond_or == DOM_CC_X_AND_Y)
14950 return CC_DLTUmode;
14951
14952 switch (cond2)
14953 {
14954 case LTU:
14955 return CC_DLTUmode;
14956 case LEU:
14957 return CC_DLEUmode;
14958 case NE:
14959 return CC_DNEmode;
14960 default:
14961 gcc_unreachable ();
14962 }
14963
14964 case GTU:
14965 if (cond_or == DOM_CC_X_AND_Y)
14966 return CC_DGTUmode;
14967
14968 switch (cond2)
14969 {
14970 case GTU:
14971 return CC_DGTUmode;
14972 case GEU:
14973 return CC_DGEUmode;
14974 case NE:
14975 return CC_DNEmode;
14976 default:
14977 gcc_unreachable ();
14978 }
14979
14980 /* The remaining cases only occur when both comparisons are the
14981 same. */
14982 case NE:
14983 gcc_assert (cond1 == cond2);
14984 return CC_DNEmode;
14985
14986 case LE:
14987 gcc_assert (cond1 == cond2);
14988 return CC_DLEmode;
14989
14990 case GE:
14991 gcc_assert (cond1 == cond2);
14992 return CC_DGEmode;
14993
14994 case LEU:
14995 gcc_assert (cond1 == cond2);
14996 return CC_DLEUmode;
14997
14998 case GEU:
14999 gcc_assert (cond1 == cond2);
15000 return CC_DGEUmode;
15001
15002 default:
15003 gcc_unreachable ();
15004 }
15005 }
15006
15007 enum machine_mode
15008 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15009 {
15010 /* All floating point compares return CCFP if it is an equality
15011 comparison, and CCFPE otherwise. */
15012 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15013 {
15014 switch (op)
15015 {
15016 case EQ:
15017 case NE:
15018 case UNORDERED:
15019 case ORDERED:
15020 case UNLT:
15021 case UNLE:
15022 case UNGT:
15023 case UNGE:
15024 case UNEQ:
15025 case LTGT:
15026 return CCFPmode;
15027
15028 case LT:
15029 case LE:
15030 case GT:
15031 case GE:
15032 return CCFPEmode;
15033
15034 default:
15035 gcc_unreachable ();
15036 }
15037 }
15038
15039 /* A compare with a shifted operand. Because of canonicalization, the
15040 comparison will have to be swapped when we emit the assembler. */
15041 if (GET_MODE (y) == SImode
15042 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15043 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15044 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15045 || GET_CODE (x) == ROTATERT))
15046 return CC_SWPmode;
15047
15048 /* This operation is performed swapped, but since we only rely on the Z
15049 flag we don't need an additional mode. */
15050 if (GET_MODE (y) == SImode
15051 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15052 && GET_CODE (x) == NEG
15053 && (op == EQ || op == NE))
15054 return CC_Zmode;
15055
15056 /* This is a special case that is used by combine to allow a
15057 comparison of a shifted byte load to be split into a zero-extend
15058 followed by a comparison of the shifted integer (only valid for
15059 equalities and unsigned inequalities). */
15060 if (GET_MODE (x) == SImode
15061 && GET_CODE (x) == ASHIFT
15062 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15063 && GET_CODE (XEXP (x, 0)) == SUBREG
15064 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15065 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15066 && (op == EQ || op == NE
15067 || op == GEU || op == GTU || op == LTU || op == LEU)
15068 && CONST_INT_P (y))
15069 return CC_Zmode;
15070
15071 /* A construct for a conditional compare, if the false arm contains
15072 0, then both conditions must be true, otherwise either condition
15073 must be true. Not all conditions are possible, so CCmode is
15074 returned if it can't be done. */
15075 if (GET_CODE (x) == IF_THEN_ELSE
15076 && (XEXP (x, 2) == const0_rtx
15077 || XEXP (x, 2) == const1_rtx)
15078 && COMPARISON_P (XEXP (x, 0))
15079 && COMPARISON_P (XEXP (x, 1)))
15080 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15081 INTVAL (XEXP (x, 2)));
15082
15083 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15084 if (GET_CODE (x) == AND
15085 && (op == EQ || op == NE)
15086 && COMPARISON_P (XEXP (x, 0))
15087 && COMPARISON_P (XEXP (x, 1)))
15088 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15089 DOM_CC_X_AND_Y);
15090
15091 if (GET_CODE (x) == IOR
15092 && (op == EQ || op == NE)
15093 && COMPARISON_P (XEXP (x, 0))
15094 && COMPARISON_P (XEXP (x, 1)))
15095 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15096 DOM_CC_X_OR_Y);
15097
15098 /* An operation (on Thumb) where we want to test for a single bit.
15099 This is done by shifting that bit up into the top bit of a
15100 scratch register; we can then branch on the sign bit. */
15101 if (TARGET_THUMB1
15102 && GET_MODE (x) == SImode
15103 && (op == EQ || op == NE)
15104 && GET_CODE (x) == ZERO_EXTRACT
15105 && XEXP (x, 1) == const1_rtx)
15106 return CC_Nmode;
15107
15108 /* An operation that sets the condition codes as a side-effect, the
15109 V flag is not set correctly, so we can only use comparisons where
15110 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15111 instead.) */
15112 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15113 if (GET_MODE (x) == SImode
15114 && y == const0_rtx
15115 && (op == EQ || op == NE || op == LT || op == GE)
15116 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15117 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15118 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15119 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15120 || GET_CODE (x) == LSHIFTRT
15121 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15122 || GET_CODE (x) == ROTATERT
15123 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15124 return CC_NOOVmode;
15125
15126 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15127 return CC_Zmode;
15128
15129 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15130 && GET_CODE (x) == PLUS
15131 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15132 return CC_Cmode;
15133
15134 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15135 {
15136 switch (op)
15137 {
15138 case EQ:
15139 case NE:
15140 /* A DImode comparison against zero can be implemented by
15141 or'ing the two halves together. */
15142 if (y == const0_rtx)
15143 return CC_Zmode;
15144
15145 /* We can do an equality test in three Thumb instructions. */
15146 if (!TARGET_32BIT)
15147 return CC_Zmode;
15148
15149 /* FALLTHROUGH */
15150
15151 case LTU:
15152 case LEU:
15153 case GTU:
15154 case GEU:
15155 /* DImode unsigned comparisons can be implemented by cmp +
15156 cmpeq without a scratch register. Not worth doing in
15157 Thumb-2. */
15158 if (TARGET_32BIT)
15159 return CC_CZmode;
15160
15161 /* FALLTHROUGH */
15162
15163 case LT:
15164 case LE:
15165 case GT:
15166 case GE:
15167 /* DImode signed and unsigned comparisons can be implemented
15168 by cmp + sbcs with a scratch register, but that does not
15169 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15170 gcc_assert (op != EQ && op != NE);
15171 return CC_NCVmode;
15172
15173 default:
15174 gcc_unreachable ();
15175 }
15176 }
15177
15178 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15179 return GET_MODE (x);
15180
15181 return CCmode;
15182 }
15183
15184 /* X and Y are two things to compare using CODE. Emit the compare insn and
15185 return the rtx for register 0 in the proper mode. FP means this is a
15186 floating point compare: I don't think that it is needed on the arm. */
15187 rtx
15188 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15189 {
15190 enum machine_mode mode;
15191 rtx cc_reg;
15192 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15193
15194 /* We might have X as a constant, Y as a register because of the predicates
15195 used for cmpdi. If so, force X to a register here. */
15196 if (dimode_comparison && !REG_P (x))
15197 x = force_reg (DImode, x);
15198
15199 mode = SELECT_CC_MODE (code, x, y);
15200 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15201
15202 if (dimode_comparison
15203 && mode != CC_CZmode)
15204 {
15205 rtx clobber, set;
15206
15207 /* To compare two non-zero values for equality, XOR them and
15208 then compare against zero. Not used for ARM mode; there
15209 CC_CZmode is cheaper. */
15210 if (mode == CC_Zmode && y != const0_rtx)
15211 {
15212 gcc_assert (!reload_completed);
15213 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15214 y = const0_rtx;
15215 }
15216
15217 /* A scratch register is required. */
15218 if (reload_completed)
15219 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15220 else
15221 scratch = gen_rtx_SCRATCH (SImode);
15222
15223 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15224 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15225 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15226 }
15227 else
15228 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15229
15230 return cc_reg;
15231 }
15232
15233 /* Generate a sequence of insns that will generate the correct return
15234 address mask depending on the physical architecture that the program
15235 is running on. */
15236 rtx
15237 arm_gen_return_addr_mask (void)
15238 {
15239 rtx reg = gen_reg_rtx (Pmode);
15240
15241 emit_insn (gen_return_addr_mask (reg));
15242 return reg;
15243 }
15244
15245 void
15246 arm_reload_in_hi (rtx *operands)
15247 {
15248 rtx ref = operands[1];
15249 rtx base, scratch;
15250 HOST_WIDE_INT offset = 0;
15251
15252 if (GET_CODE (ref) == SUBREG)
15253 {
15254 offset = SUBREG_BYTE (ref);
15255 ref = SUBREG_REG (ref);
15256 }
15257
15258 if (REG_P (ref))
15259 {
15260 /* We have a pseudo which has been spilt onto the stack; there
15261 are two cases here: the first where there is a simple
15262 stack-slot replacement and a second where the stack-slot is
15263 out of range, or is used as a subreg. */
15264 if (reg_equiv_mem (REGNO (ref)))
15265 {
15266 ref = reg_equiv_mem (REGNO (ref));
15267 base = find_replacement (&XEXP (ref, 0));
15268 }
15269 else
15270 /* The slot is out of range, or was dressed up in a SUBREG. */
15271 base = reg_equiv_address (REGNO (ref));
15272 }
15273 else
15274 base = find_replacement (&XEXP (ref, 0));
15275
15276 /* Handle the case where the address is too complex to be offset by 1. */
15277 if (GET_CODE (base) == MINUS
15278 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15279 {
15280 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15281
15282 emit_set_insn (base_plus, base);
15283 base = base_plus;
15284 }
15285 else if (GET_CODE (base) == PLUS)
15286 {
15287 /* The addend must be CONST_INT, or we would have dealt with it above. */
15288 HOST_WIDE_INT hi, lo;
15289
15290 offset += INTVAL (XEXP (base, 1));
15291 base = XEXP (base, 0);
15292
15293 /* Rework the address into a legal sequence of insns. */
15294 /* Valid range for lo is -4095 -> 4095 */
15295 lo = (offset >= 0
15296 ? (offset & 0xfff)
15297 : -((-offset) & 0xfff));
15298
15299 /* Corner case, if lo is the max offset then we would be out of range
15300 once we have added the additional 1 below, so bump the msb into the
15301 pre-loading insn(s). */
15302 if (lo == 4095)
15303 lo &= 0x7ff;
15304
15305 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15306 ^ (HOST_WIDE_INT) 0x80000000)
15307 - (HOST_WIDE_INT) 0x80000000);
15308
15309 gcc_assert (hi + lo == offset);
15310
15311 if (hi != 0)
15312 {
15313 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15314
15315 /* Get the base address; addsi3 knows how to handle constants
15316 that require more than one insn. */
15317 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15318 base = base_plus;
15319 offset = lo;
15320 }
15321 }
15322
15323 /* Operands[2] may overlap operands[0] (though it won't overlap
15324 operands[1]), that's why we asked for a DImode reg -- so we can
15325 use the bit that does not overlap. */
15326 if (REGNO (operands[2]) == REGNO (operands[0]))
15327 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15328 else
15329 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15330
15331 emit_insn (gen_zero_extendqisi2 (scratch,
15332 gen_rtx_MEM (QImode,
15333 plus_constant (Pmode, base,
15334 offset))));
15335 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15336 gen_rtx_MEM (QImode,
15337 plus_constant (Pmode, base,
15338 offset + 1))));
15339 if (!BYTES_BIG_ENDIAN)
15340 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15341 gen_rtx_IOR (SImode,
15342 gen_rtx_ASHIFT
15343 (SImode,
15344 gen_rtx_SUBREG (SImode, operands[0], 0),
15345 GEN_INT (8)),
15346 scratch));
15347 else
15348 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15349 gen_rtx_IOR (SImode,
15350 gen_rtx_ASHIFT (SImode, scratch,
15351 GEN_INT (8)),
15352 gen_rtx_SUBREG (SImode, operands[0], 0)));
15353 }
15354
15355 /* Handle storing a half-word to memory during reload by synthesizing as two
15356 byte stores. Take care not to clobber the input values until after we
15357 have moved them somewhere safe. This code assumes that if the DImode
15358 scratch in operands[2] overlaps either the input value or output address
15359 in some way, then that value must die in this insn (we absolutely need
15360 two scratch registers for some corner cases). */
15361 void
15362 arm_reload_out_hi (rtx *operands)
15363 {
15364 rtx ref = operands[0];
15365 rtx outval = operands[1];
15366 rtx base, scratch;
15367 HOST_WIDE_INT offset = 0;
15368
15369 if (GET_CODE (ref) == SUBREG)
15370 {
15371 offset = SUBREG_BYTE (ref);
15372 ref = SUBREG_REG (ref);
15373 }
15374
15375 if (REG_P (ref))
15376 {
15377 /* We have a pseudo which has been spilt onto the stack; there
15378 are two cases here: the first where there is a simple
15379 stack-slot replacement and a second where the stack-slot is
15380 out of range, or is used as a subreg. */
15381 if (reg_equiv_mem (REGNO (ref)))
15382 {
15383 ref = reg_equiv_mem (REGNO (ref));
15384 base = find_replacement (&XEXP (ref, 0));
15385 }
15386 else
15387 /* The slot is out of range, or was dressed up in a SUBREG. */
15388 base = reg_equiv_address (REGNO (ref));
15389 }
15390 else
15391 base = find_replacement (&XEXP (ref, 0));
15392
15393 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15394
15395 /* Handle the case where the address is too complex to be offset by 1. */
15396 if (GET_CODE (base) == MINUS
15397 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15398 {
15399 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15400
15401 /* Be careful not to destroy OUTVAL. */
15402 if (reg_overlap_mentioned_p (base_plus, outval))
15403 {
15404 /* Updating base_plus might destroy outval, see if we can
15405 swap the scratch and base_plus. */
15406 if (!reg_overlap_mentioned_p (scratch, outval))
15407 {
15408 rtx tmp = scratch;
15409 scratch = base_plus;
15410 base_plus = tmp;
15411 }
15412 else
15413 {
15414 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15415
15416 /* Be conservative and copy OUTVAL into the scratch now,
15417 this should only be necessary if outval is a subreg
15418 of something larger than a word. */
15419 /* XXX Might this clobber base? I can't see how it can,
15420 since scratch is known to overlap with OUTVAL, and
15421 must be wider than a word. */
15422 emit_insn (gen_movhi (scratch_hi, outval));
15423 outval = scratch_hi;
15424 }
15425 }
15426
15427 emit_set_insn (base_plus, base);
15428 base = base_plus;
15429 }
15430 else if (GET_CODE (base) == PLUS)
15431 {
15432 /* The addend must be CONST_INT, or we would have dealt with it above. */
15433 HOST_WIDE_INT hi, lo;
15434
15435 offset += INTVAL (XEXP (base, 1));
15436 base = XEXP (base, 0);
15437
15438 /* Rework the address into a legal sequence of insns. */
15439 /* Valid range for lo is -4095 -> 4095 */
15440 lo = (offset >= 0
15441 ? (offset & 0xfff)
15442 : -((-offset) & 0xfff));
15443
15444 /* Corner case, if lo is the max offset then we would be out of range
15445 once we have added the additional 1 below, so bump the msb into the
15446 pre-loading insn(s). */
15447 if (lo == 4095)
15448 lo &= 0x7ff;
15449
15450 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15451 ^ (HOST_WIDE_INT) 0x80000000)
15452 - (HOST_WIDE_INT) 0x80000000);
15453
15454 gcc_assert (hi + lo == offset);
15455
15456 if (hi != 0)
15457 {
15458 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15459
15460 /* Be careful not to destroy OUTVAL. */
15461 if (reg_overlap_mentioned_p (base_plus, outval))
15462 {
15463 /* Updating base_plus might destroy outval, see if we
15464 can swap the scratch and base_plus. */
15465 if (!reg_overlap_mentioned_p (scratch, outval))
15466 {
15467 rtx tmp = scratch;
15468 scratch = base_plus;
15469 base_plus = tmp;
15470 }
15471 else
15472 {
15473 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15474
15475 /* Be conservative and copy outval into scratch now,
15476 this should only be necessary if outval is a
15477 subreg of something larger than a word. */
15478 /* XXX Might this clobber base? I can't see how it
15479 can, since scratch is known to overlap with
15480 outval. */
15481 emit_insn (gen_movhi (scratch_hi, outval));
15482 outval = scratch_hi;
15483 }
15484 }
15485
15486 /* Get the base address; addsi3 knows how to handle constants
15487 that require more than one insn. */
15488 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15489 base = base_plus;
15490 offset = lo;
15491 }
15492 }
15493
15494 if (BYTES_BIG_ENDIAN)
15495 {
15496 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15497 plus_constant (Pmode, base,
15498 offset + 1)),
15499 gen_lowpart (QImode, outval)));
15500 emit_insn (gen_lshrsi3 (scratch,
15501 gen_rtx_SUBREG (SImode, outval, 0),
15502 GEN_INT (8)));
15503 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15504 offset)),
15505 gen_lowpart (QImode, scratch)));
15506 }
15507 else
15508 {
15509 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15510 offset)),
15511 gen_lowpart (QImode, outval)));
15512 emit_insn (gen_lshrsi3 (scratch,
15513 gen_rtx_SUBREG (SImode, outval, 0),
15514 GEN_INT (8)));
15515 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15516 plus_constant (Pmode, base,
15517 offset + 1)),
15518 gen_lowpart (QImode, scratch)));
15519 }
15520 }
15521
15522 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15523 (padded to the size of a word) should be passed in a register. */
15524
15525 static bool
15526 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15527 {
15528 if (TARGET_AAPCS_BASED)
15529 return must_pass_in_stack_var_size (mode, type);
15530 else
15531 return must_pass_in_stack_var_size_or_pad (mode, type);
15532 }
15533
15534
15535 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15536 Return true if an argument passed on the stack should be padded upwards,
15537 i.e. if the least-significant byte has useful data.
15538 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15539 aggregate types are placed in the lowest memory address. */
15540
15541 bool
15542 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15543 {
15544 if (!TARGET_AAPCS_BASED)
15545 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15546
15547 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15548 return false;
15549
15550 return true;
15551 }
15552
15553
15554 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15555 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15556 register has useful data, and return the opposite if the most
15557 significant byte does. */
15558
15559 bool
15560 arm_pad_reg_upward (enum machine_mode mode,
15561 tree type, int first ATTRIBUTE_UNUSED)
15562 {
15563 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15564 {
15565 /* For AAPCS, small aggregates, small fixed-point types,
15566 and small complex types are always padded upwards. */
15567 if (type)
15568 {
15569 if ((AGGREGATE_TYPE_P (type)
15570 || TREE_CODE (type) == COMPLEX_TYPE
15571 || FIXED_POINT_TYPE_P (type))
15572 && int_size_in_bytes (type) <= 4)
15573 return true;
15574 }
15575 else
15576 {
15577 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15578 && GET_MODE_SIZE (mode) <= 4)
15579 return true;
15580 }
15581 }
15582
15583 /* Otherwise, use default padding. */
15584 return !BYTES_BIG_ENDIAN;
15585 }
15586
15587 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15588 assuming that the address in the base register is word aligned. */
15589 bool
15590 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15591 {
15592 HOST_WIDE_INT max_offset;
15593
15594 /* Offset must be a multiple of 4 in Thumb mode. */
15595 if (TARGET_THUMB2 && ((offset & 3) != 0))
15596 return false;
15597
15598 if (TARGET_THUMB2)
15599 max_offset = 1020;
15600 else if (TARGET_ARM)
15601 max_offset = 255;
15602 else
15603 return false;
15604
15605 return ((offset <= max_offset) && (offset >= -max_offset));
15606 }
15607
15608 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15609 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15610 Assumes that the address in the base register RN is word aligned. Pattern
15611 guarantees that both memory accesses use the same base register,
15612 the offsets are constants within the range, and the gap between the offsets is 4.
15613 If preload complete then check that registers are legal. WBACK indicates whether
15614 address is updated. LOAD indicates whether memory access is load or store. */
15615 bool
15616 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15617 bool wback, bool load)
15618 {
15619 unsigned int t, t2, n;
15620
15621 if (!reload_completed)
15622 return true;
15623
15624 if (!offset_ok_for_ldrd_strd (offset))
15625 return false;
15626
15627 t = REGNO (rt);
15628 t2 = REGNO (rt2);
15629 n = REGNO (rn);
15630
15631 if ((TARGET_THUMB2)
15632 && ((wback && (n == t || n == t2))
15633 || (t == SP_REGNUM)
15634 || (t == PC_REGNUM)
15635 || (t2 == SP_REGNUM)
15636 || (t2 == PC_REGNUM)
15637 || (!load && (n == PC_REGNUM))
15638 || (load && (t == t2))
15639 /* Triggers Cortex-M3 LDRD errata. */
15640 || (!wback && load && fix_cm3_ldrd && (n == t))))
15641 return false;
15642
15643 if ((TARGET_ARM)
15644 && ((wback && (n == t || n == t2))
15645 || (t2 == PC_REGNUM)
15646 || (t % 2 != 0) /* First destination register is not even. */
15647 || (t2 != t + 1)
15648 /* PC can be used as base register (for offset addressing only),
15649 but it is depricated. */
15650 || (n == PC_REGNUM)))
15651 return false;
15652
15653 return true;
15654 }
15655
15656 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15657 operand MEM's address contains an immediate offset from the base
15658 register and has no side effects, in which case it sets BASE and
15659 OFFSET accordingly. */
15660 static bool
15661 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15662 {
15663 rtx addr;
15664
15665 gcc_assert (base != NULL && offset != NULL);
15666
15667 /* TODO: Handle more general memory operand patterns, such as
15668 PRE_DEC and PRE_INC. */
15669
15670 if (side_effects_p (mem))
15671 return false;
15672
15673 /* Can't deal with subregs. */
15674 if (GET_CODE (mem) == SUBREG)
15675 return false;
15676
15677 gcc_assert (MEM_P (mem));
15678
15679 *offset = const0_rtx;
15680
15681 addr = XEXP (mem, 0);
15682
15683 /* If addr isn't valid for DImode, then we can't handle it. */
15684 if (!arm_legitimate_address_p (DImode, addr,
15685 reload_in_progress || reload_completed))
15686 return false;
15687
15688 if (REG_P (addr))
15689 {
15690 *base = addr;
15691 return true;
15692 }
15693 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15694 {
15695 *base = XEXP (addr, 0);
15696 *offset = XEXP (addr, 1);
15697 return (REG_P (*base) && CONST_INT_P (*offset));
15698 }
15699
15700 return false;
15701 }
15702
15703 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15704
15705 /* Called from a peephole2 to replace two word-size accesses with a
15706 single LDRD/STRD instruction. Returns true iff we can generate a
15707 new instruction sequence. That is, both accesses use the same base
15708 register and the gap between constant offsets is 4. This function
15709 may reorder its operands to match ldrd/strd RTL templates.
15710 OPERANDS are the operands found by the peephole matcher;
15711 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15712 corresponding memory operands. LOAD indicaates whether the access
15713 is load or store. CONST_STORE indicates a store of constant
15714 integer values held in OPERANDS[4,5] and assumes that the pattern
15715 is of length 4 insn, for the purpose of checking dead registers.
15716 COMMUTE indicates that register operands may be reordered. */
15717 bool
15718 gen_operands_ldrd_strd (rtx *operands, bool load,
15719 bool const_store, bool commute)
15720 {
15721 int nops = 2;
15722 HOST_WIDE_INT offsets[2], offset;
15723 rtx base = NULL_RTX;
15724 rtx cur_base, cur_offset, tmp;
15725 int i, gap;
15726 HARD_REG_SET regset;
15727
15728 gcc_assert (!const_store || !load);
15729 /* Check that the memory references are immediate offsets from the
15730 same base register. Extract the base register, the destination
15731 registers, and the corresponding memory offsets. */
15732 for (i = 0; i < nops; i++)
15733 {
15734 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15735 return false;
15736
15737 if (i == 0)
15738 base = cur_base;
15739 else if (REGNO (base) != REGNO (cur_base))
15740 return false;
15741
15742 offsets[i] = INTVAL (cur_offset);
15743 if (GET_CODE (operands[i]) == SUBREG)
15744 {
15745 tmp = SUBREG_REG (operands[i]);
15746 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15747 operands[i] = tmp;
15748 }
15749 }
15750
15751 /* Make sure there is no dependency between the individual loads. */
15752 if (load && REGNO (operands[0]) == REGNO (base))
15753 return false; /* RAW */
15754
15755 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15756 return false; /* WAW */
15757
15758 /* If the same input register is used in both stores
15759 when storing different constants, try to find a free register.
15760 For example, the code
15761 mov r0, 0
15762 str r0, [r2]
15763 mov r0, 1
15764 str r0, [r2, #4]
15765 can be transformed into
15766 mov r1, 0
15767 strd r1, r0, [r2]
15768 in Thumb mode assuming that r1 is free. */
15769 if (const_store
15770 && REGNO (operands[0]) == REGNO (operands[1])
15771 && INTVAL (operands[4]) != INTVAL (operands[5]))
15772 {
15773 if (TARGET_THUMB2)
15774 {
15775 CLEAR_HARD_REG_SET (regset);
15776 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15777 if (tmp == NULL_RTX)
15778 return false;
15779
15780 /* Use the new register in the first load to ensure that
15781 if the original input register is not dead after peephole,
15782 then it will have the correct constant value. */
15783 operands[0] = tmp;
15784 }
15785 else if (TARGET_ARM)
15786 {
15787 return false;
15788 int regno = REGNO (operands[0]);
15789 if (!peep2_reg_dead_p (4, operands[0]))
15790 {
15791 /* When the input register is even and is not dead after the
15792 pattern, it has to hold the second constant but we cannot
15793 form a legal STRD in ARM mode with this register as the second
15794 register. */
15795 if (regno % 2 == 0)
15796 return false;
15797
15798 /* Is regno-1 free? */
15799 SET_HARD_REG_SET (regset);
15800 CLEAR_HARD_REG_BIT(regset, regno - 1);
15801 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15802 if (tmp == NULL_RTX)
15803 return false;
15804
15805 operands[0] = tmp;
15806 }
15807 else
15808 {
15809 /* Find a DImode register. */
15810 CLEAR_HARD_REG_SET (regset);
15811 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15812 if (tmp != NULL_RTX)
15813 {
15814 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15815 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15816 }
15817 else
15818 {
15819 /* Can we use the input register to form a DI register? */
15820 SET_HARD_REG_SET (regset);
15821 CLEAR_HARD_REG_BIT(regset,
15822 regno % 2 == 0 ? regno + 1 : regno - 1);
15823 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15824 if (tmp == NULL_RTX)
15825 return false;
15826 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15827 }
15828 }
15829
15830 gcc_assert (operands[0] != NULL_RTX);
15831 gcc_assert (operands[1] != NULL_RTX);
15832 gcc_assert (REGNO (operands[0]) % 2 == 0);
15833 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15834 }
15835 }
15836
15837 /* Make sure the instructions are ordered with lower memory access first. */
15838 if (offsets[0] > offsets[1])
15839 {
15840 gap = offsets[0] - offsets[1];
15841 offset = offsets[1];
15842
15843 /* Swap the instructions such that lower memory is accessed first. */
15844 SWAP_RTX (operands[0], operands[1]);
15845 SWAP_RTX (operands[2], operands[3]);
15846 if (const_store)
15847 SWAP_RTX (operands[4], operands[5]);
15848 }
15849 else
15850 {
15851 gap = offsets[1] - offsets[0];
15852 offset = offsets[0];
15853 }
15854
15855 /* Make sure accesses are to consecutive memory locations. */
15856 if (gap != 4)
15857 return false;
15858
15859 /* Make sure we generate legal instructions. */
15860 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15861 false, load))
15862 return true;
15863
15864 /* In Thumb state, where registers are almost unconstrained, there
15865 is little hope to fix it. */
15866 if (TARGET_THUMB2)
15867 return false;
15868
15869 if (load && commute)
15870 {
15871 /* Try reordering registers. */
15872 SWAP_RTX (operands[0], operands[1]);
15873 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15874 false, load))
15875 return true;
15876 }
15877
15878 if (const_store)
15879 {
15880 /* If input registers are dead after this pattern, they can be
15881 reordered or replaced by other registers that are free in the
15882 current pattern. */
15883 if (!peep2_reg_dead_p (4, operands[0])
15884 || !peep2_reg_dead_p (4, operands[1]))
15885 return false;
15886
15887 /* Try to reorder the input registers. */
15888 /* For example, the code
15889 mov r0, 0
15890 mov r1, 1
15891 str r1, [r2]
15892 str r0, [r2, #4]
15893 can be transformed into
15894 mov r1, 0
15895 mov r0, 1
15896 strd r0, [r2]
15897 */
15898 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15899 false, false))
15900 {
15901 SWAP_RTX (operands[0], operands[1]);
15902 return true;
15903 }
15904
15905 /* Try to find a free DI register. */
15906 CLEAR_HARD_REG_SET (regset);
15907 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15908 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15909 while (true)
15910 {
15911 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15912 if (tmp == NULL_RTX)
15913 return false;
15914
15915 /* DREG must be an even-numbered register in DImode.
15916 Split it into SI registers. */
15917 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15918 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15919 gcc_assert (operands[0] != NULL_RTX);
15920 gcc_assert (operands[1] != NULL_RTX);
15921 gcc_assert (REGNO (operands[0]) % 2 == 0);
15922 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15923
15924 return (operands_ok_ldrd_strd (operands[0], operands[1],
15925 base, offset,
15926 false, load));
15927 }
15928 }
15929
15930 return false;
15931 }
15932 #undef SWAP_RTX
15933
15934
15935
15936 \f
15937 /* Print a symbolic form of X to the debug file, F. */
15938 static void
15939 arm_print_value (FILE *f, rtx x)
15940 {
15941 switch (GET_CODE (x))
15942 {
15943 case CONST_INT:
15944 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15945 return;
15946
15947 case CONST_DOUBLE:
15948 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15949 return;
15950
15951 case CONST_VECTOR:
15952 {
15953 int i;
15954
15955 fprintf (f, "<");
15956 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15957 {
15958 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15959 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15960 fputc (',', f);
15961 }
15962 fprintf (f, ">");
15963 }
15964 return;
15965
15966 case CONST_STRING:
15967 fprintf (f, "\"%s\"", XSTR (x, 0));
15968 return;
15969
15970 case SYMBOL_REF:
15971 fprintf (f, "`%s'", XSTR (x, 0));
15972 return;
15973
15974 case LABEL_REF:
15975 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15976 return;
15977
15978 case CONST:
15979 arm_print_value (f, XEXP (x, 0));
15980 return;
15981
15982 case PLUS:
15983 arm_print_value (f, XEXP (x, 0));
15984 fprintf (f, "+");
15985 arm_print_value (f, XEXP (x, 1));
15986 return;
15987
15988 case PC:
15989 fprintf (f, "pc");
15990 return;
15991
15992 default:
15993 fprintf (f, "????");
15994 return;
15995 }
15996 }
15997 \f
15998 /* Routines for manipulation of the constant pool. */
15999
16000 /* Arm instructions cannot load a large constant directly into a
16001 register; they have to come from a pc relative load. The constant
16002 must therefore be placed in the addressable range of the pc
16003 relative load. Depending on the precise pc relative load
16004 instruction the range is somewhere between 256 bytes and 4k. This
16005 means that we often have to dump a constant inside a function, and
16006 generate code to branch around it.
16007
16008 It is important to minimize this, since the branches will slow
16009 things down and make the code larger.
16010
16011 Normally we can hide the table after an existing unconditional
16012 branch so that there is no interruption of the flow, but in the
16013 worst case the code looks like this:
16014
16015 ldr rn, L1
16016 ...
16017 b L2
16018 align
16019 L1: .long value
16020 L2:
16021 ...
16022
16023 ldr rn, L3
16024 ...
16025 b L4
16026 align
16027 L3: .long value
16028 L4:
16029 ...
16030
16031 We fix this by performing a scan after scheduling, which notices
16032 which instructions need to have their operands fetched from the
16033 constant table and builds the table.
16034
16035 The algorithm starts by building a table of all the constants that
16036 need fixing up and all the natural barriers in the function (places
16037 where a constant table can be dropped without breaking the flow).
16038 For each fixup we note how far the pc-relative replacement will be
16039 able to reach and the offset of the instruction into the function.
16040
16041 Having built the table we then group the fixes together to form
16042 tables that are as large as possible (subject to addressing
16043 constraints) and emit each table of constants after the last
16044 barrier that is within range of all the instructions in the group.
16045 If a group does not contain a barrier, then we forcibly create one
16046 by inserting a jump instruction into the flow. Once the table has
16047 been inserted, the insns are then modified to reference the
16048 relevant entry in the pool.
16049
16050 Possible enhancements to the algorithm (not implemented) are:
16051
16052 1) For some processors and object formats, there may be benefit in
16053 aligning the pools to the start of cache lines; this alignment
16054 would need to be taken into account when calculating addressability
16055 of a pool. */
16056
16057 /* These typedefs are located at the start of this file, so that
16058 they can be used in the prototypes there. This comment is to
16059 remind readers of that fact so that the following structures
16060 can be understood more easily.
16061
16062 typedef struct minipool_node Mnode;
16063 typedef struct minipool_fixup Mfix; */
16064
16065 struct minipool_node
16066 {
16067 /* Doubly linked chain of entries. */
16068 Mnode * next;
16069 Mnode * prev;
16070 /* The maximum offset into the code that this entry can be placed. While
16071 pushing fixes for forward references, all entries are sorted in order
16072 of increasing max_address. */
16073 HOST_WIDE_INT max_address;
16074 /* Similarly for an entry inserted for a backwards ref. */
16075 HOST_WIDE_INT min_address;
16076 /* The number of fixes referencing this entry. This can become zero
16077 if we "unpush" an entry. In this case we ignore the entry when we
16078 come to emit the code. */
16079 int refcount;
16080 /* The offset from the start of the minipool. */
16081 HOST_WIDE_INT offset;
16082 /* The value in table. */
16083 rtx value;
16084 /* The mode of value. */
16085 enum machine_mode mode;
16086 /* The size of the value. With iWMMXt enabled
16087 sizes > 4 also imply an alignment of 8-bytes. */
16088 int fix_size;
16089 };
16090
16091 struct minipool_fixup
16092 {
16093 Mfix * next;
16094 rtx insn;
16095 HOST_WIDE_INT address;
16096 rtx * loc;
16097 enum machine_mode mode;
16098 int fix_size;
16099 rtx value;
16100 Mnode * minipool;
16101 HOST_WIDE_INT forwards;
16102 HOST_WIDE_INT backwards;
16103 };
16104
16105 /* Fixes less than a word need padding out to a word boundary. */
16106 #define MINIPOOL_FIX_SIZE(mode) \
16107 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16108
16109 static Mnode * minipool_vector_head;
16110 static Mnode * minipool_vector_tail;
16111 static rtx minipool_vector_label;
16112 static int minipool_pad;
16113
16114 /* The linked list of all minipool fixes required for this function. */
16115 Mfix * minipool_fix_head;
16116 Mfix * minipool_fix_tail;
16117 /* The fix entry for the current minipool, once it has been placed. */
16118 Mfix * minipool_barrier;
16119
16120 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16121 #define JUMP_TABLES_IN_TEXT_SECTION 0
16122 #endif
16123
16124 static HOST_WIDE_INT
16125 get_jump_table_size (rtx_jump_table_data *insn)
16126 {
16127 /* ADDR_VECs only take room if read-only data does into the text
16128 section. */
16129 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16130 {
16131 rtx body = PATTERN (insn);
16132 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16133 HOST_WIDE_INT size;
16134 HOST_WIDE_INT modesize;
16135
16136 modesize = GET_MODE_SIZE (GET_MODE (body));
16137 size = modesize * XVECLEN (body, elt);
16138 switch (modesize)
16139 {
16140 case 1:
16141 /* Round up size of TBB table to a halfword boundary. */
16142 size = (size + 1) & ~(HOST_WIDE_INT)1;
16143 break;
16144 case 2:
16145 /* No padding necessary for TBH. */
16146 break;
16147 case 4:
16148 /* Add two bytes for alignment on Thumb. */
16149 if (TARGET_THUMB)
16150 size += 2;
16151 break;
16152 default:
16153 gcc_unreachable ();
16154 }
16155 return size;
16156 }
16157
16158 return 0;
16159 }
16160
16161 /* Return the maximum amount of padding that will be inserted before
16162 label LABEL. */
16163
16164 static HOST_WIDE_INT
16165 get_label_padding (rtx label)
16166 {
16167 HOST_WIDE_INT align, min_insn_size;
16168
16169 align = 1 << label_to_alignment (label);
16170 min_insn_size = TARGET_THUMB ? 2 : 4;
16171 return align > min_insn_size ? align - min_insn_size : 0;
16172 }
16173
16174 /* Move a minipool fix MP from its current location to before MAX_MP.
16175 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16176 constraints may need updating. */
16177 static Mnode *
16178 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16179 HOST_WIDE_INT max_address)
16180 {
16181 /* The code below assumes these are different. */
16182 gcc_assert (mp != max_mp);
16183
16184 if (max_mp == NULL)
16185 {
16186 if (max_address < mp->max_address)
16187 mp->max_address = max_address;
16188 }
16189 else
16190 {
16191 if (max_address > max_mp->max_address - mp->fix_size)
16192 mp->max_address = max_mp->max_address - mp->fix_size;
16193 else
16194 mp->max_address = max_address;
16195
16196 /* Unlink MP from its current position. Since max_mp is non-null,
16197 mp->prev must be non-null. */
16198 mp->prev->next = mp->next;
16199 if (mp->next != NULL)
16200 mp->next->prev = mp->prev;
16201 else
16202 minipool_vector_tail = mp->prev;
16203
16204 /* Re-insert it before MAX_MP. */
16205 mp->next = max_mp;
16206 mp->prev = max_mp->prev;
16207 max_mp->prev = mp;
16208
16209 if (mp->prev != NULL)
16210 mp->prev->next = mp;
16211 else
16212 minipool_vector_head = mp;
16213 }
16214
16215 /* Save the new entry. */
16216 max_mp = mp;
16217
16218 /* Scan over the preceding entries and adjust their addresses as
16219 required. */
16220 while (mp->prev != NULL
16221 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16222 {
16223 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16224 mp = mp->prev;
16225 }
16226
16227 return max_mp;
16228 }
16229
16230 /* Add a constant to the minipool for a forward reference. Returns the
16231 node added or NULL if the constant will not fit in this pool. */
16232 static Mnode *
16233 add_minipool_forward_ref (Mfix *fix)
16234 {
16235 /* If set, max_mp is the first pool_entry that has a lower
16236 constraint than the one we are trying to add. */
16237 Mnode * max_mp = NULL;
16238 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16239 Mnode * mp;
16240
16241 /* If the minipool starts before the end of FIX->INSN then this FIX
16242 can not be placed into the current pool. Furthermore, adding the
16243 new constant pool entry may cause the pool to start FIX_SIZE bytes
16244 earlier. */
16245 if (minipool_vector_head &&
16246 (fix->address + get_attr_length (fix->insn)
16247 >= minipool_vector_head->max_address - fix->fix_size))
16248 return NULL;
16249
16250 /* Scan the pool to see if a constant with the same value has
16251 already been added. While we are doing this, also note the
16252 location where we must insert the constant if it doesn't already
16253 exist. */
16254 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16255 {
16256 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16257 && fix->mode == mp->mode
16258 && (!LABEL_P (fix->value)
16259 || (CODE_LABEL_NUMBER (fix->value)
16260 == CODE_LABEL_NUMBER (mp->value)))
16261 && rtx_equal_p (fix->value, mp->value))
16262 {
16263 /* More than one fix references this entry. */
16264 mp->refcount++;
16265 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16266 }
16267
16268 /* Note the insertion point if necessary. */
16269 if (max_mp == NULL
16270 && mp->max_address > max_address)
16271 max_mp = mp;
16272
16273 /* If we are inserting an 8-bytes aligned quantity and
16274 we have not already found an insertion point, then
16275 make sure that all such 8-byte aligned quantities are
16276 placed at the start of the pool. */
16277 if (ARM_DOUBLEWORD_ALIGN
16278 && max_mp == NULL
16279 && fix->fix_size >= 8
16280 && mp->fix_size < 8)
16281 {
16282 max_mp = mp;
16283 max_address = mp->max_address;
16284 }
16285 }
16286
16287 /* The value is not currently in the minipool, so we need to create
16288 a new entry for it. If MAX_MP is NULL, the entry will be put on
16289 the end of the list since the placement is less constrained than
16290 any existing entry. Otherwise, we insert the new fix before
16291 MAX_MP and, if necessary, adjust the constraints on the other
16292 entries. */
16293 mp = XNEW (Mnode);
16294 mp->fix_size = fix->fix_size;
16295 mp->mode = fix->mode;
16296 mp->value = fix->value;
16297 mp->refcount = 1;
16298 /* Not yet required for a backwards ref. */
16299 mp->min_address = -65536;
16300
16301 if (max_mp == NULL)
16302 {
16303 mp->max_address = max_address;
16304 mp->next = NULL;
16305 mp->prev = minipool_vector_tail;
16306
16307 if (mp->prev == NULL)
16308 {
16309 minipool_vector_head = mp;
16310 minipool_vector_label = gen_label_rtx ();
16311 }
16312 else
16313 mp->prev->next = mp;
16314
16315 minipool_vector_tail = mp;
16316 }
16317 else
16318 {
16319 if (max_address > max_mp->max_address - mp->fix_size)
16320 mp->max_address = max_mp->max_address - mp->fix_size;
16321 else
16322 mp->max_address = max_address;
16323
16324 mp->next = max_mp;
16325 mp->prev = max_mp->prev;
16326 max_mp->prev = mp;
16327 if (mp->prev != NULL)
16328 mp->prev->next = mp;
16329 else
16330 minipool_vector_head = mp;
16331 }
16332
16333 /* Save the new entry. */
16334 max_mp = mp;
16335
16336 /* Scan over the preceding entries and adjust their addresses as
16337 required. */
16338 while (mp->prev != NULL
16339 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16340 {
16341 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16342 mp = mp->prev;
16343 }
16344
16345 return max_mp;
16346 }
16347
16348 static Mnode *
16349 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16350 HOST_WIDE_INT min_address)
16351 {
16352 HOST_WIDE_INT offset;
16353
16354 /* The code below assumes these are different. */
16355 gcc_assert (mp != min_mp);
16356
16357 if (min_mp == NULL)
16358 {
16359 if (min_address > mp->min_address)
16360 mp->min_address = min_address;
16361 }
16362 else
16363 {
16364 /* We will adjust this below if it is too loose. */
16365 mp->min_address = min_address;
16366
16367 /* Unlink MP from its current position. Since min_mp is non-null,
16368 mp->next must be non-null. */
16369 mp->next->prev = mp->prev;
16370 if (mp->prev != NULL)
16371 mp->prev->next = mp->next;
16372 else
16373 minipool_vector_head = mp->next;
16374
16375 /* Reinsert it after MIN_MP. */
16376 mp->prev = min_mp;
16377 mp->next = min_mp->next;
16378 min_mp->next = mp;
16379 if (mp->next != NULL)
16380 mp->next->prev = mp;
16381 else
16382 minipool_vector_tail = mp;
16383 }
16384
16385 min_mp = mp;
16386
16387 offset = 0;
16388 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16389 {
16390 mp->offset = offset;
16391 if (mp->refcount > 0)
16392 offset += mp->fix_size;
16393
16394 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16395 mp->next->min_address = mp->min_address + mp->fix_size;
16396 }
16397
16398 return min_mp;
16399 }
16400
16401 /* Add a constant to the minipool for a backward reference. Returns the
16402 node added or NULL if the constant will not fit in this pool.
16403
16404 Note that the code for insertion for a backwards reference can be
16405 somewhat confusing because the calculated offsets for each fix do
16406 not take into account the size of the pool (which is still under
16407 construction. */
16408 static Mnode *
16409 add_minipool_backward_ref (Mfix *fix)
16410 {
16411 /* If set, min_mp is the last pool_entry that has a lower constraint
16412 than the one we are trying to add. */
16413 Mnode *min_mp = NULL;
16414 /* This can be negative, since it is only a constraint. */
16415 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16416 Mnode *mp;
16417
16418 /* If we can't reach the current pool from this insn, or if we can't
16419 insert this entry at the end of the pool without pushing other
16420 fixes out of range, then we don't try. This ensures that we
16421 can't fail later on. */
16422 if (min_address >= minipool_barrier->address
16423 || (minipool_vector_tail->min_address + fix->fix_size
16424 >= minipool_barrier->address))
16425 return NULL;
16426
16427 /* Scan the pool to see if a constant with the same value has
16428 already been added. While we are doing this, also note the
16429 location where we must insert the constant if it doesn't already
16430 exist. */
16431 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16432 {
16433 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16434 && fix->mode == mp->mode
16435 && (!LABEL_P (fix->value)
16436 || (CODE_LABEL_NUMBER (fix->value)
16437 == CODE_LABEL_NUMBER (mp->value)))
16438 && rtx_equal_p (fix->value, mp->value)
16439 /* Check that there is enough slack to move this entry to the
16440 end of the table (this is conservative). */
16441 && (mp->max_address
16442 > (minipool_barrier->address
16443 + minipool_vector_tail->offset
16444 + minipool_vector_tail->fix_size)))
16445 {
16446 mp->refcount++;
16447 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16448 }
16449
16450 if (min_mp != NULL)
16451 mp->min_address += fix->fix_size;
16452 else
16453 {
16454 /* Note the insertion point if necessary. */
16455 if (mp->min_address < min_address)
16456 {
16457 /* For now, we do not allow the insertion of 8-byte alignment
16458 requiring nodes anywhere but at the start of the pool. */
16459 if (ARM_DOUBLEWORD_ALIGN
16460 && fix->fix_size >= 8 && mp->fix_size < 8)
16461 return NULL;
16462 else
16463 min_mp = mp;
16464 }
16465 else if (mp->max_address
16466 < minipool_barrier->address + mp->offset + fix->fix_size)
16467 {
16468 /* Inserting before this entry would push the fix beyond
16469 its maximum address (which can happen if we have
16470 re-located a forwards fix); force the new fix to come
16471 after it. */
16472 if (ARM_DOUBLEWORD_ALIGN
16473 && fix->fix_size >= 8 && mp->fix_size < 8)
16474 return NULL;
16475 else
16476 {
16477 min_mp = mp;
16478 min_address = mp->min_address + fix->fix_size;
16479 }
16480 }
16481 /* Do not insert a non-8-byte aligned quantity before 8-byte
16482 aligned quantities. */
16483 else if (ARM_DOUBLEWORD_ALIGN
16484 && fix->fix_size < 8
16485 && mp->fix_size >= 8)
16486 {
16487 min_mp = mp;
16488 min_address = mp->min_address + fix->fix_size;
16489 }
16490 }
16491 }
16492
16493 /* We need to create a new entry. */
16494 mp = XNEW (Mnode);
16495 mp->fix_size = fix->fix_size;
16496 mp->mode = fix->mode;
16497 mp->value = fix->value;
16498 mp->refcount = 1;
16499 mp->max_address = minipool_barrier->address + 65536;
16500
16501 mp->min_address = min_address;
16502
16503 if (min_mp == NULL)
16504 {
16505 mp->prev = NULL;
16506 mp->next = minipool_vector_head;
16507
16508 if (mp->next == NULL)
16509 {
16510 minipool_vector_tail = mp;
16511 minipool_vector_label = gen_label_rtx ();
16512 }
16513 else
16514 mp->next->prev = mp;
16515
16516 minipool_vector_head = mp;
16517 }
16518 else
16519 {
16520 mp->next = min_mp->next;
16521 mp->prev = min_mp;
16522 min_mp->next = mp;
16523
16524 if (mp->next != NULL)
16525 mp->next->prev = mp;
16526 else
16527 minipool_vector_tail = mp;
16528 }
16529
16530 /* Save the new entry. */
16531 min_mp = mp;
16532
16533 if (mp->prev)
16534 mp = mp->prev;
16535 else
16536 mp->offset = 0;
16537
16538 /* Scan over the following entries and adjust their offsets. */
16539 while (mp->next != NULL)
16540 {
16541 if (mp->next->min_address < mp->min_address + mp->fix_size)
16542 mp->next->min_address = mp->min_address + mp->fix_size;
16543
16544 if (mp->refcount)
16545 mp->next->offset = mp->offset + mp->fix_size;
16546 else
16547 mp->next->offset = mp->offset;
16548
16549 mp = mp->next;
16550 }
16551
16552 return min_mp;
16553 }
16554
16555 static void
16556 assign_minipool_offsets (Mfix *barrier)
16557 {
16558 HOST_WIDE_INT offset = 0;
16559 Mnode *mp;
16560
16561 minipool_barrier = barrier;
16562
16563 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16564 {
16565 mp->offset = offset;
16566
16567 if (mp->refcount > 0)
16568 offset += mp->fix_size;
16569 }
16570 }
16571
16572 /* Output the literal table */
16573 static void
16574 dump_minipool (rtx scan)
16575 {
16576 Mnode * mp;
16577 Mnode * nmp;
16578 int align64 = 0;
16579
16580 if (ARM_DOUBLEWORD_ALIGN)
16581 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16582 if (mp->refcount > 0 && mp->fix_size >= 8)
16583 {
16584 align64 = 1;
16585 break;
16586 }
16587
16588 if (dump_file)
16589 fprintf (dump_file,
16590 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16591 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16592
16593 scan = emit_label_after (gen_label_rtx (), scan);
16594 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16595 scan = emit_label_after (minipool_vector_label, scan);
16596
16597 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16598 {
16599 if (mp->refcount > 0)
16600 {
16601 if (dump_file)
16602 {
16603 fprintf (dump_file,
16604 ";; Offset %u, min %ld, max %ld ",
16605 (unsigned) mp->offset, (unsigned long) mp->min_address,
16606 (unsigned long) mp->max_address);
16607 arm_print_value (dump_file, mp->value);
16608 fputc ('\n', dump_file);
16609 }
16610
16611 switch (mp->fix_size)
16612 {
16613 #ifdef HAVE_consttable_1
16614 case 1:
16615 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16616 break;
16617
16618 #endif
16619 #ifdef HAVE_consttable_2
16620 case 2:
16621 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16622 break;
16623
16624 #endif
16625 #ifdef HAVE_consttable_4
16626 case 4:
16627 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16628 break;
16629
16630 #endif
16631 #ifdef HAVE_consttable_8
16632 case 8:
16633 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16634 break;
16635
16636 #endif
16637 #ifdef HAVE_consttable_16
16638 case 16:
16639 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16640 break;
16641
16642 #endif
16643 default:
16644 gcc_unreachable ();
16645 }
16646 }
16647
16648 nmp = mp->next;
16649 free (mp);
16650 }
16651
16652 minipool_vector_head = minipool_vector_tail = NULL;
16653 scan = emit_insn_after (gen_consttable_end (), scan);
16654 scan = emit_barrier_after (scan);
16655 }
16656
16657 /* Return the cost of forcibly inserting a barrier after INSN. */
16658 static int
16659 arm_barrier_cost (rtx insn)
16660 {
16661 /* Basing the location of the pool on the loop depth is preferable,
16662 but at the moment, the basic block information seems to be
16663 corrupt by this stage of the compilation. */
16664 int base_cost = 50;
16665 rtx next = next_nonnote_insn (insn);
16666
16667 if (next != NULL && LABEL_P (next))
16668 base_cost -= 20;
16669
16670 switch (GET_CODE (insn))
16671 {
16672 case CODE_LABEL:
16673 /* It will always be better to place the table before the label, rather
16674 than after it. */
16675 return 50;
16676
16677 case INSN:
16678 case CALL_INSN:
16679 return base_cost;
16680
16681 case JUMP_INSN:
16682 return base_cost - 10;
16683
16684 default:
16685 return base_cost + 10;
16686 }
16687 }
16688
16689 /* Find the best place in the insn stream in the range
16690 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16691 Create the barrier by inserting a jump and add a new fix entry for
16692 it. */
16693 static Mfix *
16694 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16695 {
16696 HOST_WIDE_INT count = 0;
16697 rtx barrier;
16698 rtx from = fix->insn;
16699 /* The instruction after which we will insert the jump. */
16700 rtx selected = NULL;
16701 int selected_cost;
16702 /* The address at which the jump instruction will be placed. */
16703 HOST_WIDE_INT selected_address;
16704 Mfix * new_fix;
16705 HOST_WIDE_INT max_count = max_address - fix->address;
16706 rtx label = gen_label_rtx ();
16707
16708 selected_cost = arm_barrier_cost (from);
16709 selected_address = fix->address;
16710
16711 while (from && count < max_count)
16712 {
16713 rtx_jump_table_data *tmp;
16714 int new_cost;
16715
16716 /* This code shouldn't have been called if there was a natural barrier
16717 within range. */
16718 gcc_assert (!BARRIER_P (from));
16719
16720 /* Count the length of this insn. This must stay in sync with the
16721 code that pushes minipool fixes. */
16722 if (LABEL_P (from))
16723 count += get_label_padding (from);
16724 else
16725 count += get_attr_length (from);
16726
16727 /* If there is a jump table, add its length. */
16728 if (tablejump_p (from, NULL, &tmp))
16729 {
16730 count += get_jump_table_size (tmp);
16731
16732 /* Jump tables aren't in a basic block, so base the cost on
16733 the dispatch insn. If we select this location, we will
16734 still put the pool after the table. */
16735 new_cost = arm_barrier_cost (from);
16736
16737 if (count < max_count
16738 && (!selected || new_cost <= selected_cost))
16739 {
16740 selected = tmp;
16741 selected_cost = new_cost;
16742 selected_address = fix->address + count;
16743 }
16744
16745 /* Continue after the dispatch table. */
16746 from = NEXT_INSN (tmp);
16747 continue;
16748 }
16749
16750 new_cost = arm_barrier_cost (from);
16751
16752 if (count < max_count
16753 && (!selected || new_cost <= selected_cost))
16754 {
16755 selected = from;
16756 selected_cost = new_cost;
16757 selected_address = fix->address + count;
16758 }
16759
16760 from = NEXT_INSN (from);
16761 }
16762
16763 /* Make sure that we found a place to insert the jump. */
16764 gcc_assert (selected);
16765
16766 /* Make sure we do not split a call and its corresponding
16767 CALL_ARG_LOCATION note. */
16768 if (CALL_P (selected))
16769 {
16770 rtx next = NEXT_INSN (selected);
16771 if (next && NOTE_P (next)
16772 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16773 selected = next;
16774 }
16775
16776 /* Create a new JUMP_INSN that branches around a barrier. */
16777 from = emit_jump_insn_after (gen_jump (label), selected);
16778 JUMP_LABEL (from) = label;
16779 barrier = emit_barrier_after (from);
16780 emit_label_after (label, barrier);
16781
16782 /* Create a minipool barrier entry for the new barrier. */
16783 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16784 new_fix->insn = barrier;
16785 new_fix->address = selected_address;
16786 new_fix->next = fix->next;
16787 fix->next = new_fix;
16788
16789 return new_fix;
16790 }
16791
16792 /* Record that there is a natural barrier in the insn stream at
16793 ADDRESS. */
16794 static void
16795 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16796 {
16797 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16798
16799 fix->insn = insn;
16800 fix->address = address;
16801
16802 fix->next = NULL;
16803 if (minipool_fix_head != NULL)
16804 minipool_fix_tail->next = fix;
16805 else
16806 minipool_fix_head = fix;
16807
16808 minipool_fix_tail = fix;
16809 }
16810
16811 /* Record INSN, which will need fixing up to load a value from the
16812 minipool. ADDRESS is the offset of the insn since the start of the
16813 function; LOC is a pointer to the part of the insn which requires
16814 fixing; VALUE is the constant that must be loaded, which is of type
16815 MODE. */
16816 static void
16817 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16818 enum machine_mode mode, rtx value)
16819 {
16820 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16821
16822 fix->insn = insn;
16823 fix->address = address;
16824 fix->loc = loc;
16825 fix->mode = mode;
16826 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16827 fix->value = value;
16828 fix->forwards = get_attr_pool_range (insn);
16829 fix->backwards = get_attr_neg_pool_range (insn);
16830 fix->minipool = NULL;
16831
16832 /* If an insn doesn't have a range defined for it, then it isn't
16833 expecting to be reworked by this code. Better to stop now than
16834 to generate duff assembly code. */
16835 gcc_assert (fix->forwards || fix->backwards);
16836
16837 /* If an entry requires 8-byte alignment then assume all constant pools
16838 require 4 bytes of padding. Trying to do this later on a per-pool
16839 basis is awkward because existing pool entries have to be modified. */
16840 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16841 minipool_pad = 4;
16842
16843 if (dump_file)
16844 {
16845 fprintf (dump_file,
16846 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16847 GET_MODE_NAME (mode),
16848 INSN_UID (insn), (unsigned long) address,
16849 -1 * (long)fix->backwards, (long)fix->forwards);
16850 arm_print_value (dump_file, fix->value);
16851 fprintf (dump_file, "\n");
16852 }
16853
16854 /* Add it to the chain of fixes. */
16855 fix->next = NULL;
16856
16857 if (minipool_fix_head != NULL)
16858 minipool_fix_tail->next = fix;
16859 else
16860 minipool_fix_head = fix;
16861
16862 minipool_fix_tail = fix;
16863 }
16864
16865 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16866 Returns the number of insns needed, or 99 if we always want to synthesize
16867 the value. */
16868 int
16869 arm_max_const_double_inline_cost ()
16870 {
16871 /* Let the value get synthesized to avoid the use of literal pools. */
16872 if (arm_disable_literal_pool)
16873 return 99;
16874
16875 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16876 }
16877
16878 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16879 Returns the number of insns needed, or 99 if we don't know how to
16880 do it. */
16881 int
16882 arm_const_double_inline_cost (rtx val)
16883 {
16884 rtx lowpart, highpart;
16885 enum machine_mode mode;
16886
16887 mode = GET_MODE (val);
16888
16889 if (mode == VOIDmode)
16890 mode = DImode;
16891
16892 gcc_assert (GET_MODE_SIZE (mode) == 8);
16893
16894 lowpart = gen_lowpart (SImode, val);
16895 highpart = gen_highpart_mode (SImode, mode, val);
16896
16897 gcc_assert (CONST_INT_P (lowpart));
16898 gcc_assert (CONST_INT_P (highpart));
16899
16900 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16901 NULL_RTX, NULL_RTX, 0, 0)
16902 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16903 NULL_RTX, NULL_RTX, 0, 0));
16904 }
16905
16906 /* Return true if it is worthwhile to split a 64-bit constant into two
16907 32-bit operations. This is the case if optimizing for size, or
16908 if we have load delay slots, or if one 32-bit part can be done with
16909 a single data operation. */
16910 bool
16911 arm_const_double_by_parts (rtx val)
16912 {
16913 enum machine_mode mode = GET_MODE (val);
16914 rtx part;
16915
16916 if (optimize_size || arm_ld_sched)
16917 return true;
16918
16919 if (mode == VOIDmode)
16920 mode = DImode;
16921
16922 part = gen_highpart_mode (SImode, mode, val);
16923
16924 gcc_assert (CONST_INT_P (part));
16925
16926 if (const_ok_for_arm (INTVAL (part))
16927 || const_ok_for_arm (~INTVAL (part)))
16928 return true;
16929
16930 part = gen_lowpart (SImode, val);
16931
16932 gcc_assert (CONST_INT_P (part));
16933
16934 if (const_ok_for_arm (INTVAL (part))
16935 || const_ok_for_arm (~INTVAL (part)))
16936 return true;
16937
16938 return false;
16939 }
16940
16941 /* Return true if it is possible to inline both the high and low parts
16942 of a 64-bit constant into 32-bit data processing instructions. */
16943 bool
16944 arm_const_double_by_immediates (rtx val)
16945 {
16946 enum machine_mode mode = GET_MODE (val);
16947 rtx part;
16948
16949 if (mode == VOIDmode)
16950 mode = DImode;
16951
16952 part = gen_highpart_mode (SImode, mode, val);
16953
16954 gcc_assert (CONST_INT_P (part));
16955
16956 if (!const_ok_for_arm (INTVAL (part)))
16957 return false;
16958
16959 part = gen_lowpart (SImode, val);
16960
16961 gcc_assert (CONST_INT_P (part));
16962
16963 if (!const_ok_for_arm (INTVAL (part)))
16964 return false;
16965
16966 return true;
16967 }
16968
16969 /* Scan INSN and note any of its operands that need fixing.
16970 If DO_PUSHES is false we do not actually push any of the fixups
16971 needed. */
16972 static void
16973 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16974 {
16975 int opno;
16976
16977 extract_insn (insn);
16978
16979 if (!constrain_operands (1))
16980 fatal_insn_not_found (insn);
16981
16982 if (recog_data.n_alternatives == 0)
16983 return;
16984
16985 /* Fill in recog_op_alt with information about the constraints of
16986 this insn. */
16987 preprocess_constraints (insn);
16988
16989 const operand_alternative *op_alt = which_op_alt ();
16990 for (opno = 0; opno < recog_data.n_operands; opno++)
16991 {
16992 /* Things we need to fix can only occur in inputs. */
16993 if (recog_data.operand_type[opno] != OP_IN)
16994 continue;
16995
16996 /* If this alternative is a memory reference, then any mention
16997 of constants in this alternative is really to fool reload
16998 into allowing us to accept one there. We need to fix them up
16999 now so that we output the right code. */
17000 if (op_alt[opno].memory_ok)
17001 {
17002 rtx op = recog_data.operand[opno];
17003
17004 if (CONSTANT_P (op))
17005 {
17006 if (do_pushes)
17007 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17008 recog_data.operand_mode[opno], op);
17009 }
17010 else if (MEM_P (op)
17011 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17012 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17013 {
17014 if (do_pushes)
17015 {
17016 rtx cop = avoid_constant_pool_reference (op);
17017
17018 /* Casting the address of something to a mode narrower
17019 than a word can cause avoid_constant_pool_reference()
17020 to return the pool reference itself. That's no good to
17021 us here. Lets just hope that we can use the
17022 constant pool value directly. */
17023 if (op == cop)
17024 cop = get_pool_constant (XEXP (op, 0));
17025
17026 push_minipool_fix (insn, address,
17027 recog_data.operand_loc[opno],
17028 recog_data.operand_mode[opno], cop);
17029 }
17030
17031 }
17032 }
17033 }
17034
17035 return;
17036 }
17037
17038 /* Rewrite move insn into subtract of 0 if the condition codes will
17039 be useful in next conditional jump insn. */
17040
17041 static void
17042 thumb1_reorg (void)
17043 {
17044 basic_block bb;
17045
17046 FOR_EACH_BB_FN (bb, cfun)
17047 {
17048 rtx dest, src;
17049 rtx pat, op0, set = NULL;
17050 rtx prev, insn = BB_END (bb);
17051 bool insn_clobbered = false;
17052
17053 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17054 insn = PREV_INSN (insn);
17055
17056 /* Find the last cbranchsi4_insn in basic block BB. */
17057 if (insn == BB_HEAD (bb)
17058 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17059 continue;
17060
17061 /* Get the register with which we are comparing. */
17062 pat = PATTERN (insn);
17063 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17064
17065 /* Find the first flag setting insn before INSN in basic block BB. */
17066 gcc_assert (insn != BB_HEAD (bb));
17067 for (prev = PREV_INSN (insn);
17068 (!insn_clobbered
17069 && prev != BB_HEAD (bb)
17070 && (NOTE_P (prev)
17071 || DEBUG_INSN_P (prev)
17072 || ((set = single_set (prev)) != NULL
17073 && get_attr_conds (prev) == CONDS_NOCOND)));
17074 prev = PREV_INSN (prev))
17075 {
17076 if (reg_set_p (op0, prev))
17077 insn_clobbered = true;
17078 }
17079
17080 /* Skip if op0 is clobbered by insn other than prev. */
17081 if (insn_clobbered)
17082 continue;
17083
17084 if (!set)
17085 continue;
17086
17087 dest = SET_DEST (set);
17088 src = SET_SRC (set);
17089 if (!low_register_operand (dest, SImode)
17090 || !low_register_operand (src, SImode))
17091 continue;
17092
17093 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17094 in INSN. Both src and dest of the move insn are checked. */
17095 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17096 {
17097 dest = copy_rtx (dest);
17098 src = copy_rtx (src);
17099 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17100 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17101 INSN_CODE (prev) = -1;
17102 /* Set test register in INSN to dest. */
17103 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17104 INSN_CODE (insn) = -1;
17105 }
17106 }
17107 }
17108
17109 /* Convert instructions to their cc-clobbering variant if possible, since
17110 that allows us to use smaller encodings. */
17111
17112 static void
17113 thumb2_reorg (void)
17114 {
17115 basic_block bb;
17116 regset_head live;
17117
17118 INIT_REG_SET (&live);
17119
17120 /* We are freeing block_for_insn in the toplev to keep compatibility
17121 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17122 compute_bb_for_insn ();
17123 df_analyze ();
17124
17125 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17126
17127 FOR_EACH_BB_FN (bb, cfun)
17128 {
17129 if (current_tune->disparage_flag_setting_t16_encodings
17130 && optimize_bb_for_speed_p (bb))
17131 continue;
17132
17133 rtx insn;
17134 Convert_Action action = SKIP;
17135 Convert_Action action_for_partial_flag_setting
17136 = (current_tune->disparage_partial_flag_setting_t16_encodings
17137 && optimize_bb_for_speed_p (bb))
17138 ? SKIP : CONV;
17139
17140 COPY_REG_SET (&live, DF_LR_OUT (bb));
17141 df_simulate_initialize_backwards (bb, &live);
17142 FOR_BB_INSNS_REVERSE (bb, insn)
17143 {
17144 if (NONJUMP_INSN_P (insn)
17145 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17146 && GET_CODE (PATTERN (insn)) == SET)
17147 {
17148 action = SKIP;
17149 rtx pat = PATTERN (insn);
17150 rtx dst = XEXP (pat, 0);
17151 rtx src = XEXP (pat, 1);
17152 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17153
17154 if (!OBJECT_P (src))
17155 op0 = XEXP (src, 0);
17156
17157 if (BINARY_P (src))
17158 op1 = XEXP (src, 1);
17159
17160 if (low_register_operand (dst, SImode))
17161 {
17162 switch (GET_CODE (src))
17163 {
17164 case PLUS:
17165 /* Adding two registers and storing the result
17166 in the first source is already a 16-bit
17167 operation. */
17168 if (rtx_equal_p (dst, op0)
17169 && register_operand (op1, SImode))
17170 break;
17171
17172 if (low_register_operand (op0, SImode))
17173 {
17174 /* ADDS <Rd>,<Rn>,<Rm> */
17175 if (low_register_operand (op1, SImode))
17176 action = CONV;
17177 /* ADDS <Rdn>,#<imm8> */
17178 /* SUBS <Rdn>,#<imm8> */
17179 else if (rtx_equal_p (dst, op0)
17180 && CONST_INT_P (op1)
17181 && IN_RANGE (INTVAL (op1), -255, 255))
17182 action = CONV;
17183 /* ADDS <Rd>,<Rn>,#<imm3> */
17184 /* SUBS <Rd>,<Rn>,#<imm3> */
17185 else if (CONST_INT_P (op1)
17186 && IN_RANGE (INTVAL (op1), -7, 7))
17187 action = CONV;
17188 }
17189 /* ADCS <Rd>, <Rn> */
17190 else if (GET_CODE (XEXP (src, 0)) == PLUS
17191 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17192 && low_register_operand (XEXP (XEXP (src, 0), 1),
17193 SImode)
17194 && COMPARISON_P (op1)
17195 && cc_register (XEXP (op1, 0), VOIDmode)
17196 && maybe_get_arm_condition_code (op1) == ARM_CS
17197 && XEXP (op1, 1) == const0_rtx)
17198 action = CONV;
17199 break;
17200
17201 case MINUS:
17202 /* RSBS <Rd>,<Rn>,#0
17203 Not handled here: see NEG below. */
17204 /* SUBS <Rd>,<Rn>,#<imm3>
17205 SUBS <Rdn>,#<imm8>
17206 Not handled here: see PLUS above. */
17207 /* SUBS <Rd>,<Rn>,<Rm> */
17208 if (low_register_operand (op0, SImode)
17209 && low_register_operand (op1, SImode))
17210 action = CONV;
17211 break;
17212
17213 case MULT:
17214 /* MULS <Rdm>,<Rn>,<Rdm>
17215 As an exception to the rule, this is only used
17216 when optimizing for size since MULS is slow on all
17217 known implementations. We do not even want to use
17218 MULS in cold code, if optimizing for speed, so we
17219 test the global flag here. */
17220 if (!optimize_size)
17221 break;
17222 /* else fall through. */
17223 case AND:
17224 case IOR:
17225 case XOR:
17226 /* ANDS <Rdn>,<Rm> */
17227 if (rtx_equal_p (dst, op0)
17228 && low_register_operand (op1, SImode))
17229 action = action_for_partial_flag_setting;
17230 else if (rtx_equal_p (dst, op1)
17231 && low_register_operand (op0, SImode))
17232 action = action_for_partial_flag_setting == SKIP
17233 ? SKIP : SWAP_CONV;
17234 break;
17235
17236 case ASHIFTRT:
17237 case ASHIFT:
17238 case LSHIFTRT:
17239 /* ASRS <Rdn>,<Rm> */
17240 /* LSRS <Rdn>,<Rm> */
17241 /* LSLS <Rdn>,<Rm> */
17242 if (rtx_equal_p (dst, op0)
17243 && low_register_operand (op1, SImode))
17244 action = action_for_partial_flag_setting;
17245 /* ASRS <Rd>,<Rm>,#<imm5> */
17246 /* LSRS <Rd>,<Rm>,#<imm5> */
17247 /* LSLS <Rd>,<Rm>,#<imm5> */
17248 else if (low_register_operand (op0, SImode)
17249 && CONST_INT_P (op1)
17250 && IN_RANGE (INTVAL (op1), 0, 31))
17251 action = action_for_partial_flag_setting;
17252 break;
17253
17254 case ROTATERT:
17255 /* RORS <Rdn>,<Rm> */
17256 if (rtx_equal_p (dst, op0)
17257 && low_register_operand (op1, SImode))
17258 action = action_for_partial_flag_setting;
17259 break;
17260
17261 case NOT:
17262 /* MVNS <Rd>,<Rm> */
17263 if (low_register_operand (op0, SImode))
17264 action = action_for_partial_flag_setting;
17265 break;
17266
17267 case NEG:
17268 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17269 if (low_register_operand (op0, SImode))
17270 action = CONV;
17271 break;
17272
17273 case CONST_INT:
17274 /* MOVS <Rd>,#<imm8> */
17275 if (CONST_INT_P (src)
17276 && IN_RANGE (INTVAL (src), 0, 255))
17277 action = action_for_partial_flag_setting;
17278 break;
17279
17280 case REG:
17281 /* MOVS and MOV<c> with registers have different
17282 encodings, so are not relevant here. */
17283 break;
17284
17285 default:
17286 break;
17287 }
17288 }
17289
17290 if (action != SKIP)
17291 {
17292 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17293 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17294 rtvec vec;
17295
17296 if (action == SWAP_CONV)
17297 {
17298 src = copy_rtx (src);
17299 XEXP (src, 0) = op1;
17300 XEXP (src, 1) = op0;
17301 pat = gen_rtx_SET (VOIDmode, dst, src);
17302 vec = gen_rtvec (2, pat, clobber);
17303 }
17304 else /* action == CONV */
17305 vec = gen_rtvec (2, pat, clobber);
17306
17307 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17308 INSN_CODE (insn) = -1;
17309 }
17310 }
17311
17312 if (NONDEBUG_INSN_P (insn))
17313 df_simulate_one_insn_backwards (bb, insn, &live);
17314 }
17315 }
17316
17317 CLEAR_REG_SET (&live);
17318 }
17319
17320 /* Gcc puts the pool in the wrong place for ARM, since we can only
17321 load addresses a limited distance around the pc. We do some
17322 special munging to move the constant pool values to the correct
17323 point in the code. */
17324 static void
17325 arm_reorg (void)
17326 {
17327 rtx insn;
17328 HOST_WIDE_INT address = 0;
17329 Mfix * fix;
17330
17331 if (TARGET_THUMB1)
17332 thumb1_reorg ();
17333 else if (TARGET_THUMB2)
17334 thumb2_reorg ();
17335
17336 /* Ensure all insns that must be split have been split at this point.
17337 Otherwise, the pool placement code below may compute incorrect
17338 insn lengths. Note that when optimizing, all insns have already
17339 been split at this point. */
17340 if (!optimize)
17341 split_all_insns_noflow ();
17342
17343 minipool_fix_head = minipool_fix_tail = NULL;
17344
17345 /* The first insn must always be a note, or the code below won't
17346 scan it properly. */
17347 insn = get_insns ();
17348 gcc_assert (NOTE_P (insn));
17349 minipool_pad = 0;
17350
17351 /* Scan all the insns and record the operands that will need fixing. */
17352 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17353 {
17354 if (BARRIER_P (insn))
17355 push_minipool_barrier (insn, address);
17356 else if (INSN_P (insn))
17357 {
17358 rtx_jump_table_data *table;
17359
17360 note_invalid_constants (insn, address, true);
17361 address += get_attr_length (insn);
17362
17363 /* If the insn is a vector jump, add the size of the table
17364 and skip the table. */
17365 if (tablejump_p (insn, NULL, &table))
17366 {
17367 address += get_jump_table_size (table);
17368 insn = table;
17369 }
17370 }
17371 else if (LABEL_P (insn))
17372 /* Add the worst-case padding due to alignment. We don't add
17373 the _current_ padding because the minipool insertions
17374 themselves might change it. */
17375 address += get_label_padding (insn);
17376 }
17377
17378 fix = minipool_fix_head;
17379
17380 /* Now scan the fixups and perform the required changes. */
17381 while (fix)
17382 {
17383 Mfix * ftmp;
17384 Mfix * fdel;
17385 Mfix * last_added_fix;
17386 Mfix * last_barrier = NULL;
17387 Mfix * this_fix;
17388
17389 /* Skip any further barriers before the next fix. */
17390 while (fix && BARRIER_P (fix->insn))
17391 fix = fix->next;
17392
17393 /* No more fixes. */
17394 if (fix == NULL)
17395 break;
17396
17397 last_added_fix = NULL;
17398
17399 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17400 {
17401 if (BARRIER_P (ftmp->insn))
17402 {
17403 if (ftmp->address >= minipool_vector_head->max_address)
17404 break;
17405
17406 last_barrier = ftmp;
17407 }
17408 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17409 break;
17410
17411 last_added_fix = ftmp; /* Keep track of the last fix added. */
17412 }
17413
17414 /* If we found a barrier, drop back to that; any fixes that we
17415 could have reached but come after the barrier will now go in
17416 the next mini-pool. */
17417 if (last_barrier != NULL)
17418 {
17419 /* Reduce the refcount for those fixes that won't go into this
17420 pool after all. */
17421 for (fdel = last_barrier->next;
17422 fdel && fdel != ftmp;
17423 fdel = fdel->next)
17424 {
17425 fdel->minipool->refcount--;
17426 fdel->minipool = NULL;
17427 }
17428
17429 ftmp = last_barrier;
17430 }
17431 else
17432 {
17433 /* ftmp is first fix that we can't fit into this pool and
17434 there no natural barriers that we could use. Insert a
17435 new barrier in the code somewhere between the previous
17436 fix and this one, and arrange to jump around it. */
17437 HOST_WIDE_INT max_address;
17438
17439 /* The last item on the list of fixes must be a barrier, so
17440 we can never run off the end of the list of fixes without
17441 last_barrier being set. */
17442 gcc_assert (ftmp);
17443
17444 max_address = minipool_vector_head->max_address;
17445 /* Check that there isn't another fix that is in range that
17446 we couldn't fit into this pool because the pool was
17447 already too large: we need to put the pool before such an
17448 instruction. The pool itself may come just after the
17449 fix because create_fix_barrier also allows space for a
17450 jump instruction. */
17451 if (ftmp->address < max_address)
17452 max_address = ftmp->address + 1;
17453
17454 last_barrier = create_fix_barrier (last_added_fix, max_address);
17455 }
17456
17457 assign_minipool_offsets (last_barrier);
17458
17459 while (ftmp)
17460 {
17461 if (!BARRIER_P (ftmp->insn)
17462 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17463 == NULL))
17464 break;
17465
17466 ftmp = ftmp->next;
17467 }
17468
17469 /* Scan over the fixes we have identified for this pool, fixing them
17470 up and adding the constants to the pool itself. */
17471 for (this_fix = fix; this_fix && ftmp != this_fix;
17472 this_fix = this_fix->next)
17473 if (!BARRIER_P (this_fix->insn))
17474 {
17475 rtx addr
17476 = plus_constant (Pmode,
17477 gen_rtx_LABEL_REF (VOIDmode,
17478 minipool_vector_label),
17479 this_fix->minipool->offset);
17480 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17481 }
17482
17483 dump_minipool (last_barrier->insn);
17484 fix = ftmp;
17485 }
17486
17487 /* From now on we must synthesize any constants that we can't handle
17488 directly. This can happen if the RTL gets split during final
17489 instruction generation. */
17490 cfun->machine->after_arm_reorg = 1;
17491
17492 /* Free the minipool memory. */
17493 obstack_free (&minipool_obstack, minipool_startobj);
17494 }
17495 \f
17496 /* Routines to output assembly language. */
17497
17498 /* If the rtx is the correct value then return the string of the number.
17499 In this way we can ensure that valid double constants are generated even
17500 when cross compiling. */
17501 const char *
17502 fp_immediate_constant (rtx x)
17503 {
17504 REAL_VALUE_TYPE r;
17505
17506 if (!fp_consts_inited)
17507 init_fp_table ();
17508
17509 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17510
17511 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17512 return "0";
17513 }
17514
17515 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17516 static const char *
17517 fp_const_from_val (REAL_VALUE_TYPE *r)
17518 {
17519 if (!fp_consts_inited)
17520 init_fp_table ();
17521
17522 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17523 return "0";
17524 }
17525
17526 /* OPERANDS[0] is the entire list of insns that constitute pop,
17527 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17528 is in the list, UPDATE is true iff the list contains explicit
17529 update of base register. */
17530 void
17531 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17532 bool update)
17533 {
17534 int i;
17535 char pattern[100];
17536 int offset;
17537 const char *conditional;
17538 int num_saves = XVECLEN (operands[0], 0);
17539 unsigned int regno;
17540 unsigned int regno_base = REGNO (operands[1]);
17541
17542 offset = 0;
17543 offset += update ? 1 : 0;
17544 offset += return_pc ? 1 : 0;
17545
17546 /* Is the base register in the list? */
17547 for (i = offset; i < num_saves; i++)
17548 {
17549 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17550 /* If SP is in the list, then the base register must be SP. */
17551 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17552 /* If base register is in the list, there must be no explicit update. */
17553 if (regno == regno_base)
17554 gcc_assert (!update);
17555 }
17556
17557 conditional = reverse ? "%?%D0" : "%?%d0";
17558 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17559 {
17560 /* Output pop (not stmfd) because it has a shorter encoding. */
17561 gcc_assert (update);
17562 sprintf (pattern, "pop%s\t{", conditional);
17563 }
17564 else
17565 {
17566 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17567 It's just a convention, their semantics are identical. */
17568 if (regno_base == SP_REGNUM)
17569 sprintf (pattern, "ldm%sfd\t", conditional);
17570 else if (TARGET_UNIFIED_ASM)
17571 sprintf (pattern, "ldmia%s\t", conditional);
17572 else
17573 sprintf (pattern, "ldm%sia\t", conditional);
17574
17575 strcat (pattern, reg_names[regno_base]);
17576 if (update)
17577 strcat (pattern, "!, {");
17578 else
17579 strcat (pattern, ", {");
17580 }
17581
17582 /* Output the first destination register. */
17583 strcat (pattern,
17584 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17585
17586 /* Output the rest of the destination registers. */
17587 for (i = offset + 1; i < num_saves; i++)
17588 {
17589 strcat (pattern, ", ");
17590 strcat (pattern,
17591 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17592 }
17593
17594 strcat (pattern, "}");
17595
17596 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17597 strcat (pattern, "^");
17598
17599 output_asm_insn (pattern, &cond);
17600 }
17601
17602
17603 /* Output the assembly for a store multiple. */
17604
17605 const char *
17606 vfp_output_fstmd (rtx * operands)
17607 {
17608 char pattern[100];
17609 int p;
17610 int base;
17611 int i;
17612
17613 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17614 p = strlen (pattern);
17615
17616 gcc_assert (REG_P (operands[1]));
17617
17618 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17619 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17620 {
17621 p += sprintf (&pattern[p], ", d%d", base + i);
17622 }
17623 strcpy (&pattern[p], "}");
17624
17625 output_asm_insn (pattern, operands);
17626 return "";
17627 }
17628
17629
17630 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17631 number of bytes pushed. */
17632
17633 static int
17634 vfp_emit_fstmd (int base_reg, int count)
17635 {
17636 rtx par;
17637 rtx dwarf;
17638 rtx tmp, reg;
17639 int i;
17640
17641 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17642 register pairs are stored by a store multiple insn. We avoid this
17643 by pushing an extra pair. */
17644 if (count == 2 && !arm_arch6)
17645 {
17646 if (base_reg == LAST_VFP_REGNUM - 3)
17647 base_reg -= 2;
17648 count++;
17649 }
17650
17651 /* FSTMD may not store more than 16 doubleword registers at once. Split
17652 larger stores into multiple parts (up to a maximum of two, in
17653 practice). */
17654 if (count > 16)
17655 {
17656 int saved;
17657 /* NOTE: base_reg is an internal register number, so each D register
17658 counts as 2. */
17659 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17660 saved += vfp_emit_fstmd (base_reg, 16);
17661 return saved;
17662 }
17663
17664 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17665 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17666
17667 reg = gen_rtx_REG (DFmode, base_reg);
17668 base_reg += 2;
17669
17670 XVECEXP (par, 0, 0)
17671 = gen_rtx_SET (VOIDmode,
17672 gen_frame_mem
17673 (BLKmode,
17674 gen_rtx_PRE_MODIFY (Pmode,
17675 stack_pointer_rtx,
17676 plus_constant
17677 (Pmode, stack_pointer_rtx,
17678 - (count * 8)))
17679 ),
17680 gen_rtx_UNSPEC (BLKmode,
17681 gen_rtvec (1, reg),
17682 UNSPEC_PUSH_MULT));
17683
17684 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17685 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17686 RTX_FRAME_RELATED_P (tmp) = 1;
17687 XVECEXP (dwarf, 0, 0) = tmp;
17688
17689 tmp = gen_rtx_SET (VOIDmode,
17690 gen_frame_mem (DFmode, stack_pointer_rtx),
17691 reg);
17692 RTX_FRAME_RELATED_P (tmp) = 1;
17693 XVECEXP (dwarf, 0, 1) = tmp;
17694
17695 for (i = 1; i < count; i++)
17696 {
17697 reg = gen_rtx_REG (DFmode, base_reg);
17698 base_reg += 2;
17699 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17700
17701 tmp = gen_rtx_SET (VOIDmode,
17702 gen_frame_mem (DFmode,
17703 plus_constant (Pmode,
17704 stack_pointer_rtx,
17705 i * 8)),
17706 reg);
17707 RTX_FRAME_RELATED_P (tmp) = 1;
17708 XVECEXP (dwarf, 0, i + 1) = tmp;
17709 }
17710
17711 par = emit_insn (par);
17712 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17713 RTX_FRAME_RELATED_P (par) = 1;
17714
17715 return count * 8;
17716 }
17717
17718 /* Emit a call instruction with pattern PAT. ADDR is the address of
17719 the call target. */
17720
17721 void
17722 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17723 {
17724 rtx insn;
17725
17726 insn = emit_call_insn (pat);
17727
17728 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17729 If the call might use such an entry, add a use of the PIC register
17730 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17731 if (TARGET_VXWORKS_RTP
17732 && flag_pic
17733 && !sibcall
17734 && GET_CODE (addr) == SYMBOL_REF
17735 && (SYMBOL_REF_DECL (addr)
17736 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17737 : !SYMBOL_REF_LOCAL_P (addr)))
17738 {
17739 require_pic_register ();
17740 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17741 }
17742
17743 if (TARGET_AAPCS_BASED)
17744 {
17745 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17746 linker. We need to add an IP clobber to allow setting
17747 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17748 is not needed since it's a fixed register. */
17749 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17750 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17751 }
17752 }
17753
17754 /* Output a 'call' insn. */
17755 const char *
17756 output_call (rtx *operands)
17757 {
17758 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17759
17760 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17761 if (REGNO (operands[0]) == LR_REGNUM)
17762 {
17763 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17764 output_asm_insn ("mov%?\t%0, %|lr", operands);
17765 }
17766
17767 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17768
17769 if (TARGET_INTERWORK || arm_arch4t)
17770 output_asm_insn ("bx%?\t%0", operands);
17771 else
17772 output_asm_insn ("mov%?\t%|pc, %0", operands);
17773
17774 return "";
17775 }
17776
17777 /* Output a 'call' insn that is a reference in memory. This is
17778 disabled for ARMv5 and we prefer a blx instead because otherwise
17779 there's a significant performance overhead. */
17780 const char *
17781 output_call_mem (rtx *operands)
17782 {
17783 gcc_assert (!arm_arch5);
17784 if (TARGET_INTERWORK)
17785 {
17786 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17787 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17788 output_asm_insn ("bx%?\t%|ip", operands);
17789 }
17790 else if (regno_use_in (LR_REGNUM, operands[0]))
17791 {
17792 /* LR is used in the memory address. We load the address in the
17793 first instruction. It's safe to use IP as the target of the
17794 load since the call will kill it anyway. */
17795 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17796 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17797 if (arm_arch4t)
17798 output_asm_insn ("bx%?\t%|ip", operands);
17799 else
17800 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17801 }
17802 else
17803 {
17804 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17805 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17806 }
17807
17808 return "";
17809 }
17810
17811
17812 /* Output a move from arm registers to arm registers of a long double
17813 OPERANDS[0] is the destination.
17814 OPERANDS[1] is the source. */
17815 const char *
17816 output_mov_long_double_arm_from_arm (rtx *operands)
17817 {
17818 /* We have to be careful here because the two might overlap. */
17819 int dest_start = REGNO (operands[0]);
17820 int src_start = REGNO (operands[1]);
17821 rtx ops[2];
17822 int i;
17823
17824 if (dest_start < src_start)
17825 {
17826 for (i = 0; i < 3; i++)
17827 {
17828 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17829 ops[1] = gen_rtx_REG (SImode, src_start + i);
17830 output_asm_insn ("mov%?\t%0, %1", ops);
17831 }
17832 }
17833 else
17834 {
17835 for (i = 2; i >= 0; i--)
17836 {
17837 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17838 ops[1] = gen_rtx_REG (SImode, src_start + i);
17839 output_asm_insn ("mov%?\t%0, %1", ops);
17840 }
17841 }
17842
17843 return "";
17844 }
17845
17846 void
17847 arm_emit_movpair (rtx dest, rtx src)
17848 {
17849 /* If the src is an immediate, simplify it. */
17850 if (CONST_INT_P (src))
17851 {
17852 HOST_WIDE_INT val = INTVAL (src);
17853 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17854 if ((val >> 16) & 0x0000ffff)
17855 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17856 GEN_INT (16)),
17857 GEN_INT ((val >> 16) & 0x0000ffff));
17858 return;
17859 }
17860 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17861 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17862 }
17863
17864 /* Output a move between double words. It must be REG<-MEM
17865 or MEM<-REG. */
17866 const char *
17867 output_move_double (rtx *operands, bool emit, int *count)
17868 {
17869 enum rtx_code code0 = GET_CODE (operands[0]);
17870 enum rtx_code code1 = GET_CODE (operands[1]);
17871 rtx otherops[3];
17872 if (count)
17873 *count = 1;
17874
17875 /* The only case when this might happen is when
17876 you are looking at the length of a DImode instruction
17877 that has an invalid constant in it. */
17878 if (code0 == REG && code1 != MEM)
17879 {
17880 gcc_assert (!emit);
17881 *count = 2;
17882 return "";
17883 }
17884
17885 if (code0 == REG)
17886 {
17887 unsigned int reg0 = REGNO (operands[0]);
17888
17889 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17890
17891 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17892
17893 switch (GET_CODE (XEXP (operands[1], 0)))
17894 {
17895 case REG:
17896
17897 if (emit)
17898 {
17899 if (TARGET_LDRD
17900 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17901 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17902 else
17903 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17904 }
17905 break;
17906
17907 case PRE_INC:
17908 gcc_assert (TARGET_LDRD);
17909 if (emit)
17910 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17911 break;
17912
17913 case PRE_DEC:
17914 if (emit)
17915 {
17916 if (TARGET_LDRD)
17917 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17918 else
17919 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17920 }
17921 break;
17922
17923 case POST_INC:
17924 if (emit)
17925 {
17926 if (TARGET_LDRD)
17927 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17928 else
17929 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17930 }
17931 break;
17932
17933 case POST_DEC:
17934 gcc_assert (TARGET_LDRD);
17935 if (emit)
17936 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17937 break;
17938
17939 case PRE_MODIFY:
17940 case POST_MODIFY:
17941 /* Autoicrement addressing modes should never have overlapping
17942 base and destination registers, and overlapping index registers
17943 are already prohibited, so this doesn't need to worry about
17944 fix_cm3_ldrd. */
17945 otherops[0] = operands[0];
17946 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17947 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17948
17949 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17950 {
17951 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17952 {
17953 /* Registers overlap so split out the increment. */
17954 if (emit)
17955 {
17956 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17957 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17958 }
17959 if (count)
17960 *count = 2;
17961 }
17962 else
17963 {
17964 /* Use a single insn if we can.
17965 FIXME: IWMMXT allows offsets larger than ldrd can
17966 handle, fix these up with a pair of ldr. */
17967 if (TARGET_THUMB2
17968 || !CONST_INT_P (otherops[2])
17969 || (INTVAL (otherops[2]) > -256
17970 && INTVAL (otherops[2]) < 256))
17971 {
17972 if (emit)
17973 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17974 }
17975 else
17976 {
17977 if (emit)
17978 {
17979 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17980 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17981 }
17982 if (count)
17983 *count = 2;
17984
17985 }
17986 }
17987 }
17988 else
17989 {
17990 /* Use a single insn if we can.
17991 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17992 fix these up with a pair of ldr. */
17993 if (TARGET_THUMB2
17994 || !CONST_INT_P (otherops[2])
17995 || (INTVAL (otherops[2]) > -256
17996 && INTVAL (otherops[2]) < 256))
17997 {
17998 if (emit)
17999 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18000 }
18001 else
18002 {
18003 if (emit)
18004 {
18005 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18006 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18007 }
18008 if (count)
18009 *count = 2;
18010 }
18011 }
18012 break;
18013
18014 case LABEL_REF:
18015 case CONST:
18016 /* We might be able to use ldrd %0, %1 here. However the range is
18017 different to ldr/adr, and it is broken on some ARMv7-M
18018 implementations. */
18019 /* Use the second register of the pair to avoid problematic
18020 overlap. */
18021 otherops[1] = operands[1];
18022 if (emit)
18023 output_asm_insn ("adr%?\t%0, %1", otherops);
18024 operands[1] = otherops[0];
18025 if (emit)
18026 {
18027 if (TARGET_LDRD)
18028 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18029 else
18030 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18031 }
18032
18033 if (count)
18034 *count = 2;
18035 break;
18036
18037 /* ??? This needs checking for thumb2. */
18038 default:
18039 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18040 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18041 {
18042 otherops[0] = operands[0];
18043 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18044 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18045
18046 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18047 {
18048 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18049 {
18050 switch ((int) INTVAL (otherops[2]))
18051 {
18052 case -8:
18053 if (emit)
18054 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18055 return "";
18056 case -4:
18057 if (TARGET_THUMB2)
18058 break;
18059 if (emit)
18060 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18061 return "";
18062 case 4:
18063 if (TARGET_THUMB2)
18064 break;
18065 if (emit)
18066 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18067 return "";
18068 }
18069 }
18070 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18071 operands[1] = otherops[0];
18072 if (TARGET_LDRD
18073 && (REG_P (otherops[2])
18074 || TARGET_THUMB2
18075 || (CONST_INT_P (otherops[2])
18076 && INTVAL (otherops[2]) > -256
18077 && INTVAL (otherops[2]) < 256)))
18078 {
18079 if (reg_overlap_mentioned_p (operands[0],
18080 otherops[2]))
18081 {
18082 rtx tmp;
18083 /* Swap base and index registers over to
18084 avoid a conflict. */
18085 tmp = otherops[1];
18086 otherops[1] = otherops[2];
18087 otherops[2] = tmp;
18088 }
18089 /* If both registers conflict, it will usually
18090 have been fixed by a splitter. */
18091 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18092 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18093 {
18094 if (emit)
18095 {
18096 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18097 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18098 }
18099 if (count)
18100 *count = 2;
18101 }
18102 else
18103 {
18104 otherops[0] = operands[0];
18105 if (emit)
18106 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18107 }
18108 return "";
18109 }
18110
18111 if (CONST_INT_P (otherops[2]))
18112 {
18113 if (emit)
18114 {
18115 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18116 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18117 else
18118 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18119 }
18120 }
18121 else
18122 {
18123 if (emit)
18124 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18125 }
18126 }
18127 else
18128 {
18129 if (emit)
18130 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18131 }
18132
18133 if (count)
18134 *count = 2;
18135
18136 if (TARGET_LDRD)
18137 return "ldr%(d%)\t%0, [%1]";
18138
18139 return "ldm%(ia%)\t%1, %M0";
18140 }
18141 else
18142 {
18143 otherops[1] = adjust_address (operands[1], SImode, 4);
18144 /* Take care of overlapping base/data reg. */
18145 if (reg_mentioned_p (operands[0], operands[1]))
18146 {
18147 if (emit)
18148 {
18149 output_asm_insn ("ldr%?\t%0, %1", otherops);
18150 output_asm_insn ("ldr%?\t%0, %1", operands);
18151 }
18152 if (count)
18153 *count = 2;
18154
18155 }
18156 else
18157 {
18158 if (emit)
18159 {
18160 output_asm_insn ("ldr%?\t%0, %1", operands);
18161 output_asm_insn ("ldr%?\t%0, %1", otherops);
18162 }
18163 if (count)
18164 *count = 2;
18165 }
18166 }
18167 }
18168 }
18169 else
18170 {
18171 /* Constraints should ensure this. */
18172 gcc_assert (code0 == MEM && code1 == REG);
18173 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18174 || (TARGET_ARM && TARGET_LDRD));
18175
18176 switch (GET_CODE (XEXP (operands[0], 0)))
18177 {
18178 case REG:
18179 if (emit)
18180 {
18181 if (TARGET_LDRD)
18182 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18183 else
18184 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18185 }
18186 break;
18187
18188 case PRE_INC:
18189 gcc_assert (TARGET_LDRD);
18190 if (emit)
18191 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18192 break;
18193
18194 case PRE_DEC:
18195 if (emit)
18196 {
18197 if (TARGET_LDRD)
18198 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18199 else
18200 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18201 }
18202 break;
18203
18204 case POST_INC:
18205 if (emit)
18206 {
18207 if (TARGET_LDRD)
18208 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18209 else
18210 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18211 }
18212 break;
18213
18214 case POST_DEC:
18215 gcc_assert (TARGET_LDRD);
18216 if (emit)
18217 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18218 break;
18219
18220 case PRE_MODIFY:
18221 case POST_MODIFY:
18222 otherops[0] = operands[1];
18223 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18224 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18225
18226 /* IWMMXT allows offsets larger than ldrd can handle,
18227 fix these up with a pair of ldr. */
18228 if (!TARGET_THUMB2
18229 && CONST_INT_P (otherops[2])
18230 && (INTVAL(otherops[2]) <= -256
18231 || INTVAL(otherops[2]) >= 256))
18232 {
18233 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18234 {
18235 if (emit)
18236 {
18237 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18238 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18239 }
18240 if (count)
18241 *count = 2;
18242 }
18243 else
18244 {
18245 if (emit)
18246 {
18247 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18248 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18249 }
18250 if (count)
18251 *count = 2;
18252 }
18253 }
18254 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18255 {
18256 if (emit)
18257 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18258 }
18259 else
18260 {
18261 if (emit)
18262 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18263 }
18264 break;
18265
18266 case PLUS:
18267 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18268 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18269 {
18270 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18271 {
18272 case -8:
18273 if (emit)
18274 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18275 return "";
18276
18277 case -4:
18278 if (TARGET_THUMB2)
18279 break;
18280 if (emit)
18281 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18282 return "";
18283
18284 case 4:
18285 if (TARGET_THUMB2)
18286 break;
18287 if (emit)
18288 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18289 return "";
18290 }
18291 }
18292 if (TARGET_LDRD
18293 && (REG_P (otherops[2])
18294 || TARGET_THUMB2
18295 || (CONST_INT_P (otherops[2])
18296 && INTVAL (otherops[2]) > -256
18297 && INTVAL (otherops[2]) < 256)))
18298 {
18299 otherops[0] = operands[1];
18300 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18301 if (emit)
18302 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18303 return "";
18304 }
18305 /* Fall through */
18306
18307 default:
18308 otherops[0] = adjust_address (operands[0], SImode, 4);
18309 otherops[1] = operands[1];
18310 if (emit)
18311 {
18312 output_asm_insn ("str%?\t%1, %0", operands);
18313 output_asm_insn ("str%?\t%H1, %0", otherops);
18314 }
18315 if (count)
18316 *count = 2;
18317 }
18318 }
18319
18320 return "";
18321 }
18322
18323 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18324 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18325
18326 const char *
18327 output_move_quad (rtx *operands)
18328 {
18329 if (REG_P (operands[0]))
18330 {
18331 /* Load, or reg->reg move. */
18332
18333 if (MEM_P (operands[1]))
18334 {
18335 switch (GET_CODE (XEXP (operands[1], 0)))
18336 {
18337 case REG:
18338 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18339 break;
18340
18341 case LABEL_REF:
18342 case CONST:
18343 output_asm_insn ("adr%?\t%0, %1", operands);
18344 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18345 break;
18346
18347 default:
18348 gcc_unreachable ();
18349 }
18350 }
18351 else
18352 {
18353 rtx ops[2];
18354 int dest, src, i;
18355
18356 gcc_assert (REG_P (operands[1]));
18357
18358 dest = REGNO (operands[0]);
18359 src = REGNO (operands[1]);
18360
18361 /* This seems pretty dumb, but hopefully GCC won't try to do it
18362 very often. */
18363 if (dest < src)
18364 for (i = 0; i < 4; i++)
18365 {
18366 ops[0] = gen_rtx_REG (SImode, dest + i);
18367 ops[1] = gen_rtx_REG (SImode, src + i);
18368 output_asm_insn ("mov%?\t%0, %1", ops);
18369 }
18370 else
18371 for (i = 3; i >= 0; i--)
18372 {
18373 ops[0] = gen_rtx_REG (SImode, dest + i);
18374 ops[1] = gen_rtx_REG (SImode, src + i);
18375 output_asm_insn ("mov%?\t%0, %1", ops);
18376 }
18377 }
18378 }
18379 else
18380 {
18381 gcc_assert (MEM_P (operands[0]));
18382 gcc_assert (REG_P (operands[1]));
18383 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18384
18385 switch (GET_CODE (XEXP (operands[0], 0)))
18386 {
18387 case REG:
18388 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18389 break;
18390
18391 default:
18392 gcc_unreachable ();
18393 }
18394 }
18395
18396 return "";
18397 }
18398
18399 /* Output a VFP load or store instruction. */
18400
18401 const char *
18402 output_move_vfp (rtx *operands)
18403 {
18404 rtx reg, mem, addr, ops[2];
18405 int load = REG_P (operands[0]);
18406 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18407 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18408 const char *templ;
18409 char buff[50];
18410 enum machine_mode mode;
18411
18412 reg = operands[!load];
18413 mem = operands[load];
18414
18415 mode = GET_MODE (reg);
18416
18417 gcc_assert (REG_P (reg));
18418 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18419 gcc_assert (mode == SFmode
18420 || mode == DFmode
18421 || mode == SImode
18422 || mode == DImode
18423 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18424 gcc_assert (MEM_P (mem));
18425
18426 addr = XEXP (mem, 0);
18427
18428 switch (GET_CODE (addr))
18429 {
18430 case PRE_DEC:
18431 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18432 ops[0] = XEXP (addr, 0);
18433 ops[1] = reg;
18434 break;
18435
18436 case POST_INC:
18437 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18438 ops[0] = XEXP (addr, 0);
18439 ops[1] = reg;
18440 break;
18441
18442 default:
18443 templ = "f%s%c%%?\t%%%s0, %%1%s";
18444 ops[0] = reg;
18445 ops[1] = mem;
18446 break;
18447 }
18448
18449 sprintf (buff, templ,
18450 load ? "ld" : "st",
18451 dp ? 'd' : 's',
18452 dp ? "P" : "",
18453 integer_p ? "\t%@ int" : "");
18454 output_asm_insn (buff, ops);
18455
18456 return "";
18457 }
18458
18459 /* Output a Neon double-word or quad-word load or store, or a load
18460 or store for larger structure modes.
18461
18462 WARNING: The ordering of elements is weird in big-endian mode,
18463 because the EABI requires that vectors stored in memory appear
18464 as though they were stored by a VSTM, as required by the EABI.
18465 GCC RTL defines element ordering based on in-memory order.
18466 This can be different from the architectural ordering of elements
18467 within a NEON register. The intrinsics defined in arm_neon.h use the
18468 NEON register element ordering, not the GCC RTL element ordering.
18469
18470 For example, the in-memory ordering of a big-endian a quadword
18471 vector with 16-bit elements when stored from register pair {d0,d1}
18472 will be (lowest address first, d0[N] is NEON register element N):
18473
18474 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18475
18476 When necessary, quadword registers (dN, dN+1) are moved to ARM
18477 registers from rN in the order:
18478
18479 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18480
18481 So that STM/LDM can be used on vectors in ARM registers, and the
18482 same memory layout will result as if VSTM/VLDM were used.
18483
18484 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18485 possible, which allows use of appropriate alignment tags.
18486 Note that the choice of "64" is independent of the actual vector
18487 element size; this size simply ensures that the behavior is
18488 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18489
18490 Due to limitations of those instructions, use of VST1.64/VLD1.64
18491 is not possible if:
18492 - the address contains PRE_DEC, or
18493 - the mode refers to more than 4 double-word registers
18494
18495 In those cases, it would be possible to replace VSTM/VLDM by a
18496 sequence of instructions; this is not currently implemented since
18497 this is not certain to actually improve performance. */
18498
18499 const char *
18500 output_move_neon (rtx *operands)
18501 {
18502 rtx reg, mem, addr, ops[2];
18503 int regno, nregs, load = REG_P (operands[0]);
18504 const char *templ;
18505 char buff[50];
18506 enum machine_mode mode;
18507
18508 reg = operands[!load];
18509 mem = operands[load];
18510
18511 mode = GET_MODE (reg);
18512
18513 gcc_assert (REG_P (reg));
18514 regno = REGNO (reg);
18515 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18516 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18517 || NEON_REGNO_OK_FOR_QUAD (regno));
18518 gcc_assert (VALID_NEON_DREG_MODE (mode)
18519 || VALID_NEON_QREG_MODE (mode)
18520 || VALID_NEON_STRUCT_MODE (mode));
18521 gcc_assert (MEM_P (mem));
18522
18523 addr = XEXP (mem, 0);
18524
18525 /* Strip off const from addresses like (const (plus (...))). */
18526 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18527 addr = XEXP (addr, 0);
18528
18529 switch (GET_CODE (addr))
18530 {
18531 case POST_INC:
18532 /* We have to use vldm / vstm for too-large modes. */
18533 if (nregs > 4)
18534 {
18535 templ = "v%smia%%?\t%%0!, %%h1";
18536 ops[0] = XEXP (addr, 0);
18537 }
18538 else
18539 {
18540 templ = "v%s1.64\t%%h1, %%A0";
18541 ops[0] = mem;
18542 }
18543 ops[1] = reg;
18544 break;
18545
18546 case PRE_DEC:
18547 /* We have to use vldm / vstm in this case, since there is no
18548 pre-decrement form of the vld1 / vst1 instructions. */
18549 templ = "v%smdb%%?\t%%0!, %%h1";
18550 ops[0] = XEXP (addr, 0);
18551 ops[1] = reg;
18552 break;
18553
18554 case POST_MODIFY:
18555 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18556 gcc_unreachable ();
18557
18558 case LABEL_REF:
18559 case PLUS:
18560 {
18561 int i;
18562 int overlap = -1;
18563 for (i = 0; i < nregs; i++)
18564 {
18565 /* We're only using DImode here because it's a convenient size. */
18566 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18567 ops[1] = adjust_address (mem, DImode, 8 * i);
18568 if (reg_overlap_mentioned_p (ops[0], mem))
18569 {
18570 gcc_assert (overlap == -1);
18571 overlap = i;
18572 }
18573 else
18574 {
18575 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18576 output_asm_insn (buff, ops);
18577 }
18578 }
18579 if (overlap != -1)
18580 {
18581 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18582 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18583 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18584 output_asm_insn (buff, ops);
18585 }
18586
18587 return "";
18588 }
18589
18590 default:
18591 /* We have to use vldm / vstm for too-large modes. */
18592 if (nregs > 4)
18593 templ = "v%smia%%?\t%%m0, %%h1";
18594 else
18595 templ = "v%s1.64\t%%h1, %%A0";
18596
18597 ops[0] = mem;
18598 ops[1] = reg;
18599 }
18600
18601 sprintf (buff, templ, load ? "ld" : "st");
18602 output_asm_insn (buff, ops);
18603
18604 return "";
18605 }
18606
18607 /* Compute and return the length of neon_mov<mode>, where <mode> is
18608 one of VSTRUCT modes: EI, OI, CI or XI. */
18609 int
18610 arm_attr_length_move_neon (rtx insn)
18611 {
18612 rtx reg, mem, addr;
18613 int load;
18614 enum machine_mode mode;
18615
18616 extract_insn_cached (insn);
18617
18618 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18619 {
18620 mode = GET_MODE (recog_data.operand[0]);
18621 switch (mode)
18622 {
18623 case EImode:
18624 case OImode:
18625 return 8;
18626 case CImode:
18627 return 12;
18628 case XImode:
18629 return 16;
18630 default:
18631 gcc_unreachable ();
18632 }
18633 }
18634
18635 load = REG_P (recog_data.operand[0]);
18636 reg = recog_data.operand[!load];
18637 mem = recog_data.operand[load];
18638
18639 gcc_assert (MEM_P (mem));
18640
18641 mode = GET_MODE (reg);
18642 addr = XEXP (mem, 0);
18643
18644 /* Strip off const from addresses like (const (plus (...))). */
18645 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18646 addr = XEXP (addr, 0);
18647
18648 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18649 {
18650 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18651 return insns * 4;
18652 }
18653 else
18654 return 4;
18655 }
18656
18657 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18658 return zero. */
18659
18660 int
18661 arm_address_offset_is_imm (rtx insn)
18662 {
18663 rtx mem, addr;
18664
18665 extract_insn_cached (insn);
18666
18667 if (REG_P (recog_data.operand[0]))
18668 return 0;
18669
18670 mem = recog_data.operand[0];
18671
18672 gcc_assert (MEM_P (mem));
18673
18674 addr = XEXP (mem, 0);
18675
18676 if (REG_P (addr)
18677 || (GET_CODE (addr) == PLUS
18678 && REG_P (XEXP (addr, 0))
18679 && CONST_INT_P (XEXP (addr, 1))))
18680 return 1;
18681 else
18682 return 0;
18683 }
18684
18685 /* Output an ADD r, s, #n where n may be too big for one instruction.
18686 If adding zero to one register, output nothing. */
18687 const char *
18688 output_add_immediate (rtx *operands)
18689 {
18690 HOST_WIDE_INT n = INTVAL (operands[2]);
18691
18692 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18693 {
18694 if (n < 0)
18695 output_multi_immediate (operands,
18696 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18697 -n);
18698 else
18699 output_multi_immediate (operands,
18700 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18701 n);
18702 }
18703
18704 return "";
18705 }
18706
18707 /* Output a multiple immediate operation.
18708 OPERANDS is the vector of operands referred to in the output patterns.
18709 INSTR1 is the output pattern to use for the first constant.
18710 INSTR2 is the output pattern to use for subsequent constants.
18711 IMMED_OP is the index of the constant slot in OPERANDS.
18712 N is the constant value. */
18713 static const char *
18714 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18715 int immed_op, HOST_WIDE_INT n)
18716 {
18717 #if HOST_BITS_PER_WIDE_INT > 32
18718 n &= 0xffffffff;
18719 #endif
18720
18721 if (n == 0)
18722 {
18723 /* Quick and easy output. */
18724 operands[immed_op] = const0_rtx;
18725 output_asm_insn (instr1, operands);
18726 }
18727 else
18728 {
18729 int i;
18730 const char * instr = instr1;
18731
18732 /* Note that n is never zero here (which would give no output). */
18733 for (i = 0; i < 32; i += 2)
18734 {
18735 if (n & (3 << i))
18736 {
18737 operands[immed_op] = GEN_INT (n & (255 << i));
18738 output_asm_insn (instr, operands);
18739 instr = instr2;
18740 i += 6;
18741 }
18742 }
18743 }
18744
18745 return "";
18746 }
18747
18748 /* Return the name of a shifter operation. */
18749 static const char *
18750 arm_shift_nmem(enum rtx_code code)
18751 {
18752 switch (code)
18753 {
18754 case ASHIFT:
18755 return ARM_LSL_NAME;
18756
18757 case ASHIFTRT:
18758 return "asr";
18759
18760 case LSHIFTRT:
18761 return "lsr";
18762
18763 case ROTATERT:
18764 return "ror";
18765
18766 default:
18767 abort();
18768 }
18769 }
18770
18771 /* Return the appropriate ARM instruction for the operation code.
18772 The returned result should not be overwritten. OP is the rtx of the
18773 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18774 was shifted. */
18775 const char *
18776 arithmetic_instr (rtx op, int shift_first_arg)
18777 {
18778 switch (GET_CODE (op))
18779 {
18780 case PLUS:
18781 return "add";
18782
18783 case MINUS:
18784 return shift_first_arg ? "rsb" : "sub";
18785
18786 case IOR:
18787 return "orr";
18788
18789 case XOR:
18790 return "eor";
18791
18792 case AND:
18793 return "and";
18794
18795 case ASHIFT:
18796 case ASHIFTRT:
18797 case LSHIFTRT:
18798 case ROTATERT:
18799 return arm_shift_nmem(GET_CODE(op));
18800
18801 default:
18802 gcc_unreachable ();
18803 }
18804 }
18805
18806 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18807 for the operation code. The returned result should not be overwritten.
18808 OP is the rtx code of the shift.
18809 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18810 shift. */
18811 static const char *
18812 shift_op (rtx op, HOST_WIDE_INT *amountp)
18813 {
18814 const char * mnem;
18815 enum rtx_code code = GET_CODE (op);
18816
18817 switch (code)
18818 {
18819 case ROTATE:
18820 if (!CONST_INT_P (XEXP (op, 1)))
18821 {
18822 output_operand_lossage ("invalid shift operand");
18823 return NULL;
18824 }
18825
18826 code = ROTATERT;
18827 *amountp = 32 - INTVAL (XEXP (op, 1));
18828 mnem = "ror";
18829 break;
18830
18831 case ASHIFT:
18832 case ASHIFTRT:
18833 case LSHIFTRT:
18834 case ROTATERT:
18835 mnem = arm_shift_nmem(code);
18836 if (CONST_INT_P (XEXP (op, 1)))
18837 {
18838 *amountp = INTVAL (XEXP (op, 1));
18839 }
18840 else if (REG_P (XEXP (op, 1)))
18841 {
18842 *amountp = -1;
18843 return mnem;
18844 }
18845 else
18846 {
18847 output_operand_lossage ("invalid shift operand");
18848 return NULL;
18849 }
18850 break;
18851
18852 case MULT:
18853 /* We never have to worry about the amount being other than a
18854 power of 2, since this case can never be reloaded from a reg. */
18855 if (!CONST_INT_P (XEXP (op, 1)))
18856 {
18857 output_operand_lossage ("invalid shift operand");
18858 return NULL;
18859 }
18860
18861 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18862
18863 /* Amount must be a power of two. */
18864 if (*amountp & (*amountp - 1))
18865 {
18866 output_operand_lossage ("invalid shift operand");
18867 return NULL;
18868 }
18869
18870 *amountp = int_log2 (*amountp);
18871 return ARM_LSL_NAME;
18872
18873 default:
18874 output_operand_lossage ("invalid shift operand");
18875 return NULL;
18876 }
18877
18878 /* This is not 100% correct, but follows from the desire to merge
18879 multiplication by a power of 2 with the recognizer for a
18880 shift. >=32 is not a valid shift for "lsl", so we must try and
18881 output a shift that produces the correct arithmetical result.
18882 Using lsr #32 is identical except for the fact that the carry bit
18883 is not set correctly if we set the flags; but we never use the
18884 carry bit from such an operation, so we can ignore that. */
18885 if (code == ROTATERT)
18886 /* Rotate is just modulo 32. */
18887 *amountp &= 31;
18888 else if (*amountp != (*amountp & 31))
18889 {
18890 if (code == ASHIFT)
18891 mnem = "lsr";
18892 *amountp = 32;
18893 }
18894
18895 /* Shifts of 0 are no-ops. */
18896 if (*amountp == 0)
18897 return NULL;
18898
18899 return mnem;
18900 }
18901
18902 /* Obtain the shift from the POWER of two. */
18903
18904 static HOST_WIDE_INT
18905 int_log2 (HOST_WIDE_INT power)
18906 {
18907 HOST_WIDE_INT shift = 0;
18908
18909 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18910 {
18911 gcc_assert (shift <= 31);
18912 shift++;
18913 }
18914
18915 return shift;
18916 }
18917
18918 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18919 because /bin/as is horribly restrictive. The judgement about
18920 whether or not each character is 'printable' (and can be output as
18921 is) or not (and must be printed with an octal escape) must be made
18922 with reference to the *host* character set -- the situation is
18923 similar to that discussed in the comments above pp_c_char in
18924 c-pretty-print.c. */
18925
18926 #define MAX_ASCII_LEN 51
18927
18928 void
18929 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18930 {
18931 int i;
18932 int len_so_far = 0;
18933
18934 fputs ("\t.ascii\t\"", stream);
18935
18936 for (i = 0; i < len; i++)
18937 {
18938 int c = p[i];
18939
18940 if (len_so_far >= MAX_ASCII_LEN)
18941 {
18942 fputs ("\"\n\t.ascii\t\"", stream);
18943 len_so_far = 0;
18944 }
18945
18946 if (ISPRINT (c))
18947 {
18948 if (c == '\\' || c == '\"')
18949 {
18950 putc ('\\', stream);
18951 len_so_far++;
18952 }
18953 putc (c, stream);
18954 len_so_far++;
18955 }
18956 else
18957 {
18958 fprintf (stream, "\\%03o", c);
18959 len_so_far += 4;
18960 }
18961 }
18962
18963 fputs ("\"\n", stream);
18964 }
18965 \f
18966 /* Compute the register save mask for registers 0 through 12
18967 inclusive. This code is used by arm_compute_save_reg_mask. */
18968
18969 static unsigned long
18970 arm_compute_save_reg0_reg12_mask (void)
18971 {
18972 unsigned long func_type = arm_current_func_type ();
18973 unsigned long save_reg_mask = 0;
18974 unsigned int reg;
18975
18976 if (IS_INTERRUPT (func_type))
18977 {
18978 unsigned int max_reg;
18979 /* Interrupt functions must not corrupt any registers,
18980 even call clobbered ones. If this is a leaf function
18981 we can just examine the registers used by the RTL, but
18982 otherwise we have to assume that whatever function is
18983 called might clobber anything, and so we have to save
18984 all the call-clobbered registers as well. */
18985 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18986 /* FIQ handlers have registers r8 - r12 banked, so
18987 we only need to check r0 - r7, Normal ISRs only
18988 bank r14 and r15, so we must check up to r12.
18989 r13 is the stack pointer which is always preserved,
18990 so we do not need to consider it here. */
18991 max_reg = 7;
18992 else
18993 max_reg = 12;
18994
18995 for (reg = 0; reg <= max_reg; reg++)
18996 if (df_regs_ever_live_p (reg)
18997 || (! crtl->is_leaf && call_used_regs[reg]))
18998 save_reg_mask |= (1 << reg);
18999
19000 /* Also save the pic base register if necessary. */
19001 if (flag_pic
19002 && !TARGET_SINGLE_PIC_BASE
19003 && arm_pic_register != INVALID_REGNUM
19004 && crtl->uses_pic_offset_table)
19005 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19006 }
19007 else if (IS_VOLATILE(func_type))
19008 {
19009 /* For noreturn functions we historically omitted register saves
19010 altogether. However this really messes up debugging. As a
19011 compromise save just the frame pointers. Combined with the link
19012 register saved elsewhere this should be sufficient to get
19013 a backtrace. */
19014 if (frame_pointer_needed)
19015 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19016 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19017 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19018 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19019 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19020 }
19021 else
19022 {
19023 /* In the normal case we only need to save those registers
19024 which are call saved and which are used by this function. */
19025 for (reg = 0; reg <= 11; reg++)
19026 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19027 save_reg_mask |= (1 << reg);
19028
19029 /* Handle the frame pointer as a special case. */
19030 if (frame_pointer_needed)
19031 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19032
19033 /* If we aren't loading the PIC register,
19034 don't stack it even though it may be live. */
19035 if (flag_pic
19036 && !TARGET_SINGLE_PIC_BASE
19037 && arm_pic_register != INVALID_REGNUM
19038 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19039 || crtl->uses_pic_offset_table))
19040 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19041
19042 /* The prologue will copy SP into R0, so save it. */
19043 if (IS_STACKALIGN (func_type))
19044 save_reg_mask |= 1;
19045 }
19046
19047 /* Save registers so the exception handler can modify them. */
19048 if (crtl->calls_eh_return)
19049 {
19050 unsigned int i;
19051
19052 for (i = 0; ; i++)
19053 {
19054 reg = EH_RETURN_DATA_REGNO (i);
19055 if (reg == INVALID_REGNUM)
19056 break;
19057 save_reg_mask |= 1 << reg;
19058 }
19059 }
19060
19061 return save_reg_mask;
19062 }
19063
19064 /* Return true if r3 is live at the start of the function. */
19065
19066 static bool
19067 arm_r3_live_at_start_p (void)
19068 {
19069 /* Just look at cfg info, which is still close enough to correct at this
19070 point. This gives false positives for broken functions that might use
19071 uninitialized data that happens to be allocated in r3, but who cares? */
19072 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19073 }
19074
19075 /* Compute the number of bytes used to store the static chain register on the
19076 stack, above the stack frame. We need to know this accurately to get the
19077 alignment of the rest of the stack frame correct. */
19078
19079 static int
19080 arm_compute_static_chain_stack_bytes (void)
19081 {
19082 /* See the defining assertion in arm_expand_prologue. */
19083 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19084 && IS_NESTED (arm_current_func_type ())
19085 && arm_r3_live_at_start_p ()
19086 && crtl->args.pretend_args_size == 0)
19087 return 4;
19088
19089 return 0;
19090 }
19091
19092 /* Compute a bit mask of which registers need to be
19093 saved on the stack for the current function.
19094 This is used by arm_get_frame_offsets, which may add extra registers. */
19095
19096 static unsigned long
19097 arm_compute_save_reg_mask (void)
19098 {
19099 unsigned int save_reg_mask = 0;
19100 unsigned long func_type = arm_current_func_type ();
19101 unsigned int reg;
19102
19103 if (IS_NAKED (func_type))
19104 /* This should never really happen. */
19105 return 0;
19106
19107 /* If we are creating a stack frame, then we must save the frame pointer,
19108 IP (which will hold the old stack pointer), LR and the PC. */
19109 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19110 save_reg_mask |=
19111 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19112 | (1 << IP_REGNUM)
19113 | (1 << LR_REGNUM)
19114 | (1 << PC_REGNUM);
19115
19116 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19117
19118 /* Decide if we need to save the link register.
19119 Interrupt routines have their own banked link register,
19120 so they never need to save it.
19121 Otherwise if we do not use the link register we do not need to save
19122 it. If we are pushing other registers onto the stack however, we
19123 can save an instruction in the epilogue by pushing the link register
19124 now and then popping it back into the PC. This incurs extra memory
19125 accesses though, so we only do it when optimizing for size, and only
19126 if we know that we will not need a fancy return sequence. */
19127 if (df_regs_ever_live_p (LR_REGNUM)
19128 || (save_reg_mask
19129 && optimize_size
19130 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19131 && !crtl->calls_eh_return))
19132 save_reg_mask |= 1 << LR_REGNUM;
19133
19134 if (cfun->machine->lr_save_eliminated)
19135 save_reg_mask &= ~ (1 << LR_REGNUM);
19136
19137 if (TARGET_REALLY_IWMMXT
19138 && ((bit_count (save_reg_mask)
19139 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19140 arm_compute_static_chain_stack_bytes())
19141 ) % 2) != 0)
19142 {
19143 /* The total number of registers that are going to be pushed
19144 onto the stack is odd. We need to ensure that the stack
19145 is 64-bit aligned before we start to save iWMMXt registers,
19146 and also before we start to create locals. (A local variable
19147 might be a double or long long which we will load/store using
19148 an iWMMXt instruction). Therefore we need to push another
19149 ARM register, so that the stack will be 64-bit aligned. We
19150 try to avoid using the arg registers (r0 -r3) as they might be
19151 used to pass values in a tail call. */
19152 for (reg = 4; reg <= 12; reg++)
19153 if ((save_reg_mask & (1 << reg)) == 0)
19154 break;
19155
19156 if (reg <= 12)
19157 save_reg_mask |= (1 << reg);
19158 else
19159 {
19160 cfun->machine->sibcall_blocked = 1;
19161 save_reg_mask |= (1 << 3);
19162 }
19163 }
19164
19165 /* We may need to push an additional register for use initializing the
19166 PIC base register. */
19167 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19168 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19169 {
19170 reg = thumb_find_work_register (1 << 4);
19171 if (!call_used_regs[reg])
19172 save_reg_mask |= (1 << reg);
19173 }
19174
19175 return save_reg_mask;
19176 }
19177
19178
19179 /* Compute a bit mask of which registers need to be
19180 saved on the stack for the current function. */
19181 static unsigned long
19182 thumb1_compute_save_reg_mask (void)
19183 {
19184 unsigned long mask;
19185 unsigned reg;
19186
19187 mask = 0;
19188 for (reg = 0; reg < 12; reg ++)
19189 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19190 mask |= 1 << reg;
19191
19192 if (flag_pic
19193 && !TARGET_SINGLE_PIC_BASE
19194 && arm_pic_register != INVALID_REGNUM
19195 && crtl->uses_pic_offset_table)
19196 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19197
19198 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19199 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19200 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19201
19202 /* LR will also be pushed if any lo regs are pushed. */
19203 if (mask & 0xff || thumb_force_lr_save ())
19204 mask |= (1 << LR_REGNUM);
19205
19206 /* Make sure we have a low work register if we need one.
19207 We will need one if we are going to push a high register,
19208 but we are not currently intending to push a low register. */
19209 if ((mask & 0xff) == 0
19210 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19211 {
19212 /* Use thumb_find_work_register to choose which register
19213 we will use. If the register is live then we will
19214 have to push it. Use LAST_LO_REGNUM as our fallback
19215 choice for the register to select. */
19216 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19217 /* Make sure the register returned by thumb_find_work_register is
19218 not part of the return value. */
19219 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19220 reg = LAST_LO_REGNUM;
19221
19222 if (! call_used_regs[reg])
19223 mask |= 1 << reg;
19224 }
19225
19226 /* The 504 below is 8 bytes less than 512 because there are two possible
19227 alignment words. We can't tell here if they will be present or not so we
19228 have to play it safe and assume that they are. */
19229 if ((CALLER_INTERWORKING_SLOT_SIZE +
19230 ROUND_UP_WORD (get_frame_size ()) +
19231 crtl->outgoing_args_size) >= 504)
19232 {
19233 /* This is the same as the code in thumb1_expand_prologue() which
19234 determines which register to use for stack decrement. */
19235 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19236 if (mask & (1 << reg))
19237 break;
19238
19239 if (reg > LAST_LO_REGNUM)
19240 {
19241 /* Make sure we have a register available for stack decrement. */
19242 mask |= 1 << LAST_LO_REGNUM;
19243 }
19244 }
19245
19246 return mask;
19247 }
19248
19249
19250 /* Return the number of bytes required to save VFP registers. */
19251 static int
19252 arm_get_vfp_saved_size (void)
19253 {
19254 unsigned int regno;
19255 int count;
19256 int saved;
19257
19258 saved = 0;
19259 /* Space for saved VFP registers. */
19260 if (TARGET_HARD_FLOAT && TARGET_VFP)
19261 {
19262 count = 0;
19263 for (regno = FIRST_VFP_REGNUM;
19264 regno < LAST_VFP_REGNUM;
19265 regno += 2)
19266 {
19267 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19268 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19269 {
19270 if (count > 0)
19271 {
19272 /* Workaround ARM10 VFPr1 bug. */
19273 if (count == 2 && !arm_arch6)
19274 count++;
19275 saved += count * 8;
19276 }
19277 count = 0;
19278 }
19279 else
19280 count++;
19281 }
19282 if (count > 0)
19283 {
19284 if (count == 2 && !arm_arch6)
19285 count++;
19286 saved += count * 8;
19287 }
19288 }
19289 return saved;
19290 }
19291
19292
19293 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19294 everything bar the final return instruction. If simple_return is true,
19295 then do not output epilogue, because it has already been emitted in RTL. */
19296 const char *
19297 output_return_instruction (rtx operand, bool really_return, bool reverse,
19298 bool simple_return)
19299 {
19300 char conditional[10];
19301 char instr[100];
19302 unsigned reg;
19303 unsigned long live_regs_mask;
19304 unsigned long func_type;
19305 arm_stack_offsets *offsets;
19306
19307 func_type = arm_current_func_type ();
19308
19309 if (IS_NAKED (func_type))
19310 return "";
19311
19312 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19313 {
19314 /* If this function was declared non-returning, and we have
19315 found a tail call, then we have to trust that the called
19316 function won't return. */
19317 if (really_return)
19318 {
19319 rtx ops[2];
19320
19321 /* Otherwise, trap an attempted return by aborting. */
19322 ops[0] = operand;
19323 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19324 : "abort");
19325 assemble_external_libcall (ops[1]);
19326 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19327 }
19328
19329 return "";
19330 }
19331
19332 gcc_assert (!cfun->calls_alloca || really_return);
19333
19334 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19335
19336 cfun->machine->return_used_this_function = 1;
19337
19338 offsets = arm_get_frame_offsets ();
19339 live_regs_mask = offsets->saved_regs_mask;
19340
19341 if (!simple_return && live_regs_mask)
19342 {
19343 const char * return_reg;
19344
19345 /* If we do not have any special requirements for function exit
19346 (e.g. interworking) then we can load the return address
19347 directly into the PC. Otherwise we must load it into LR. */
19348 if (really_return
19349 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19350 return_reg = reg_names[PC_REGNUM];
19351 else
19352 return_reg = reg_names[LR_REGNUM];
19353
19354 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19355 {
19356 /* There are three possible reasons for the IP register
19357 being saved. 1) a stack frame was created, in which case
19358 IP contains the old stack pointer, or 2) an ISR routine
19359 corrupted it, or 3) it was saved to align the stack on
19360 iWMMXt. In case 1, restore IP into SP, otherwise just
19361 restore IP. */
19362 if (frame_pointer_needed)
19363 {
19364 live_regs_mask &= ~ (1 << IP_REGNUM);
19365 live_regs_mask |= (1 << SP_REGNUM);
19366 }
19367 else
19368 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19369 }
19370
19371 /* On some ARM architectures it is faster to use LDR rather than
19372 LDM to load a single register. On other architectures, the
19373 cost is the same. In 26 bit mode, or for exception handlers,
19374 we have to use LDM to load the PC so that the CPSR is also
19375 restored. */
19376 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19377 if (live_regs_mask == (1U << reg))
19378 break;
19379
19380 if (reg <= LAST_ARM_REGNUM
19381 && (reg != LR_REGNUM
19382 || ! really_return
19383 || ! IS_INTERRUPT (func_type)))
19384 {
19385 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19386 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19387 }
19388 else
19389 {
19390 char *p;
19391 int first = 1;
19392
19393 /* Generate the load multiple instruction to restore the
19394 registers. Note we can get here, even if
19395 frame_pointer_needed is true, but only if sp already
19396 points to the base of the saved core registers. */
19397 if (live_regs_mask & (1 << SP_REGNUM))
19398 {
19399 unsigned HOST_WIDE_INT stack_adjust;
19400
19401 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19402 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19403
19404 if (stack_adjust && arm_arch5 && TARGET_ARM)
19405 if (TARGET_UNIFIED_ASM)
19406 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19407 else
19408 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19409 else
19410 {
19411 /* If we can't use ldmib (SA110 bug),
19412 then try to pop r3 instead. */
19413 if (stack_adjust)
19414 live_regs_mask |= 1 << 3;
19415
19416 if (TARGET_UNIFIED_ASM)
19417 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19418 else
19419 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19420 }
19421 }
19422 else
19423 if (TARGET_UNIFIED_ASM)
19424 sprintf (instr, "pop%s\t{", conditional);
19425 else
19426 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19427
19428 p = instr + strlen (instr);
19429
19430 for (reg = 0; reg <= SP_REGNUM; reg++)
19431 if (live_regs_mask & (1 << reg))
19432 {
19433 int l = strlen (reg_names[reg]);
19434
19435 if (first)
19436 first = 0;
19437 else
19438 {
19439 memcpy (p, ", ", 2);
19440 p += 2;
19441 }
19442
19443 memcpy (p, "%|", 2);
19444 memcpy (p + 2, reg_names[reg], l);
19445 p += l + 2;
19446 }
19447
19448 if (live_regs_mask & (1 << LR_REGNUM))
19449 {
19450 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19451 /* If returning from an interrupt, restore the CPSR. */
19452 if (IS_INTERRUPT (func_type))
19453 strcat (p, "^");
19454 }
19455 else
19456 strcpy (p, "}");
19457 }
19458
19459 output_asm_insn (instr, & operand);
19460
19461 /* See if we need to generate an extra instruction to
19462 perform the actual function return. */
19463 if (really_return
19464 && func_type != ARM_FT_INTERWORKED
19465 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19466 {
19467 /* The return has already been handled
19468 by loading the LR into the PC. */
19469 return "";
19470 }
19471 }
19472
19473 if (really_return)
19474 {
19475 switch ((int) ARM_FUNC_TYPE (func_type))
19476 {
19477 case ARM_FT_ISR:
19478 case ARM_FT_FIQ:
19479 /* ??? This is wrong for unified assembly syntax. */
19480 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19481 break;
19482
19483 case ARM_FT_INTERWORKED:
19484 sprintf (instr, "bx%s\t%%|lr", conditional);
19485 break;
19486
19487 case ARM_FT_EXCEPTION:
19488 /* ??? This is wrong for unified assembly syntax. */
19489 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19490 break;
19491
19492 default:
19493 /* Use bx if it's available. */
19494 if (arm_arch5 || arm_arch4t)
19495 sprintf (instr, "bx%s\t%%|lr", conditional);
19496 else
19497 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19498 break;
19499 }
19500
19501 output_asm_insn (instr, & operand);
19502 }
19503
19504 return "";
19505 }
19506
19507 /* Write the function name into the code section, directly preceding
19508 the function prologue.
19509
19510 Code will be output similar to this:
19511 t0
19512 .ascii "arm_poke_function_name", 0
19513 .align
19514 t1
19515 .word 0xff000000 + (t1 - t0)
19516 arm_poke_function_name
19517 mov ip, sp
19518 stmfd sp!, {fp, ip, lr, pc}
19519 sub fp, ip, #4
19520
19521 When performing a stack backtrace, code can inspect the value
19522 of 'pc' stored at 'fp' + 0. If the trace function then looks
19523 at location pc - 12 and the top 8 bits are set, then we know
19524 that there is a function name embedded immediately preceding this
19525 location and has length ((pc[-3]) & 0xff000000).
19526
19527 We assume that pc is declared as a pointer to an unsigned long.
19528
19529 It is of no benefit to output the function name if we are assembling
19530 a leaf function. These function types will not contain a stack
19531 backtrace structure, therefore it is not possible to determine the
19532 function name. */
19533 void
19534 arm_poke_function_name (FILE *stream, const char *name)
19535 {
19536 unsigned long alignlength;
19537 unsigned long length;
19538 rtx x;
19539
19540 length = strlen (name) + 1;
19541 alignlength = ROUND_UP_WORD (length);
19542
19543 ASM_OUTPUT_ASCII (stream, name, length);
19544 ASM_OUTPUT_ALIGN (stream, 2);
19545 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19546 assemble_aligned_integer (UNITS_PER_WORD, x);
19547 }
19548
19549 /* Place some comments into the assembler stream
19550 describing the current function. */
19551 static void
19552 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19553 {
19554 unsigned long func_type;
19555
19556 /* ??? Do we want to print some of the below anyway? */
19557 if (TARGET_THUMB1)
19558 return;
19559
19560 /* Sanity check. */
19561 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19562
19563 func_type = arm_current_func_type ();
19564
19565 switch ((int) ARM_FUNC_TYPE (func_type))
19566 {
19567 default:
19568 case ARM_FT_NORMAL:
19569 break;
19570 case ARM_FT_INTERWORKED:
19571 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19572 break;
19573 case ARM_FT_ISR:
19574 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19575 break;
19576 case ARM_FT_FIQ:
19577 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19578 break;
19579 case ARM_FT_EXCEPTION:
19580 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19581 break;
19582 }
19583
19584 if (IS_NAKED (func_type))
19585 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19586
19587 if (IS_VOLATILE (func_type))
19588 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19589
19590 if (IS_NESTED (func_type))
19591 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19592 if (IS_STACKALIGN (func_type))
19593 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19594
19595 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19596 crtl->args.size,
19597 crtl->args.pretend_args_size, frame_size);
19598
19599 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19600 frame_pointer_needed,
19601 cfun->machine->uses_anonymous_args);
19602
19603 if (cfun->machine->lr_save_eliminated)
19604 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19605
19606 if (crtl->calls_eh_return)
19607 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19608
19609 }
19610
19611 static void
19612 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19613 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19614 {
19615 arm_stack_offsets *offsets;
19616
19617 if (TARGET_THUMB1)
19618 {
19619 int regno;
19620
19621 /* Emit any call-via-reg trampolines that are needed for v4t support
19622 of call_reg and call_value_reg type insns. */
19623 for (regno = 0; regno < LR_REGNUM; regno++)
19624 {
19625 rtx label = cfun->machine->call_via[regno];
19626
19627 if (label != NULL)
19628 {
19629 switch_to_section (function_section (current_function_decl));
19630 targetm.asm_out.internal_label (asm_out_file, "L",
19631 CODE_LABEL_NUMBER (label));
19632 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19633 }
19634 }
19635
19636 /* ??? Probably not safe to set this here, since it assumes that a
19637 function will be emitted as assembly immediately after we generate
19638 RTL for it. This does not happen for inline functions. */
19639 cfun->machine->return_used_this_function = 0;
19640 }
19641 else /* TARGET_32BIT */
19642 {
19643 /* We need to take into account any stack-frame rounding. */
19644 offsets = arm_get_frame_offsets ();
19645
19646 gcc_assert (!use_return_insn (FALSE, NULL)
19647 || (cfun->machine->return_used_this_function != 0)
19648 || offsets->saved_regs == offsets->outgoing_args
19649 || frame_pointer_needed);
19650 }
19651 }
19652
19653 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19654 STR and STRD. If an even number of registers are being pushed, one
19655 or more STRD patterns are created for each register pair. If an
19656 odd number of registers are pushed, emit an initial STR followed by
19657 as many STRD instructions as are needed. This works best when the
19658 stack is initially 64-bit aligned (the normal case), since it
19659 ensures that each STRD is also 64-bit aligned. */
19660 static void
19661 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19662 {
19663 int num_regs = 0;
19664 int i;
19665 int regno;
19666 rtx par = NULL_RTX;
19667 rtx dwarf = NULL_RTX;
19668 rtx tmp;
19669 bool first = true;
19670
19671 num_regs = bit_count (saved_regs_mask);
19672
19673 /* Must be at least one register to save, and can't save SP or PC. */
19674 gcc_assert (num_regs > 0 && num_regs <= 14);
19675 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19676 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19677
19678 /* Create sequence for DWARF info. All the frame-related data for
19679 debugging is held in this wrapper. */
19680 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19681
19682 /* Describe the stack adjustment. */
19683 tmp = gen_rtx_SET (VOIDmode,
19684 stack_pointer_rtx,
19685 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19686 RTX_FRAME_RELATED_P (tmp) = 1;
19687 XVECEXP (dwarf, 0, 0) = tmp;
19688
19689 /* Find the first register. */
19690 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19691 ;
19692
19693 i = 0;
19694
19695 /* If there's an odd number of registers to push. Start off by
19696 pushing a single register. This ensures that subsequent strd
19697 operations are dword aligned (assuming that SP was originally
19698 64-bit aligned). */
19699 if ((num_regs & 1) != 0)
19700 {
19701 rtx reg, mem, insn;
19702
19703 reg = gen_rtx_REG (SImode, regno);
19704 if (num_regs == 1)
19705 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19706 stack_pointer_rtx));
19707 else
19708 mem = gen_frame_mem (Pmode,
19709 gen_rtx_PRE_MODIFY
19710 (Pmode, stack_pointer_rtx,
19711 plus_constant (Pmode, stack_pointer_rtx,
19712 -4 * num_regs)));
19713
19714 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19715 RTX_FRAME_RELATED_P (tmp) = 1;
19716 insn = emit_insn (tmp);
19717 RTX_FRAME_RELATED_P (insn) = 1;
19718 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19719 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19720 reg);
19721 RTX_FRAME_RELATED_P (tmp) = 1;
19722 i++;
19723 regno++;
19724 XVECEXP (dwarf, 0, i) = tmp;
19725 first = false;
19726 }
19727
19728 while (i < num_regs)
19729 if (saved_regs_mask & (1 << regno))
19730 {
19731 rtx reg1, reg2, mem1, mem2;
19732 rtx tmp0, tmp1, tmp2;
19733 int regno2;
19734
19735 /* Find the register to pair with this one. */
19736 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19737 regno2++)
19738 ;
19739
19740 reg1 = gen_rtx_REG (SImode, regno);
19741 reg2 = gen_rtx_REG (SImode, regno2);
19742
19743 if (first)
19744 {
19745 rtx insn;
19746
19747 first = false;
19748 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19749 stack_pointer_rtx,
19750 -4 * num_regs));
19751 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19752 stack_pointer_rtx,
19753 -4 * (num_regs - 1)));
19754 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19755 plus_constant (Pmode, stack_pointer_rtx,
19756 -4 * (num_regs)));
19757 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19758 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19759 RTX_FRAME_RELATED_P (tmp0) = 1;
19760 RTX_FRAME_RELATED_P (tmp1) = 1;
19761 RTX_FRAME_RELATED_P (tmp2) = 1;
19762 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19763 XVECEXP (par, 0, 0) = tmp0;
19764 XVECEXP (par, 0, 1) = tmp1;
19765 XVECEXP (par, 0, 2) = tmp2;
19766 insn = emit_insn (par);
19767 RTX_FRAME_RELATED_P (insn) = 1;
19768 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19769 }
19770 else
19771 {
19772 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19773 stack_pointer_rtx,
19774 4 * i));
19775 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19776 stack_pointer_rtx,
19777 4 * (i + 1)));
19778 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19779 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19780 RTX_FRAME_RELATED_P (tmp1) = 1;
19781 RTX_FRAME_RELATED_P (tmp2) = 1;
19782 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19783 XVECEXP (par, 0, 0) = tmp1;
19784 XVECEXP (par, 0, 1) = tmp2;
19785 emit_insn (par);
19786 }
19787
19788 /* Create unwind information. This is an approximation. */
19789 tmp1 = gen_rtx_SET (VOIDmode,
19790 gen_frame_mem (Pmode,
19791 plus_constant (Pmode,
19792 stack_pointer_rtx,
19793 4 * i)),
19794 reg1);
19795 tmp2 = gen_rtx_SET (VOIDmode,
19796 gen_frame_mem (Pmode,
19797 plus_constant (Pmode,
19798 stack_pointer_rtx,
19799 4 * (i + 1))),
19800 reg2);
19801
19802 RTX_FRAME_RELATED_P (tmp1) = 1;
19803 RTX_FRAME_RELATED_P (tmp2) = 1;
19804 XVECEXP (dwarf, 0, i + 1) = tmp1;
19805 XVECEXP (dwarf, 0, i + 2) = tmp2;
19806 i += 2;
19807 regno = regno2 + 1;
19808 }
19809 else
19810 regno++;
19811
19812 return;
19813 }
19814
19815 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19816 whenever possible, otherwise it emits single-word stores. The first store
19817 also allocates stack space for all saved registers, using writeback with
19818 post-addressing mode. All other stores use offset addressing. If no STRD
19819 can be emitted, this function emits a sequence of single-word stores,
19820 and not an STM as before, because single-word stores provide more freedom
19821 scheduling and can be turned into an STM by peephole optimizations. */
19822 static void
19823 arm_emit_strd_push (unsigned long saved_regs_mask)
19824 {
19825 int num_regs = 0;
19826 int i, j, dwarf_index = 0;
19827 int offset = 0;
19828 rtx dwarf = NULL_RTX;
19829 rtx insn = NULL_RTX;
19830 rtx tmp, mem;
19831
19832 /* TODO: A more efficient code can be emitted by changing the
19833 layout, e.g., first push all pairs that can use STRD to keep the
19834 stack aligned, and then push all other registers. */
19835 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19836 if (saved_regs_mask & (1 << i))
19837 num_regs++;
19838
19839 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19840 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19841 gcc_assert (num_regs > 0);
19842
19843 /* Create sequence for DWARF info. */
19844 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19845
19846 /* For dwarf info, we generate explicit stack update. */
19847 tmp = gen_rtx_SET (VOIDmode,
19848 stack_pointer_rtx,
19849 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19850 RTX_FRAME_RELATED_P (tmp) = 1;
19851 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19852
19853 /* Save registers. */
19854 offset = - 4 * num_regs;
19855 j = 0;
19856 while (j <= LAST_ARM_REGNUM)
19857 if (saved_regs_mask & (1 << j))
19858 {
19859 if ((j % 2 == 0)
19860 && (saved_regs_mask & (1 << (j + 1))))
19861 {
19862 /* Current register and previous register form register pair for
19863 which STRD can be generated. */
19864 if (offset < 0)
19865 {
19866 /* Allocate stack space for all saved registers. */
19867 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19868 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19869 mem = gen_frame_mem (DImode, tmp);
19870 offset = 0;
19871 }
19872 else if (offset > 0)
19873 mem = gen_frame_mem (DImode,
19874 plus_constant (Pmode,
19875 stack_pointer_rtx,
19876 offset));
19877 else
19878 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19879
19880 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19881 RTX_FRAME_RELATED_P (tmp) = 1;
19882 tmp = emit_insn (tmp);
19883
19884 /* Record the first store insn. */
19885 if (dwarf_index == 1)
19886 insn = tmp;
19887
19888 /* Generate dwarf info. */
19889 mem = gen_frame_mem (SImode,
19890 plus_constant (Pmode,
19891 stack_pointer_rtx,
19892 offset));
19893 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19894 RTX_FRAME_RELATED_P (tmp) = 1;
19895 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19896
19897 mem = gen_frame_mem (SImode,
19898 plus_constant (Pmode,
19899 stack_pointer_rtx,
19900 offset + 4));
19901 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19902 RTX_FRAME_RELATED_P (tmp) = 1;
19903 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19904
19905 offset += 8;
19906 j += 2;
19907 }
19908 else
19909 {
19910 /* Emit a single word store. */
19911 if (offset < 0)
19912 {
19913 /* Allocate stack space for all saved registers. */
19914 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19915 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19916 mem = gen_frame_mem (SImode, tmp);
19917 offset = 0;
19918 }
19919 else if (offset > 0)
19920 mem = gen_frame_mem (SImode,
19921 plus_constant (Pmode,
19922 stack_pointer_rtx,
19923 offset));
19924 else
19925 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19926
19927 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19928 RTX_FRAME_RELATED_P (tmp) = 1;
19929 tmp = emit_insn (tmp);
19930
19931 /* Record the first store insn. */
19932 if (dwarf_index == 1)
19933 insn = tmp;
19934
19935 /* Generate dwarf info. */
19936 mem = gen_frame_mem (SImode,
19937 plus_constant(Pmode,
19938 stack_pointer_rtx,
19939 offset));
19940 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19941 RTX_FRAME_RELATED_P (tmp) = 1;
19942 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19943
19944 offset += 4;
19945 j += 1;
19946 }
19947 }
19948 else
19949 j++;
19950
19951 /* Attach dwarf info to the first insn we generate. */
19952 gcc_assert (insn != NULL_RTX);
19953 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19954 RTX_FRAME_RELATED_P (insn) = 1;
19955 }
19956
19957 /* Generate and emit an insn that we will recognize as a push_multi.
19958 Unfortunately, since this insn does not reflect very well the actual
19959 semantics of the operation, we need to annotate the insn for the benefit
19960 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19961 MASK for registers that should be annotated for DWARF2 frame unwind
19962 information. */
19963 static rtx
19964 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19965 {
19966 int num_regs = 0;
19967 int num_dwarf_regs = 0;
19968 int i, j;
19969 rtx par;
19970 rtx dwarf;
19971 int dwarf_par_index;
19972 rtx tmp, reg;
19973
19974 /* We don't record the PC in the dwarf frame information. */
19975 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19976
19977 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19978 {
19979 if (mask & (1 << i))
19980 num_regs++;
19981 if (dwarf_regs_mask & (1 << i))
19982 num_dwarf_regs++;
19983 }
19984
19985 gcc_assert (num_regs && num_regs <= 16);
19986 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19987
19988 /* For the body of the insn we are going to generate an UNSPEC in
19989 parallel with several USEs. This allows the insn to be recognized
19990 by the push_multi pattern in the arm.md file.
19991
19992 The body of the insn looks something like this:
19993
19994 (parallel [
19995 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19996 (const_int:SI <num>)))
19997 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19998 (use (reg:SI XX))
19999 (use (reg:SI YY))
20000 ...
20001 ])
20002
20003 For the frame note however, we try to be more explicit and actually
20004 show each register being stored into the stack frame, plus a (single)
20005 decrement of the stack pointer. We do it this way in order to be
20006 friendly to the stack unwinding code, which only wants to see a single
20007 stack decrement per instruction. The RTL we generate for the note looks
20008 something like this:
20009
20010 (sequence [
20011 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20012 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20013 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20014 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20015 ...
20016 ])
20017
20018 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20019 instead we'd have a parallel expression detailing all
20020 the stores to the various memory addresses so that debug
20021 information is more up-to-date. Remember however while writing
20022 this to take care of the constraints with the push instruction.
20023
20024 Note also that this has to be taken care of for the VFP registers.
20025
20026 For more see PR43399. */
20027
20028 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20029 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20030 dwarf_par_index = 1;
20031
20032 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20033 {
20034 if (mask & (1 << i))
20035 {
20036 reg = gen_rtx_REG (SImode, i);
20037
20038 XVECEXP (par, 0, 0)
20039 = gen_rtx_SET (VOIDmode,
20040 gen_frame_mem
20041 (BLKmode,
20042 gen_rtx_PRE_MODIFY (Pmode,
20043 stack_pointer_rtx,
20044 plus_constant
20045 (Pmode, stack_pointer_rtx,
20046 -4 * num_regs))
20047 ),
20048 gen_rtx_UNSPEC (BLKmode,
20049 gen_rtvec (1, reg),
20050 UNSPEC_PUSH_MULT));
20051
20052 if (dwarf_regs_mask & (1 << i))
20053 {
20054 tmp = gen_rtx_SET (VOIDmode,
20055 gen_frame_mem (SImode, stack_pointer_rtx),
20056 reg);
20057 RTX_FRAME_RELATED_P (tmp) = 1;
20058 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20059 }
20060
20061 break;
20062 }
20063 }
20064
20065 for (j = 1, i++; j < num_regs; i++)
20066 {
20067 if (mask & (1 << i))
20068 {
20069 reg = gen_rtx_REG (SImode, i);
20070
20071 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20072
20073 if (dwarf_regs_mask & (1 << i))
20074 {
20075 tmp
20076 = gen_rtx_SET (VOIDmode,
20077 gen_frame_mem
20078 (SImode,
20079 plus_constant (Pmode, stack_pointer_rtx,
20080 4 * j)),
20081 reg);
20082 RTX_FRAME_RELATED_P (tmp) = 1;
20083 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20084 }
20085
20086 j++;
20087 }
20088 }
20089
20090 par = emit_insn (par);
20091
20092 tmp = gen_rtx_SET (VOIDmode,
20093 stack_pointer_rtx,
20094 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20095 RTX_FRAME_RELATED_P (tmp) = 1;
20096 XVECEXP (dwarf, 0, 0) = tmp;
20097
20098 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20099
20100 return par;
20101 }
20102
20103 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20104 SIZE is the offset to be adjusted.
20105 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20106 static void
20107 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20108 {
20109 rtx dwarf;
20110
20111 RTX_FRAME_RELATED_P (insn) = 1;
20112 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20113 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20114 }
20115
20116 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20117 SAVED_REGS_MASK shows which registers need to be restored.
20118
20119 Unfortunately, since this insn does not reflect very well the actual
20120 semantics of the operation, we need to annotate the insn for the benefit
20121 of DWARF2 frame unwind information. */
20122 static void
20123 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20124 {
20125 int num_regs = 0;
20126 int i, j;
20127 rtx par;
20128 rtx dwarf = NULL_RTX;
20129 rtx tmp, reg;
20130 bool return_in_pc;
20131 int offset_adj;
20132 int emit_update;
20133
20134 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20135 offset_adj = return_in_pc ? 1 : 0;
20136 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20137 if (saved_regs_mask & (1 << i))
20138 num_regs++;
20139
20140 gcc_assert (num_regs && num_regs <= 16);
20141
20142 /* If SP is in reglist, then we don't emit SP update insn. */
20143 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20144
20145 /* The parallel needs to hold num_regs SETs
20146 and one SET for the stack update. */
20147 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20148
20149 if (return_in_pc)
20150 {
20151 tmp = ret_rtx;
20152 XVECEXP (par, 0, 0) = tmp;
20153 }
20154
20155 if (emit_update)
20156 {
20157 /* Increment the stack pointer, based on there being
20158 num_regs 4-byte registers to restore. */
20159 tmp = gen_rtx_SET (VOIDmode,
20160 stack_pointer_rtx,
20161 plus_constant (Pmode,
20162 stack_pointer_rtx,
20163 4 * num_regs));
20164 RTX_FRAME_RELATED_P (tmp) = 1;
20165 XVECEXP (par, 0, offset_adj) = tmp;
20166 }
20167
20168 /* Now restore every reg, which may include PC. */
20169 for (j = 0, i = 0; j < num_regs; i++)
20170 if (saved_regs_mask & (1 << i))
20171 {
20172 reg = gen_rtx_REG (SImode, i);
20173 if ((num_regs == 1) && emit_update && !return_in_pc)
20174 {
20175 /* Emit single load with writeback. */
20176 tmp = gen_frame_mem (SImode,
20177 gen_rtx_POST_INC (Pmode,
20178 stack_pointer_rtx));
20179 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20180 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20181 return;
20182 }
20183
20184 tmp = gen_rtx_SET (VOIDmode,
20185 reg,
20186 gen_frame_mem
20187 (SImode,
20188 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20189 RTX_FRAME_RELATED_P (tmp) = 1;
20190 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20191
20192 /* We need to maintain a sequence for DWARF info too. As dwarf info
20193 should not have PC, skip PC. */
20194 if (i != PC_REGNUM)
20195 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20196
20197 j++;
20198 }
20199
20200 if (return_in_pc)
20201 par = emit_jump_insn (par);
20202 else
20203 par = emit_insn (par);
20204
20205 REG_NOTES (par) = dwarf;
20206 if (!return_in_pc)
20207 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20208 stack_pointer_rtx, stack_pointer_rtx);
20209 }
20210
20211 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20212 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20213
20214 Unfortunately, since this insn does not reflect very well the actual
20215 semantics of the operation, we need to annotate the insn for the benefit
20216 of DWARF2 frame unwind information. */
20217 static void
20218 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20219 {
20220 int i, j;
20221 rtx par;
20222 rtx dwarf = NULL_RTX;
20223 rtx tmp, reg;
20224
20225 gcc_assert (num_regs && num_regs <= 32);
20226
20227 /* Workaround ARM10 VFPr1 bug. */
20228 if (num_regs == 2 && !arm_arch6)
20229 {
20230 if (first_reg == 15)
20231 first_reg--;
20232
20233 num_regs++;
20234 }
20235
20236 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20237 there could be up to 32 D-registers to restore.
20238 If there are more than 16 D-registers, make two recursive calls,
20239 each of which emits one pop_multi instruction. */
20240 if (num_regs > 16)
20241 {
20242 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20243 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20244 return;
20245 }
20246
20247 /* The parallel needs to hold num_regs SETs
20248 and one SET for the stack update. */
20249 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20250
20251 /* Increment the stack pointer, based on there being
20252 num_regs 8-byte registers to restore. */
20253 tmp = gen_rtx_SET (VOIDmode,
20254 base_reg,
20255 plus_constant (Pmode, base_reg, 8 * num_regs));
20256 RTX_FRAME_RELATED_P (tmp) = 1;
20257 XVECEXP (par, 0, 0) = tmp;
20258
20259 /* Now show every reg that will be restored, using a SET for each. */
20260 for (j = 0, i=first_reg; j < num_regs; i += 2)
20261 {
20262 reg = gen_rtx_REG (DFmode, i);
20263
20264 tmp = gen_rtx_SET (VOIDmode,
20265 reg,
20266 gen_frame_mem
20267 (DFmode,
20268 plus_constant (Pmode, base_reg, 8 * j)));
20269 RTX_FRAME_RELATED_P (tmp) = 1;
20270 XVECEXP (par, 0, j + 1) = tmp;
20271
20272 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20273
20274 j++;
20275 }
20276
20277 par = emit_insn (par);
20278 REG_NOTES (par) = dwarf;
20279
20280 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20281 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20282 {
20283 RTX_FRAME_RELATED_P (par) = 1;
20284 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20285 }
20286 else
20287 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20288 base_reg, base_reg);
20289 }
20290
20291 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20292 number of registers are being popped, multiple LDRD patterns are created for
20293 all register pairs. If odd number of registers are popped, last register is
20294 loaded by using LDR pattern. */
20295 static void
20296 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20297 {
20298 int num_regs = 0;
20299 int i, j;
20300 rtx par = NULL_RTX;
20301 rtx dwarf = NULL_RTX;
20302 rtx tmp, reg, tmp1;
20303 bool return_in_pc;
20304
20305 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20306 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20307 if (saved_regs_mask & (1 << i))
20308 num_regs++;
20309
20310 gcc_assert (num_regs && num_regs <= 16);
20311
20312 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20313 to be popped. So, if num_regs is even, now it will become odd,
20314 and we can generate pop with PC. If num_regs is odd, it will be
20315 even now, and ldr with return can be generated for PC. */
20316 if (return_in_pc)
20317 num_regs--;
20318
20319 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20320
20321 /* Var j iterates over all the registers to gather all the registers in
20322 saved_regs_mask. Var i gives index of saved registers in stack frame.
20323 A PARALLEL RTX of register-pair is created here, so that pattern for
20324 LDRD can be matched. As PC is always last register to be popped, and
20325 we have already decremented num_regs if PC, we don't have to worry
20326 about PC in this loop. */
20327 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20328 if (saved_regs_mask & (1 << j))
20329 {
20330 /* Create RTX for memory load. */
20331 reg = gen_rtx_REG (SImode, j);
20332 tmp = gen_rtx_SET (SImode,
20333 reg,
20334 gen_frame_mem (SImode,
20335 plus_constant (Pmode,
20336 stack_pointer_rtx, 4 * i)));
20337 RTX_FRAME_RELATED_P (tmp) = 1;
20338
20339 if (i % 2 == 0)
20340 {
20341 /* When saved-register index (i) is even, the RTX to be emitted is
20342 yet to be created. Hence create it first. The LDRD pattern we
20343 are generating is :
20344 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20345 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20346 where target registers need not be consecutive. */
20347 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20348 dwarf = NULL_RTX;
20349 }
20350
20351 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20352 added as 0th element and if i is odd, reg_i is added as 1st element
20353 of LDRD pattern shown above. */
20354 XVECEXP (par, 0, (i % 2)) = tmp;
20355 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20356
20357 if ((i % 2) == 1)
20358 {
20359 /* When saved-register index (i) is odd, RTXs for both the registers
20360 to be loaded are generated in above given LDRD pattern, and the
20361 pattern can be emitted now. */
20362 par = emit_insn (par);
20363 REG_NOTES (par) = dwarf;
20364 RTX_FRAME_RELATED_P (par) = 1;
20365 }
20366
20367 i++;
20368 }
20369
20370 /* If the number of registers pushed is odd AND return_in_pc is false OR
20371 number of registers are even AND return_in_pc is true, last register is
20372 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20373 then LDR with post increment. */
20374
20375 /* Increment the stack pointer, based on there being
20376 num_regs 4-byte registers to restore. */
20377 tmp = gen_rtx_SET (VOIDmode,
20378 stack_pointer_rtx,
20379 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20380 RTX_FRAME_RELATED_P (tmp) = 1;
20381 tmp = emit_insn (tmp);
20382 if (!return_in_pc)
20383 {
20384 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20385 stack_pointer_rtx, stack_pointer_rtx);
20386 }
20387
20388 dwarf = NULL_RTX;
20389
20390 if (((num_regs % 2) == 1 && !return_in_pc)
20391 || ((num_regs % 2) == 0 && return_in_pc))
20392 {
20393 /* Scan for the single register to be popped. Skip until the saved
20394 register is found. */
20395 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20396
20397 /* Gen LDR with post increment here. */
20398 tmp1 = gen_rtx_MEM (SImode,
20399 gen_rtx_POST_INC (SImode,
20400 stack_pointer_rtx));
20401 set_mem_alias_set (tmp1, get_frame_alias_set ());
20402
20403 reg = gen_rtx_REG (SImode, j);
20404 tmp = gen_rtx_SET (SImode, reg, tmp1);
20405 RTX_FRAME_RELATED_P (tmp) = 1;
20406 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20407
20408 if (return_in_pc)
20409 {
20410 /* If return_in_pc, j must be PC_REGNUM. */
20411 gcc_assert (j == PC_REGNUM);
20412 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20413 XVECEXP (par, 0, 0) = ret_rtx;
20414 XVECEXP (par, 0, 1) = tmp;
20415 par = emit_jump_insn (par);
20416 }
20417 else
20418 {
20419 par = emit_insn (tmp);
20420 REG_NOTES (par) = dwarf;
20421 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20422 stack_pointer_rtx, stack_pointer_rtx);
20423 }
20424
20425 }
20426 else if ((num_regs % 2) == 1 && return_in_pc)
20427 {
20428 /* There are 2 registers to be popped. So, generate the pattern
20429 pop_multiple_with_stack_update_and_return to pop in PC. */
20430 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20431 }
20432
20433 return;
20434 }
20435
20436 /* LDRD in ARM mode needs consecutive registers as operands. This function
20437 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20438 offset addressing and then generates one separate stack udpate. This provides
20439 more scheduling freedom, compared to writeback on every load. However,
20440 if the function returns using load into PC directly
20441 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20442 before the last load. TODO: Add a peephole optimization to recognize
20443 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20444 peephole optimization to merge the load at stack-offset zero
20445 with the stack update instruction using load with writeback
20446 in post-index addressing mode. */
20447 static void
20448 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20449 {
20450 int j = 0;
20451 int offset = 0;
20452 rtx par = NULL_RTX;
20453 rtx dwarf = NULL_RTX;
20454 rtx tmp, mem;
20455
20456 /* Restore saved registers. */
20457 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20458 j = 0;
20459 while (j <= LAST_ARM_REGNUM)
20460 if (saved_regs_mask & (1 << j))
20461 {
20462 if ((j % 2) == 0
20463 && (saved_regs_mask & (1 << (j + 1)))
20464 && (j + 1) != PC_REGNUM)
20465 {
20466 /* Current register and next register form register pair for which
20467 LDRD can be generated. PC is always the last register popped, and
20468 we handle it separately. */
20469 if (offset > 0)
20470 mem = gen_frame_mem (DImode,
20471 plus_constant (Pmode,
20472 stack_pointer_rtx,
20473 offset));
20474 else
20475 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20476
20477 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20478 tmp = emit_insn (tmp);
20479 RTX_FRAME_RELATED_P (tmp) = 1;
20480
20481 /* Generate dwarf info. */
20482
20483 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20484 gen_rtx_REG (SImode, j),
20485 NULL_RTX);
20486 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20487 gen_rtx_REG (SImode, j + 1),
20488 dwarf);
20489
20490 REG_NOTES (tmp) = dwarf;
20491
20492 offset += 8;
20493 j += 2;
20494 }
20495 else if (j != PC_REGNUM)
20496 {
20497 /* Emit a single word load. */
20498 if (offset > 0)
20499 mem = gen_frame_mem (SImode,
20500 plus_constant (Pmode,
20501 stack_pointer_rtx,
20502 offset));
20503 else
20504 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20505
20506 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20507 tmp = emit_insn (tmp);
20508 RTX_FRAME_RELATED_P (tmp) = 1;
20509
20510 /* Generate dwarf info. */
20511 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20512 gen_rtx_REG (SImode, j),
20513 NULL_RTX);
20514
20515 offset += 4;
20516 j += 1;
20517 }
20518 else /* j == PC_REGNUM */
20519 j++;
20520 }
20521 else
20522 j++;
20523
20524 /* Update the stack. */
20525 if (offset > 0)
20526 {
20527 tmp = gen_rtx_SET (Pmode,
20528 stack_pointer_rtx,
20529 plus_constant (Pmode,
20530 stack_pointer_rtx,
20531 offset));
20532 tmp = emit_insn (tmp);
20533 arm_add_cfa_adjust_cfa_note (tmp, offset,
20534 stack_pointer_rtx, stack_pointer_rtx);
20535 offset = 0;
20536 }
20537
20538 if (saved_regs_mask & (1 << PC_REGNUM))
20539 {
20540 /* Only PC is to be popped. */
20541 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20542 XVECEXP (par, 0, 0) = ret_rtx;
20543 tmp = gen_rtx_SET (SImode,
20544 gen_rtx_REG (SImode, PC_REGNUM),
20545 gen_frame_mem (SImode,
20546 gen_rtx_POST_INC (SImode,
20547 stack_pointer_rtx)));
20548 RTX_FRAME_RELATED_P (tmp) = 1;
20549 XVECEXP (par, 0, 1) = tmp;
20550 par = emit_jump_insn (par);
20551
20552 /* Generate dwarf info. */
20553 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20554 gen_rtx_REG (SImode, PC_REGNUM),
20555 NULL_RTX);
20556 REG_NOTES (par) = dwarf;
20557 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20558 stack_pointer_rtx, stack_pointer_rtx);
20559 }
20560 }
20561
20562 /* Calculate the size of the return value that is passed in registers. */
20563 static unsigned
20564 arm_size_return_regs (void)
20565 {
20566 enum machine_mode mode;
20567
20568 if (crtl->return_rtx != 0)
20569 mode = GET_MODE (crtl->return_rtx);
20570 else
20571 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20572
20573 return GET_MODE_SIZE (mode);
20574 }
20575
20576 /* Return true if the current function needs to save/restore LR. */
20577 static bool
20578 thumb_force_lr_save (void)
20579 {
20580 return !cfun->machine->lr_save_eliminated
20581 && (!leaf_function_p ()
20582 || thumb_far_jump_used_p ()
20583 || df_regs_ever_live_p (LR_REGNUM));
20584 }
20585
20586 /* We do not know if r3 will be available because
20587 we do have an indirect tailcall happening in this
20588 particular case. */
20589 static bool
20590 is_indirect_tailcall_p (rtx call)
20591 {
20592 rtx pat = PATTERN (call);
20593
20594 /* Indirect tail call. */
20595 pat = XVECEXP (pat, 0, 0);
20596 if (GET_CODE (pat) == SET)
20597 pat = SET_SRC (pat);
20598
20599 pat = XEXP (XEXP (pat, 0), 0);
20600 return REG_P (pat);
20601 }
20602
20603 /* Return true if r3 is used by any of the tail call insns in the
20604 current function. */
20605 static bool
20606 any_sibcall_could_use_r3 (void)
20607 {
20608 edge_iterator ei;
20609 edge e;
20610
20611 if (!crtl->tail_call_emit)
20612 return false;
20613 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20614 if (e->flags & EDGE_SIBCALL)
20615 {
20616 rtx call = BB_END (e->src);
20617 if (!CALL_P (call))
20618 call = prev_nonnote_nondebug_insn (call);
20619 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20620 if (find_regno_fusage (call, USE, 3)
20621 || is_indirect_tailcall_p (call))
20622 return true;
20623 }
20624 return false;
20625 }
20626
20627
20628 /* Compute the distance from register FROM to register TO.
20629 These can be the arg pointer (26), the soft frame pointer (25),
20630 the stack pointer (13) or the hard frame pointer (11).
20631 In thumb mode r7 is used as the soft frame pointer, if needed.
20632 Typical stack layout looks like this:
20633
20634 old stack pointer -> | |
20635 ----
20636 | | \
20637 | | saved arguments for
20638 | | vararg functions
20639 | | /
20640 --
20641 hard FP & arg pointer -> | | \
20642 | | stack
20643 | | frame
20644 | | /
20645 --
20646 | | \
20647 | | call saved
20648 | | registers
20649 soft frame pointer -> | | /
20650 --
20651 | | \
20652 | | local
20653 | | variables
20654 locals base pointer -> | | /
20655 --
20656 | | \
20657 | | outgoing
20658 | | arguments
20659 current stack pointer -> | | /
20660 --
20661
20662 For a given function some or all of these stack components
20663 may not be needed, giving rise to the possibility of
20664 eliminating some of the registers.
20665
20666 The values returned by this function must reflect the behavior
20667 of arm_expand_prologue() and arm_compute_save_reg_mask().
20668
20669 The sign of the number returned reflects the direction of stack
20670 growth, so the values are positive for all eliminations except
20671 from the soft frame pointer to the hard frame pointer.
20672
20673 SFP may point just inside the local variables block to ensure correct
20674 alignment. */
20675
20676
20677 /* Calculate stack offsets. These are used to calculate register elimination
20678 offsets and in prologue/epilogue code. Also calculates which registers
20679 should be saved. */
20680
20681 static arm_stack_offsets *
20682 arm_get_frame_offsets (void)
20683 {
20684 struct arm_stack_offsets *offsets;
20685 unsigned long func_type;
20686 int leaf;
20687 int saved;
20688 int core_saved;
20689 HOST_WIDE_INT frame_size;
20690 int i;
20691
20692 offsets = &cfun->machine->stack_offsets;
20693
20694 /* We need to know if we are a leaf function. Unfortunately, it
20695 is possible to be called after start_sequence has been called,
20696 which causes get_insns to return the insns for the sequence,
20697 not the function, which will cause leaf_function_p to return
20698 the incorrect result.
20699
20700 to know about leaf functions once reload has completed, and the
20701 frame size cannot be changed after that time, so we can safely
20702 use the cached value. */
20703
20704 if (reload_completed)
20705 return offsets;
20706
20707 /* Initially this is the size of the local variables. It will translated
20708 into an offset once we have determined the size of preceding data. */
20709 frame_size = ROUND_UP_WORD (get_frame_size ());
20710
20711 leaf = leaf_function_p ();
20712
20713 /* Space for variadic functions. */
20714 offsets->saved_args = crtl->args.pretend_args_size;
20715
20716 /* In Thumb mode this is incorrect, but never used. */
20717 offsets->frame
20718 = (offsets->saved_args
20719 + arm_compute_static_chain_stack_bytes ()
20720 + (frame_pointer_needed ? 4 : 0));
20721
20722 if (TARGET_32BIT)
20723 {
20724 unsigned int regno;
20725
20726 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20727 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20728 saved = core_saved;
20729
20730 /* We know that SP will be doubleword aligned on entry, and we must
20731 preserve that condition at any subroutine call. We also require the
20732 soft frame pointer to be doubleword aligned. */
20733
20734 if (TARGET_REALLY_IWMMXT)
20735 {
20736 /* Check for the call-saved iWMMXt registers. */
20737 for (regno = FIRST_IWMMXT_REGNUM;
20738 regno <= LAST_IWMMXT_REGNUM;
20739 regno++)
20740 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20741 saved += 8;
20742 }
20743
20744 func_type = arm_current_func_type ();
20745 /* Space for saved VFP registers. */
20746 if (! IS_VOLATILE (func_type)
20747 && TARGET_HARD_FLOAT && TARGET_VFP)
20748 saved += arm_get_vfp_saved_size ();
20749 }
20750 else /* TARGET_THUMB1 */
20751 {
20752 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20753 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20754 saved = core_saved;
20755 if (TARGET_BACKTRACE)
20756 saved += 16;
20757 }
20758
20759 /* Saved registers include the stack frame. */
20760 offsets->saved_regs
20761 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20762 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20763
20764 /* A leaf function does not need any stack alignment if it has nothing
20765 on the stack. */
20766 if (leaf && frame_size == 0
20767 /* However if it calls alloca(), we have a dynamically allocated
20768 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20769 && ! cfun->calls_alloca)
20770 {
20771 offsets->outgoing_args = offsets->soft_frame;
20772 offsets->locals_base = offsets->soft_frame;
20773 return offsets;
20774 }
20775
20776 /* Ensure SFP has the correct alignment. */
20777 if (ARM_DOUBLEWORD_ALIGN
20778 && (offsets->soft_frame & 7))
20779 {
20780 offsets->soft_frame += 4;
20781 /* Try to align stack by pushing an extra reg. Don't bother doing this
20782 when there is a stack frame as the alignment will be rolled into
20783 the normal stack adjustment. */
20784 if (frame_size + crtl->outgoing_args_size == 0)
20785 {
20786 int reg = -1;
20787
20788 /* Register r3 is caller-saved. Normally it does not need to be
20789 saved on entry by the prologue. However if we choose to save
20790 it for padding then we may confuse the compiler into thinking
20791 a prologue sequence is required when in fact it is not. This
20792 will occur when shrink-wrapping if r3 is used as a scratch
20793 register and there are no other callee-saved writes.
20794
20795 This situation can be avoided when other callee-saved registers
20796 are available and r3 is not mandatory if we choose a callee-saved
20797 register for padding. */
20798 bool prefer_callee_reg_p = false;
20799
20800 /* If it is safe to use r3, then do so. This sometimes
20801 generates better code on Thumb-2 by avoiding the need to
20802 use 32-bit push/pop instructions. */
20803 if (! any_sibcall_could_use_r3 ()
20804 && arm_size_return_regs () <= 12
20805 && (offsets->saved_regs_mask & (1 << 3)) == 0
20806 && (TARGET_THUMB2
20807 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20808 {
20809 reg = 3;
20810 if (!TARGET_THUMB2)
20811 prefer_callee_reg_p = true;
20812 }
20813 if (reg == -1
20814 || prefer_callee_reg_p)
20815 {
20816 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20817 {
20818 /* Avoid fixed registers; they may be changed at
20819 arbitrary times so it's unsafe to restore them
20820 during the epilogue. */
20821 if (!fixed_regs[i]
20822 && (offsets->saved_regs_mask & (1 << i)) == 0)
20823 {
20824 reg = i;
20825 break;
20826 }
20827 }
20828 }
20829
20830 if (reg != -1)
20831 {
20832 offsets->saved_regs += 4;
20833 offsets->saved_regs_mask |= (1 << reg);
20834 }
20835 }
20836 }
20837
20838 offsets->locals_base = offsets->soft_frame + frame_size;
20839 offsets->outgoing_args = (offsets->locals_base
20840 + crtl->outgoing_args_size);
20841
20842 if (ARM_DOUBLEWORD_ALIGN)
20843 {
20844 /* Ensure SP remains doubleword aligned. */
20845 if (offsets->outgoing_args & 7)
20846 offsets->outgoing_args += 4;
20847 gcc_assert (!(offsets->outgoing_args & 7));
20848 }
20849
20850 return offsets;
20851 }
20852
20853
20854 /* Calculate the relative offsets for the different stack pointers. Positive
20855 offsets are in the direction of stack growth. */
20856
20857 HOST_WIDE_INT
20858 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20859 {
20860 arm_stack_offsets *offsets;
20861
20862 offsets = arm_get_frame_offsets ();
20863
20864 /* OK, now we have enough information to compute the distances.
20865 There must be an entry in these switch tables for each pair
20866 of registers in ELIMINABLE_REGS, even if some of the entries
20867 seem to be redundant or useless. */
20868 switch (from)
20869 {
20870 case ARG_POINTER_REGNUM:
20871 switch (to)
20872 {
20873 case THUMB_HARD_FRAME_POINTER_REGNUM:
20874 return 0;
20875
20876 case FRAME_POINTER_REGNUM:
20877 /* This is the reverse of the soft frame pointer
20878 to hard frame pointer elimination below. */
20879 return offsets->soft_frame - offsets->saved_args;
20880
20881 case ARM_HARD_FRAME_POINTER_REGNUM:
20882 /* This is only non-zero in the case where the static chain register
20883 is stored above the frame. */
20884 return offsets->frame - offsets->saved_args - 4;
20885
20886 case STACK_POINTER_REGNUM:
20887 /* If nothing has been pushed on the stack at all
20888 then this will return -4. This *is* correct! */
20889 return offsets->outgoing_args - (offsets->saved_args + 4);
20890
20891 default:
20892 gcc_unreachable ();
20893 }
20894 gcc_unreachable ();
20895
20896 case FRAME_POINTER_REGNUM:
20897 switch (to)
20898 {
20899 case THUMB_HARD_FRAME_POINTER_REGNUM:
20900 return 0;
20901
20902 case ARM_HARD_FRAME_POINTER_REGNUM:
20903 /* The hard frame pointer points to the top entry in the
20904 stack frame. The soft frame pointer to the bottom entry
20905 in the stack frame. If there is no stack frame at all,
20906 then they are identical. */
20907
20908 return offsets->frame - offsets->soft_frame;
20909
20910 case STACK_POINTER_REGNUM:
20911 return offsets->outgoing_args - offsets->soft_frame;
20912
20913 default:
20914 gcc_unreachable ();
20915 }
20916 gcc_unreachable ();
20917
20918 default:
20919 /* You cannot eliminate from the stack pointer.
20920 In theory you could eliminate from the hard frame
20921 pointer to the stack pointer, but this will never
20922 happen, since if a stack frame is not needed the
20923 hard frame pointer will never be used. */
20924 gcc_unreachable ();
20925 }
20926 }
20927
20928 /* Given FROM and TO register numbers, say whether this elimination is
20929 allowed. Frame pointer elimination is automatically handled.
20930
20931 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20932 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20933 pointer, we must eliminate FRAME_POINTER_REGNUM into
20934 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20935 ARG_POINTER_REGNUM. */
20936
20937 bool
20938 arm_can_eliminate (const int from, const int to)
20939 {
20940 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20941 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20942 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20943 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20944 true);
20945 }
20946
20947 /* Emit RTL to save coprocessor registers on function entry. Returns the
20948 number of bytes pushed. */
20949
20950 static int
20951 arm_save_coproc_regs(void)
20952 {
20953 int saved_size = 0;
20954 unsigned reg;
20955 unsigned start_reg;
20956 rtx insn;
20957
20958 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20959 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20960 {
20961 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20962 insn = gen_rtx_MEM (V2SImode, insn);
20963 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20964 RTX_FRAME_RELATED_P (insn) = 1;
20965 saved_size += 8;
20966 }
20967
20968 if (TARGET_HARD_FLOAT && TARGET_VFP)
20969 {
20970 start_reg = FIRST_VFP_REGNUM;
20971
20972 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20973 {
20974 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20975 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20976 {
20977 if (start_reg != reg)
20978 saved_size += vfp_emit_fstmd (start_reg,
20979 (reg - start_reg) / 2);
20980 start_reg = reg + 2;
20981 }
20982 }
20983 if (start_reg != reg)
20984 saved_size += vfp_emit_fstmd (start_reg,
20985 (reg - start_reg) / 2);
20986 }
20987 return saved_size;
20988 }
20989
20990
20991 /* Set the Thumb frame pointer from the stack pointer. */
20992
20993 static void
20994 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20995 {
20996 HOST_WIDE_INT amount;
20997 rtx insn, dwarf;
20998
20999 amount = offsets->outgoing_args - offsets->locals_base;
21000 if (amount < 1024)
21001 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21002 stack_pointer_rtx, GEN_INT (amount)));
21003 else
21004 {
21005 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21006 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21007 expects the first two operands to be the same. */
21008 if (TARGET_THUMB2)
21009 {
21010 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21011 stack_pointer_rtx,
21012 hard_frame_pointer_rtx));
21013 }
21014 else
21015 {
21016 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21017 hard_frame_pointer_rtx,
21018 stack_pointer_rtx));
21019 }
21020 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21021 plus_constant (Pmode, stack_pointer_rtx, amount));
21022 RTX_FRAME_RELATED_P (dwarf) = 1;
21023 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21024 }
21025
21026 RTX_FRAME_RELATED_P (insn) = 1;
21027 }
21028
21029 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21030 function. */
21031 void
21032 arm_expand_prologue (void)
21033 {
21034 rtx amount;
21035 rtx insn;
21036 rtx ip_rtx;
21037 unsigned long live_regs_mask;
21038 unsigned long func_type;
21039 int fp_offset = 0;
21040 int saved_pretend_args = 0;
21041 int saved_regs = 0;
21042 unsigned HOST_WIDE_INT args_to_push;
21043 arm_stack_offsets *offsets;
21044
21045 func_type = arm_current_func_type ();
21046
21047 /* Naked functions don't have prologues. */
21048 if (IS_NAKED (func_type))
21049 return;
21050
21051 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21052 args_to_push = crtl->args.pretend_args_size;
21053
21054 /* Compute which register we will have to save onto the stack. */
21055 offsets = arm_get_frame_offsets ();
21056 live_regs_mask = offsets->saved_regs_mask;
21057
21058 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21059
21060 if (IS_STACKALIGN (func_type))
21061 {
21062 rtx r0, r1;
21063
21064 /* Handle a word-aligned stack pointer. We generate the following:
21065
21066 mov r0, sp
21067 bic r1, r0, #7
21068 mov sp, r1
21069 <save and restore r0 in normal prologue/epilogue>
21070 mov sp, r0
21071 bx lr
21072
21073 The unwinder doesn't need to know about the stack realignment.
21074 Just tell it we saved SP in r0. */
21075 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21076
21077 r0 = gen_rtx_REG (SImode, 0);
21078 r1 = gen_rtx_REG (SImode, 1);
21079
21080 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21081 RTX_FRAME_RELATED_P (insn) = 1;
21082 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21083
21084 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21085
21086 /* ??? The CFA changes here, which may cause GDB to conclude that it
21087 has entered a different function. That said, the unwind info is
21088 correct, individually, before and after this instruction because
21089 we've described the save of SP, which will override the default
21090 handling of SP as restoring from the CFA. */
21091 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21092 }
21093
21094 /* For APCS frames, if IP register is clobbered
21095 when creating frame, save that register in a special
21096 way. */
21097 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21098 {
21099 if (IS_INTERRUPT (func_type))
21100 {
21101 /* Interrupt functions must not corrupt any registers.
21102 Creating a frame pointer however, corrupts the IP
21103 register, so we must push it first. */
21104 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21105
21106 /* Do not set RTX_FRAME_RELATED_P on this insn.
21107 The dwarf stack unwinding code only wants to see one
21108 stack decrement per function, and this is not it. If
21109 this instruction is labeled as being part of the frame
21110 creation sequence then dwarf2out_frame_debug_expr will
21111 die when it encounters the assignment of IP to FP
21112 later on, since the use of SP here establishes SP as
21113 the CFA register and not IP.
21114
21115 Anyway this instruction is not really part of the stack
21116 frame creation although it is part of the prologue. */
21117 }
21118 else if (IS_NESTED (func_type))
21119 {
21120 /* The static chain register is the same as the IP register
21121 used as a scratch register during stack frame creation.
21122 To get around this need to find somewhere to store IP
21123 whilst the frame is being created. We try the following
21124 places in order:
21125
21126 1. The last argument register r3 if it is available.
21127 2. A slot on the stack above the frame if there are no
21128 arguments to push onto the stack.
21129 3. Register r3 again, after pushing the argument registers
21130 onto the stack, if this is a varargs function.
21131 4. The last slot on the stack created for the arguments to
21132 push, if this isn't a varargs function.
21133
21134 Note - we only need to tell the dwarf2 backend about the SP
21135 adjustment in the second variant; the static chain register
21136 doesn't need to be unwound, as it doesn't contain a value
21137 inherited from the caller. */
21138
21139 if (!arm_r3_live_at_start_p ())
21140 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21141 else if (args_to_push == 0)
21142 {
21143 rtx addr, dwarf;
21144
21145 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21146 saved_regs += 4;
21147
21148 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21149 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21150 fp_offset = 4;
21151
21152 /* Just tell the dwarf backend that we adjusted SP. */
21153 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21154 plus_constant (Pmode, stack_pointer_rtx,
21155 -fp_offset));
21156 RTX_FRAME_RELATED_P (insn) = 1;
21157 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21158 }
21159 else
21160 {
21161 /* Store the args on the stack. */
21162 if (cfun->machine->uses_anonymous_args)
21163 {
21164 insn
21165 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21166 (0xf0 >> (args_to_push / 4)) & 0xf);
21167 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21168 saved_pretend_args = 1;
21169 }
21170 else
21171 {
21172 rtx addr, dwarf;
21173
21174 if (args_to_push == 4)
21175 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21176 else
21177 addr
21178 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21179 plus_constant (Pmode,
21180 stack_pointer_rtx,
21181 -args_to_push));
21182
21183 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21184
21185 /* Just tell the dwarf backend that we adjusted SP. */
21186 dwarf
21187 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21188 plus_constant (Pmode, stack_pointer_rtx,
21189 -args_to_push));
21190 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21191 }
21192
21193 RTX_FRAME_RELATED_P (insn) = 1;
21194 fp_offset = args_to_push;
21195 args_to_push = 0;
21196 }
21197 }
21198
21199 insn = emit_set_insn (ip_rtx,
21200 plus_constant (Pmode, stack_pointer_rtx,
21201 fp_offset));
21202 RTX_FRAME_RELATED_P (insn) = 1;
21203 }
21204
21205 if (args_to_push)
21206 {
21207 /* Push the argument registers, or reserve space for them. */
21208 if (cfun->machine->uses_anonymous_args)
21209 insn = emit_multi_reg_push
21210 ((0xf0 >> (args_to_push / 4)) & 0xf,
21211 (0xf0 >> (args_to_push / 4)) & 0xf);
21212 else
21213 insn = emit_insn
21214 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21215 GEN_INT (- args_to_push)));
21216 RTX_FRAME_RELATED_P (insn) = 1;
21217 }
21218
21219 /* If this is an interrupt service routine, and the link register
21220 is going to be pushed, and we're not generating extra
21221 push of IP (needed when frame is needed and frame layout if apcs),
21222 subtracting four from LR now will mean that the function return
21223 can be done with a single instruction. */
21224 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21225 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21226 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21227 && TARGET_ARM)
21228 {
21229 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21230
21231 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21232 }
21233
21234 if (live_regs_mask)
21235 {
21236 unsigned long dwarf_regs_mask = live_regs_mask;
21237
21238 saved_regs += bit_count (live_regs_mask) * 4;
21239 if (optimize_size && !frame_pointer_needed
21240 && saved_regs == offsets->saved_regs - offsets->saved_args)
21241 {
21242 /* If no coprocessor registers are being pushed and we don't have
21243 to worry about a frame pointer then push extra registers to
21244 create the stack frame. This is done is a way that does not
21245 alter the frame layout, so is independent of the epilogue. */
21246 int n;
21247 int frame;
21248 n = 0;
21249 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21250 n++;
21251 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21252 if (frame && n * 4 >= frame)
21253 {
21254 n = frame / 4;
21255 live_regs_mask |= (1 << n) - 1;
21256 saved_regs += frame;
21257 }
21258 }
21259
21260 if (TARGET_LDRD
21261 && current_tune->prefer_ldrd_strd
21262 && !optimize_function_for_size_p (cfun))
21263 {
21264 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21265 if (TARGET_THUMB2)
21266 thumb2_emit_strd_push (live_regs_mask);
21267 else if (TARGET_ARM
21268 && !TARGET_APCS_FRAME
21269 && !IS_INTERRUPT (func_type))
21270 arm_emit_strd_push (live_regs_mask);
21271 else
21272 {
21273 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21274 RTX_FRAME_RELATED_P (insn) = 1;
21275 }
21276 }
21277 else
21278 {
21279 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21280 RTX_FRAME_RELATED_P (insn) = 1;
21281 }
21282 }
21283
21284 if (! IS_VOLATILE (func_type))
21285 saved_regs += arm_save_coproc_regs ();
21286
21287 if (frame_pointer_needed && TARGET_ARM)
21288 {
21289 /* Create the new frame pointer. */
21290 if (TARGET_APCS_FRAME)
21291 {
21292 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21293 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21294 RTX_FRAME_RELATED_P (insn) = 1;
21295
21296 if (IS_NESTED (func_type))
21297 {
21298 /* Recover the static chain register. */
21299 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21300 insn = gen_rtx_REG (SImode, 3);
21301 else
21302 {
21303 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21304 insn = gen_frame_mem (SImode, insn);
21305 }
21306 emit_set_insn (ip_rtx, insn);
21307 /* Add a USE to stop propagate_one_insn() from barfing. */
21308 emit_insn (gen_force_register_use (ip_rtx));
21309 }
21310 }
21311 else
21312 {
21313 insn = GEN_INT (saved_regs - 4);
21314 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21315 stack_pointer_rtx, insn));
21316 RTX_FRAME_RELATED_P (insn) = 1;
21317 }
21318 }
21319
21320 if (flag_stack_usage_info)
21321 current_function_static_stack_size
21322 = offsets->outgoing_args - offsets->saved_args;
21323
21324 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21325 {
21326 /* This add can produce multiple insns for a large constant, so we
21327 need to get tricky. */
21328 rtx last = get_last_insn ();
21329
21330 amount = GEN_INT (offsets->saved_args + saved_regs
21331 - offsets->outgoing_args);
21332
21333 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21334 amount));
21335 do
21336 {
21337 last = last ? NEXT_INSN (last) : get_insns ();
21338 RTX_FRAME_RELATED_P (last) = 1;
21339 }
21340 while (last != insn);
21341
21342 /* If the frame pointer is needed, emit a special barrier that
21343 will prevent the scheduler from moving stores to the frame
21344 before the stack adjustment. */
21345 if (frame_pointer_needed)
21346 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21347 hard_frame_pointer_rtx));
21348 }
21349
21350
21351 if (frame_pointer_needed && TARGET_THUMB2)
21352 thumb_set_frame_pointer (offsets);
21353
21354 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21355 {
21356 unsigned long mask;
21357
21358 mask = live_regs_mask;
21359 mask &= THUMB2_WORK_REGS;
21360 if (!IS_NESTED (func_type))
21361 mask |= (1 << IP_REGNUM);
21362 arm_load_pic_register (mask);
21363 }
21364
21365 /* If we are profiling, make sure no instructions are scheduled before
21366 the call to mcount. Similarly if the user has requested no
21367 scheduling in the prolog. Similarly if we want non-call exceptions
21368 using the EABI unwinder, to prevent faulting instructions from being
21369 swapped with a stack adjustment. */
21370 if (crtl->profile || !TARGET_SCHED_PROLOG
21371 || (arm_except_unwind_info (&global_options) == UI_TARGET
21372 && cfun->can_throw_non_call_exceptions))
21373 emit_insn (gen_blockage ());
21374
21375 /* If the link register is being kept alive, with the return address in it,
21376 then make sure that it does not get reused by the ce2 pass. */
21377 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21378 cfun->machine->lr_save_eliminated = 1;
21379 }
21380 \f
21381 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21382 static void
21383 arm_print_condition (FILE *stream)
21384 {
21385 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21386 {
21387 /* Branch conversion is not implemented for Thumb-2. */
21388 if (TARGET_THUMB)
21389 {
21390 output_operand_lossage ("predicated Thumb instruction");
21391 return;
21392 }
21393 if (current_insn_predicate != NULL)
21394 {
21395 output_operand_lossage
21396 ("predicated instruction in conditional sequence");
21397 return;
21398 }
21399
21400 fputs (arm_condition_codes[arm_current_cc], stream);
21401 }
21402 else if (current_insn_predicate)
21403 {
21404 enum arm_cond_code code;
21405
21406 if (TARGET_THUMB1)
21407 {
21408 output_operand_lossage ("predicated Thumb instruction");
21409 return;
21410 }
21411
21412 code = get_arm_condition_code (current_insn_predicate);
21413 fputs (arm_condition_codes[code], stream);
21414 }
21415 }
21416
21417
21418 /* Globally reserved letters: acln
21419 Puncutation letters currently used: @_|?().!#
21420 Lower case letters currently used: bcdefhimpqtvwxyz
21421 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21422 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21423
21424 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21425
21426 If CODE is 'd', then the X is a condition operand and the instruction
21427 should only be executed if the condition is true.
21428 if CODE is 'D', then the X is a condition operand and the instruction
21429 should only be executed if the condition is false: however, if the mode
21430 of the comparison is CCFPEmode, then always execute the instruction -- we
21431 do this because in these circumstances !GE does not necessarily imply LT;
21432 in these cases the instruction pattern will take care to make sure that
21433 an instruction containing %d will follow, thereby undoing the effects of
21434 doing this instruction unconditionally.
21435 If CODE is 'N' then X is a floating point operand that must be negated
21436 before output.
21437 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21438 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21439 static void
21440 arm_print_operand (FILE *stream, rtx x, int code)
21441 {
21442 switch (code)
21443 {
21444 case '@':
21445 fputs (ASM_COMMENT_START, stream);
21446 return;
21447
21448 case '_':
21449 fputs (user_label_prefix, stream);
21450 return;
21451
21452 case '|':
21453 fputs (REGISTER_PREFIX, stream);
21454 return;
21455
21456 case '?':
21457 arm_print_condition (stream);
21458 return;
21459
21460 case '(':
21461 /* Nothing in unified syntax, otherwise the current condition code. */
21462 if (!TARGET_UNIFIED_ASM)
21463 arm_print_condition (stream);
21464 break;
21465
21466 case ')':
21467 /* The current condition code in unified syntax, otherwise nothing. */
21468 if (TARGET_UNIFIED_ASM)
21469 arm_print_condition (stream);
21470 break;
21471
21472 case '.':
21473 /* The current condition code for a condition code setting instruction.
21474 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21475 if (TARGET_UNIFIED_ASM)
21476 {
21477 fputc('s', stream);
21478 arm_print_condition (stream);
21479 }
21480 else
21481 {
21482 arm_print_condition (stream);
21483 fputc('s', stream);
21484 }
21485 return;
21486
21487 case '!':
21488 /* If the instruction is conditionally executed then print
21489 the current condition code, otherwise print 's'. */
21490 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21491 if (current_insn_predicate)
21492 arm_print_condition (stream);
21493 else
21494 fputc('s', stream);
21495 break;
21496
21497 /* %# is a "break" sequence. It doesn't output anything, but is used to
21498 separate e.g. operand numbers from following text, if that text consists
21499 of further digits which we don't want to be part of the operand
21500 number. */
21501 case '#':
21502 return;
21503
21504 case 'N':
21505 {
21506 REAL_VALUE_TYPE r;
21507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21508 r = real_value_negate (&r);
21509 fprintf (stream, "%s", fp_const_from_val (&r));
21510 }
21511 return;
21512
21513 /* An integer or symbol address without a preceding # sign. */
21514 case 'c':
21515 switch (GET_CODE (x))
21516 {
21517 case CONST_INT:
21518 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21519 break;
21520
21521 case SYMBOL_REF:
21522 output_addr_const (stream, x);
21523 break;
21524
21525 case CONST:
21526 if (GET_CODE (XEXP (x, 0)) == PLUS
21527 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21528 {
21529 output_addr_const (stream, x);
21530 break;
21531 }
21532 /* Fall through. */
21533
21534 default:
21535 output_operand_lossage ("Unsupported operand for code '%c'", code);
21536 }
21537 return;
21538
21539 /* An integer that we want to print in HEX. */
21540 case 'x':
21541 switch (GET_CODE (x))
21542 {
21543 case CONST_INT:
21544 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21545 break;
21546
21547 default:
21548 output_operand_lossage ("Unsupported operand for code '%c'", code);
21549 }
21550 return;
21551
21552 case 'B':
21553 if (CONST_INT_P (x))
21554 {
21555 HOST_WIDE_INT val;
21556 val = ARM_SIGN_EXTEND (~INTVAL (x));
21557 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21558 }
21559 else
21560 {
21561 putc ('~', stream);
21562 output_addr_const (stream, x);
21563 }
21564 return;
21565
21566 case 'b':
21567 /* Print the log2 of a CONST_INT. */
21568 {
21569 HOST_WIDE_INT val;
21570
21571 if (!CONST_INT_P (x)
21572 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21573 output_operand_lossage ("Unsupported operand for code '%c'", code);
21574 else
21575 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21576 }
21577 return;
21578
21579 case 'L':
21580 /* The low 16 bits of an immediate constant. */
21581 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21582 return;
21583
21584 case 'i':
21585 fprintf (stream, "%s", arithmetic_instr (x, 1));
21586 return;
21587
21588 case 'I':
21589 fprintf (stream, "%s", arithmetic_instr (x, 0));
21590 return;
21591
21592 case 'S':
21593 {
21594 HOST_WIDE_INT val;
21595 const char *shift;
21596
21597 shift = shift_op (x, &val);
21598
21599 if (shift)
21600 {
21601 fprintf (stream, ", %s ", shift);
21602 if (val == -1)
21603 arm_print_operand (stream, XEXP (x, 1), 0);
21604 else
21605 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21606 }
21607 }
21608 return;
21609
21610 /* An explanation of the 'Q', 'R' and 'H' register operands:
21611
21612 In a pair of registers containing a DI or DF value the 'Q'
21613 operand returns the register number of the register containing
21614 the least significant part of the value. The 'R' operand returns
21615 the register number of the register containing the most
21616 significant part of the value.
21617
21618 The 'H' operand returns the higher of the two register numbers.
21619 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21620 same as the 'Q' operand, since the most significant part of the
21621 value is held in the lower number register. The reverse is true
21622 on systems where WORDS_BIG_ENDIAN is false.
21623
21624 The purpose of these operands is to distinguish between cases
21625 where the endian-ness of the values is important (for example
21626 when they are added together), and cases where the endian-ness
21627 is irrelevant, but the order of register operations is important.
21628 For example when loading a value from memory into a register
21629 pair, the endian-ness does not matter. Provided that the value
21630 from the lower memory address is put into the lower numbered
21631 register, and the value from the higher address is put into the
21632 higher numbered register, the load will work regardless of whether
21633 the value being loaded is big-wordian or little-wordian. The
21634 order of the two register loads can matter however, if the address
21635 of the memory location is actually held in one of the registers
21636 being overwritten by the load.
21637
21638 The 'Q' and 'R' constraints are also available for 64-bit
21639 constants. */
21640 case 'Q':
21641 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21642 {
21643 rtx part = gen_lowpart (SImode, x);
21644 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21645 return;
21646 }
21647
21648 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21649 {
21650 output_operand_lossage ("invalid operand for code '%c'", code);
21651 return;
21652 }
21653
21654 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21655 return;
21656
21657 case 'R':
21658 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21659 {
21660 enum machine_mode mode = GET_MODE (x);
21661 rtx part;
21662
21663 if (mode == VOIDmode)
21664 mode = DImode;
21665 part = gen_highpart_mode (SImode, mode, x);
21666 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21667 return;
21668 }
21669
21670 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21671 {
21672 output_operand_lossage ("invalid operand for code '%c'", code);
21673 return;
21674 }
21675
21676 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21677 return;
21678
21679 case 'H':
21680 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21681 {
21682 output_operand_lossage ("invalid operand for code '%c'", code);
21683 return;
21684 }
21685
21686 asm_fprintf (stream, "%r", REGNO (x) + 1);
21687 return;
21688
21689 case 'J':
21690 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21691 {
21692 output_operand_lossage ("invalid operand for code '%c'", code);
21693 return;
21694 }
21695
21696 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21697 return;
21698
21699 case 'K':
21700 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21701 {
21702 output_operand_lossage ("invalid operand for code '%c'", code);
21703 return;
21704 }
21705
21706 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21707 return;
21708
21709 case 'm':
21710 asm_fprintf (stream, "%r",
21711 REG_P (XEXP (x, 0))
21712 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21713 return;
21714
21715 case 'M':
21716 asm_fprintf (stream, "{%r-%r}",
21717 REGNO (x),
21718 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21719 return;
21720
21721 /* Like 'M', but writing doubleword vector registers, for use by Neon
21722 insns. */
21723 case 'h':
21724 {
21725 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21726 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21727 if (numregs == 1)
21728 asm_fprintf (stream, "{d%d}", regno);
21729 else
21730 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21731 }
21732 return;
21733
21734 case 'd':
21735 /* CONST_TRUE_RTX means always -- that's the default. */
21736 if (x == const_true_rtx)
21737 return;
21738
21739 if (!COMPARISON_P (x))
21740 {
21741 output_operand_lossage ("invalid operand for code '%c'", code);
21742 return;
21743 }
21744
21745 fputs (arm_condition_codes[get_arm_condition_code (x)],
21746 stream);
21747 return;
21748
21749 case 'D':
21750 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21751 want to do that. */
21752 if (x == const_true_rtx)
21753 {
21754 output_operand_lossage ("instruction never executed");
21755 return;
21756 }
21757 if (!COMPARISON_P (x))
21758 {
21759 output_operand_lossage ("invalid operand for code '%c'", code);
21760 return;
21761 }
21762
21763 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21764 (get_arm_condition_code (x))],
21765 stream);
21766 return;
21767
21768 case 's':
21769 case 'V':
21770 case 'W':
21771 case 'X':
21772 case 'Y':
21773 case 'Z':
21774 /* Former Maverick support, removed after GCC-4.7. */
21775 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21776 return;
21777
21778 case 'U':
21779 if (!REG_P (x)
21780 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21781 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21782 /* Bad value for wCG register number. */
21783 {
21784 output_operand_lossage ("invalid operand for code '%c'", code);
21785 return;
21786 }
21787
21788 else
21789 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21790 return;
21791
21792 /* Print an iWMMXt control register name. */
21793 case 'w':
21794 if (!CONST_INT_P (x)
21795 || INTVAL (x) < 0
21796 || INTVAL (x) >= 16)
21797 /* Bad value for wC register number. */
21798 {
21799 output_operand_lossage ("invalid operand for code '%c'", code);
21800 return;
21801 }
21802
21803 else
21804 {
21805 static const char * wc_reg_names [16] =
21806 {
21807 "wCID", "wCon", "wCSSF", "wCASF",
21808 "wC4", "wC5", "wC6", "wC7",
21809 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21810 "wC12", "wC13", "wC14", "wC15"
21811 };
21812
21813 fputs (wc_reg_names [INTVAL (x)], stream);
21814 }
21815 return;
21816
21817 /* Print the high single-precision register of a VFP double-precision
21818 register. */
21819 case 'p':
21820 {
21821 enum machine_mode mode = GET_MODE (x);
21822 int regno;
21823
21824 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21825 {
21826 output_operand_lossage ("invalid operand for code '%c'", code);
21827 return;
21828 }
21829
21830 regno = REGNO (x);
21831 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21832 {
21833 output_operand_lossage ("invalid operand for code '%c'", code);
21834 return;
21835 }
21836
21837 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21838 }
21839 return;
21840
21841 /* Print a VFP/Neon double precision or quad precision register name. */
21842 case 'P':
21843 case 'q':
21844 {
21845 enum machine_mode mode = GET_MODE (x);
21846 int is_quad = (code == 'q');
21847 int regno;
21848
21849 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21850 {
21851 output_operand_lossage ("invalid operand for code '%c'", code);
21852 return;
21853 }
21854
21855 if (!REG_P (x)
21856 || !IS_VFP_REGNUM (REGNO (x)))
21857 {
21858 output_operand_lossage ("invalid operand for code '%c'", code);
21859 return;
21860 }
21861
21862 regno = REGNO (x);
21863 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21864 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21865 {
21866 output_operand_lossage ("invalid operand for code '%c'", code);
21867 return;
21868 }
21869
21870 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21871 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21872 }
21873 return;
21874
21875 /* These two codes print the low/high doubleword register of a Neon quad
21876 register, respectively. For pair-structure types, can also print
21877 low/high quadword registers. */
21878 case 'e':
21879 case 'f':
21880 {
21881 enum machine_mode mode = GET_MODE (x);
21882 int regno;
21883
21884 if ((GET_MODE_SIZE (mode) != 16
21885 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21886 {
21887 output_operand_lossage ("invalid operand for code '%c'", code);
21888 return;
21889 }
21890
21891 regno = REGNO (x);
21892 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21893 {
21894 output_operand_lossage ("invalid operand for code '%c'", code);
21895 return;
21896 }
21897
21898 if (GET_MODE_SIZE (mode) == 16)
21899 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21900 + (code == 'f' ? 1 : 0));
21901 else
21902 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21903 + (code == 'f' ? 1 : 0));
21904 }
21905 return;
21906
21907 /* Print a VFPv3 floating-point constant, represented as an integer
21908 index. */
21909 case 'G':
21910 {
21911 int index = vfp3_const_double_index (x);
21912 gcc_assert (index != -1);
21913 fprintf (stream, "%d", index);
21914 }
21915 return;
21916
21917 /* Print bits representing opcode features for Neon.
21918
21919 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21920 and polynomials as unsigned.
21921
21922 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21923
21924 Bit 2 is 1 for rounding functions, 0 otherwise. */
21925
21926 /* Identify the type as 's', 'u', 'p' or 'f'. */
21927 case 'T':
21928 {
21929 HOST_WIDE_INT bits = INTVAL (x);
21930 fputc ("uspf"[bits & 3], stream);
21931 }
21932 return;
21933
21934 /* Likewise, but signed and unsigned integers are both 'i'. */
21935 case 'F':
21936 {
21937 HOST_WIDE_INT bits = INTVAL (x);
21938 fputc ("iipf"[bits & 3], stream);
21939 }
21940 return;
21941
21942 /* As for 'T', but emit 'u' instead of 'p'. */
21943 case 't':
21944 {
21945 HOST_WIDE_INT bits = INTVAL (x);
21946 fputc ("usuf"[bits & 3], stream);
21947 }
21948 return;
21949
21950 /* Bit 2: rounding (vs none). */
21951 case 'O':
21952 {
21953 HOST_WIDE_INT bits = INTVAL (x);
21954 fputs ((bits & 4) != 0 ? "r" : "", stream);
21955 }
21956 return;
21957
21958 /* Memory operand for vld1/vst1 instruction. */
21959 case 'A':
21960 {
21961 rtx addr;
21962 bool postinc = FALSE;
21963 rtx postinc_reg = NULL;
21964 unsigned align, memsize, align_bits;
21965
21966 gcc_assert (MEM_P (x));
21967 addr = XEXP (x, 0);
21968 if (GET_CODE (addr) == POST_INC)
21969 {
21970 postinc = 1;
21971 addr = XEXP (addr, 0);
21972 }
21973 if (GET_CODE (addr) == POST_MODIFY)
21974 {
21975 postinc_reg = XEXP( XEXP (addr, 1), 1);
21976 addr = XEXP (addr, 0);
21977 }
21978 asm_fprintf (stream, "[%r", REGNO (addr));
21979
21980 /* We know the alignment of this access, so we can emit a hint in the
21981 instruction (for some alignments) as an aid to the memory subsystem
21982 of the target. */
21983 align = MEM_ALIGN (x) >> 3;
21984 memsize = MEM_SIZE (x);
21985
21986 /* Only certain alignment specifiers are supported by the hardware. */
21987 if (memsize == 32 && (align % 32) == 0)
21988 align_bits = 256;
21989 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21990 align_bits = 128;
21991 else if (memsize >= 8 && (align % 8) == 0)
21992 align_bits = 64;
21993 else
21994 align_bits = 0;
21995
21996 if (align_bits != 0)
21997 asm_fprintf (stream, ":%d", align_bits);
21998
21999 asm_fprintf (stream, "]");
22000
22001 if (postinc)
22002 fputs("!", stream);
22003 if (postinc_reg)
22004 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22005 }
22006 return;
22007
22008 case 'C':
22009 {
22010 rtx addr;
22011
22012 gcc_assert (MEM_P (x));
22013 addr = XEXP (x, 0);
22014 gcc_assert (REG_P (addr));
22015 asm_fprintf (stream, "[%r]", REGNO (addr));
22016 }
22017 return;
22018
22019 /* Translate an S register number into a D register number and element index. */
22020 case 'y':
22021 {
22022 enum machine_mode mode = GET_MODE (x);
22023 int regno;
22024
22025 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22026 {
22027 output_operand_lossage ("invalid operand for code '%c'", code);
22028 return;
22029 }
22030
22031 regno = REGNO (x);
22032 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22033 {
22034 output_operand_lossage ("invalid operand for code '%c'", code);
22035 return;
22036 }
22037
22038 regno = regno - FIRST_VFP_REGNUM;
22039 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22040 }
22041 return;
22042
22043 case 'v':
22044 gcc_assert (CONST_DOUBLE_P (x));
22045 int result;
22046 result = vfp3_const_double_for_fract_bits (x);
22047 if (result == 0)
22048 result = vfp3_const_double_for_bits (x);
22049 fprintf (stream, "#%d", result);
22050 return;
22051
22052 /* Register specifier for vld1.16/vst1.16. Translate the S register
22053 number into a D register number and element index. */
22054 case 'z':
22055 {
22056 enum machine_mode mode = GET_MODE (x);
22057 int regno;
22058
22059 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22060 {
22061 output_operand_lossage ("invalid operand for code '%c'", code);
22062 return;
22063 }
22064
22065 regno = REGNO (x);
22066 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22067 {
22068 output_operand_lossage ("invalid operand for code '%c'", code);
22069 return;
22070 }
22071
22072 regno = regno - FIRST_VFP_REGNUM;
22073 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22074 }
22075 return;
22076
22077 default:
22078 if (x == 0)
22079 {
22080 output_operand_lossage ("missing operand");
22081 return;
22082 }
22083
22084 switch (GET_CODE (x))
22085 {
22086 case REG:
22087 asm_fprintf (stream, "%r", REGNO (x));
22088 break;
22089
22090 case MEM:
22091 output_memory_reference_mode = GET_MODE (x);
22092 output_address (XEXP (x, 0));
22093 break;
22094
22095 case CONST_DOUBLE:
22096 if (TARGET_NEON)
22097 {
22098 char fpstr[20];
22099 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22100 sizeof (fpstr), 0, 1);
22101 fprintf (stream, "#%s", fpstr);
22102 }
22103 else
22104 fprintf (stream, "#%s", fp_immediate_constant (x));
22105 break;
22106
22107 default:
22108 gcc_assert (GET_CODE (x) != NEG);
22109 fputc ('#', stream);
22110 if (GET_CODE (x) == HIGH)
22111 {
22112 fputs (":lower16:", stream);
22113 x = XEXP (x, 0);
22114 }
22115
22116 output_addr_const (stream, x);
22117 break;
22118 }
22119 }
22120 }
22121 \f
22122 /* Target hook for printing a memory address. */
22123 static void
22124 arm_print_operand_address (FILE *stream, rtx x)
22125 {
22126 if (TARGET_32BIT)
22127 {
22128 int is_minus = GET_CODE (x) == MINUS;
22129
22130 if (REG_P (x))
22131 asm_fprintf (stream, "[%r]", REGNO (x));
22132 else if (GET_CODE (x) == PLUS || is_minus)
22133 {
22134 rtx base = XEXP (x, 0);
22135 rtx index = XEXP (x, 1);
22136 HOST_WIDE_INT offset = 0;
22137 if (!REG_P (base)
22138 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22139 {
22140 /* Ensure that BASE is a register. */
22141 /* (one of them must be). */
22142 /* Also ensure the SP is not used as in index register. */
22143 rtx temp = base;
22144 base = index;
22145 index = temp;
22146 }
22147 switch (GET_CODE (index))
22148 {
22149 case CONST_INT:
22150 offset = INTVAL (index);
22151 if (is_minus)
22152 offset = -offset;
22153 asm_fprintf (stream, "[%r, #%wd]",
22154 REGNO (base), offset);
22155 break;
22156
22157 case REG:
22158 asm_fprintf (stream, "[%r, %s%r]",
22159 REGNO (base), is_minus ? "-" : "",
22160 REGNO (index));
22161 break;
22162
22163 case MULT:
22164 case ASHIFTRT:
22165 case LSHIFTRT:
22166 case ASHIFT:
22167 case ROTATERT:
22168 {
22169 asm_fprintf (stream, "[%r, %s%r",
22170 REGNO (base), is_minus ? "-" : "",
22171 REGNO (XEXP (index, 0)));
22172 arm_print_operand (stream, index, 'S');
22173 fputs ("]", stream);
22174 break;
22175 }
22176
22177 default:
22178 gcc_unreachable ();
22179 }
22180 }
22181 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22182 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22183 {
22184 extern enum machine_mode output_memory_reference_mode;
22185
22186 gcc_assert (REG_P (XEXP (x, 0)));
22187
22188 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22189 asm_fprintf (stream, "[%r, #%s%d]!",
22190 REGNO (XEXP (x, 0)),
22191 GET_CODE (x) == PRE_DEC ? "-" : "",
22192 GET_MODE_SIZE (output_memory_reference_mode));
22193 else
22194 asm_fprintf (stream, "[%r], #%s%d",
22195 REGNO (XEXP (x, 0)),
22196 GET_CODE (x) == POST_DEC ? "-" : "",
22197 GET_MODE_SIZE (output_memory_reference_mode));
22198 }
22199 else if (GET_CODE (x) == PRE_MODIFY)
22200 {
22201 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22202 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22203 asm_fprintf (stream, "#%wd]!",
22204 INTVAL (XEXP (XEXP (x, 1), 1)));
22205 else
22206 asm_fprintf (stream, "%r]!",
22207 REGNO (XEXP (XEXP (x, 1), 1)));
22208 }
22209 else if (GET_CODE (x) == POST_MODIFY)
22210 {
22211 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22212 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22213 asm_fprintf (stream, "#%wd",
22214 INTVAL (XEXP (XEXP (x, 1), 1)));
22215 else
22216 asm_fprintf (stream, "%r",
22217 REGNO (XEXP (XEXP (x, 1), 1)));
22218 }
22219 else output_addr_const (stream, x);
22220 }
22221 else
22222 {
22223 if (REG_P (x))
22224 asm_fprintf (stream, "[%r]", REGNO (x));
22225 else if (GET_CODE (x) == POST_INC)
22226 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22227 else if (GET_CODE (x) == PLUS)
22228 {
22229 gcc_assert (REG_P (XEXP (x, 0)));
22230 if (CONST_INT_P (XEXP (x, 1)))
22231 asm_fprintf (stream, "[%r, #%wd]",
22232 REGNO (XEXP (x, 0)),
22233 INTVAL (XEXP (x, 1)));
22234 else
22235 asm_fprintf (stream, "[%r, %r]",
22236 REGNO (XEXP (x, 0)),
22237 REGNO (XEXP (x, 1)));
22238 }
22239 else
22240 output_addr_const (stream, x);
22241 }
22242 }
22243 \f
22244 /* Target hook for indicating whether a punctuation character for
22245 TARGET_PRINT_OPERAND is valid. */
22246 static bool
22247 arm_print_operand_punct_valid_p (unsigned char code)
22248 {
22249 return (code == '@' || code == '|' || code == '.'
22250 || code == '(' || code == ')' || code == '#'
22251 || (TARGET_32BIT && (code == '?'))
22252 || (TARGET_THUMB2 && (code == '!'))
22253 || (TARGET_THUMB && (code == '_')));
22254 }
22255 \f
22256 /* Target hook for assembling integer objects. The ARM version needs to
22257 handle word-sized values specially. */
22258 static bool
22259 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22260 {
22261 enum machine_mode mode;
22262
22263 if (size == UNITS_PER_WORD && aligned_p)
22264 {
22265 fputs ("\t.word\t", asm_out_file);
22266 output_addr_const (asm_out_file, x);
22267
22268 /* Mark symbols as position independent. We only do this in the
22269 .text segment, not in the .data segment. */
22270 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22271 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22272 {
22273 /* See legitimize_pic_address for an explanation of the
22274 TARGET_VXWORKS_RTP check. */
22275 if (!arm_pic_data_is_text_relative
22276 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22277 fputs ("(GOT)", asm_out_file);
22278 else
22279 fputs ("(GOTOFF)", asm_out_file);
22280 }
22281 fputc ('\n', asm_out_file);
22282 return true;
22283 }
22284
22285 mode = GET_MODE (x);
22286
22287 if (arm_vector_mode_supported_p (mode))
22288 {
22289 int i, units;
22290
22291 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22292
22293 units = CONST_VECTOR_NUNITS (x);
22294 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22295
22296 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22297 for (i = 0; i < units; i++)
22298 {
22299 rtx elt = CONST_VECTOR_ELT (x, i);
22300 assemble_integer
22301 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22302 }
22303 else
22304 for (i = 0; i < units; i++)
22305 {
22306 rtx elt = CONST_VECTOR_ELT (x, i);
22307 REAL_VALUE_TYPE rval;
22308
22309 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22310
22311 assemble_real
22312 (rval, GET_MODE_INNER (mode),
22313 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22314 }
22315
22316 return true;
22317 }
22318
22319 return default_assemble_integer (x, size, aligned_p);
22320 }
22321
22322 static void
22323 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22324 {
22325 section *s;
22326
22327 if (!TARGET_AAPCS_BASED)
22328 {
22329 (is_ctor ?
22330 default_named_section_asm_out_constructor
22331 : default_named_section_asm_out_destructor) (symbol, priority);
22332 return;
22333 }
22334
22335 /* Put these in the .init_array section, using a special relocation. */
22336 if (priority != DEFAULT_INIT_PRIORITY)
22337 {
22338 char buf[18];
22339 sprintf (buf, "%s.%.5u",
22340 is_ctor ? ".init_array" : ".fini_array",
22341 priority);
22342 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22343 }
22344 else if (is_ctor)
22345 s = ctors_section;
22346 else
22347 s = dtors_section;
22348
22349 switch_to_section (s);
22350 assemble_align (POINTER_SIZE);
22351 fputs ("\t.word\t", asm_out_file);
22352 output_addr_const (asm_out_file, symbol);
22353 fputs ("(target1)\n", asm_out_file);
22354 }
22355
22356 /* Add a function to the list of static constructors. */
22357
22358 static void
22359 arm_elf_asm_constructor (rtx symbol, int priority)
22360 {
22361 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22362 }
22363
22364 /* Add a function to the list of static destructors. */
22365
22366 static void
22367 arm_elf_asm_destructor (rtx symbol, int priority)
22368 {
22369 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22370 }
22371 \f
22372 /* A finite state machine takes care of noticing whether or not instructions
22373 can be conditionally executed, and thus decrease execution time and code
22374 size by deleting branch instructions. The fsm is controlled by
22375 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22376
22377 /* The state of the fsm controlling condition codes are:
22378 0: normal, do nothing special
22379 1: make ASM_OUTPUT_OPCODE not output this instruction
22380 2: make ASM_OUTPUT_OPCODE not output this instruction
22381 3: make instructions conditional
22382 4: make instructions conditional
22383
22384 State transitions (state->state by whom under condition):
22385 0 -> 1 final_prescan_insn if the `target' is a label
22386 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22387 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22388 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22389 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22390 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22391 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22392 (the target insn is arm_target_insn).
22393
22394 If the jump clobbers the conditions then we use states 2 and 4.
22395
22396 A similar thing can be done with conditional return insns.
22397
22398 XXX In case the `target' is an unconditional branch, this conditionalising
22399 of the instructions always reduces code size, but not always execution
22400 time. But then, I want to reduce the code size to somewhere near what
22401 /bin/cc produces. */
22402
22403 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22404 instructions. When a COND_EXEC instruction is seen the subsequent
22405 instructions are scanned so that multiple conditional instructions can be
22406 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22407 specify the length and true/false mask for the IT block. These will be
22408 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22409
22410 /* Returns the index of the ARM condition code string in
22411 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22412 COMPARISON should be an rtx like `(eq (...) (...))'. */
22413
22414 enum arm_cond_code
22415 maybe_get_arm_condition_code (rtx comparison)
22416 {
22417 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22418 enum arm_cond_code code;
22419 enum rtx_code comp_code = GET_CODE (comparison);
22420
22421 if (GET_MODE_CLASS (mode) != MODE_CC)
22422 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22423 XEXP (comparison, 1));
22424
22425 switch (mode)
22426 {
22427 case CC_DNEmode: code = ARM_NE; goto dominance;
22428 case CC_DEQmode: code = ARM_EQ; goto dominance;
22429 case CC_DGEmode: code = ARM_GE; goto dominance;
22430 case CC_DGTmode: code = ARM_GT; goto dominance;
22431 case CC_DLEmode: code = ARM_LE; goto dominance;
22432 case CC_DLTmode: code = ARM_LT; goto dominance;
22433 case CC_DGEUmode: code = ARM_CS; goto dominance;
22434 case CC_DGTUmode: code = ARM_HI; goto dominance;
22435 case CC_DLEUmode: code = ARM_LS; goto dominance;
22436 case CC_DLTUmode: code = ARM_CC;
22437
22438 dominance:
22439 if (comp_code == EQ)
22440 return ARM_INVERSE_CONDITION_CODE (code);
22441 if (comp_code == NE)
22442 return code;
22443 return ARM_NV;
22444
22445 case CC_NOOVmode:
22446 switch (comp_code)
22447 {
22448 case NE: return ARM_NE;
22449 case EQ: return ARM_EQ;
22450 case GE: return ARM_PL;
22451 case LT: return ARM_MI;
22452 default: return ARM_NV;
22453 }
22454
22455 case CC_Zmode:
22456 switch (comp_code)
22457 {
22458 case NE: return ARM_NE;
22459 case EQ: return ARM_EQ;
22460 default: return ARM_NV;
22461 }
22462
22463 case CC_Nmode:
22464 switch (comp_code)
22465 {
22466 case NE: return ARM_MI;
22467 case EQ: return ARM_PL;
22468 default: return ARM_NV;
22469 }
22470
22471 case CCFPEmode:
22472 case CCFPmode:
22473 /* We can handle all cases except UNEQ and LTGT. */
22474 switch (comp_code)
22475 {
22476 case GE: return ARM_GE;
22477 case GT: return ARM_GT;
22478 case LE: return ARM_LS;
22479 case LT: return ARM_MI;
22480 case NE: return ARM_NE;
22481 case EQ: return ARM_EQ;
22482 case ORDERED: return ARM_VC;
22483 case UNORDERED: return ARM_VS;
22484 case UNLT: return ARM_LT;
22485 case UNLE: return ARM_LE;
22486 case UNGT: return ARM_HI;
22487 case UNGE: return ARM_PL;
22488 /* UNEQ and LTGT do not have a representation. */
22489 case UNEQ: /* Fall through. */
22490 case LTGT: /* Fall through. */
22491 default: return ARM_NV;
22492 }
22493
22494 case CC_SWPmode:
22495 switch (comp_code)
22496 {
22497 case NE: return ARM_NE;
22498 case EQ: return ARM_EQ;
22499 case GE: return ARM_LE;
22500 case GT: return ARM_LT;
22501 case LE: return ARM_GE;
22502 case LT: return ARM_GT;
22503 case GEU: return ARM_LS;
22504 case GTU: return ARM_CC;
22505 case LEU: return ARM_CS;
22506 case LTU: return ARM_HI;
22507 default: return ARM_NV;
22508 }
22509
22510 case CC_Cmode:
22511 switch (comp_code)
22512 {
22513 case LTU: return ARM_CS;
22514 case GEU: return ARM_CC;
22515 default: return ARM_NV;
22516 }
22517
22518 case CC_CZmode:
22519 switch (comp_code)
22520 {
22521 case NE: return ARM_NE;
22522 case EQ: return ARM_EQ;
22523 case GEU: return ARM_CS;
22524 case GTU: return ARM_HI;
22525 case LEU: return ARM_LS;
22526 case LTU: return ARM_CC;
22527 default: return ARM_NV;
22528 }
22529
22530 case CC_NCVmode:
22531 switch (comp_code)
22532 {
22533 case GE: return ARM_GE;
22534 case LT: return ARM_LT;
22535 case GEU: return ARM_CS;
22536 case LTU: return ARM_CC;
22537 default: return ARM_NV;
22538 }
22539
22540 case CCmode:
22541 switch (comp_code)
22542 {
22543 case NE: return ARM_NE;
22544 case EQ: return ARM_EQ;
22545 case GE: return ARM_GE;
22546 case GT: return ARM_GT;
22547 case LE: return ARM_LE;
22548 case LT: return ARM_LT;
22549 case GEU: return ARM_CS;
22550 case GTU: return ARM_HI;
22551 case LEU: return ARM_LS;
22552 case LTU: return ARM_CC;
22553 default: return ARM_NV;
22554 }
22555
22556 default: gcc_unreachable ();
22557 }
22558 }
22559
22560 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22561 static enum arm_cond_code
22562 get_arm_condition_code (rtx comparison)
22563 {
22564 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22565 gcc_assert (code != ARM_NV);
22566 return code;
22567 }
22568
22569 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22570 instructions. */
22571 void
22572 thumb2_final_prescan_insn (rtx insn)
22573 {
22574 rtx first_insn = insn;
22575 rtx body = PATTERN (insn);
22576 rtx predicate;
22577 enum arm_cond_code code;
22578 int n;
22579 int mask;
22580 int max;
22581
22582 /* max_insns_skipped in the tune was already taken into account in the
22583 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22584 just emit the IT blocks as we can. It does not make sense to split
22585 the IT blocks. */
22586 max = MAX_INSN_PER_IT_BLOCK;
22587
22588 /* Remove the previous insn from the count of insns to be output. */
22589 if (arm_condexec_count)
22590 arm_condexec_count--;
22591
22592 /* Nothing to do if we are already inside a conditional block. */
22593 if (arm_condexec_count)
22594 return;
22595
22596 if (GET_CODE (body) != COND_EXEC)
22597 return;
22598
22599 /* Conditional jumps are implemented directly. */
22600 if (JUMP_P (insn))
22601 return;
22602
22603 predicate = COND_EXEC_TEST (body);
22604 arm_current_cc = get_arm_condition_code (predicate);
22605
22606 n = get_attr_ce_count (insn);
22607 arm_condexec_count = 1;
22608 arm_condexec_mask = (1 << n) - 1;
22609 arm_condexec_masklen = n;
22610 /* See if subsequent instructions can be combined into the same block. */
22611 for (;;)
22612 {
22613 insn = next_nonnote_insn (insn);
22614
22615 /* Jumping into the middle of an IT block is illegal, so a label or
22616 barrier terminates the block. */
22617 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22618 break;
22619
22620 body = PATTERN (insn);
22621 /* USE and CLOBBER aren't really insns, so just skip them. */
22622 if (GET_CODE (body) == USE
22623 || GET_CODE (body) == CLOBBER)
22624 continue;
22625
22626 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22627 if (GET_CODE (body) != COND_EXEC)
22628 break;
22629 /* Maximum number of conditionally executed instructions in a block. */
22630 n = get_attr_ce_count (insn);
22631 if (arm_condexec_masklen + n > max)
22632 break;
22633
22634 predicate = COND_EXEC_TEST (body);
22635 code = get_arm_condition_code (predicate);
22636 mask = (1 << n) - 1;
22637 if (arm_current_cc == code)
22638 arm_condexec_mask |= (mask << arm_condexec_masklen);
22639 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22640 break;
22641
22642 arm_condexec_count++;
22643 arm_condexec_masklen += n;
22644
22645 /* A jump must be the last instruction in a conditional block. */
22646 if (JUMP_P (insn))
22647 break;
22648 }
22649 /* Restore recog_data (getting the attributes of other insns can
22650 destroy this array, but final.c assumes that it remains intact
22651 across this call). */
22652 extract_constrain_insn_cached (first_insn);
22653 }
22654
22655 void
22656 arm_final_prescan_insn (rtx insn)
22657 {
22658 /* BODY will hold the body of INSN. */
22659 rtx body = PATTERN (insn);
22660
22661 /* This will be 1 if trying to repeat the trick, and things need to be
22662 reversed if it appears to fail. */
22663 int reverse = 0;
22664
22665 /* If we start with a return insn, we only succeed if we find another one. */
22666 int seeking_return = 0;
22667 enum rtx_code return_code = UNKNOWN;
22668
22669 /* START_INSN will hold the insn from where we start looking. This is the
22670 first insn after the following code_label if REVERSE is true. */
22671 rtx start_insn = insn;
22672
22673 /* If in state 4, check if the target branch is reached, in order to
22674 change back to state 0. */
22675 if (arm_ccfsm_state == 4)
22676 {
22677 if (insn == arm_target_insn)
22678 {
22679 arm_target_insn = NULL;
22680 arm_ccfsm_state = 0;
22681 }
22682 return;
22683 }
22684
22685 /* If in state 3, it is possible to repeat the trick, if this insn is an
22686 unconditional branch to a label, and immediately following this branch
22687 is the previous target label which is only used once, and the label this
22688 branch jumps to is not too far off. */
22689 if (arm_ccfsm_state == 3)
22690 {
22691 if (simplejump_p (insn))
22692 {
22693 start_insn = next_nonnote_insn (start_insn);
22694 if (BARRIER_P (start_insn))
22695 {
22696 /* XXX Isn't this always a barrier? */
22697 start_insn = next_nonnote_insn (start_insn);
22698 }
22699 if (LABEL_P (start_insn)
22700 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22701 && LABEL_NUSES (start_insn) == 1)
22702 reverse = TRUE;
22703 else
22704 return;
22705 }
22706 else if (ANY_RETURN_P (body))
22707 {
22708 start_insn = next_nonnote_insn (start_insn);
22709 if (BARRIER_P (start_insn))
22710 start_insn = next_nonnote_insn (start_insn);
22711 if (LABEL_P (start_insn)
22712 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22713 && LABEL_NUSES (start_insn) == 1)
22714 {
22715 reverse = TRUE;
22716 seeking_return = 1;
22717 return_code = GET_CODE (body);
22718 }
22719 else
22720 return;
22721 }
22722 else
22723 return;
22724 }
22725
22726 gcc_assert (!arm_ccfsm_state || reverse);
22727 if (!JUMP_P (insn))
22728 return;
22729
22730 /* This jump might be paralleled with a clobber of the condition codes
22731 the jump should always come first */
22732 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22733 body = XVECEXP (body, 0, 0);
22734
22735 if (reverse
22736 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22737 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22738 {
22739 int insns_skipped;
22740 int fail = FALSE, succeed = FALSE;
22741 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22742 int then_not_else = TRUE;
22743 rtx this_insn = start_insn, label = 0;
22744
22745 /* Register the insn jumped to. */
22746 if (reverse)
22747 {
22748 if (!seeking_return)
22749 label = XEXP (SET_SRC (body), 0);
22750 }
22751 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22752 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22753 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22754 {
22755 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22756 then_not_else = FALSE;
22757 }
22758 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22759 {
22760 seeking_return = 1;
22761 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22762 }
22763 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22764 {
22765 seeking_return = 1;
22766 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22767 then_not_else = FALSE;
22768 }
22769 else
22770 gcc_unreachable ();
22771
22772 /* See how many insns this branch skips, and what kind of insns. If all
22773 insns are okay, and the label or unconditional branch to the same
22774 label is not too far away, succeed. */
22775 for (insns_skipped = 0;
22776 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22777 {
22778 rtx scanbody;
22779
22780 this_insn = next_nonnote_insn (this_insn);
22781 if (!this_insn)
22782 break;
22783
22784 switch (GET_CODE (this_insn))
22785 {
22786 case CODE_LABEL:
22787 /* Succeed if it is the target label, otherwise fail since
22788 control falls in from somewhere else. */
22789 if (this_insn == label)
22790 {
22791 arm_ccfsm_state = 1;
22792 succeed = TRUE;
22793 }
22794 else
22795 fail = TRUE;
22796 break;
22797
22798 case BARRIER:
22799 /* Succeed if the following insn is the target label.
22800 Otherwise fail.
22801 If return insns are used then the last insn in a function
22802 will be a barrier. */
22803 this_insn = next_nonnote_insn (this_insn);
22804 if (this_insn && this_insn == label)
22805 {
22806 arm_ccfsm_state = 1;
22807 succeed = TRUE;
22808 }
22809 else
22810 fail = TRUE;
22811 break;
22812
22813 case CALL_INSN:
22814 /* The AAPCS says that conditional calls should not be
22815 used since they make interworking inefficient (the
22816 linker can't transform BL<cond> into BLX). That's
22817 only a problem if the machine has BLX. */
22818 if (arm_arch5)
22819 {
22820 fail = TRUE;
22821 break;
22822 }
22823
22824 /* Succeed if the following insn is the target label, or
22825 if the following two insns are a barrier and the
22826 target label. */
22827 this_insn = next_nonnote_insn (this_insn);
22828 if (this_insn && BARRIER_P (this_insn))
22829 this_insn = next_nonnote_insn (this_insn);
22830
22831 if (this_insn && this_insn == label
22832 && insns_skipped < max_insns_skipped)
22833 {
22834 arm_ccfsm_state = 1;
22835 succeed = TRUE;
22836 }
22837 else
22838 fail = TRUE;
22839 break;
22840
22841 case JUMP_INSN:
22842 /* If this is an unconditional branch to the same label, succeed.
22843 If it is to another label, do nothing. If it is conditional,
22844 fail. */
22845 /* XXX Probably, the tests for SET and the PC are
22846 unnecessary. */
22847
22848 scanbody = PATTERN (this_insn);
22849 if (GET_CODE (scanbody) == SET
22850 && GET_CODE (SET_DEST (scanbody)) == PC)
22851 {
22852 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22853 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22854 {
22855 arm_ccfsm_state = 2;
22856 succeed = TRUE;
22857 }
22858 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22859 fail = TRUE;
22860 }
22861 /* Fail if a conditional return is undesirable (e.g. on a
22862 StrongARM), but still allow this if optimizing for size. */
22863 else if (GET_CODE (scanbody) == return_code
22864 && !use_return_insn (TRUE, NULL)
22865 && !optimize_size)
22866 fail = TRUE;
22867 else if (GET_CODE (scanbody) == return_code)
22868 {
22869 arm_ccfsm_state = 2;
22870 succeed = TRUE;
22871 }
22872 else if (GET_CODE (scanbody) == PARALLEL)
22873 {
22874 switch (get_attr_conds (this_insn))
22875 {
22876 case CONDS_NOCOND:
22877 break;
22878 default:
22879 fail = TRUE;
22880 break;
22881 }
22882 }
22883 else
22884 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22885
22886 break;
22887
22888 case INSN:
22889 /* Instructions using or affecting the condition codes make it
22890 fail. */
22891 scanbody = PATTERN (this_insn);
22892 if (!(GET_CODE (scanbody) == SET
22893 || GET_CODE (scanbody) == PARALLEL)
22894 || get_attr_conds (this_insn) != CONDS_NOCOND)
22895 fail = TRUE;
22896 break;
22897
22898 default:
22899 break;
22900 }
22901 }
22902 if (succeed)
22903 {
22904 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22905 arm_target_label = CODE_LABEL_NUMBER (label);
22906 else
22907 {
22908 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22909
22910 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22911 {
22912 this_insn = next_nonnote_insn (this_insn);
22913 gcc_assert (!this_insn
22914 || (!BARRIER_P (this_insn)
22915 && !LABEL_P (this_insn)));
22916 }
22917 if (!this_insn)
22918 {
22919 /* Oh, dear! we ran off the end.. give up. */
22920 extract_constrain_insn_cached (insn);
22921 arm_ccfsm_state = 0;
22922 arm_target_insn = NULL;
22923 return;
22924 }
22925 arm_target_insn = this_insn;
22926 }
22927
22928 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22929 what it was. */
22930 if (!reverse)
22931 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22932
22933 if (reverse || then_not_else)
22934 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22935 }
22936
22937 /* Restore recog_data (getting the attributes of other insns can
22938 destroy this array, but final.c assumes that it remains intact
22939 across this call. */
22940 extract_constrain_insn_cached (insn);
22941 }
22942 }
22943
22944 /* Output IT instructions. */
22945 void
22946 thumb2_asm_output_opcode (FILE * stream)
22947 {
22948 char buff[5];
22949 int n;
22950
22951 if (arm_condexec_mask)
22952 {
22953 for (n = 0; n < arm_condexec_masklen; n++)
22954 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22955 buff[n] = 0;
22956 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22957 arm_condition_codes[arm_current_cc]);
22958 arm_condexec_mask = 0;
22959 }
22960 }
22961
22962 /* Returns true if REGNO is a valid register
22963 for holding a quantity of type MODE. */
22964 int
22965 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22966 {
22967 if (GET_MODE_CLASS (mode) == MODE_CC)
22968 return (regno == CC_REGNUM
22969 || (TARGET_HARD_FLOAT && TARGET_VFP
22970 && regno == VFPCC_REGNUM));
22971
22972 if (TARGET_THUMB1)
22973 /* For the Thumb we only allow values bigger than SImode in
22974 registers 0 - 6, so that there is always a second low
22975 register available to hold the upper part of the value.
22976 We probably we ought to ensure that the register is the
22977 start of an even numbered register pair. */
22978 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22979
22980 if (TARGET_HARD_FLOAT && TARGET_VFP
22981 && IS_VFP_REGNUM (regno))
22982 {
22983 if (mode == SFmode || mode == SImode)
22984 return VFP_REGNO_OK_FOR_SINGLE (regno);
22985
22986 if (mode == DFmode)
22987 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22988
22989 /* VFP registers can hold HFmode values, but there is no point in
22990 putting them there unless we have hardware conversion insns. */
22991 if (mode == HFmode)
22992 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22993
22994 if (TARGET_NEON)
22995 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22996 || (VALID_NEON_QREG_MODE (mode)
22997 && NEON_REGNO_OK_FOR_QUAD (regno))
22998 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22999 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23000 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23001 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23002 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23003
23004 return FALSE;
23005 }
23006
23007 if (TARGET_REALLY_IWMMXT)
23008 {
23009 if (IS_IWMMXT_GR_REGNUM (regno))
23010 return mode == SImode;
23011
23012 if (IS_IWMMXT_REGNUM (regno))
23013 return VALID_IWMMXT_REG_MODE (mode);
23014 }
23015
23016 /* We allow almost any value to be stored in the general registers.
23017 Restrict doubleword quantities to even register pairs in ARM state
23018 so that we can use ldrd. Do not allow very large Neon structure
23019 opaque modes in general registers; they would use too many. */
23020 if (regno <= LAST_ARM_REGNUM)
23021 {
23022 if (ARM_NUM_REGS (mode) > 4)
23023 return FALSE;
23024
23025 if (TARGET_THUMB2)
23026 return TRUE;
23027
23028 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23029 }
23030
23031 if (regno == FRAME_POINTER_REGNUM
23032 || regno == ARG_POINTER_REGNUM)
23033 /* We only allow integers in the fake hard registers. */
23034 return GET_MODE_CLASS (mode) == MODE_INT;
23035
23036 return FALSE;
23037 }
23038
23039 /* Implement MODES_TIEABLE_P. */
23040
23041 bool
23042 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
23043 {
23044 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23045 return true;
23046
23047 /* We specifically want to allow elements of "structure" modes to
23048 be tieable to the structure. This more general condition allows
23049 other rarer situations too. */
23050 if (TARGET_NEON
23051 && (VALID_NEON_DREG_MODE (mode1)
23052 || VALID_NEON_QREG_MODE (mode1)
23053 || VALID_NEON_STRUCT_MODE (mode1))
23054 && (VALID_NEON_DREG_MODE (mode2)
23055 || VALID_NEON_QREG_MODE (mode2)
23056 || VALID_NEON_STRUCT_MODE (mode2)))
23057 return true;
23058
23059 return false;
23060 }
23061
23062 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23063 not used in arm mode. */
23064
23065 enum reg_class
23066 arm_regno_class (int regno)
23067 {
23068 if (TARGET_THUMB1)
23069 {
23070 if (regno == STACK_POINTER_REGNUM)
23071 return STACK_REG;
23072 if (regno == CC_REGNUM)
23073 return CC_REG;
23074 if (regno < 8)
23075 return LO_REGS;
23076 return HI_REGS;
23077 }
23078
23079 if (TARGET_THUMB2 && regno < 8)
23080 return LO_REGS;
23081
23082 if ( regno <= LAST_ARM_REGNUM
23083 || regno == FRAME_POINTER_REGNUM
23084 || regno == ARG_POINTER_REGNUM)
23085 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23086
23087 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23088 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23089
23090 if (IS_VFP_REGNUM (regno))
23091 {
23092 if (regno <= D7_VFP_REGNUM)
23093 return VFP_D0_D7_REGS;
23094 else if (regno <= LAST_LO_VFP_REGNUM)
23095 return VFP_LO_REGS;
23096 else
23097 return VFP_HI_REGS;
23098 }
23099
23100 if (IS_IWMMXT_REGNUM (regno))
23101 return IWMMXT_REGS;
23102
23103 if (IS_IWMMXT_GR_REGNUM (regno))
23104 return IWMMXT_GR_REGS;
23105
23106 return NO_REGS;
23107 }
23108
23109 /* Handle a special case when computing the offset
23110 of an argument from the frame pointer. */
23111 int
23112 arm_debugger_arg_offset (int value, rtx addr)
23113 {
23114 rtx insn;
23115
23116 /* We are only interested if dbxout_parms() failed to compute the offset. */
23117 if (value != 0)
23118 return 0;
23119
23120 /* We can only cope with the case where the address is held in a register. */
23121 if (!REG_P (addr))
23122 return 0;
23123
23124 /* If we are using the frame pointer to point at the argument, then
23125 an offset of 0 is correct. */
23126 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23127 return 0;
23128
23129 /* If we are using the stack pointer to point at the
23130 argument, then an offset of 0 is correct. */
23131 /* ??? Check this is consistent with thumb2 frame layout. */
23132 if ((TARGET_THUMB || !frame_pointer_needed)
23133 && REGNO (addr) == SP_REGNUM)
23134 return 0;
23135
23136 /* Oh dear. The argument is pointed to by a register rather
23137 than being held in a register, or being stored at a known
23138 offset from the frame pointer. Since GDB only understands
23139 those two kinds of argument we must translate the address
23140 held in the register into an offset from the frame pointer.
23141 We do this by searching through the insns for the function
23142 looking to see where this register gets its value. If the
23143 register is initialized from the frame pointer plus an offset
23144 then we are in luck and we can continue, otherwise we give up.
23145
23146 This code is exercised by producing debugging information
23147 for a function with arguments like this:
23148
23149 double func (double a, double b, int c, double d) {return d;}
23150
23151 Without this code the stab for parameter 'd' will be set to
23152 an offset of 0 from the frame pointer, rather than 8. */
23153
23154 /* The if() statement says:
23155
23156 If the insn is a normal instruction
23157 and if the insn is setting the value in a register
23158 and if the register being set is the register holding the address of the argument
23159 and if the address is computing by an addition
23160 that involves adding to a register
23161 which is the frame pointer
23162 a constant integer
23163
23164 then... */
23165
23166 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23167 {
23168 if ( NONJUMP_INSN_P (insn)
23169 && GET_CODE (PATTERN (insn)) == SET
23170 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23171 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23172 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23173 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23174 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23175 )
23176 {
23177 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23178
23179 break;
23180 }
23181 }
23182
23183 if (value == 0)
23184 {
23185 debug_rtx (addr);
23186 warning (0, "unable to compute real location of stacked parameter");
23187 value = 8; /* XXX magic hack */
23188 }
23189
23190 return value;
23191 }
23192 \f
23193 typedef enum {
23194 T_V8QI,
23195 T_V4HI,
23196 T_V4HF,
23197 T_V2SI,
23198 T_V2SF,
23199 T_DI,
23200 T_V16QI,
23201 T_V8HI,
23202 T_V4SI,
23203 T_V4SF,
23204 T_V2DI,
23205 T_TI,
23206 T_EI,
23207 T_OI,
23208 T_MAX /* Size of enum. Keep last. */
23209 } neon_builtin_type_mode;
23210
23211 #define TYPE_MODE_BIT(X) (1 << (X))
23212
23213 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23214 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23215 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23216 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23217 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23218 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23219
23220 #define v8qi_UP T_V8QI
23221 #define v4hi_UP T_V4HI
23222 #define v4hf_UP T_V4HF
23223 #define v2si_UP T_V2SI
23224 #define v2sf_UP T_V2SF
23225 #define di_UP T_DI
23226 #define v16qi_UP T_V16QI
23227 #define v8hi_UP T_V8HI
23228 #define v4si_UP T_V4SI
23229 #define v4sf_UP T_V4SF
23230 #define v2di_UP T_V2DI
23231 #define ti_UP T_TI
23232 #define ei_UP T_EI
23233 #define oi_UP T_OI
23234
23235 #define UP(X) X##_UP
23236
23237 typedef enum {
23238 NEON_BINOP,
23239 NEON_TERNOP,
23240 NEON_UNOP,
23241 NEON_BSWAP,
23242 NEON_GETLANE,
23243 NEON_SETLANE,
23244 NEON_CREATE,
23245 NEON_RINT,
23246 NEON_DUP,
23247 NEON_DUPLANE,
23248 NEON_COMBINE,
23249 NEON_SPLIT,
23250 NEON_LANEMUL,
23251 NEON_LANEMULL,
23252 NEON_LANEMULH,
23253 NEON_LANEMAC,
23254 NEON_SCALARMUL,
23255 NEON_SCALARMULL,
23256 NEON_SCALARMULH,
23257 NEON_SCALARMAC,
23258 NEON_CONVERT,
23259 NEON_FLOAT_WIDEN,
23260 NEON_FLOAT_NARROW,
23261 NEON_FIXCONV,
23262 NEON_SELECT,
23263 NEON_REINTERP,
23264 NEON_VTBL,
23265 NEON_VTBX,
23266 NEON_LOAD1,
23267 NEON_LOAD1LANE,
23268 NEON_STORE1,
23269 NEON_STORE1LANE,
23270 NEON_LOADSTRUCT,
23271 NEON_LOADSTRUCTLANE,
23272 NEON_STORESTRUCT,
23273 NEON_STORESTRUCTLANE,
23274 NEON_LOGICBINOP,
23275 NEON_SHIFTINSERT,
23276 NEON_SHIFTIMM,
23277 NEON_SHIFTACC
23278 } neon_itype;
23279
23280 typedef struct {
23281 const char *name;
23282 const neon_itype itype;
23283 const neon_builtin_type_mode mode;
23284 const enum insn_code code;
23285 unsigned int fcode;
23286 } neon_builtin_datum;
23287
23288 #define CF(N,X) CODE_FOR_neon_##N##X
23289
23290 #define VAR1(T, N, A) \
23291 {#N, NEON_##T, UP (A), CF (N, A), 0}
23292 #define VAR2(T, N, A, B) \
23293 VAR1 (T, N, A), \
23294 {#N, NEON_##T, UP (B), CF (N, B), 0}
23295 #define VAR3(T, N, A, B, C) \
23296 VAR2 (T, N, A, B), \
23297 {#N, NEON_##T, UP (C), CF (N, C), 0}
23298 #define VAR4(T, N, A, B, C, D) \
23299 VAR3 (T, N, A, B, C), \
23300 {#N, NEON_##T, UP (D), CF (N, D), 0}
23301 #define VAR5(T, N, A, B, C, D, E) \
23302 VAR4 (T, N, A, B, C, D), \
23303 {#N, NEON_##T, UP (E), CF (N, E), 0}
23304 #define VAR6(T, N, A, B, C, D, E, F) \
23305 VAR5 (T, N, A, B, C, D, E), \
23306 {#N, NEON_##T, UP (F), CF (N, F), 0}
23307 #define VAR7(T, N, A, B, C, D, E, F, G) \
23308 VAR6 (T, N, A, B, C, D, E, F), \
23309 {#N, NEON_##T, UP (G), CF (N, G), 0}
23310 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23311 VAR7 (T, N, A, B, C, D, E, F, G), \
23312 {#N, NEON_##T, UP (H), CF (N, H), 0}
23313 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23314 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23315 {#N, NEON_##T, UP (I), CF (N, I), 0}
23316 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23317 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23318 {#N, NEON_##T, UP (J), CF (N, J), 0}
23319
23320 /* The NEON builtin data can be found in arm_neon_builtins.def.
23321 The mode entries in the following table correspond to the "key" type of the
23322 instruction variant, i.e. equivalent to that which would be specified after
23323 the assembler mnemonic, which usually refers to the last vector operand.
23324 (Signed/unsigned/polynomial types are not differentiated between though, and
23325 are all mapped onto the same mode for a given element size.) The modes
23326 listed per instruction should be the same as those defined for that
23327 instruction's pattern in neon.md. */
23328
23329 static neon_builtin_datum neon_builtin_data[] =
23330 {
23331 #include "arm_neon_builtins.def"
23332 };
23333
23334 #undef CF
23335 #undef VAR1
23336 #undef VAR2
23337 #undef VAR3
23338 #undef VAR4
23339 #undef VAR5
23340 #undef VAR6
23341 #undef VAR7
23342 #undef VAR8
23343 #undef VAR9
23344 #undef VAR10
23345
23346 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23347 #define VAR1(T, N, A) \
23348 CF (N, A)
23349 #define VAR2(T, N, A, B) \
23350 VAR1 (T, N, A), \
23351 CF (N, B)
23352 #define VAR3(T, N, A, B, C) \
23353 VAR2 (T, N, A, B), \
23354 CF (N, C)
23355 #define VAR4(T, N, A, B, C, D) \
23356 VAR3 (T, N, A, B, C), \
23357 CF (N, D)
23358 #define VAR5(T, N, A, B, C, D, E) \
23359 VAR4 (T, N, A, B, C, D), \
23360 CF (N, E)
23361 #define VAR6(T, N, A, B, C, D, E, F) \
23362 VAR5 (T, N, A, B, C, D, E), \
23363 CF (N, F)
23364 #define VAR7(T, N, A, B, C, D, E, F, G) \
23365 VAR6 (T, N, A, B, C, D, E, F), \
23366 CF (N, G)
23367 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23368 VAR7 (T, N, A, B, C, D, E, F, G), \
23369 CF (N, H)
23370 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23371 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23372 CF (N, I)
23373 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23374 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23375 CF (N, J)
23376 enum arm_builtins
23377 {
23378 ARM_BUILTIN_GETWCGR0,
23379 ARM_BUILTIN_GETWCGR1,
23380 ARM_BUILTIN_GETWCGR2,
23381 ARM_BUILTIN_GETWCGR3,
23382
23383 ARM_BUILTIN_SETWCGR0,
23384 ARM_BUILTIN_SETWCGR1,
23385 ARM_BUILTIN_SETWCGR2,
23386 ARM_BUILTIN_SETWCGR3,
23387
23388 ARM_BUILTIN_WZERO,
23389
23390 ARM_BUILTIN_WAVG2BR,
23391 ARM_BUILTIN_WAVG2HR,
23392 ARM_BUILTIN_WAVG2B,
23393 ARM_BUILTIN_WAVG2H,
23394
23395 ARM_BUILTIN_WACCB,
23396 ARM_BUILTIN_WACCH,
23397 ARM_BUILTIN_WACCW,
23398
23399 ARM_BUILTIN_WMACS,
23400 ARM_BUILTIN_WMACSZ,
23401 ARM_BUILTIN_WMACU,
23402 ARM_BUILTIN_WMACUZ,
23403
23404 ARM_BUILTIN_WSADB,
23405 ARM_BUILTIN_WSADBZ,
23406 ARM_BUILTIN_WSADH,
23407 ARM_BUILTIN_WSADHZ,
23408
23409 ARM_BUILTIN_WALIGNI,
23410 ARM_BUILTIN_WALIGNR0,
23411 ARM_BUILTIN_WALIGNR1,
23412 ARM_BUILTIN_WALIGNR2,
23413 ARM_BUILTIN_WALIGNR3,
23414
23415 ARM_BUILTIN_TMIA,
23416 ARM_BUILTIN_TMIAPH,
23417 ARM_BUILTIN_TMIABB,
23418 ARM_BUILTIN_TMIABT,
23419 ARM_BUILTIN_TMIATB,
23420 ARM_BUILTIN_TMIATT,
23421
23422 ARM_BUILTIN_TMOVMSKB,
23423 ARM_BUILTIN_TMOVMSKH,
23424 ARM_BUILTIN_TMOVMSKW,
23425
23426 ARM_BUILTIN_TBCSTB,
23427 ARM_BUILTIN_TBCSTH,
23428 ARM_BUILTIN_TBCSTW,
23429
23430 ARM_BUILTIN_WMADDS,
23431 ARM_BUILTIN_WMADDU,
23432
23433 ARM_BUILTIN_WPACKHSS,
23434 ARM_BUILTIN_WPACKWSS,
23435 ARM_BUILTIN_WPACKDSS,
23436 ARM_BUILTIN_WPACKHUS,
23437 ARM_BUILTIN_WPACKWUS,
23438 ARM_BUILTIN_WPACKDUS,
23439
23440 ARM_BUILTIN_WADDB,
23441 ARM_BUILTIN_WADDH,
23442 ARM_BUILTIN_WADDW,
23443 ARM_BUILTIN_WADDSSB,
23444 ARM_BUILTIN_WADDSSH,
23445 ARM_BUILTIN_WADDSSW,
23446 ARM_BUILTIN_WADDUSB,
23447 ARM_BUILTIN_WADDUSH,
23448 ARM_BUILTIN_WADDUSW,
23449 ARM_BUILTIN_WSUBB,
23450 ARM_BUILTIN_WSUBH,
23451 ARM_BUILTIN_WSUBW,
23452 ARM_BUILTIN_WSUBSSB,
23453 ARM_BUILTIN_WSUBSSH,
23454 ARM_BUILTIN_WSUBSSW,
23455 ARM_BUILTIN_WSUBUSB,
23456 ARM_BUILTIN_WSUBUSH,
23457 ARM_BUILTIN_WSUBUSW,
23458
23459 ARM_BUILTIN_WAND,
23460 ARM_BUILTIN_WANDN,
23461 ARM_BUILTIN_WOR,
23462 ARM_BUILTIN_WXOR,
23463
23464 ARM_BUILTIN_WCMPEQB,
23465 ARM_BUILTIN_WCMPEQH,
23466 ARM_BUILTIN_WCMPEQW,
23467 ARM_BUILTIN_WCMPGTUB,
23468 ARM_BUILTIN_WCMPGTUH,
23469 ARM_BUILTIN_WCMPGTUW,
23470 ARM_BUILTIN_WCMPGTSB,
23471 ARM_BUILTIN_WCMPGTSH,
23472 ARM_BUILTIN_WCMPGTSW,
23473
23474 ARM_BUILTIN_TEXTRMSB,
23475 ARM_BUILTIN_TEXTRMSH,
23476 ARM_BUILTIN_TEXTRMSW,
23477 ARM_BUILTIN_TEXTRMUB,
23478 ARM_BUILTIN_TEXTRMUH,
23479 ARM_BUILTIN_TEXTRMUW,
23480 ARM_BUILTIN_TINSRB,
23481 ARM_BUILTIN_TINSRH,
23482 ARM_BUILTIN_TINSRW,
23483
23484 ARM_BUILTIN_WMAXSW,
23485 ARM_BUILTIN_WMAXSH,
23486 ARM_BUILTIN_WMAXSB,
23487 ARM_BUILTIN_WMAXUW,
23488 ARM_BUILTIN_WMAXUH,
23489 ARM_BUILTIN_WMAXUB,
23490 ARM_BUILTIN_WMINSW,
23491 ARM_BUILTIN_WMINSH,
23492 ARM_BUILTIN_WMINSB,
23493 ARM_BUILTIN_WMINUW,
23494 ARM_BUILTIN_WMINUH,
23495 ARM_BUILTIN_WMINUB,
23496
23497 ARM_BUILTIN_WMULUM,
23498 ARM_BUILTIN_WMULSM,
23499 ARM_BUILTIN_WMULUL,
23500
23501 ARM_BUILTIN_PSADBH,
23502 ARM_BUILTIN_WSHUFH,
23503
23504 ARM_BUILTIN_WSLLH,
23505 ARM_BUILTIN_WSLLW,
23506 ARM_BUILTIN_WSLLD,
23507 ARM_BUILTIN_WSRAH,
23508 ARM_BUILTIN_WSRAW,
23509 ARM_BUILTIN_WSRAD,
23510 ARM_BUILTIN_WSRLH,
23511 ARM_BUILTIN_WSRLW,
23512 ARM_BUILTIN_WSRLD,
23513 ARM_BUILTIN_WRORH,
23514 ARM_BUILTIN_WRORW,
23515 ARM_BUILTIN_WRORD,
23516 ARM_BUILTIN_WSLLHI,
23517 ARM_BUILTIN_WSLLWI,
23518 ARM_BUILTIN_WSLLDI,
23519 ARM_BUILTIN_WSRAHI,
23520 ARM_BUILTIN_WSRAWI,
23521 ARM_BUILTIN_WSRADI,
23522 ARM_BUILTIN_WSRLHI,
23523 ARM_BUILTIN_WSRLWI,
23524 ARM_BUILTIN_WSRLDI,
23525 ARM_BUILTIN_WRORHI,
23526 ARM_BUILTIN_WRORWI,
23527 ARM_BUILTIN_WRORDI,
23528
23529 ARM_BUILTIN_WUNPCKIHB,
23530 ARM_BUILTIN_WUNPCKIHH,
23531 ARM_BUILTIN_WUNPCKIHW,
23532 ARM_BUILTIN_WUNPCKILB,
23533 ARM_BUILTIN_WUNPCKILH,
23534 ARM_BUILTIN_WUNPCKILW,
23535
23536 ARM_BUILTIN_WUNPCKEHSB,
23537 ARM_BUILTIN_WUNPCKEHSH,
23538 ARM_BUILTIN_WUNPCKEHSW,
23539 ARM_BUILTIN_WUNPCKEHUB,
23540 ARM_BUILTIN_WUNPCKEHUH,
23541 ARM_BUILTIN_WUNPCKEHUW,
23542 ARM_BUILTIN_WUNPCKELSB,
23543 ARM_BUILTIN_WUNPCKELSH,
23544 ARM_BUILTIN_WUNPCKELSW,
23545 ARM_BUILTIN_WUNPCKELUB,
23546 ARM_BUILTIN_WUNPCKELUH,
23547 ARM_BUILTIN_WUNPCKELUW,
23548
23549 ARM_BUILTIN_WABSB,
23550 ARM_BUILTIN_WABSH,
23551 ARM_BUILTIN_WABSW,
23552
23553 ARM_BUILTIN_WADDSUBHX,
23554 ARM_BUILTIN_WSUBADDHX,
23555
23556 ARM_BUILTIN_WABSDIFFB,
23557 ARM_BUILTIN_WABSDIFFH,
23558 ARM_BUILTIN_WABSDIFFW,
23559
23560 ARM_BUILTIN_WADDCH,
23561 ARM_BUILTIN_WADDCW,
23562
23563 ARM_BUILTIN_WAVG4,
23564 ARM_BUILTIN_WAVG4R,
23565
23566 ARM_BUILTIN_WMADDSX,
23567 ARM_BUILTIN_WMADDUX,
23568
23569 ARM_BUILTIN_WMADDSN,
23570 ARM_BUILTIN_WMADDUN,
23571
23572 ARM_BUILTIN_WMULWSM,
23573 ARM_BUILTIN_WMULWUM,
23574
23575 ARM_BUILTIN_WMULWSMR,
23576 ARM_BUILTIN_WMULWUMR,
23577
23578 ARM_BUILTIN_WMULWL,
23579
23580 ARM_BUILTIN_WMULSMR,
23581 ARM_BUILTIN_WMULUMR,
23582
23583 ARM_BUILTIN_WQMULM,
23584 ARM_BUILTIN_WQMULMR,
23585
23586 ARM_BUILTIN_WQMULWM,
23587 ARM_BUILTIN_WQMULWMR,
23588
23589 ARM_BUILTIN_WADDBHUSM,
23590 ARM_BUILTIN_WADDBHUSL,
23591
23592 ARM_BUILTIN_WQMIABB,
23593 ARM_BUILTIN_WQMIABT,
23594 ARM_BUILTIN_WQMIATB,
23595 ARM_BUILTIN_WQMIATT,
23596
23597 ARM_BUILTIN_WQMIABBN,
23598 ARM_BUILTIN_WQMIABTN,
23599 ARM_BUILTIN_WQMIATBN,
23600 ARM_BUILTIN_WQMIATTN,
23601
23602 ARM_BUILTIN_WMIABB,
23603 ARM_BUILTIN_WMIABT,
23604 ARM_BUILTIN_WMIATB,
23605 ARM_BUILTIN_WMIATT,
23606
23607 ARM_BUILTIN_WMIABBN,
23608 ARM_BUILTIN_WMIABTN,
23609 ARM_BUILTIN_WMIATBN,
23610 ARM_BUILTIN_WMIATTN,
23611
23612 ARM_BUILTIN_WMIAWBB,
23613 ARM_BUILTIN_WMIAWBT,
23614 ARM_BUILTIN_WMIAWTB,
23615 ARM_BUILTIN_WMIAWTT,
23616
23617 ARM_BUILTIN_WMIAWBBN,
23618 ARM_BUILTIN_WMIAWBTN,
23619 ARM_BUILTIN_WMIAWTBN,
23620 ARM_BUILTIN_WMIAWTTN,
23621
23622 ARM_BUILTIN_WMERGE,
23623
23624 ARM_BUILTIN_CRC32B,
23625 ARM_BUILTIN_CRC32H,
23626 ARM_BUILTIN_CRC32W,
23627 ARM_BUILTIN_CRC32CB,
23628 ARM_BUILTIN_CRC32CH,
23629 ARM_BUILTIN_CRC32CW,
23630
23631 ARM_BUILTIN_GET_FPSCR,
23632 ARM_BUILTIN_SET_FPSCR,
23633
23634 #undef CRYPTO1
23635 #undef CRYPTO2
23636 #undef CRYPTO3
23637
23638 #define CRYPTO1(L, U, M1, M2) \
23639 ARM_BUILTIN_CRYPTO_##U,
23640 #define CRYPTO2(L, U, M1, M2, M3) \
23641 ARM_BUILTIN_CRYPTO_##U,
23642 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23643 ARM_BUILTIN_CRYPTO_##U,
23644
23645 #include "crypto.def"
23646
23647 #undef CRYPTO1
23648 #undef CRYPTO2
23649 #undef CRYPTO3
23650
23651 #include "arm_neon_builtins.def"
23652
23653 ,ARM_BUILTIN_MAX
23654 };
23655
23656 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23657
23658 #undef CF
23659 #undef VAR1
23660 #undef VAR2
23661 #undef VAR3
23662 #undef VAR4
23663 #undef VAR5
23664 #undef VAR6
23665 #undef VAR7
23666 #undef VAR8
23667 #undef VAR9
23668 #undef VAR10
23669
23670 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23671
23672 #define NUM_DREG_TYPES 5
23673 #define NUM_QREG_TYPES 6
23674
23675 static void
23676 arm_init_neon_builtins (void)
23677 {
23678 unsigned int i, fcode;
23679 tree decl;
23680
23681 tree neon_intQI_type_node;
23682 tree neon_intHI_type_node;
23683 tree neon_floatHF_type_node;
23684 tree neon_polyQI_type_node;
23685 tree neon_polyHI_type_node;
23686 tree neon_intSI_type_node;
23687 tree neon_intDI_type_node;
23688 tree neon_intUTI_type_node;
23689 tree neon_float_type_node;
23690
23691 tree intQI_pointer_node;
23692 tree intHI_pointer_node;
23693 tree intSI_pointer_node;
23694 tree intDI_pointer_node;
23695 tree float_pointer_node;
23696
23697 tree const_intQI_node;
23698 tree const_intHI_node;
23699 tree const_intSI_node;
23700 tree const_intDI_node;
23701 tree const_float_node;
23702
23703 tree const_intQI_pointer_node;
23704 tree const_intHI_pointer_node;
23705 tree const_intSI_pointer_node;
23706 tree const_intDI_pointer_node;
23707 tree const_float_pointer_node;
23708
23709 tree V8QI_type_node;
23710 tree V4HI_type_node;
23711 tree V4UHI_type_node;
23712 tree V4HF_type_node;
23713 tree V2SI_type_node;
23714 tree V2USI_type_node;
23715 tree V2SF_type_node;
23716 tree V16QI_type_node;
23717 tree V8HI_type_node;
23718 tree V8UHI_type_node;
23719 tree V4SI_type_node;
23720 tree V4USI_type_node;
23721 tree V4SF_type_node;
23722 tree V2DI_type_node;
23723 tree V2UDI_type_node;
23724
23725 tree intUQI_type_node;
23726 tree intUHI_type_node;
23727 tree intUSI_type_node;
23728 tree intUDI_type_node;
23729
23730 tree intEI_type_node;
23731 tree intOI_type_node;
23732 tree intCI_type_node;
23733 tree intXI_type_node;
23734
23735 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23736 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23737 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23738
23739 /* Create distinguished type nodes for NEON vector element types,
23740 and pointers to values of such types, so we can detect them later. */
23741 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23742 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23743 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23744 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23745 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23746 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23747 neon_float_type_node = make_node (REAL_TYPE);
23748 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23749 layout_type (neon_float_type_node);
23750 neon_floatHF_type_node = make_node (REAL_TYPE);
23751 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23752 layout_type (neon_floatHF_type_node);
23753
23754 /* Define typedefs which exactly correspond to the modes we are basing vector
23755 types on. If you change these names you'll need to change
23756 the table used by arm_mangle_type too. */
23757 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23758 "__builtin_neon_qi");
23759 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23760 "__builtin_neon_hi");
23761 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23762 "__builtin_neon_hf");
23763 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23764 "__builtin_neon_si");
23765 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23766 "__builtin_neon_sf");
23767 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23768 "__builtin_neon_di");
23769 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23770 "__builtin_neon_poly8");
23771 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23772 "__builtin_neon_poly16");
23773
23774 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23775 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23776 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23777 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23778 float_pointer_node = build_pointer_type (neon_float_type_node);
23779
23780 /* Next create constant-qualified versions of the above types. */
23781 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23782 TYPE_QUAL_CONST);
23783 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23784 TYPE_QUAL_CONST);
23785 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23786 TYPE_QUAL_CONST);
23787 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23788 TYPE_QUAL_CONST);
23789 const_float_node = build_qualified_type (neon_float_type_node,
23790 TYPE_QUAL_CONST);
23791
23792 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23793 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23794 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23795 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23796 const_float_pointer_node = build_pointer_type (const_float_node);
23797
23798 /* Unsigned integer types for various mode sizes. */
23799 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23800 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23801 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23802 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23803 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23804 /* Now create vector types based on our NEON element types. */
23805 /* 64-bit vectors. */
23806 V8QI_type_node =
23807 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23808 V4HI_type_node =
23809 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23810 V4UHI_type_node =
23811 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23812 V4HF_type_node =
23813 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23814 V2SI_type_node =
23815 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23816 V2USI_type_node =
23817 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23818 V2SF_type_node =
23819 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23820 /* 128-bit vectors. */
23821 V16QI_type_node =
23822 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23823 V8HI_type_node =
23824 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23825 V8UHI_type_node =
23826 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23827 V4SI_type_node =
23828 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23829 V4USI_type_node =
23830 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23831 V4SF_type_node =
23832 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23833 V2DI_type_node =
23834 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23835 V2UDI_type_node =
23836 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23837
23838
23839 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23840 "__builtin_neon_uqi");
23841 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23842 "__builtin_neon_uhi");
23843 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23844 "__builtin_neon_usi");
23845 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23846 "__builtin_neon_udi");
23847 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23848 "__builtin_neon_poly64");
23849 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23850 "__builtin_neon_poly128");
23851
23852 /* Opaque integer types for structures of vectors. */
23853 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23854 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23855 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23856 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23857
23858 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23859 "__builtin_neon_ti");
23860 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23861 "__builtin_neon_ei");
23862 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23863 "__builtin_neon_oi");
23864 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23865 "__builtin_neon_ci");
23866 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23867 "__builtin_neon_xi");
23868
23869 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23870 {
23871
23872 tree V16UQI_type_node =
23873 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23874
23875 tree v16uqi_ftype_v16uqi
23876 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23877
23878 tree v16uqi_ftype_v16uqi_v16uqi
23879 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23880 V16UQI_type_node, NULL_TREE);
23881
23882 tree v4usi_ftype_v4usi
23883 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23884
23885 tree v4usi_ftype_v4usi_v4usi
23886 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23887 V4USI_type_node, NULL_TREE);
23888
23889 tree v4usi_ftype_v4usi_v4usi_v4usi
23890 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23891 V4USI_type_node, V4USI_type_node, NULL_TREE);
23892
23893 tree uti_ftype_udi_udi
23894 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23895 intUDI_type_node, NULL_TREE);
23896
23897 #undef CRYPTO1
23898 #undef CRYPTO2
23899 #undef CRYPTO3
23900 #undef C
23901 #undef N
23902 #undef CF
23903 #undef FT1
23904 #undef FT2
23905 #undef FT3
23906
23907 #define C(U) \
23908 ARM_BUILTIN_CRYPTO_##U
23909 #define N(L) \
23910 "__builtin_arm_crypto_"#L
23911 #define FT1(R, A) \
23912 R##_ftype_##A
23913 #define FT2(R, A1, A2) \
23914 R##_ftype_##A1##_##A2
23915 #define FT3(R, A1, A2, A3) \
23916 R##_ftype_##A1##_##A2##_##A3
23917 #define CRYPTO1(L, U, R, A) \
23918 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23919 C (U), BUILT_IN_MD, \
23920 NULL, NULL_TREE);
23921 #define CRYPTO2(L, U, R, A1, A2) \
23922 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23923 C (U), BUILT_IN_MD, \
23924 NULL, NULL_TREE);
23925
23926 #define CRYPTO3(L, U, R, A1, A2, A3) \
23927 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23928 C (U), BUILT_IN_MD, \
23929 NULL, NULL_TREE);
23930 #include "crypto.def"
23931
23932 #undef CRYPTO1
23933 #undef CRYPTO2
23934 #undef CRYPTO3
23935 #undef C
23936 #undef N
23937 #undef FT1
23938 #undef FT2
23939 #undef FT3
23940 }
23941 dreg_types[0] = V8QI_type_node;
23942 dreg_types[1] = V4HI_type_node;
23943 dreg_types[2] = V2SI_type_node;
23944 dreg_types[3] = V2SF_type_node;
23945 dreg_types[4] = neon_intDI_type_node;
23946
23947 qreg_types[0] = V16QI_type_node;
23948 qreg_types[1] = V8HI_type_node;
23949 qreg_types[2] = V4SI_type_node;
23950 qreg_types[3] = V4SF_type_node;
23951 qreg_types[4] = V2DI_type_node;
23952 qreg_types[5] = neon_intUTI_type_node;
23953
23954 for (i = 0; i < NUM_QREG_TYPES; i++)
23955 {
23956 int j;
23957 for (j = 0; j < NUM_QREG_TYPES; j++)
23958 {
23959 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23960 reinterp_ftype_dreg[i][j]
23961 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23962
23963 reinterp_ftype_qreg[i][j]
23964 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23965 }
23966 }
23967
23968 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23969 i < ARRAY_SIZE (neon_builtin_data);
23970 i++, fcode++)
23971 {
23972 neon_builtin_datum *d = &neon_builtin_data[i];
23973
23974 const char* const modenames[] = {
23975 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23976 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23977 "ti", "ei", "oi"
23978 };
23979 char namebuf[60];
23980 tree ftype = NULL;
23981 int is_load = 0, is_store = 0;
23982
23983 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23984
23985 d->fcode = fcode;
23986
23987 switch (d->itype)
23988 {
23989 case NEON_LOAD1:
23990 case NEON_LOAD1LANE:
23991 case NEON_LOADSTRUCT:
23992 case NEON_LOADSTRUCTLANE:
23993 is_load = 1;
23994 /* Fall through. */
23995 case NEON_STORE1:
23996 case NEON_STORE1LANE:
23997 case NEON_STORESTRUCT:
23998 case NEON_STORESTRUCTLANE:
23999 if (!is_load)
24000 is_store = 1;
24001 /* Fall through. */
24002 case NEON_UNOP:
24003 case NEON_RINT:
24004 case NEON_BINOP:
24005 case NEON_LOGICBINOP:
24006 case NEON_SHIFTINSERT:
24007 case NEON_TERNOP:
24008 case NEON_GETLANE:
24009 case NEON_SETLANE:
24010 case NEON_CREATE:
24011 case NEON_DUP:
24012 case NEON_DUPLANE:
24013 case NEON_SHIFTIMM:
24014 case NEON_SHIFTACC:
24015 case NEON_COMBINE:
24016 case NEON_SPLIT:
24017 case NEON_CONVERT:
24018 case NEON_FIXCONV:
24019 case NEON_LANEMUL:
24020 case NEON_LANEMULL:
24021 case NEON_LANEMULH:
24022 case NEON_LANEMAC:
24023 case NEON_SCALARMUL:
24024 case NEON_SCALARMULL:
24025 case NEON_SCALARMULH:
24026 case NEON_SCALARMAC:
24027 case NEON_SELECT:
24028 case NEON_VTBL:
24029 case NEON_VTBX:
24030 {
24031 int k;
24032 tree return_type = void_type_node, args = void_list_node;
24033
24034 /* Build a function type directly from the insn_data for
24035 this builtin. The build_function_type() function takes
24036 care of removing duplicates for us. */
24037 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
24038 {
24039 tree eltype;
24040
24041 if (is_load && k == 1)
24042 {
24043 /* Neon load patterns always have the memory
24044 operand in the operand 1 position. */
24045 gcc_assert (insn_data[d->code].operand[k].predicate
24046 == neon_struct_operand);
24047
24048 switch (d->mode)
24049 {
24050 case T_V8QI:
24051 case T_V16QI:
24052 eltype = const_intQI_pointer_node;
24053 break;
24054
24055 case T_V4HI:
24056 case T_V8HI:
24057 eltype = const_intHI_pointer_node;
24058 break;
24059
24060 case T_V2SI:
24061 case T_V4SI:
24062 eltype = const_intSI_pointer_node;
24063 break;
24064
24065 case T_V2SF:
24066 case T_V4SF:
24067 eltype = const_float_pointer_node;
24068 break;
24069
24070 case T_DI:
24071 case T_V2DI:
24072 eltype = const_intDI_pointer_node;
24073 break;
24074
24075 default: gcc_unreachable ();
24076 }
24077 }
24078 else if (is_store && k == 0)
24079 {
24080 /* Similarly, Neon store patterns use operand 0 as
24081 the memory location to store to. */
24082 gcc_assert (insn_data[d->code].operand[k].predicate
24083 == neon_struct_operand);
24084
24085 switch (d->mode)
24086 {
24087 case T_V8QI:
24088 case T_V16QI:
24089 eltype = intQI_pointer_node;
24090 break;
24091
24092 case T_V4HI:
24093 case T_V8HI:
24094 eltype = intHI_pointer_node;
24095 break;
24096
24097 case T_V2SI:
24098 case T_V4SI:
24099 eltype = intSI_pointer_node;
24100 break;
24101
24102 case T_V2SF:
24103 case T_V4SF:
24104 eltype = float_pointer_node;
24105 break;
24106
24107 case T_DI:
24108 case T_V2DI:
24109 eltype = intDI_pointer_node;
24110 break;
24111
24112 default: gcc_unreachable ();
24113 }
24114 }
24115 else
24116 {
24117 switch (insn_data[d->code].operand[k].mode)
24118 {
24119 case VOIDmode: eltype = void_type_node; break;
24120 /* Scalars. */
24121 case QImode: eltype = neon_intQI_type_node; break;
24122 case HImode: eltype = neon_intHI_type_node; break;
24123 case SImode: eltype = neon_intSI_type_node; break;
24124 case SFmode: eltype = neon_float_type_node; break;
24125 case DImode: eltype = neon_intDI_type_node; break;
24126 case TImode: eltype = intTI_type_node; break;
24127 case EImode: eltype = intEI_type_node; break;
24128 case OImode: eltype = intOI_type_node; break;
24129 case CImode: eltype = intCI_type_node; break;
24130 case XImode: eltype = intXI_type_node; break;
24131 /* 64-bit vectors. */
24132 case V8QImode: eltype = V8QI_type_node; break;
24133 case V4HImode: eltype = V4HI_type_node; break;
24134 case V2SImode: eltype = V2SI_type_node; break;
24135 case V2SFmode: eltype = V2SF_type_node; break;
24136 /* 128-bit vectors. */
24137 case V16QImode: eltype = V16QI_type_node; break;
24138 case V8HImode: eltype = V8HI_type_node; break;
24139 case V4SImode: eltype = V4SI_type_node; break;
24140 case V4SFmode: eltype = V4SF_type_node; break;
24141 case V2DImode: eltype = V2DI_type_node; break;
24142 default: gcc_unreachable ();
24143 }
24144 }
24145
24146 if (k == 0 && !is_store)
24147 return_type = eltype;
24148 else
24149 args = tree_cons (NULL_TREE, eltype, args);
24150 }
24151
24152 ftype = build_function_type (return_type, args);
24153 }
24154 break;
24155
24156 case NEON_REINTERP:
24157 {
24158 /* We iterate over NUM_DREG_TYPES doubleword types,
24159 then NUM_QREG_TYPES quadword types.
24160 V4HF is not a type used in reinterpret, so we translate
24161 d->mode to the correct index in reinterp_ftype_dreg. */
24162 bool qreg_p
24163 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24164 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24165 % NUM_QREG_TYPES;
24166 switch (insn_data[d->code].operand[0].mode)
24167 {
24168 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24169 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24170 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24171 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24172 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24173 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24174 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24175 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24176 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24177 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24178 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24179 default: gcc_unreachable ();
24180 }
24181 }
24182 break;
24183 case NEON_FLOAT_WIDEN:
24184 {
24185 tree eltype = NULL_TREE;
24186 tree return_type = NULL_TREE;
24187
24188 switch (insn_data[d->code].operand[1].mode)
24189 {
24190 case V4HFmode:
24191 eltype = V4HF_type_node;
24192 return_type = V4SF_type_node;
24193 break;
24194 default: gcc_unreachable ();
24195 }
24196 ftype = build_function_type_list (return_type, eltype, NULL);
24197 break;
24198 }
24199 case NEON_FLOAT_NARROW:
24200 {
24201 tree eltype = NULL_TREE;
24202 tree return_type = NULL_TREE;
24203
24204 switch (insn_data[d->code].operand[1].mode)
24205 {
24206 case V4SFmode:
24207 eltype = V4SF_type_node;
24208 return_type = V4HF_type_node;
24209 break;
24210 default: gcc_unreachable ();
24211 }
24212 ftype = build_function_type_list (return_type, eltype, NULL);
24213 break;
24214 }
24215 case NEON_BSWAP:
24216 {
24217 tree eltype = NULL_TREE;
24218 switch (insn_data[d->code].operand[1].mode)
24219 {
24220 case V4HImode:
24221 eltype = V4UHI_type_node;
24222 break;
24223 case V8HImode:
24224 eltype = V8UHI_type_node;
24225 break;
24226 case V2SImode:
24227 eltype = V2USI_type_node;
24228 break;
24229 case V4SImode:
24230 eltype = V4USI_type_node;
24231 break;
24232 case V2DImode:
24233 eltype = V2UDI_type_node;
24234 break;
24235 default: gcc_unreachable ();
24236 }
24237 ftype = build_function_type_list (eltype, eltype, NULL);
24238 break;
24239 }
24240 default:
24241 gcc_unreachable ();
24242 }
24243
24244 gcc_assert (ftype != NULL);
24245
24246 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24247
24248 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24249 NULL_TREE);
24250 arm_builtin_decls[fcode] = decl;
24251 }
24252 }
24253
24254 #undef NUM_DREG_TYPES
24255 #undef NUM_QREG_TYPES
24256
24257 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24258 do \
24259 { \
24260 if ((MASK) & insn_flags) \
24261 { \
24262 tree bdecl; \
24263 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24264 BUILT_IN_MD, NULL, NULL_TREE); \
24265 arm_builtin_decls[CODE] = bdecl; \
24266 } \
24267 } \
24268 while (0)
24269
24270 struct builtin_description
24271 {
24272 const unsigned int mask;
24273 const enum insn_code icode;
24274 const char * const name;
24275 const enum arm_builtins code;
24276 const enum rtx_code comparison;
24277 const unsigned int flag;
24278 };
24279
24280 static const struct builtin_description bdesc_2arg[] =
24281 {
24282 #define IWMMXT_BUILTIN(code, string, builtin) \
24283 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24284 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24285
24286 #define IWMMXT2_BUILTIN(code, string, builtin) \
24287 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24288 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24289
24290 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24291 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24292 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24293 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24294 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24295 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24296 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24297 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24298 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24299 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24300 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24301 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24302 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24303 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24304 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24305 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24306 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24307 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24308 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24309 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24310 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24311 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24312 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24313 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24314 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24315 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24316 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24317 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24318 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24319 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24320 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24321 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24322 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24323 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24324 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24325 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24326 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24327 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24328 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24329 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24330 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24331 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24332 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24333 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24334 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24335 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24336 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24337 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24338 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24339 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24340 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24341 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24342 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24343 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24344 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24345 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24346 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24347 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24348 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24349 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24350 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24351 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24352 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24353 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24354 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24355 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24356 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24357 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24358 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24359 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24360 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24361 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24362 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24363 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24364 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24365 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24366 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24367 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24368
24369 #define IWMMXT_BUILTIN2(code, builtin) \
24370 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24371
24372 #define IWMMXT2_BUILTIN2(code, builtin) \
24373 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24374
24375 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24376 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24377 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24378 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24379 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24380 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24381 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24382 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24383 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24384 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24385
24386
24387 #define FP_BUILTIN(L, U) \
24388 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24389 UNKNOWN, 0},
24390
24391 FP_BUILTIN (get_fpscr, GET_FPSCR)
24392 FP_BUILTIN (set_fpscr, SET_FPSCR)
24393 #undef FP_BUILTIN
24394
24395 #define CRC32_BUILTIN(L, U) \
24396 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24397 UNKNOWN, 0},
24398 CRC32_BUILTIN (crc32b, CRC32B)
24399 CRC32_BUILTIN (crc32h, CRC32H)
24400 CRC32_BUILTIN (crc32w, CRC32W)
24401 CRC32_BUILTIN (crc32cb, CRC32CB)
24402 CRC32_BUILTIN (crc32ch, CRC32CH)
24403 CRC32_BUILTIN (crc32cw, CRC32CW)
24404 #undef CRC32_BUILTIN
24405
24406
24407 #define CRYPTO_BUILTIN(L, U) \
24408 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24409 UNKNOWN, 0},
24410 #undef CRYPTO1
24411 #undef CRYPTO2
24412 #undef CRYPTO3
24413 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24414 #define CRYPTO1(L, U, R, A)
24415 #define CRYPTO3(L, U, R, A1, A2, A3)
24416 #include "crypto.def"
24417 #undef CRYPTO1
24418 #undef CRYPTO2
24419 #undef CRYPTO3
24420
24421 };
24422
24423 static const struct builtin_description bdesc_1arg[] =
24424 {
24425 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24426 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24427 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24428 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24429 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24430 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24431 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24432 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24433 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24434 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24435 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24436 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24437 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24438 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24439 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24440 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24441 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24442 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24443 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24444 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24445 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24446 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24447 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24448 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24449
24450 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24451 #define CRYPTO2(L, U, R, A1, A2)
24452 #define CRYPTO3(L, U, R, A1, A2, A3)
24453 #include "crypto.def"
24454 #undef CRYPTO1
24455 #undef CRYPTO2
24456 #undef CRYPTO3
24457 };
24458
24459 static const struct builtin_description bdesc_3arg[] =
24460 {
24461 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24462 #define CRYPTO1(L, U, R, A)
24463 #define CRYPTO2(L, U, R, A1, A2)
24464 #include "crypto.def"
24465 #undef CRYPTO1
24466 #undef CRYPTO2
24467 #undef CRYPTO3
24468 };
24469 #undef CRYPTO_BUILTIN
24470
24471 /* Set up all the iWMMXt builtins. This is not called if
24472 TARGET_IWMMXT is zero. */
24473
24474 static void
24475 arm_init_iwmmxt_builtins (void)
24476 {
24477 const struct builtin_description * d;
24478 size_t i;
24479
24480 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24481 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24482 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24483
24484 tree v8qi_ftype_v8qi_v8qi_int
24485 = build_function_type_list (V8QI_type_node,
24486 V8QI_type_node, V8QI_type_node,
24487 integer_type_node, NULL_TREE);
24488 tree v4hi_ftype_v4hi_int
24489 = build_function_type_list (V4HI_type_node,
24490 V4HI_type_node, integer_type_node, NULL_TREE);
24491 tree v2si_ftype_v2si_int
24492 = build_function_type_list (V2SI_type_node,
24493 V2SI_type_node, integer_type_node, NULL_TREE);
24494 tree v2si_ftype_di_di
24495 = build_function_type_list (V2SI_type_node,
24496 long_long_integer_type_node,
24497 long_long_integer_type_node,
24498 NULL_TREE);
24499 tree di_ftype_di_int
24500 = build_function_type_list (long_long_integer_type_node,
24501 long_long_integer_type_node,
24502 integer_type_node, NULL_TREE);
24503 tree di_ftype_di_int_int
24504 = build_function_type_list (long_long_integer_type_node,
24505 long_long_integer_type_node,
24506 integer_type_node,
24507 integer_type_node, NULL_TREE);
24508 tree int_ftype_v8qi
24509 = build_function_type_list (integer_type_node,
24510 V8QI_type_node, NULL_TREE);
24511 tree int_ftype_v4hi
24512 = build_function_type_list (integer_type_node,
24513 V4HI_type_node, NULL_TREE);
24514 tree int_ftype_v2si
24515 = build_function_type_list (integer_type_node,
24516 V2SI_type_node, NULL_TREE);
24517 tree int_ftype_v8qi_int
24518 = build_function_type_list (integer_type_node,
24519 V8QI_type_node, integer_type_node, NULL_TREE);
24520 tree int_ftype_v4hi_int
24521 = build_function_type_list (integer_type_node,
24522 V4HI_type_node, integer_type_node, NULL_TREE);
24523 tree int_ftype_v2si_int
24524 = build_function_type_list (integer_type_node,
24525 V2SI_type_node, integer_type_node, NULL_TREE);
24526 tree v8qi_ftype_v8qi_int_int
24527 = build_function_type_list (V8QI_type_node,
24528 V8QI_type_node, integer_type_node,
24529 integer_type_node, NULL_TREE);
24530 tree v4hi_ftype_v4hi_int_int
24531 = build_function_type_list (V4HI_type_node,
24532 V4HI_type_node, integer_type_node,
24533 integer_type_node, NULL_TREE);
24534 tree v2si_ftype_v2si_int_int
24535 = build_function_type_list (V2SI_type_node,
24536 V2SI_type_node, integer_type_node,
24537 integer_type_node, NULL_TREE);
24538 /* Miscellaneous. */
24539 tree v8qi_ftype_v4hi_v4hi
24540 = build_function_type_list (V8QI_type_node,
24541 V4HI_type_node, V4HI_type_node, NULL_TREE);
24542 tree v4hi_ftype_v2si_v2si
24543 = build_function_type_list (V4HI_type_node,
24544 V2SI_type_node, V2SI_type_node, NULL_TREE);
24545 tree v8qi_ftype_v4hi_v8qi
24546 = build_function_type_list (V8QI_type_node,
24547 V4HI_type_node, V8QI_type_node, NULL_TREE);
24548 tree v2si_ftype_v4hi_v4hi
24549 = build_function_type_list (V2SI_type_node,
24550 V4HI_type_node, V4HI_type_node, NULL_TREE);
24551 tree v2si_ftype_v8qi_v8qi
24552 = build_function_type_list (V2SI_type_node,
24553 V8QI_type_node, V8QI_type_node, NULL_TREE);
24554 tree v4hi_ftype_v4hi_di
24555 = build_function_type_list (V4HI_type_node,
24556 V4HI_type_node, long_long_integer_type_node,
24557 NULL_TREE);
24558 tree v2si_ftype_v2si_di
24559 = build_function_type_list (V2SI_type_node,
24560 V2SI_type_node, long_long_integer_type_node,
24561 NULL_TREE);
24562 tree di_ftype_void
24563 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24564 tree int_ftype_void
24565 = build_function_type_list (integer_type_node, NULL_TREE);
24566 tree di_ftype_v8qi
24567 = build_function_type_list (long_long_integer_type_node,
24568 V8QI_type_node, NULL_TREE);
24569 tree di_ftype_v4hi
24570 = build_function_type_list (long_long_integer_type_node,
24571 V4HI_type_node, NULL_TREE);
24572 tree di_ftype_v2si
24573 = build_function_type_list (long_long_integer_type_node,
24574 V2SI_type_node, NULL_TREE);
24575 tree v2si_ftype_v4hi
24576 = build_function_type_list (V2SI_type_node,
24577 V4HI_type_node, NULL_TREE);
24578 tree v4hi_ftype_v8qi
24579 = build_function_type_list (V4HI_type_node,
24580 V8QI_type_node, NULL_TREE);
24581 tree v8qi_ftype_v8qi
24582 = build_function_type_list (V8QI_type_node,
24583 V8QI_type_node, NULL_TREE);
24584 tree v4hi_ftype_v4hi
24585 = build_function_type_list (V4HI_type_node,
24586 V4HI_type_node, NULL_TREE);
24587 tree v2si_ftype_v2si
24588 = build_function_type_list (V2SI_type_node,
24589 V2SI_type_node, NULL_TREE);
24590
24591 tree di_ftype_di_v4hi_v4hi
24592 = build_function_type_list (long_long_unsigned_type_node,
24593 long_long_unsigned_type_node,
24594 V4HI_type_node, V4HI_type_node,
24595 NULL_TREE);
24596
24597 tree di_ftype_v4hi_v4hi
24598 = build_function_type_list (long_long_unsigned_type_node,
24599 V4HI_type_node,V4HI_type_node,
24600 NULL_TREE);
24601
24602 tree v2si_ftype_v2si_v4hi_v4hi
24603 = build_function_type_list (V2SI_type_node,
24604 V2SI_type_node, V4HI_type_node,
24605 V4HI_type_node, NULL_TREE);
24606
24607 tree v2si_ftype_v2si_v8qi_v8qi
24608 = build_function_type_list (V2SI_type_node,
24609 V2SI_type_node, V8QI_type_node,
24610 V8QI_type_node, NULL_TREE);
24611
24612 tree di_ftype_di_v2si_v2si
24613 = build_function_type_list (long_long_unsigned_type_node,
24614 long_long_unsigned_type_node,
24615 V2SI_type_node, V2SI_type_node,
24616 NULL_TREE);
24617
24618 tree di_ftype_di_di_int
24619 = build_function_type_list (long_long_unsigned_type_node,
24620 long_long_unsigned_type_node,
24621 long_long_unsigned_type_node,
24622 integer_type_node, NULL_TREE);
24623
24624 tree void_ftype_int
24625 = build_function_type_list (void_type_node,
24626 integer_type_node, NULL_TREE);
24627
24628 tree v8qi_ftype_char
24629 = build_function_type_list (V8QI_type_node,
24630 signed_char_type_node, NULL_TREE);
24631
24632 tree v4hi_ftype_short
24633 = build_function_type_list (V4HI_type_node,
24634 short_integer_type_node, NULL_TREE);
24635
24636 tree v2si_ftype_int
24637 = build_function_type_list (V2SI_type_node,
24638 integer_type_node, NULL_TREE);
24639
24640 /* Normal vector binops. */
24641 tree v8qi_ftype_v8qi_v8qi
24642 = build_function_type_list (V8QI_type_node,
24643 V8QI_type_node, V8QI_type_node, NULL_TREE);
24644 tree v4hi_ftype_v4hi_v4hi
24645 = build_function_type_list (V4HI_type_node,
24646 V4HI_type_node,V4HI_type_node, NULL_TREE);
24647 tree v2si_ftype_v2si_v2si
24648 = build_function_type_list (V2SI_type_node,
24649 V2SI_type_node, V2SI_type_node, NULL_TREE);
24650 tree di_ftype_di_di
24651 = build_function_type_list (long_long_unsigned_type_node,
24652 long_long_unsigned_type_node,
24653 long_long_unsigned_type_node,
24654 NULL_TREE);
24655
24656 /* Add all builtins that are more or less simple operations on two
24657 operands. */
24658 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24659 {
24660 /* Use one of the operands; the target can have a different mode for
24661 mask-generating compares. */
24662 enum machine_mode mode;
24663 tree type;
24664
24665 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24666 continue;
24667
24668 mode = insn_data[d->icode].operand[1].mode;
24669
24670 switch (mode)
24671 {
24672 case V8QImode:
24673 type = v8qi_ftype_v8qi_v8qi;
24674 break;
24675 case V4HImode:
24676 type = v4hi_ftype_v4hi_v4hi;
24677 break;
24678 case V2SImode:
24679 type = v2si_ftype_v2si_v2si;
24680 break;
24681 case DImode:
24682 type = di_ftype_di_di;
24683 break;
24684
24685 default:
24686 gcc_unreachable ();
24687 }
24688
24689 def_mbuiltin (d->mask, d->name, type, d->code);
24690 }
24691
24692 /* Add the remaining MMX insns with somewhat more complicated types. */
24693 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24694 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24695 ARM_BUILTIN_ ## CODE)
24696
24697 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24698 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24699 ARM_BUILTIN_ ## CODE)
24700
24701 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24702 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24703 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24704 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24705 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24706 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24707 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24708 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24709 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24710
24711 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24712 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24713 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24714 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24715 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24716 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24717
24718 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24719 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24720 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24721 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24722 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24723 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24724
24725 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24726 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24727 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24728 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24729 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24730 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24731
24732 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24733 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24734 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24735 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24736 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24737 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24738
24739 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24740
24741 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24742 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24743 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24744 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24745 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24746 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24747 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24748 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24749 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24750 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24751
24752 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24753 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24754 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24755 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24756 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24757 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24758 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24759 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24760 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24761
24762 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24763 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24764 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24765
24766 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24767 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24768 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24769
24770 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24771 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24772
24773 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24774 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24775 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24776 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24777 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24778 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24779
24780 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24781 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24782 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24783 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24784 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24785 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24786 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24787 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24788 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24789 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24790 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24791 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24792
24793 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24794 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24795 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24796 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24797
24798 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24799 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24800 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24801 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24802 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24803 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24804 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24805
24806 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24807 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24808 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24809
24810 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24811 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24812 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24813 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24814
24815 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24816 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24817 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24818 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24819
24820 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24821 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24822 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24823 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24824
24825 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24826 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24827 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24828 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24829
24830 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24831 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24832 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24833 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24834
24835 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24836 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24837 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24838 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24839
24840 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24841
24842 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24843 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24844 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24845
24846 #undef iwmmx_mbuiltin
24847 #undef iwmmx2_mbuiltin
24848 }
24849
24850 static void
24851 arm_init_fp16_builtins (void)
24852 {
24853 tree fp16_type = make_node (REAL_TYPE);
24854 TYPE_PRECISION (fp16_type) = 16;
24855 layout_type (fp16_type);
24856 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24857 }
24858
24859 static void
24860 arm_init_crc32_builtins ()
24861 {
24862 tree si_ftype_si_qi
24863 = build_function_type_list (unsigned_intSI_type_node,
24864 unsigned_intSI_type_node,
24865 unsigned_intQI_type_node, NULL_TREE);
24866 tree si_ftype_si_hi
24867 = build_function_type_list (unsigned_intSI_type_node,
24868 unsigned_intSI_type_node,
24869 unsigned_intHI_type_node, NULL_TREE);
24870 tree si_ftype_si_si
24871 = build_function_type_list (unsigned_intSI_type_node,
24872 unsigned_intSI_type_node,
24873 unsigned_intSI_type_node, NULL_TREE);
24874
24875 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24876 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24877 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24878 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24879 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24880 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24881 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24882 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24883 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24884 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24885 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24886 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24887 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24888 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24889 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24890 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24891 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24892 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24893 }
24894
24895 static void
24896 arm_init_builtins (void)
24897 {
24898 if (TARGET_REALLY_IWMMXT)
24899 arm_init_iwmmxt_builtins ();
24900
24901 if (TARGET_NEON)
24902 arm_init_neon_builtins ();
24903
24904 if (arm_fp16_format)
24905 arm_init_fp16_builtins ();
24906
24907 if (TARGET_CRC32)
24908 arm_init_crc32_builtins ();
24909
24910 if (TARGET_VFP && TARGET_HARD_FLOAT)
24911 {
24912 tree ftype_set_fpscr
24913 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24914 tree ftype_get_fpscr
24915 = build_function_type_list (unsigned_type_node, NULL);
24916
24917 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24918 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24919 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24920 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24921 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24922 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24923 }
24924 }
24925
24926 /* Return the ARM builtin for CODE. */
24927
24928 static tree
24929 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24930 {
24931 if (code >= ARM_BUILTIN_MAX)
24932 return error_mark_node;
24933
24934 return arm_builtin_decls[code];
24935 }
24936
24937 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24938
24939 static const char *
24940 arm_invalid_parameter_type (const_tree t)
24941 {
24942 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24943 return N_("function parameters cannot have __fp16 type");
24944 return NULL;
24945 }
24946
24947 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24948
24949 static const char *
24950 arm_invalid_return_type (const_tree t)
24951 {
24952 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24953 return N_("functions cannot return __fp16 type");
24954 return NULL;
24955 }
24956
24957 /* Implement TARGET_PROMOTED_TYPE. */
24958
24959 static tree
24960 arm_promoted_type (const_tree t)
24961 {
24962 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24963 return float_type_node;
24964 return NULL_TREE;
24965 }
24966
24967 /* Implement TARGET_CONVERT_TO_TYPE.
24968 Specifically, this hook implements the peculiarity of the ARM
24969 half-precision floating-point C semantics that requires conversions between
24970 __fp16 to or from double to do an intermediate conversion to float. */
24971
24972 static tree
24973 arm_convert_to_type (tree type, tree expr)
24974 {
24975 tree fromtype = TREE_TYPE (expr);
24976 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24977 return NULL_TREE;
24978 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24979 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24980 return convert (type, convert (float_type_node, expr));
24981 return NULL_TREE;
24982 }
24983
24984 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24985 This simply adds HFmode as a supported mode; even though we don't
24986 implement arithmetic on this type directly, it's supported by
24987 optabs conversions, much the way the double-word arithmetic is
24988 special-cased in the default hook. */
24989
24990 static bool
24991 arm_scalar_mode_supported_p (enum machine_mode mode)
24992 {
24993 if (mode == HFmode)
24994 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24995 else if (ALL_FIXED_POINT_MODE_P (mode))
24996 return true;
24997 else
24998 return default_scalar_mode_supported_p (mode);
24999 }
25000
25001 /* Errors in the source file can cause expand_expr to return const0_rtx
25002 where we expect a vector. To avoid crashing, use one of the vector
25003 clear instructions. */
25004
25005 static rtx
25006 safe_vector_operand (rtx x, enum machine_mode mode)
25007 {
25008 if (x != const0_rtx)
25009 return x;
25010 x = gen_reg_rtx (mode);
25011
25012 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
25013 : gen_rtx_SUBREG (DImode, x, 0)));
25014 return x;
25015 }
25016
25017 /* Function to expand ternary builtins. */
25018 static rtx
25019 arm_expand_ternop_builtin (enum insn_code icode,
25020 tree exp, rtx target)
25021 {
25022 rtx pat;
25023 tree arg0 = CALL_EXPR_ARG (exp, 0);
25024 tree arg1 = CALL_EXPR_ARG (exp, 1);
25025 tree arg2 = CALL_EXPR_ARG (exp, 2);
25026
25027 rtx op0 = expand_normal (arg0);
25028 rtx op1 = expand_normal (arg1);
25029 rtx op2 = expand_normal (arg2);
25030 rtx op3 = NULL_RTX;
25031
25032 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
25033 lane operand depending on endianness. */
25034 bool builtin_sha1cpm_p = false;
25035
25036 if (insn_data[icode].n_operands == 5)
25037 {
25038 gcc_assert (icode == CODE_FOR_crypto_sha1c
25039 || icode == CODE_FOR_crypto_sha1p
25040 || icode == CODE_FOR_crypto_sha1m);
25041 builtin_sha1cpm_p = true;
25042 }
25043 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25044 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25045 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25046 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
25047
25048
25049 if (VECTOR_MODE_P (mode0))
25050 op0 = safe_vector_operand (op0, mode0);
25051 if (VECTOR_MODE_P (mode1))
25052 op1 = safe_vector_operand (op1, mode1);
25053 if (VECTOR_MODE_P (mode2))
25054 op2 = safe_vector_operand (op2, mode2);
25055
25056 if (! target
25057 || GET_MODE (target) != tmode
25058 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25059 target = gen_reg_rtx (tmode);
25060
25061 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25062 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
25063 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
25064
25065 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25066 op0 = copy_to_mode_reg (mode0, op0);
25067 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25068 op1 = copy_to_mode_reg (mode1, op1);
25069 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25070 op2 = copy_to_mode_reg (mode2, op2);
25071 if (builtin_sha1cpm_p)
25072 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25073
25074 if (builtin_sha1cpm_p)
25075 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
25076 else
25077 pat = GEN_FCN (icode) (target, op0, op1, op2);
25078 if (! pat)
25079 return 0;
25080 emit_insn (pat);
25081 return target;
25082 }
25083
25084 /* Subroutine of arm_expand_builtin to take care of binop insns. */
25085
25086 static rtx
25087 arm_expand_binop_builtin (enum insn_code icode,
25088 tree exp, rtx target)
25089 {
25090 rtx pat;
25091 tree arg0 = CALL_EXPR_ARG (exp, 0);
25092 tree arg1 = CALL_EXPR_ARG (exp, 1);
25093 rtx op0 = expand_normal (arg0);
25094 rtx op1 = expand_normal (arg1);
25095 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25096 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25097 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
25098
25099 if (VECTOR_MODE_P (mode0))
25100 op0 = safe_vector_operand (op0, mode0);
25101 if (VECTOR_MODE_P (mode1))
25102 op1 = safe_vector_operand (op1, mode1);
25103
25104 if (! target
25105 || GET_MODE (target) != tmode
25106 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25107 target = gen_reg_rtx (tmode);
25108
25109 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
25110 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
25111
25112 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25113 op0 = copy_to_mode_reg (mode0, op0);
25114 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25115 op1 = copy_to_mode_reg (mode1, op1);
25116
25117 pat = GEN_FCN (icode) (target, op0, op1);
25118 if (! pat)
25119 return 0;
25120 emit_insn (pat);
25121 return target;
25122 }
25123
25124 /* Subroutine of arm_expand_builtin to take care of unop insns. */
25125
25126 static rtx
25127 arm_expand_unop_builtin (enum insn_code icode,
25128 tree exp, rtx target, int do_load)
25129 {
25130 rtx pat;
25131 tree arg0 = CALL_EXPR_ARG (exp, 0);
25132 rtx op0 = expand_normal (arg0);
25133 rtx op1 = NULL_RTX;
25134 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25135 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
25136 bool builtin_sha1h_p = false;
25137
25138 if (insn_data[icode].n_operands == 3)
25139 {
25140 gcc_assert (icode == CODE_FOR_crypto_sha1h);
25141 builtin_sha1h_p = true;
25142 }
25143
25144 if (! target
25145 || GET_MODE (target) != tmode
25146 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25147 target = gen_reg_rtx (tmode);
25148 if (do_load)
25149 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25150 else
25151 {
25152 if (VECTOR_MODE_P (mode0))
25153 op0 = safe_vector_operand (op0, mode0);
25154
25155 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25156 op0 = copy_to_mode_reg (mode0, op0);
25157 }
25158 if (builtin_sha1h_p)
25159 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25160
25161 if (builtin_sha1h_p)
25162 pat = GEN_FCN (icode) (target, op0, op1);
25163 else
25164 pat = GEN_FCN (icode) (target, op0);
25165 if (! pat)
25166 return 0;
25167 emit_insn (pat);
25168 return target;
25169 }
25170
25171 typedef enum {
25172 NEON_ARG_COPY_TO_REG,
25173 NEON_ARG_CONSTANT,
25174 NEON_ARG_MEMORY,
25175 NEON_ARG_STOP
25176 } builtin_arg;
25177
25178 #define NEON_MAX_BUILTIN_ARGS 5
25179
25180 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25181 and return an expression for the accessed memory.
25182
25183 The intrinsic function operates on a block of registers that has
25184 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25185 function references the memory at EXP of type TYPE and in mode
25186 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25187 available. */
25188
25189 static tree
25190 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25191 enum machine_mode reg_mode,
25192 neon_builtin_type_mode type_mode)
25193 {
25194 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25195 tree elem_type, upper_bound, array_type;
25196
25197 /* Work out the size of the register block in bytes. */
25198 reg_size = GET_MODE_SIZE (reg_mode);
25199
25200 /* Work out the size of each vector in bytes. */
25201 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25202 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25203
25204 /* Work out how many vectors there are. */
25205 gcc_assert (reg_size % vector_size == 0);
25206 nvectors = reg_size / vector_size;
25207
25208 /* Work out the type of each element. */
25209 gcc_assert (POINTER_TYPE_P (type));
25210 elem_type = TREE_TYPE (type);
25211
25212 /* Work out how many elements are being loaded or stored.
25213 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25214 and memory elements; anything else implies a lane load or store. */
25215 if (mem_mode == reg_mode)
25216 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25217 else
25218 nelems = nvectors;
25219
25220 /* Create a type that describes the full access. */
25221 upper_bound = build_int_cst (size_type_node, nelems - 1);
25222 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25223
25224 /* Dereference EXP using that type. */
25225 return fold_build2 (MEM_REF, array_type, exp,
25226 build_int_cst (build_pointer_type (array_type), 0));
25227 }
25228
25229 /* Expand a Neon builtin. */
25230 static rtx
25231 arm_expand_neon_args (rtx target, int icode, int have_retval,
25232 neon_builtin_type_mode type_mode,
25233 tree exp, int fcode, ...)
25234 {
25235 va_list ap;
25236 rtx pat;
25237 tree arg[NEON_MAX_BUILTIN_ARGS];
25238 rtx op[NEON_MAX_BUILTIN_ARGS];
25239 tree arg_type;
25240 tree formals;
25241 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25242 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25243 enum machine_mode other_mode;
25244 int argc = 0;
25245 int opno;
25246
25247 if (have_retval
25248 && (!target
25249 || GET_MODE (target) != tmode
25250 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25251 target = gen_reg_rtx (tmode);
25252
25253 va_start (ap, fcode);
25254
25255 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25256
25257 for (;;)
25258 {
25259 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25260
25261 if (thisarg == NEON_ARG_STOP)
25262 break;
25263 else
25264 {
25265 opno = argc + have_retval;
25266 mode[argc] = insn_data[icode].operand[opno].mode;
25267 arg[argc] = CALL_EXPR_ARG (exp, argc);
25268 arg_type = TREE_VALUE (formals);
25269 if (thisarg == NEON_ARG_MEMORY)
25270 {
25271 other_mode = insn_data[icode].operand[1 - opno].mode;
25272 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25273 mode[argc], other_mode,
25274 type_mode);
25275 }
25276
25277 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25278 be returned. */
25279 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25280 (thisarg == NEON_ARG_MEMORY
25281 ? EXPAND_MEMORY : EXPAND_NORMAL));
25282
25283 switch (thisarg)
25284 {
25285 case NEON_ARG_COPY_TO_REG:
25286 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25287 if (!(*insn_data[icode].operand[opno].predicate)
25288 (op[argc], mode[argc]))
25289 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25290 break;
25291
25292 case NEON_ARG_CONSTANT:
25293 /* FIXME: This error message is somewhat unhelpful. */
25294 if (!(*insn_data[icode].operand[opno].predicate)
25295 (op[argc], mode[argc]))
25296 error ("argument must be a constant");
25297 break;
25298
25299 case NEON_ARG_MEMORY:
25300 /* Check if expand failed. */
25301 if (op[argc] == const0_rtx)
25302 return 0;
25303 gcc_assert (MEM_P (op[argc]));
25304 PUT_MODE (op[argc], mode[argc]);
25305 /* ??? arm_neon.h uses the same built-in functions for signed
25306 and unsigned accesses, casting where necessary. This isn't
25307 alias safe. */
25308 set_mem_alias_set (op[argc], 0);
25309 if (!(*insn_data[icode].operand[opno].predicate)
25310 (op[argc], mode[argc]))
25311 op[argc] = (replace_equiv_address
25312 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25313 break;
25314
25315 case NEON_ARG_STOP:
25316 gcc_unreachable ();
25317 }
25318
25319 argc++;
25320 formals = TREE_CHAIN (formals);
25321 }
25322 }
25323
25324 va_end (ap);
25325
25326 if (have_retval)
25327 switch (argc)
25328 {
25329 case 1:
25330 pat = GEN_FCN (icode) (target, op[0]);
25331 break;
25332
25333 case 2:
25334 pat = GEN_FCN (icode) (target, op[0], op[1]);
25335 break;
25336
25337 case 3:
25338 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25339 break;
25340
25341 case 4:
25342 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25343 break;
25344
25345 case 5:
25346 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25347 break;
25348
25349 default:
25350 gcc_unreachable ();
25351 }
25352 else
25353 switch (argc)
25354 {
25355 case 1:
25356 pat = GEN_FCN (icode) (op[0]);
25357 break;
25358
25359 case 2:
25360 pat = GEN_FCN (icode) (op[0], op[1]);
25361 break;
25362
25363 case 3:
25364 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25365 break;
25366
25367 case 4:
25368 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25369 break;
25370
25371 case 5:
25372 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25373 break;
25374
25375 default:
25376 gcc_unreachable ();
25377 }
25378
25379 if (!pat)
25380 return 0;
25381
25382 emit_insn (pat);
25383
25384 return target;
25385 }
25386
25387 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25388 constants defined per-instruction or per instruction-variant. Instead, the
25389 required info is looked up in the table neon_builtin_data. */
25390 static rtx
25391 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25392 {
25393 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25394 neon_itype itype = d->itype;
25395 enum insn_code icode = d->code;
25396 neon_builtin_type_mode type_mode = d->mode;
25397
25398 switch (itype)
25399 {
25400 case NEON_UNOP:
25401 case NEON_CONVERT:
25402 case NEON_DUPLANE:
25403 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25404 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25405
25406 case NEON_BINOP:
25407 case NEON_SETLANE:
25408 case NEON_SCALARMUL:
25409 case NEON_SCALARMULL:
25410 case NEON_SCALARMULH:
25411 case NEON_SHIFTINSERT:
25412 case NEON_LOGICBINOP:
25413 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25414 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25415 NEON_ARG_STOP);
25416
25417 case NEON_TERNOP:
25418 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25419 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25420 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25421
25422 case NEON_GETLANE:
25423 case NEON_FIXCONV:
25424 case NEON_SHIFTIMM:
25425 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25426 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25427 NEON_ARG_STOP);
25428
25429 case NEON_CREATE:
25430 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25431 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25432
25433 case NEON_DUP:
25434 case NEON_RINT:
25435 case NEON_SPLIT:
25436 case NEON_FLOAT_WIDEN:
25437 case NEON_FLOAT_NARROW:
25438 case NEON_BSWAP:
25439 case NEON_REINTERP:
25440 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25441 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25442
25443 case NEON_COMBINE:
25444 case NEON_VTBL:
25445 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25446 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25447
25448 case NEON_LANEMUL:
25449 case NEON_LANEMULL:
25450 case NEON_LANEMULH:
25451 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25452 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25453 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25454
25455 case NEON_LANEMAC:
25456 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25457 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25458 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25459
25460 case NEON_SHIFTACC:
25461 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25462 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25463 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25464
25465 case NEON_SCALARMAC:
25466 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25467 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25468 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25469
25470 case NEON_SELECT:
25471 case NEON_VTBX:
25472 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25473 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25474 NEON_ARG_STOP);
25475
25476 case NEON_LOAD1:
25477 case NEON_LOADSTRUCT:
25478 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25479 NEON_ARG_MEMORY, NEON_ARG_STOP);
25480
25481 case NEON_LOAD1LANE:
25482 case NEON_LOADSTRUCTLANE:
25483 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25484 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25485 NEON_ARG_STOP);
25486
25487 case NEON_STORE1:
25488 case NEON_STORESTRUCT:
25489 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25490 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25491
25492 case NEON_STORE1LANE:
25493 case NEON_STORESTRUCTLANE:
25494 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25495 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25496 NEON_ARG_STOP);
25497 }
25498
25499 gcc_unreachable ();
25500 }
25501
25502 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25503 void
25504 neon_reinterpret (rtx dest, rtx src)
25505 {
25506 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25507 }
25508
25509 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25510 not to early-clobber SRC registers in the process.
25511
25512 We assume that the operands described by SRC and DEST represent a
25513 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25514 number of components into which the copy has been decomposed. */
25515 void
25516 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25517 {
25518 unsigned int i;
25519
25520 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25521 || REGNO (operands[0]) < REGNO (operands[1]))
25522 {
25523 for (i = 0; i < count; i++)
25524 {
25525 operands[2 * i] = dest[i];
25526 operands[2 * i + 1] = src[i];
25527 }
25528 }
25529 else
25530 {
25531 for (i = 0; i < count; i++)
25532 {
25533 operands[2 * i] = dest[count - i - 1];
25534 operands[2 * i + 1] = src[count - i - 1];
25535 }
25536 }
25537 }
25538
25539 /* Split operands into moves from op[1] + op[2] into op[0]. */
25540
25541 void
25542 neon_split_vcombine (rtx operands[3])
25543 {
25544 unsigned int dest = REGNO (operands[0]);
25545 unsigned int src1 = REGNO (operands[1]);
25546 unsigned int src2 = REGNO (operands[2]);
25547 enum machine_mode halfmode = GET_MODE (operands[1]);
25548 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25549 rtx destlo, desthi;
25550
25551 if (src1 == dest && src2 == dest + halfregs)
25552 {
25553 /* No-op move. Can't split to nothing; emit something. */
25554 emit_note (NOTE_INSN_DELETED);
25555 return;
25556 }
25557
25558 /* Preserve register attributes for variable tracking. */
25559 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25560 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25561 GET_MODE_SIZE (halfmode));
25562
25563 /* Special case of reversed high/low parts. Use VSWP. */
25564 if (src2 == dest && src1 == dest + halfregs)
25565 {
25566 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25567 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25568 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25569 return;
25570 }
25571
25572 if (!reg_overlap_mentioned_p (operands[2], destlo))
25573 {
25574 /* Try to avoid unnecessary moves if part of the result
25575 is in the right place already. */
25576 if (src1 != dest)
25577 emit_move_insn (destlo, operands[1]);
25578 if (src2 != dest + halfregs)
25579 emit_move_insn (desthi, operands[2]);
25580 }
25581 else
25582 {
25583 if (src2 != dest + halfregs)
25584 emit_move_insn (desthi, operands[2]);
25585 if (src1 != dest)
25586 emit_move_insn (destlo, operands[1]);
25587 }
25588 }
25589
25590 /* Expand an expression EXP that calls a built-in function,
25591 with result going to TARGET if that's convenient
25592 (and in mode MODE if that's convenient).
25593 SUBTARGET may be used as the target for computing one of EXP's operands.
25594 IGNORE is nonzero if the value is to be ignored. */
25595
25596 static rtx
25597 arm_expand_builtin (tree exp,
25598 rtx target,
25599 rtx subtarget ATTRIBUTE_UNUSED,
25600 enum machine_mode mode ATTRIBUTE_UNUSED,
25601 int ignore ATTRIBUTE_UNUSED)
25602 {
25603 const struct builtin_description * d;
25604 enum insn_code icode;
25605 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25606 tree arg0;
25607 tree arg1;
25608 tree arg2;
25609 rtx op0;
25610 rtx op1;
25611 rtx op2;
25612 rtx pat;
25613 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25614 size_t i;
25615 enum machine_mode tmode;
25616 enum machine_mode mode0;
25617 enum machine_mode mode1;
25618 enum machine_mode mode2;
25619 int opint;
25620 int selector;
25621 int mask;
25622 int imm;
25623
25624 if (fcode >= ARM_BUILTIN_NEON_BASE)
25625 return arm_expand_neon_builtin (fcode, exp, target);
25626
25627 switch (fcode)
25628 {
25629 case ARM_BUILTIN_GET_FPSCR:
25630 case ARM_BUILTIN_SET_FPSCR:
25631 if (fcode == ARM_BUILTIN_GET_FPSCR)
25632 {
25633 icode = CODE_FOR_get_fpscr;
25634 target = gen_reg_rtx (SImode);
25635 pat = GEN_FCN (icode) (target);
25636 }
25637 else
25638 {
25639 target = NULL_RTX;
25640 icode = CODE_FOR_set_fpscr;
25641 arg0 = CALL_EXPR_ARG (exp, 0);
25642 op0 = expand_normal (arg0);
25643 pat = GEN_FCN (icode) (op0);
25644 }
25645 emit_insn (pat);
25646 return target;
25647
25648 case ARM_BUILTIN_TEXTRMSB:
25649 case ARM_BUILTIN_TEXTRMUB:
25650 case ARM_BUILTIN_TEXTRMSH:
25651 case ARM_BUILTIN_TEXTRMUH:
25652 case ARM_BUILTIN_TEXTRMSW:
25653 case ARM_BUILTIN_TEXTRMUW:
25654 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25655 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25656 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25657 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25658 : CODE_FOR_iwmmxt_textrmw);
25659
25660 arg0 = CALL_EXPR_ARG (exp, 0);
25661 arg1 = CALL_EXPR_ARG (exp, 1);
25662 op0 = expand_normal (arg0);
25663 op1 = expand_normal (arg1);
25664 tmode = insn_data[icode].operand[0].mode;
25665 mode0 = insn_data[icode].operand[1].mode;
25666 mode1 = insn_data[icode].operand[2].mode;
25667
25668 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25669 op0 = copy_to_mode_reg (mode0, op0);
25670 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25671 {
25672 /* @@@ better error message */
25673 error ("selector must be an immediate");
25674 return gen_reg_rtx (tmode);
25675 }
25676
25677 opint = INTVAL (op1);
25678 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25679 {
25680 if (opint > 7 || opint < 0)
25681 error ("the range of selector should be in 0 to 7");
25682 }
25683 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25684 {
25685 if (opint > 3 || opint < 0)
25686 error ("the range of selector should be in 0 to 3");
25687 }
25688 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25689 {
25690 if (opint > 1 || opint < 0)
25691 error ("the range of selector should be in 0 to 1");
25692 }
25693
25694 if (target == 0
25695 || GET_MODE (target) != tmode
25696 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25697 target = gen_reg_rtx (tmode);
25698 pat = GEN_FCN (icode) (target, op0, op1);
25699 if (! pat)
25700 return 0;
25701 emit_insn (pat);
25702 return target;
25703
25704 case ARM_BUILTIN_WALIGNI:
25705 /* If op2 is immediate, call walighi, else call walighr. */
25706 arg0 = CALL_EXPR_ARG (exp, 0);
25707 arg1 = CALL_EXPR_ARG (exp, 1);
25708 arg2 = CALL_EXPR_ARG (exp, 2);
25709 op0 = expand_normal (arg0);
25710 op1 = expand_normal (arg1);
25711 op2 = expand_normal (arg2);
25712 if (CONST_INT_P (op2))
25713 {
25714 icode = CODE_FOR_iwmmxt_waligni;
25715 tmode = insn_data[icode].operand[0].mode;
25716 mode0 = insn_data[icode].operand[1].mode;
25717 mode1 = insn_data[icode].operand[2].mode;
25718 mode2 = insn_data[icode].operand[3].mode;
25719 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25720 op0 = copy_to_mode_reg (mode0, op0);
25721 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25722 op1 = copy_to_mode_reg (mode1, op1);
25723 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25724 selector = INTVAL (op2);
25725 if (selector > 7 || selector < 0)
25726 error ("the range of selector should be in 0 to 7");
25727 }
25728 else
25729 {
25730 icode = CODE_FOR_iwmmxt_walignr;
25731 tmode = insn_data[icode].operand[0].mode;
25732 mode0 = insn_data[icode].operand[1].mode;
25733 mode1 = insn_data[icode].operand[2].mode;
25734 mode2 = insn_data[icode].operand[3].mode;
25735 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25736 op0 = copy_to_mode_reg (mode0, op0);
25737 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25738 op1 = copy_to_mode_reg (mode1, op1);
25739 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25740 op2 = copy_to_mode_reg (mode2, op2);
25741 }
25742 if (target == 0
25743 || GET_MODE (target) != tmode
25744 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25745 target = gen_reg_rtx (tmode);
25746 pat = GEN_FCN (icode) (target, op0, op1, op2);
25747 if (!pat)
25748 return 0;
25749 emit_insn (pat);
25750 return target;
25751
25752 case ARM_BUILTIN_TINSRB:
25753 case ARM_BUILTIN_TINSRH:
25754 case ARM_BUILTIN_TINSRW:
25755 case ARM_BUILTIN_WMERGE:
25756 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25757 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25758 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25759 : CODE_FOR_iwmmxt_tinsrw);
25760 arg0 = CALL_EXPR_ARG (exp, 0);
25761 arg1 = CALL_EXPR_ARG (exp, 1);
25762 arg2 = CALL_EXPR_ARG (exp, 2);
25763 op0 = expand_normal (arg0);
25764 op1 = expand_normal (arg1);
25765 op2 = expand_normal (arg2);
25766 tmode = insn_data[icode].operand[0].mode;
25767 mode0 = insn_data[icode].operand[1].mode;
25768 mode1 = insn_data[icode].operand[2].mode;
25769 mode2 = insn_data[icode].operand[3].mode;
25770
25771 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25772 op0 = copy_to_mode_reg (mode0, op0);
25773 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25774 op1 = copy_to_mode_reg (mode1, op1);
25775 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25776 {
25777 error ("selector must be an immediate");
25778 return const0_rtx;
25779 }
25780 if (icode == CODE_FOR_iwmmxt_wmerge)
25781 {
25782 selector = INTVAL (op2);
25783 if (selector > 7 || selector < 0)
25784 error ("the range of selector should be in 0 to 7");
25785 }
25786 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25787 || (icode == CODE_FOR_iwmmxt_tinsrh)
25788 || (icode == CODE_FOR_iwmmxt_tinsrw))
25789 {
25790 mask = 0x01;
25791 selector= INTVAL (op2);
25792 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25793 error ("the range of selector should be in 0 to 7");
25794 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25795 error ("the range of selector should be in 0 to 3");
25796 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25797 error ("the range of selector should be in 0 to 1");
25798 mask <<= selector;
25799 op2 = GEN_INT (mask);
25800 }
25801 if (target == 0
25802 || GET_MODE (target) != tmode
25803 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25804 target = gen_reg_rtx (tmode);
25805 pat = GEN_FCN (icode) (target, op0, op1, op2);
25806 if (! pat)
25807 return 0;
25808 emit_insn (pat);
25809 return target;
25810
25811 case ARM_BUILTIN_SETWCGR0:
25812 case ARM_BUILTIN_SETWCGR1:
25813 case ARM_BUILTIN_SETWCGR2:
25814 case ARM_BUILTIN_SETWCGR3:
25815 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25816 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25817 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25818 : CODE_FOR_iwmmxt_setwcgr3);
25819 arg0 = CALL_EXPR_ARG (exp, 0);
25820 op0 = expand_normal (arg0);
25821 mode0 = insn_data[icode].operand[0].mode;
25822 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25823 op0 = copy_to_mode_reg (mode0, op0);
25824 pat = GEN_FCN (icode) (op0);
25825 if (!pat)
25826 return 0;
25827 emit_insn (pat);
25828 return 0;
25829
25830 case ARM_BUILTIN_GETWCGR0:
25831 case ARM_BUILTIN_GETWCGR1:
25832 case ARM_BUILTIN_GETWCGR2:
25833 case ARM_BUILTIN_GETWCGR3:
25834 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25835 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25836 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25837 : CODE_FOR_iwmmxt_getwcgr3);
25838 tmode = insn_data[icode].operand[0].mode;
25839 if (target == 0
25840 || GET_MODE (target) != tmode
25841 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25842 target = gen_reg_rtx (tmode);
25843 pat = GEN_FCN (icode) (target);
25844 if (!pat)
25845 return 0;
25846 emit_insn (pat);
25847 return target;
25848
25849 case ARM_BUILTIN_WSHUFH:
25850 icode = CODE_FOR_iwmmxt_wshufh;
25851 arg0 = CALL_EXPR_ARG (exp, 0);
25852 arg1 = CALL_EXPR_ARG (exp, 1);
25853 op0 = expand_normal (arg0);
25854 op1 = expand_normal (arg1);
25855 tmode = insn_data[icode].operand[0].mode;
25856 mode1 = insn_data[icode].operand[1].mode;
25857 mode2 = insn_data[icode].operand[2].mode;
25858
25859 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25860 op0 = copy_to_mode_reg (mode1, op0);
25861 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25862 {
25863 error ("mask must be an immediate");
25864 return const0_rtx;
25865 }
25866 selector = INTVAL (op1);
25867 if (selector < 0 || selector > 255)
25868 error ("the range of mask should be in 0 to 255");
25869 if (target == 0
25870 || GET_MODE (target) != tmode
25871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25872 target = gen_reg_rtx (tmode);
25873 pat = GEN_FCN (icode) (target, op0, op1);
25874 if (! pat)
25875 return 0;
25876 emit_insn (pat);
25877 return target;
25878
25879 case ARM_BUILTIN_WMADDS:
25880 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25881 case ARM_BUILTIN_WMADDSX:
25882 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25883 case ARM_BUILTIN_WMADDSN:
25884 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25885 case ARM_BUILTIN_WMADDU:
25886 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25887 case ARM_BUILTIN_WMADDUX:
25888 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25889 case ARM_BUILTIN_WMADDUN:
25890 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25891 case ARM_BUILTIN_WSADBZ:
25892 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25893 case ARM_BUILTIN_WSADHZ:
25894 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25895
25896 /* Several three-argument builtins. */
25897 case ARM_BUILTIN_WMACS:
25898 case ARM_BUILTIN_WMACU:
25899 case ARM_BUILTIN_TMIA:
25900 case ARM_BUILTIN_TMIAPH:
25901 case ARM_BUILTIN_TMIATT:
25902 case ARM_BUILTIN_TMIATB:
25903 case ARM_BUILTIN_TMIABT:
25904 case ARM_BUILTIN_TMIABB:
25905 case ARM_BUILTIN_WQMIABB:
25906 case ARM_BUILTIN_WQMIABT:
25907 case ARM_BUILTIN_WQMIATB:
25908 case ARM_BUILTIN_WQMIATT:
25909 case ARM_BUILTIN_WQMIABBN:
25910 case ARM_BUILTIN_WQMIABTN:
25911 case ARM_BUILTIN_WQMIATBN:
25912 case ARM_BUILTIN_WQMIATTN:
25913 case ARM_BUILTIN_WMIABB:
25914 case ARM_BUILTIN_WMIABT:
25915 case ARM_BUILTIN_WMIATB:
25916 case ARM_BUILTIN_WMIATT:
25917 case ARM_BUILTIN_WMIABBN:
25918 case ARM_BUILTIN_WMIABTN:
25919 case ARM_BUILTIN_WMIATBN:
25920 case ARM_BUILTIN_WMIATTN:
25921 case ARM_BUILTIN_WMIAWBB:
25922 case ARM_BUILTIN_WMIAWBT:
25923 case ARM_BUILTIN_WMIAWTB:
25924 case ARM_BUILTIN_WMIAWTT:
25925 case ARM_BUILTIN_WMIAWBBN:
25926 case ARM_BUILTIN_WMIAWBTN:
25927 case ARM_BUILTIN_WMIAWTBN:
25928 case ARM_BUILTIN_WMIAWTTN:
25929 case ARM_BUILTIN_WSADB:
25930 case ARM_BUILTIN_WSADH:
25931 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25932 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25933 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25934 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25935 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25936 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25937 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25938 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25939 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25940 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25941 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25942 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25943 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25944 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25945 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25946 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25947 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25948 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25949 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25950 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25951 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25952 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25953 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25954 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25955 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25956 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25957 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25958 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25959 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25960 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25961 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25962 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25963 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25964 : CODE_FOR_iwmmxt_wsadh);
25965 arg0 = CALL_EXPR_ARG (exp, 0);
25966 arg1 = CALL_EXPR_ARG (exp, 1);
25967 arg2 = CALL_EXPR_ARG (exp, 2);
25968 op0 = expand_normal (arg0);
25969 op1 = expand_normal (arg1);
25970 op2 = expand_normal (arg2);
25971 tmode = insn_data[icode].operand[0].mode;
25972 mode0 = insn_data[icode].operand[1].mode;
25973 mode1 = insn_data[icode].operand[2].mode;
25974 mode2 = insn_data[icode].operand[3].mode;
25975
25976 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25977 op0 = copy_to_mode_reg (mode0, op0);
25978 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25979 op1 = copy_to_mode_reg (mode1, op1);
25980 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25981 op2 = copy_to_mode_reg (mode2, op2);
25982 if (target == 0
25983 || GET_MODE (target) != tmode
25984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25985 target = gen_reg_rtx (tmode);
25986 pat = GEN_FCN (icode) (target, op0, op1, op2);
25987 if (! pat)
25988 return 0;
25989 emit_insn (pat);
25990 return target;
25991
25992 case ARM_BUILTIN_WZERO:
25993 target = gen_reg_rtx (DImode);
25994 emit_insn (gen_iwmmxt_clrdi (target));
25995 return target;
25996
25997 case ARM_BUILTIN_WSRLHI:
25998 case ARM_BUILTIN_WSRLWI:
25999 case ARM_BUILTIN_WSRLDI:
26000 case ARM_BUILTIN_WSLLHI:
26001 case ARM_BUILTIN_WSLLWI:
26002 case ARM_BUILTIN_WSLLDI:
26003 case ARM_BUILTIN_WSRAHI:
26004 case ARM_BUILTIN_WSRAWI:
26005 case ARM_BUILTIN_WSRADI:
26006 case ARM_BUILTIN_WRORHI:
26007 case ARM_BUILTIN_WRORWI:
26008 case ARM_BUILTIN_WRORDI:
26009 case ARM_BUILTIN_WSRLH:
26010 case ARM_BUILTIN_WSRLW:
26011 case ARM_BUILTIN_WSRLD:
26012 case ARM_BUILTIN_WSLLH:
26013 case ARM_BUILTIN_WSLLW:
26014 case ARM_BUILTIN_WSLLD:
26015 case ARM_BUILTIN_WSRAH:
26016 case ARM_BUILTIN_WSRAW:
26017 case ARM_BUILTIN_WSRAD:
26018 case ARM_BUILTIN_WRORH:
26019 case ARM_BUILTIN_WRORW:
26020 case ARM_BUILTIN_WRORD:
26021 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
26022 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
26023 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
26024 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
26025 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
26026 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
26027 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
26028 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
26029 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
26030 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
26031 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
26032 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
26033 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
26034 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
26035 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
26036 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
26037 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
26038 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
26039 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
26040 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
26041 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
26042 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
26043 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
26044 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
26045 : CODE_FOR_nothing);
26046 arg1 = CALL_EXPR_ARG (exp, 1);
26047 op1 = expand_normal (arg1);
26048 if (GET_MODE (op1) == VOIDmode)
26049 {
26050 imm = INTVAL (op1);
26051 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
26052 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
26053 && (imm < 0 || imm > 32))
26054 {
26055 if (fcode == ARM_BUILTIN_WRORHI)
26056 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
26057 else if (fcode == ARM_BUILTIN_WRORWI)
26058 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
26059 else if (fcode == ARM_BUILTIN_WRORH)
26060 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
26061 else
26062 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
26063 }
26064 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
26065 && (imm < 0 || imm > 64))
26066 {
26067 if (fcode == ARM_BUILTIN_WRORDI)
26068 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
26069 else
26070 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
26071 }
26072 else if (imm < 0)
26073 {
26074 if (fcode == ARM_BUILTIN_WSRLHI)
26075 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
26076 else if (fcode == ARM_BUILTIN_WSRLWI)
26077 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
26078 else if (fcode == ARM_BUILTIN_WSRLDI)
26079 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
26080 else if (fcode == ARM_BUILTIN_WSLLHI)
26081 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
26082 else if (fcode == ARM_BUILTIN_WSLLWI)
26083 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
26084 else if (fcode == ARM_BUILTIN_WSLLDI)
26085 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
26086 else if (fcode == ARM_BUILTIN_WSRAHI)
26087 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
26088 else if (fcode == ARM_BUILTIN_WSRAWI)
26089 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
26090 else if (fcode == ARM_BUILTIN_WSRADI)
26091 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
26092 else if (fcode == ARM_BUILTIN_WSRLH)
26093 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
26094 else if (fcode == ARM_BUILTIN_WSRLW)
26095 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
26096 else if (fcode == ARM_BUILTIN_WSRLD)
26097 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
26098 else if (fcode == ARM_BUILTIN_WSLLH)
26099 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
26100 else if (fcode == ARM_BUILTIN_WSLLW)
26101 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
26102 else if (fcode == ARM_BUILTIN_WSLLD)
26103 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
26104 else if (fcode == ARM_BUILTIN_WSRAH)
26105 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
26106 else if (fcode == ARM_BUILTIN_WSRAW)
26107 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
26108 else
26109 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
26110 }
26111 }
26112 return arm_expand_binop_builtin (icode, exp, target);
26113
26114 default:
26115 break;
26116 }
26117
26118 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
26119 if (d->code == (const enum arm_builtins) fcode)
26120 return arm_expand_binop_builtin (d->icode, exp, target);
26121
26122 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
26123 if (d->code == (const enum arm_builtins) fcode)
26124 return arm_expand_unop_builtin (d->icode, exp, target, 0);
26125
26126 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
26127 if (d->code == (const enum arm_builtins) fcode)
26128 return arm_expand_ternop_builtin (d->icode, exp, target);
26129
26130 /* @@@ Should really do something sensible here. */
26131 return NULL_RTX;
26132 }
26133 \f
26134 /* Return the number (counting from 0) of
26135 the least significant set bit in MASK. */
26136
26137 inline static int
26138 number_of_first_bit_set (unsigned mask)
26139 {
26140 return ctz_hwi (mask);
26141 }
26142
26143 /* Like emit_multi_reg_push, but allowing for a different set of
26144 registers to be described as saved. MASK is the set of registers
26145 to be saved; REAL_REGS is the set of registers to be described as
26146 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26147
26148 static rtx
26149 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26150 {
26151 unsigned long regno;
26152 rtx par[10], tmp, reg, insn;
26153 int i, j;
26154
26155 /* Build the parallel of the registers actually being stored. */
26156 for (i = 0; mask; ++i, mask &= mask - 1)
26157 {
26158 regno = ctz_hwi (mask);
26159 reg = gen_rtx_REG (SImode, regno);
26160
26161 if (i == 0)
26162 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26163 else
26164 tmp = gen_rtx_USE (VOIDmode, reg);
26165
26166 par[i] = tmp;
26167 }
26168
26169 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26170 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26171 tmp = gen_frame_mem (BLKmode, tmp);
26172 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26173 par[0] = tmp;
26174
26175 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26176 insn = emit_insn (tmp);
26177
26178 /* Always build the stack adjustment note for unwind info. */
26179 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26180 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26181 par[0] = tmp;
26182
26183 /* Build the parallel of the registers recorded as saved for unwind. */
26184 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26185 {
26186 regno = ctz_hwi (real_regs);
26187 reg = gen_rtx_REG (SImode, regno);
26188
26189 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26190 tmp = gen_frame_mem (SImode, tmp);
26191 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26192 RTX_FRAME_RELATED_P (tmp) = 1;
26193 par[j + 1] = tmp;
26194 }
26195
26196 if (j == 0)
26197 tmp = par[0];
26198 else
26199 {
26200 RTX_FRAME_RELATED_P (par[0]) = 1;
26201 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26202 }
26203
26204 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26205
26206 return insn;
26207 }
26208
26209 /* Emit code to push or pop registers to or from the stack. F is the
26210 assembly file. MASK is the registers to pop. */
26211 static void
26212 thumb_pop (FILE *f, unsigned long mask)
26213 {
26214 int regno;
26215 int lo_mask = mask & 0xFF;
26216 int pushed_words = 0;
26217
26218 gcc_assert (mask);
26219
26220 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26221 {
26222 /* Special case. Do not generate a POP PC statement here, do it in
26223 thumb_exit() */
26224 thumb_exit (f, -1);
26225 return;
26226 }
26227
26228 fprintf (f, "\tpop\t{");
26229
26230 /* Look at the low registers first. */
26231 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26232 {
26233 if (lo_mask & 1)
26234 {
26235 asm_fprintf (f, "%r", regno);
26236
26237 if ((lo_mask & ~1) != 0)
26238 fprintf (f, ", ");
26239
26240 pushed_words++;
26241 }
26242 }
26243
26244 if (mask & (1 << PC_REGNUM))
26245 {
26246 /* Catch popping the PC. */
26247 if (TARGET_INTERWORK || TARGET_BACKTRACE
26248 || crtl->calls_eh_return)
26249 {
26250 /* The PC is never poped directly, instead
26251 it is popped into r3 and then BX is used. */
26252 fprintf (f, "}\n");
26253
26254 thumb_exit (f, -1);
26255
26256 return;
26257 }
26258 else
26259 {
26260 if (mask & 0xFF)
26261 fprintf (f, ", ");
26262
26263 asm_fprintf (f, "%r", PC_REGNUM);
26264 }
26265 }
26266
26267 fprintf (f, "}\n");
26268 }
26269
26270 /* Generate code to return from a thumb function.
26271 If 'reg_containing_return_addr' is -1, then the return address is
26272 actually on the stack, at the stack pointer. */
26273 static void
26274 thumb_exit (FILE *f, int reg_containing_return_addr)
26275 {
26276 unsigned regs_available_for_popping;
26277 unsigned regs_to_pop;
26278 int pops_needed;
26279 unsigned available;
26280 unsigned required;
26281 enum machine_mode mode;
26282 int size;
26283 int restore_a4 = FALSE;
26284
26285 /* Compute the registers we need to pop. */
26286 regs_to_pop = 0;
26287 pops_needed = 0;
26288
26289 if (reg_containing_return_addr == -1)
26290 {
26291 regs_to_pop |= 1 << LR_REGNUM;
26292 ++pops_needed;
26293 }
26294
26295 if (TARGET_BACKTRACE)
26296 {
26297 /* Restore the (ARM) frame pointer and stack pointer. */
26298 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26299 pops_needed += 2;
26300 }
26301
26302 /* If there is nothing to pop then just emit the BX instruction and
26303 return. */
26304 if (pops_needed == 0)
26305 {
26306 if (crtl->calls_eh_return)
26307 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26308
26309 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26310 return;
26311 }
26312 /* Otherwise if we are not supporting interworking and we have not created
26313 a backtrace structure and the function was not entered in ARM mode then
26314 just pop the return address straight into the PC. */
26315 else if (!TARGET_INTERWORK
26316 && !TARGET_BACKTRACE
26317 && !is_called_in_ARM_mode (current_function_decl)
26318 && !crtl->calls_eh_return)
26319 {
26320 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26321 return;
26322 }
26323
26324 /* Find out how many of the (return) argument registers we can corrupt. */
26325 regs_available_for_popping = 0;
26326
26327 /* If returning via __builtin_eh_return, the bottom three registers
26328 all contain information needed for the return. */
26329 if (crtl->calls_eh_return)
26330 size = 12;
26331 else
26332 {
26333 /* If we can deduce the registers used from the function's
26334 return value. This is more reliable that examining
26335 df_regs_ever_live_p () because that will be set if the register is
26336 ever used in the function, not just if the register is used
26337 to hold a return value. */
26338
26339 if (crtl->return_rtx != 0)
26340 mode = GET_MODE (crtl->return_rtx);
26341 else
26342 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26343
26344 size = GET_MODE_SIZE (mode);
26345
26346 if (size == 0)
26347 {
26348 /* In a void function we can use any argument register.
26349 In a function that returns a structure on the stack
26350 we can use the second and third argument registers. */
26351 if (mode == VOIDmode)
26352 regs_available_for_popping =
26353 (1 << ARG_REGISTER (1))
26354 | (1 << ARG_REGISTER (2))
26355 | (1 << ARG_REGISTER (3));
26356 else
26357 regs_available_for_popping =
26358 (1 << ARG_REGISTER (2))
26359 | (1 << ARG_REGISTER (3));
26360 }
26361 else if (size <= 4)
26362 regs_available_for_popping =
26363 (1 << ARG_REGISTER (2))
26364 | (1 << ARG_REGISTER (3));
26365 else if (size <= 8)
26366 regs_available_for_popping =
26367 (1 << ARG_REGISTER (3));
26368 }
26369
26370 /* Match registers to be popped with registers into which we pop them. */
26371 for (available = regs_available_for_popping,
26372 required = regs_to_pop;
26373 required != 0 && available != 0;
26374 available &= ~(available & - available),
26375 required &= ~(required & - required))
26376 -- pops_needed;
26377
26378 /* If we have any popping registers left over, remove them. */
26379 if (available > 0)
26380 regs_available_for_popping &= ~available;
26381
26382 /* Otherwise if we need another popping register we can use
26383 the fourth argument register. */
26384 else if (pops_needed)
26385 {
26386 /* If we have not found any free argument registers and
26387 reg a4 contains the return address, we must move it. */
26388 if (regs_available_for_popping == 0
26389 && reg_containing_return_addr == LAST_ARG_REGNUM)
26390 {
26391 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26392 reg_containing_return_addr = LR_REGNUM;
26393 }
26394 else if (size > 12)
26395 {
26396 /* Register a4 is being used to hold part of the return value,
26397 but we have dire need of a free, low register. */
26398 restore_a4 = TRUE;
26399
26400 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26401 }
26402
26403 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26404 {
26405 /* The fourth argument register is available. */
26406 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26407
26408 --pops_needed;
26409 }
26410 }
26411
26412 /* Pop as many registers as we can. */
26413 thumb_pop (f, regs_available_for_popping);
26414
26415 /* Process the registers we popped. */
26416 if (reg_containing_return_addr == -1)
26417 {
26418 /* The return address was popped into the lowest numbered register. */
26419 regs_to_pop &= ~(1 << LR_REGNUM);
26420
26421 reg_containing_return_addr =
26422 number_of_first_bit_set (regs_available_for_popping);
26423
26424 /* Remove this register for the mask of available registers, so that
26425 the return address will not be corrupted by further pops. */
26426 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26427 }
26428
26429 /* If we popped other registers then handle them here. */
26430 if (regs_available_for_popping)
26431 {
26432 int frame_pointer;
26433
26434 /* Work out which register currently contains the frame pointer. */
26435 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26436
26437 /* Move it into the correct place. */
26438 asm_fprintf (f, "\tmov\t%r, %r\n",
26439 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26440
26441 /* (Temporarily) remove it from the mask of popped registers. */
26442 regs_available_for_popping &= ~(1 << frame_pointer);
26443 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26444
26445 if (regs_available_for_popping)
26446 {
26447 int stack_pointer;
26448
26449 /* We popped the stack pointer as well,
26450 find the register that contains it. */
26451 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26452
26453 /* Move it into the stack register. */
26454 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26455
26456 /* At this point we have popped all necessary registers, so
26457 do not worry about restoring regs_available_for_popping
26458 to its correct value:
26459
26460 assert (pops_needed == 0)
26461 assert (regs_available_for_popping == (1 << frame_pointer))
26462 assert (regs_to_pop == (1 << STACK_POINTER)) */
26463 }
26464 else
26465 {
26466 /* Since we have just move the popped value into the frame
26467 pointer, the popping register is available for reuse, and
26468 we know that we still have the stack pointer left to pop. */
26469 regs_available_for_popping |= (1 << frame_pointer);
26470 }
26471 }
26472
26473 /* If we still have registers left on the stack, but we no longer have
26474 any registers into which we can pop them, then we must move the return
26475 address into the link register and make available the register that
26476 contained it. */
26477 if (regs_available_for_popping == 0 && pops_needed > 0)
26478 {
26479 regs_available_for_popping |= 1 << reg_containing_return_addr;
26480
26481 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26482 reg_containing_return_addr);
26483
26484 reg_containing_return_addr = LR_REGNUM;
26485 }
26486
26487 /* If we have registers left on the stack then pop some more.
26488 We know that at most we will want to pop FP and SP. */
26489 if (pops_needed > 0)
26490 {
26491 int popped_into;
26492 int move_to;
26493
26494 thumb_pop (f, regs_available_for_popping);
26495
26496 /* We have popped either FP or SP.
26497 Move whichever one it is into the correct register. */
26498 popped_into = number_of_first_bit_set (regs_available_for_popping);
26499 move_to = number_of_first_bit_set (regs_to_pop);
26500
26501 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26502
26503 regs_to_pop &= ~(1 << move_to);
26504
26505 --pops_needed;
26506 }
26507
26508 /* If we still have not popped everything then we must have only
26509 had one register available to us and we are now popping the SP. */
26510 if (pops_needed > 0)
26511 {
26512 int popped_into;
26513
26514 thumb_pop (f, regs_available_for_popping);
26515
26516 popped_into = number_of_first_bit_set (regs_available_for_popping);
26517
26518 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26519 /*
26520 assert (regs_to_pop == (1 << STACK_POINTER))
26521 assert (pops_needed == 1)
26522 */
26523 }
26524
26525 /* If necessary restore the a4 register. */
26526 if (restore_a4)
26527 {
26528 if (reg_containing_return_addr != LR_REGNUM)
26529 {
26530 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26531 reg_containing_return_addr = LR_REGNUM;
26532 }
26533
26534 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26535 }
26536
26537 if (crtl->calls_eh_return)
26538 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26539
26540 /* Return to caller. */
26541 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26542 }
26543 \f
26544 /* Scan INSN just before assembler is output for it.
26545 For Thumb-1, we track the status of the condition codes; this
26546 information is used in the cbranchsi4_insn pattern. */
26547 void
26548 thumb1_final_prescan_insn (rtx insn)
26549 {
26550 if (flag_print_asm_name)
26551 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26552 INSN_ADDRESSES (INSN_UID (insn)));
26553 /* Don't overwrite the previous setter when we get to a cbranch. */
26554 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26555 {
26556 enum attr_conds conds;
26557
26558 if (cfun->machine->thumb1_cc_insn)
26559 {
26560 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26561 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26562 CC_STATUS_INIT;
26563 }
26564 conds = get_attr_conds (insn);
26565 if (conds == CONDS_SET)
26566 {
26567 rtx set = single_set (insn);
26568 cfun->machine->thumb1_cc_insn = insn;
26569 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26570 cfun->machine->thumb1_cc_op1 = const0_rtx;
26571 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26572 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26573 {
26574 rtx src1 = XEXP (SET_SRC (set), 1);
26575 if (src1 == const0_rtx)
26576 cfun->machine->thumb1_cc_mode = CCmode;
26577 }
26578 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26579 {
26580 /* Record the src register operand instead of dest because
26581 cprop_hardreg pass propagates src. */
26582 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26583 }
26584 }
26585 else if (conds != CONDS_NOCOND)
26586 cfun->machine->thumb1_cc_insn = NULL_RTX;
26587 }
26588
26589 /* Check if unexpected far jump is used. */
26590 if (cfun->machine->lr_save_eliminated
26591 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26592 internal_error("Unexpected thumb1 far jump");
26593 }
26594
26595 int
26596 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26597 {
26598 unsigned HOST_WIDE_INT mask = 0xff;
26599 int i;
26600
26601 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26602 if (val == 0) /* XXX */
26603 return 0;
26604
26605 for (i = 0; i < 25; i++)
26606 if ((val & (mask << i)) == val)
26607 return 1;
26608
26609 return 0;
26610 }
26611
26612 /* Returns nonzero if the current function contains,
26613 or might contain a far jump. */
26614 static int
26615 thumb_far_jump_used_p (void)
26616 {
26617 rtx insn;
26618 bool far_jump = false;
26619 unsigned int func_size = 0;
26620
26621 /* This test is only important for leaf functions. */
26622 /* assert (!leaf_function_p ()); */
26623
26624 /* If we have already decided that far jumps may be used,
26625 do not bother checking again, and always return true even if
26626 it turns out that they are not being used. Once we have made
26627 the decision that far jumps are present (and that hence the link
26628 register will be pushed onto the stack) we cannot go back on it. */
26629 if (cfun->machine->far_jump_used)
26630 return 1;
26631
26632 /* If this function is not being called from the prologue/epilogue
26633 generation code then it must be being called from the
26634 INITIAL_ELIMINATION_OFFSET macro. */
26635 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26636 {
26637 /* In this case we know that we are being asked about the elimination
26638 of the arg pointer register. If that register is not being used,
26639 then there are no arguments on the stack, and we do not have to
26640 worry that a far jump might force the prologue to push the link
26641 register, changing the stack offsets. In this case we can just
26642 return false, since the presence of far jumps in the function will
26643 not affect stack offsets.
26644
26645 If the arg pointer is live (or if it was live, but has now been
26646 eliminated and so set to dead) then we do have to test to see if
26647 the function might contain a far jump. This test can lead to some
26648 false negatives, since before reload is completed, then length of
26649 branch instructions is not known, so gcc defaults to returning their
26650 longest length, which in turn sets the far jump attribute to true.
26651
26652 A false negative will not result in bad code being generated, but it
26653 will result in a needless push and pop of the link register. We
26654 hope that this does not occur too often.
26655
26656 If we need doubleword stack alignment this could affect the other
26657 elimination offsets so we can't risk getting it wrong. */
26658 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26659 cfun->machine->arg_pointer_live = 1;
26660 else if (!cfun->machine->arg_pointer_live)
26661 return 0;
26662 }
26663
26664 /* We should not change far_jump_used during or after reload, as there is
26665 no chance to change stack frame layout. */
26666 if (reload_in_progress || reload_completed)
26667 return 0;
26668
26669 /* Check to see if the function contains a branch
26670 insn with the far jump attribute set. */
26671 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26672 {
26673 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26674 {
26675 far_jump = true;
26676 }
26677 func_size += get_attr_length (insn);
26678 }
26679
26680 /* Attribute far_jump will always be true for thumb1 before
26681 shorten_branch pass. So checking far_jump attribute before
26682 shorten_branch isn't much useful.
26683
26684 Following heuristic tries to estimate more accurately if a far jump
26685 may finally be used. The heuristic is very conservative as there is
26686 no chance to roll-back the decision of not to use far jump.
26687
26688 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26689 2-byte insn is associated with a 4 byte constant pool. Using
26690 function size 2048/3 as the threshold is conservative enough. */
26691 if (far_jump)
26692 {
26693 if ((func_size * 3) >= 2048)
26694 {
26695 /* Record the fact that we have decided that
26696 the function does use far jumps. */
26697 cfun->machine->far_jump_used = 1;
26698 return 1;
26699 }
26700 }
26701
26702 return 0;
26703 }
26704
26705 /* Return nonzero if FUNC must be entered in ARM mode. */
26706 int
26707 is_called_in_ARM_mode (tree func)
26708 {
26709 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26710
26711 /* Ignore the problem about functions whose address is taken. */
26712 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26713 return TRUE;
26714
26715 #ifdef ARM_PE
26716 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26717 #else
26718 return FALSE;
26719 #endif
26720 }
26721
26722 /* Given the stack offsets and register mask in OFFSETS, decide how
26723 many additional registers to push instead of subtracting a constant
26724 from SP. For epilogues the principle is the same except we use pop.
26725 FOR_PROLOGUE indicates which we're generating. */
26726 static int
26727 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26728 {
26729 HOST_WIDE_INT amount;
26730 unsigned long live_regs_mask = offsets->saved_regs_mask;
26731 /* Extract a mask of the ones we can give to the Thumb's push/pop
26732 instruction. */
26733 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26734 /* Then count how many other high registers will need to be pushed. */
26735 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26736 int n_free, reg_base, size;
26737
26738 if (!for_prologue && frame_pointer_needed)
26739 amount = offsets->locals_base - offsets->saved_regs;
26740 else
26741 amount = offsets->outgoing_args - offsets->saved_regs;
26742
26743 /* If the stack frame size is 512 exactly, we can save one load
26744 instruction, which should make this a win even when optimizing
26745 for speed. */
26746 if (!optimize_size && amount != 512)
26747 return 0;
26748
26749 /* Can't do this if there are high registers to push. */
26750 if (high_regs_pushed != 0)
26751 return 0;
26752
26753 /* Shouldn't do it in the prologue if no registers would normally
26754 be pushed at all. In the epilogue, also allow it if we'll have
26755 a pop insn for the PC. */
26756 if (l_mask == 0
26757 && (for_prologue
26758 || TARGET_BACKTRACE
26759 || (live_regs_mask & 1 << LR_REGNUM) == 0
26760 || TARGET_INTERWORK
26761 || crtl->args.pretend_args_size != 0))
26762 return 0;
26763
26764 /* Don't do this if thumb_expand_prologue wants to emit instructions
26765 between the push and the stack frame allocation. */
26766 if (for_prologue
26767 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26768 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26769 return 0;
26770
26771 reg_base = 0;
26772 n_free = 0;
26773 if (!for_prologue)
26774 {
26775 size = arm_size_return_regs ();
26776 reg_base = ARM_NUM_INTS (size);
26777 live_regs_mask >>= reg_base;
26778 }
26779
26780 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26781 && (for_prologue || call_used_regs[reg_base + n_free]))
26782 {
26783 live_regs_mask >>= 1;
26784 n_free++;
26785 }
26786
26787 if (n_free == 0)
26788 return 0;
26789 gcc_assert (amount / 4 * 4 == amount);
26790
26791 if (amount >= 512 && (amount - n_free * 4) < 512)
26792 return (amount - 508) / 4;
26793 if (amount <= n_free * 4)
26794 return amount / 4;
26795 return 0;
26796 }
26797
26798 /* The bits which aren't usefully expanded as rtl. */
26799 const char *
26800 thumb1_unexpanded_epilogue (void)
26801 {
26802 arm_stack_offsets *offsets;
26803 int regno;
26804 unsigned long live_regs_mask = 0;
26805 int high_regs_pushed = 0;
26806 int extra_pop;
26807 int had_to_push_lr;
26808 int size;
26809
26810 if (cfun->machine->return_used_this_function != 0)
26811 return "";
26812
26813 if (IS_NAKED (arm_current_func_type ()))
26814 return "";
26815
26816 offsets = arm_get_frame_offsets ();
26817 live_regs_mask = offsets->saved_regs_mask;
26818 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26819
26820 /* If we can deduce the registers used from the function's return value.
26821 This is more reliable that examining df_regs_ever_live_p () because that
26822 will be set if the register is ever used in the function, not just if
26823 the register is used to hold a return value. */
26824 size = arm_size_return_regs ();
26825
26826 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26827 if (extra_pop > 0)
26828 {
26829 unsigned long extra_mask = (1 << extra_pop) - 1;
26830 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26831 }
26832
26833 /* The prolog may have pushed some high registers to use as
26834 work registers. e.g. the testsuite file:
26835 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26836 compiles to produce:
26837 push {r4, r5, r6, r7, lr}
26838 mov r7, r9
26839 mov r6, r8
26840 push {r6, r7}
26841 as part of the prolog. We have to undo that pushing here. */
26842
26843 if (high_regs_pushed)
26844 {
26845 unsigned long mask = live_regs_mask & 0xff;
26846 int next_hi_reg;
26847
26848 /* The available low registers depend on the size of the value we are
26849 returning. */
26850 if (size <= 12)
26851 mask |= 1 << 3;
26852 if (size <= 8)
26853 mask |= 1 << 2;
26854
26855 if (mask == 0)
26856 /* Oh dear! We have no low registers into which we can pop
26857 high registers! */
26858 internal_error
26859 ("no low registers available for popping high registers");
26860
26861 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26862 if (live_regs_mask & (1 << next_hi_reg))
26863 break;
26864
26865 while (high_regs_pushed)
26866 {
26867 /* Find lo register(s) into which the high register(s) can
26868 be popped. */
26869 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26870 {
26871 if (mask & (1 << regno))
26872 high_regs_pushed--;
26873 if (high_regs_pushed == 0)
26874 break;
26875 }
26876
26877 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26878
26879 /* Pop the values into the low register(s). */
26880 thumb_pop (asm_out_file, mask);
26881
26882 /* Move the value(s) into the high registers. */
26883 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26884 {
26885 if (mask & (1 << regno))
26886 {
26887 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26888 regno);
26889
26890 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26891 if (live_regs_mask & (1 << next_hi_reg))
26892 break;
26893 }
26894 }
26895 }
26896 live_regs_mask &= ~0x0f00;
26897 }
26898
26899 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26900 live_regs_mask &= 0xff;
26901
26902 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26903 {
26904 /* Pop the return address into the PC. */
26905 if (had_to_push_lr)
26906 live_regs_mask |= 1 << PC_REGNUM;
26907
26908 /* Either no argument registers were pushed or a backtrace
26909 structure was created which includes an adjusted stack
26910 pointer, so just pop everything. */
26911 if (live_regs_mask)
26912 thumb_pop (asm_out_file, live_regs_mask);
26913
26914 /* We have either just popped the return address into the
26915 PC or it is was kept in LR for the entire function.
26916 Note that thumb_pop has already called thumb_exit if the
26917 PC was in the list. */
26918 if (!had_to_push_lr)
26919 thumb_exit (asm_out_file, LR_REGNUM);
26920 }
26921 else
26922 {
26923 /* Pop everything but the return address. */
26924 if (live_regs_mask)
26925 thumb_pop (asm_out_file, live_regs_mask);
26926
26927 if (had_to_push_lr)
26928 {
26929 if (size > 12)
26930 {
26931 /* We have no free low regs, so save one. */
26932 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26933 LAST_ARG_REGNUM);
26934 }
26935
26936 /* Get the return address into a temporary register. */
26937 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26938
26939 if (size > 12)
26940 {
26941 /* Move the return address to lr. */
26942 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26943 LAST_ARG_REGNUM);
26944 /* Restore the low register. */
26945 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26946 IP_REGNUM);
26947 regno = LR_REGNUM;
26948 }
26949 else
26950 regno = LAST_ARG_REGNUM;
26951 }
26952 else
26953 regno = LR_REGNUM;
26954
26955 /* Remove the argument registers that were pushed onto the stack. */
26956 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26957 SP_REGNUM, SP_REGNUM,
26958 crtl->args.pretend_args_size);
26959
26960 thumb_exit (asm_out_file, regno);
26961 }
26962
26963 return "";
26964 }
26965
26966 /* Functions to save and restore machine-specific function data. */
26967 static struct machine_function *
26968 arm_init_machine_status (void)
26969 {
26970 struct machine_function *machine;
26971 machine = ggc_cleared_alloc<machine_function> ();
26972
26973 #if ARM_FT_UNKNOWN != 0
26974 machine->func_type = ARM_FT_UNKNOWN;
26975 #endif
26976 return machine;
26977 }
26978
26979 /* Return an RTX indicating where the return address to the
26980 calling function can be found. */
26981 rtx
26982 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26983 {
26984 if (count != 0)
26985 return NULL_RTX;
26986
26987 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26988 }
26989
26990 /* Do anything needed before RTL is emitted for each function. */
26991 void
26992 arm_init_expanders (void)
26993 {
26994 /* Arrange to initialize and mark the machine per-function status. */
26995 init_machine_status = arm_init_machine_status;
26996
26997 /* This is to stop the combine pass optimizing away the alignment
26998 adjustment of va_arg. */
26999 /* ??? It is claimed that this should not be necessary. */
27000 if (cfun)
27001 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27002 }
27003
27004
27005 /* Like arm_compute_initial_elimination offset. Simpler because there
27006 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27007 to point at the base of the local variables after static stack
27008 space for a function has been allocated. */
27009
27010 HOST_WIDE_INT
27011 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27012 {
27013 arm_stack_offsets *offsets;
27014
27015 offsets = arm_get_frame_offsets ();
27016
27017 switch (from)
27018 {
27019 case ARG_POINTER_REGNUM:
27020 switch (to)
27021 {
27022 case STACK_POINTER_REGNUM:
27023 return offsets->outgoing_args - offsets->saved_args;
27024
27025 case FRAME_POINTER_REGNUM:
27026 return offsets->soft_frame - offsets->saved_args;
27027
27028 case ARM_HARD_FRAME_POINTER_REGNUM:
27029 return offsets->saved_regs - offsets->saved_args;
27030
27031 case THUMB_HARD_FRAME_POINTER_REGNUM:
27032 return offsets->locals_base - offsets->saved_args;
27033
27034 default:
27035 gcc_unreachable ();
27036 }
27037 break;
27038
27039 case FRAME_POINTER_REGNUM:
27040 switch (to)
27041 {
27042 case STACK_POINTER_REGNUM:
27043 return offsets->outgoing_args - offsets->soft_frame;
27044
27045 case ARM_HARD_FRAME_POINTER_REGNUM:
27046 return offsets->saved_regs - offsets->soft_frame;
27047
27048 case THUMB_HARD_FRAME_POINTER_REGNUM:
27049 return offsets->locals_base - offsets->soft_frame;
27050
27051 default:
27052 gcc_unreachable ();
27053 }
27054 break;
27055
27056 default:
27057 gcc_unreachable ();
27058 }
27059 }
27060
27061 /* Generate the function's prologue. */
27062
27063 void
27064 thumb1_expand_prologue (void)
27065 {
27066 rtx insn;
27067
27068 HOST_WIDE_INT amount;
27069 arm_stack_offsets *offsets;
27070 unsigned long func_type;
27071 int regno;
27072 unsigned long live_regs_mask;
27073 unsigned long l_mask;
27074 unsigned high_regs_pushed = 0;
27075
27076 func_type = arm_current_func_type ();
27077
27078 /* Naked functions don't have prologues. */
27079 if (IS_NAKED (func_type))
27080 return;
27081
27082 if (IS_INTERRUPT (func_type))
27083 {
27084 error ("interrupt Service Routines cannot be coded in Thumb mode");
27085 return;
27086 }
27087
27088 if (is_called_in_ARM_mode (current_function_decl))
27089 emit_insn (gen_prologue_thumb1_interwork ());
27090
27091 offsets = arm_get_frame_offsets ();
27092 live_regs_mask = offsets->saved_regs_mask;
27093
27094 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27095 l_mask = live_regs_mask & 0x40ff;
27096 /* Then count how many other high registers will need to be pushed. */
27097 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27098
27099 if (crtl->args.pretend_args_size)
27100 {
27101 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27102
27103 if (cfun->machine->uses_anonymous_args)
27104 {
27105 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27106 unsigned long mask;
27107
27108 mask = 1ul << (LAST_ARG_REGNUM + 1);
27109 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27110
27111 insn = thumb1_emit_multi_reg_push (mask, 0);
27112 }
27113 else
27114 {
27115 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27116 stack_pointer_rtx, x));
27117 }
27118 RTX_FRAME_RELATED_P (insn) = 1;
27119 }
27120
27121 if (TARGET_BACKTRACE)
27122 {
27123 HOST_WIDE_INT offset = 0;
27124 unsigned work_register;
27125 rtx work_reg, x, arm_hfp_rtx;
27126
27127 /* We have been asked to create a stack backtrace structure.
27128 The code looks like this:
27129
27130 0 .align 2
27131 0 func:
27132 0 sub SP, #16 Reserve space for 4 registers.
27133 2 push {R7} Push low registers.
27134 4 add R7, SP, #20 Get the stack pointer before the push.
27135 6 str R7, [SP, #8] Store the stack pointer
27136 (before reserving the space).
27137 8 mov R7, PC Get hold of the start of this code + 12.
27138 10 str R7, [SP, #16] Store it.
27139 12 mov R7, FP Get hold of the current frame pointer.
27140 14 str R7, [SP, #4] Store it.
27141 16 mov R7, LR Get hold of the current return address.
27142 18 str R7, [SP, #12] Store it.
27143 20 add R7, SP, #16 Point at the start of the
27144 backtrace structure.
27145 22 mov FP, R7 Put this value into the frame pointer. */
27146
27147 work_register = thumb_find_work_register (live_regs_mask);
27148 work_reg = gen_rtx_REG (SImode, work_register);
27149 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27150
27151 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27152 stack_pointer_rtx, GEN_INT (-16)));
27153 RTX_FRAME_RELATED_P (insn) = 1;
27154
27155 if (l_mask)
27156 {
27157 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27158 RTX_FRAME_RELATED_P (insn) = 1;
27159
27160 offset = bit_count (l_mask) * UNITS_PER_WORD;
27161 }
27162
27163 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27164 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27165
27166 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27167 x = gen_frame_mem (SImode, x);
27168 emit_move_insn (x, work_reg);
27169
27170 /* Make sure that the instruction fetching the PC is in the right place
27171 to calculate "start of backtrace creation code + 12". */
27172 /* ??? The stores using the common WORK_REG ought to be enough to
27173 prevent the scheduler from doing anything weird. Failing that
27174 we could always move all of the following into an UNSPEC_VOLATILE. */
27175 if (l_mask)
27176 {
27177 x = gen_rtx_REG (SImode, PC_REGNUM);
27178 emit_move_insn (work_reg, x);
27179
27180 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27181 x = gen_frame_mem (SImode, x);
27182 emit_move_insn (x, work_reg);
27183
27184 emit_move_insn (work_reg, arm_hfp_rtx);
27185
27186 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27187 x = gen_frame_mem (SImode, x);
27188 emit_move_insn (x, work_reg);
27189 }
27190 else
27191 {
27192 emit_move_insn (work_reg, arm_hfp_rtx);
27193
27194 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27195 x = gen_frame_mem (SImode, x);
27196 emit_move_insn (x, work_reg);
27197
27198 x = gen_rtx_REG (SImode, PC_REGNUM);
27199 emit_move_insn (work_reg, x);
27200
27201 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27202 x = gen_frame_mem (SImode, x);
27203 emit_move_insn (x, work_reg);
27204 }
27205
27206 x = gen_rtx_REG (SImode, LR_REGNUM);
27207 emit_move_insn (work_reg, x);
27208
27209 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27210 x = gen_frame_mem (SImode, x);
27211 emit_move_insn (x, work_reg);
27212
27213 x = GEN_INT (offset + 12);
27214 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27215
27216 emit_move_insn (arm_hfp_rtx, work_reg);
27217 }
27218 /* Optimization: If we are not pushing any low registers but we are going
27219 to push some high registers then delay our first push. This will just
27220 be a push of LR and we can combine it with the push of the first high
27221 register. */
27222 else if ((l_mask & 0xff) != 0
27223 || (high_regs_pushed == 0 && l_mask))
27224 {
27225 unsigned long mask = l_mask;
27226 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27227 insn = thumb1_emit_multi_reg_push (mask, mask);
27228 RTX_FRAME_RELATED_P (insn) = 1;
27229 }
27230
27231 if (high_regs_pushed)
27232 {
27233 unsigned pushable_regs;
27234 unsigned next_hi_reg;
27235 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27236 : crtl->args.info.nregs;
27237 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27238
27239 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27240 if (live_regs_mask & (1 << next_hi_reg))
27241 break;
27242
27243 /* Here we need to mask out registers used for passing arguments
27244 even if they can be pushed. This is to avoid using them to stash the high
27245 registers. Such kind of stash may clobber the use of arguments. */
27246 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27247
27248 if (pushable_regs == 0)
27249 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27250
27251 while (high_regs_pushed > 0)
27252 {
27253 unsigned long real_regs_mask = 0;
27254
27255 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27256 {
27257 if (pushable_regs & (1 << regno))
27258 {
27259 emit_move_insn (gen_rtx_REG (SImode, regno),
27260 gen_rtx_REG (SImode, next_hi_reg));
27261
27262 high_regs_pushed --;
27263 real_regs_mask |= (1 << next_hi_reg);
27264
27265 if (high_regs_pushed)
27266 {
27267 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27268 next_hi_reg --)
27269 if (live_regs_mask & (1 << next_hi_reg))
27270 break;
27271 }
27272 else
27273 {
27274 pushable_regs &= ~((1 << regno) - 1);
27275 break;
27276 }
27277 }
27278 }
27279
27280 /* If we had to find a work register and we have not yet
27281 saved the LR then add it to the list of regs to push. */
27282 if (l_mask == (1 << LR_REGNUM))
27283 {
27284 pushable_regs |= l_mask;
27285 real_regs_mask |= l_mask;
27286 l_mask = 0;
27287 }
27288
27289 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27290 RTX_FRAME_RELATED_P (insn) = 1;
27291 }
27292 }
27293
27294 /* Load the pic register before setting the frame pointer,
27295 so we can use r7 as a temporary work register. */
27296 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27297 arm_load_pic_register (live_regs_mask);
27298
27299 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27300 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27301 stack_pointer_rtx);
27302
27303 if (flag_stack_usage_info)
27304 current_function_static_stack_size
27305 = offsets->outgoing_args - offsets->saved_args;
27306
27307 amount = offsets->outgoing_args - offsets->saved_regs;
27308 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27309 if (amount)
27310 {
27311 if (amount < 512)
27312 {
27313 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27314 GEN_INT (- amount)));
27315 RTX_FRAME_RELATED_P (insn) = 1;
27316 }
27317 else
27318 {
27319 rtx reg, dwarf;
27320
27321 /* The stack decrement is too big for an immediate value in a single
27322 insn. In theory we could issue multiple subtracts, but after
27323 three of them it becomes more space efficient to place the full
27324 value in the constant pool and load into a register. (Also the
27325 ARM debugger really likes to see only one stack decrement per
27326 function). So instead we look for a scratch register into which
27327 we can load the decrement, and then we subtract this from the
27328 stack pointer. Unfortunately on the thumb the only available
27329 scratch registers are the argument registers, and we cannot use
27330 these as they may hold arguments to the function. Instead we
27331 attempt to locate a call preserved register which is used by this
27332 function. If we can find one, then we know that it will have
27333 been pushed at the start of the prologue and so we can corrupt
27334 it now. */
27335 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27336 if (live_regs_mask & (1 << regno))
27337 break;
27338
27339 gcc_assert(regno <= LAST_LO_REGNUM);
27340
27341 reg = gen_rtx_REG (SImode, regno);
27342
27343 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27344
27345 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27346 stack_pointer_rtx, reg));
27347
27348 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27349 plus_constant (Pmode, stack_pointer_rtx,
27350 -amount));
27351 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27352 RTX_FRAME_RELATED_P (insn) = 1;
27353 }
27354 }
27355
27356 if (frame_pointer_needed)
27357 thumb_set_frame_pointer (offsets);
27358
27359 /* If we are profiling, make sure no instructions are scheduled before
27360 the call to mcount. Similarly if the user has requested no
27361 scheduling in the prolog. Similarly if we want non-call exceptions
27362 using the EABI unwinder, to prevent faulting instructions from being
27363 swapped with a stack adjustment. */
27364 if (crtl->profile || !TARGET_SCHED_PROLOG
27365 || (arm_except_unwind_info (&global_options) == UI_TARGET
27366 && cfun->can_throw_non_call_exceptions))
27367 emit_insn (gen_blockage ());
27368
27369 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27370 if (live_regs_mask & 0xff)
27371 cfun->machine->lr_save_eliminated = 0;
27372 }
27373
27374 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27375 POP instruction can be generated. LR should be replaced by PC. All
27376 the checks required are already done by USE_RETURN_INSN (). Hence,
27377 all we really need to check here is if single register is to be
27378 returned, or multiple register return. */
27379 void
27380 thumb2_expand_return (bool simple_return)
27381 {
27382 int i, num_regs;
27383 unsigned long saved_regs_mask;
27384 arm_stack_offsets *offsets;
27385
27386 offsets = arm_get_frame_offsets ();
27387 saved_regs_mask = offsets->saved_regs_mask;
27388
27389 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27390 if (saved_regs_mask & (1 << i))
27391 num_regs++;
27392
27393 if (!simple_return && saved_regs_mask)
27394 {
27395 if (num_regs == 1)
27396 {
27397 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27398 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27399 rtx addr = gen_rtx_MEM (SImode,
27400 gen_rtx_POST_INC (SImode,
27401 stack_pointer_rtx));
27402 set_mem_alias_set (addr, get_frame_alias_set ());
27403 XVECEXP (par, 0, 0) = ret_rtx;
27404 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27405 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27406 emit_jump_insn (par);
27407 }
27408 else
27409 {
27410 saved_regs_mask &= ~ (1 << LR_REGNUM);
27411 saved_regs_mask |= (1 << PC_REGNUM);
27412 arm_emit_multi_reg_pop (saved_regs_mask);
27413 }
27414 }
27415 else
27416 {
27417 emit_jump_insn (simple_return_rtx);
27418 }
27419 }
27420
27421 void
27422 thumb1_expand_epilogue (void)
27423 {
27424 HOST_WIDE_INT amount;
27425 arm_stack_offsets *offsets;
27426 int regno;
27427
27428 /* Naked functions don't have prologues. */
27429 if (IS_NAKED (arm_current_func_type ()))
27430 return;
27431
27432 offsets = arm_get_frame_offsets ();
27433 amount = offsets->outgoing_args - offsets->saved_regs;
27434
27435 if (frame_pointer_needed)
27436 {
27437 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27438 amount = offsets->locals_base - offsets->saved_regs;
27439 }
27440 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27441
27442 gcc_assert (amount >= 0);
27443 if (amount)
27444 {
27445 emit_insn (gen_blockage ());
27446
27447 if (amount < 512)
27448 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27449 GEN_INT (amount)));
27450 else
27451 {
27452 /* r3 is always free in the epilogue. */
27453 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27454
27455 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27456 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27457 }
27458 }
27459
27460 /* Emit a USE (stack_pointer_rtx), so that
27461 the stack adjustment will not be deleted. */
27462 emit_insn (gen_force_register_use (stack_pointer_rtx));
27463
27464 if (crtl->profile || !TARGET_SCHED_PROLOG)
27465 emit_insn (gen_blockage ());
27466
27467 /* Emit a clobber for each insn that will be restored in the epilogue,
27468 so that flow2 will get register lifetimes correct. */
27469 for (regno = 0; regno < 13; regno++)
27470 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27471 emit_clobber (gen_rtx_REG (SImode, regno));
27472
27473 if (! df_regs_ever_live_p (LR_REGNUM))
27474 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27475 }
27476
27477 /* Epilogue code for APCS frame. */
27478 static void
27479 arm_expand_epilogue_apcs_frame (bool really_return)
27480 {
27481 unsigned long func_type;
27482 unsigned long saved_regs_mask;
27483 int num_regs = 0;
27484 int i;
27485 int floats_from_frame = 0;
27486 arm_stack_offsets *offsets;
27487
27488 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27489 func_type = arm_current_func_type ();
27490
27491 /* Get frame offsets for ARM. */
27492 offsets = arm_get_frame_offsets ();
27493 saved_regs_mask = offsets->saved_regs_mask;
27494
27495 /* Find the offset of the floating-point save area in the frame. */
27496 floats_from_frame
27497 = (offsets->saved_args
27498 + arm_compute_static_chain_stack_bytes ()
27499 - offsets->frame);
27500
27501 /* Compute how many core registers saved and how far away the floats are. */
27502 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27503 if (saved_regs_mask & (1 << i))
27504 {
27505 num_regs++;
27506 floats_from_frame += 4;
27507 }
27508
27509 if (TARGET_HARD_FLOAT && TARGET_VFP)
27510 {
27511 int start_reg;
27512 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27513
27514 /* The offset is from IP_REGNUM. */
27515 int saved_size = arm_get_vfp_saved_size ();
27516 if (saved_size > 0)
27517 {
27518 rtx insn;
27519 floats_from_frame += saved_size;
27520 insn = emit_insn (gen_addsi3 (ip_rtx,
27521 hard_frame_pointer_rtx,
27522 GEN_INT (-floats_from_frame)));
27523 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27524 ip_rtx, hard_frame_pointer_rtx);
27525 }
27526
27527 /* Generate VFP register multi-pop. */
27528 start_reg = FIRST_VFP_REGNUM;
27529
27530 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27531 /* Look for a case where a reg does not need restoring. */
27532 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27533 && (!df_regs_ever_live_p (i + 1)
27534 || call_used_regs[i + 1]))
27535 {
27536 if (start_reg != i)
27537 arm_emit_vfp_multi_reg_pop (start_reg,
27538 (i - start_reg) / 2,
27539 gen_rtx_REG (SImode,
27540 IP_REGNUM));
27541 start_reg = i + 2;
27542 }
27543
27544 /* Restore the remaining regs that we have discovered (or possibly
27545 even all of them, if the conditional in the for loop never
27546 fired). */
27547 if (start_reg != i)
27548 arm_emit_vfp_multi_reg_pop (start_reg,
27549 (i - start_reg) / 2,
27550 gen_rtx_REG (SImode, IP_REGNUM));
27551 }
27552
27553 if (TARGET_IWMMXT)
27554 {
27555 /* The frame pointer is guaranteed to be non-double-word aligned, as
27556 it is set to double-word-aligned old_stack_pointer - 4. */
27557 rtx insn;
27558 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27559
27560 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27561 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27562 {
27563 rtx addr = gen_frame_mem (V2SImode,
27564 plus_constant (Pmode, hard_frame_pointer_rtx,
27565 - lrm_count * 4));
27566 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27567 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27568 gen_rtx_REG (V2SImode, i),
27569 NULL_RTX);
27570 lrm_count += 2;
27571 }
27572 }
27573
27574 /* saved_regs_mask should contain IP which contains old stack pointer
27575 at the time of activation creation. Since SP and IP are adjacent registers,
27576 we can restore the value directly into SP. */
27577 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27578 saved_regs_mask &= ~(1 << IP_REGNUM);
27579 saved_regs_mask |= (1 << SP_REGNUM);
27580
27581 /* There are two registers left in saved_regs_mask - LR and PC. We
27582 only need to restore LR (the return address), but to
27583 save time we can load it directly into PC, unless we need a
27584 special function exit sequence, or we are not really returning. */
27585 if (really_return
27586 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27587 && !crtl->calls_eh_return)
27588 /* Delete LR from the register mask, so that LR on
27589 the stack is loaded into the PC in the register mask. */
27590 saved_regs_mask &= ~(1 << LR_REGNUM);
27591 else
27592 saved_regs_mask &= ~(1 << PC_REGNUM);
27593
27594 num_regs = bit_count (saved_regs_mask);
27595 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27596 {
27597 rtx insn;
27598 emit_insn (gen_blockage ());
27599 /* Unwind the stack to just below the saved registers. */
27600 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27601 hard_frame_pointer_rtx,
27602 GEN_INT (- 4 * num_regs)));
27603
27604 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27605 stack_pointer_rtx, hard_frame_pointer_rtx);
27606 }
27607
27608 arm_emit_multi_reg_pop (saved_regs_mask);
27609
27610 if (IS_INTERRUPT (func_type))
27611 {
27612 /* Interrupt handlers will have pushed the
27613 IP onto the stack, so restore it now. */
27614 rtx insn;
27615 rtx addr = gen_rtx_MEM (SImode,
27616 gen_rtx_POST_INC (SImode,
27617 stack_pointer_rtx));
27618 set_mem_alias_set (addr, get_frame_alias_set ());
27619 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27620 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27621 gen_rtx_REG (SImode, IP_REGNUM),
27622 NULL_RTX);
27623 }
27624
27625 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27626 return;
27627
27628 if (crtl->calls_eh_return)
27629 emit_insn (gen_addsi3 (stack_pointer_rtx,
27630 stack_pointer_rtx,
27631 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27632
27633 if (IS_STACKALIGN (func_type))
27634 /* Restore the original stack pointer. Before prologue, the stack was
27635 realigned and the original stack pointer saved in r0. For details,
27636 see comment in arm_expand_prologue. */
27637 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27638
27639 emit_jump_insn (simple_return_rtx);
27640 }
27641
27642 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27643 function is not a sibcall. */
27644 void
27645 arm_expand_epilogue (bool really_return)
27646 {
27647 unsigned long func_type;
27648 unsigned long saved_regs_mask;
27649 int num_regs = 0;
27650 int i;
27651 int amount;
27652 arm_stack_offsets *offsets;
27653
27654 func_type = arm_current_func_type ();
27655
27656 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27657 let output_return_instruction take care of instruction emission if any. */
27658 if (IS_NAKED (func_type)
27659 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27660 {
27661 if (really_return)
27662 emit_jump_insn (simple_return_rtx);
27663 return;
27664 }
27665
27666 /* If we are throwing an exception, then we really must be doing a
27667 return, so we can't tail-call. */
27668 gcc_assert (!crtl->calls_eh_return || really_return);
27669
27670 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27671 {
27672 arm_expand_epilogue_apcs_frame (really_return);
27673 return;
27674 }
27675
27676 /* Get frame offsets for ARM. */
27677 offsets = arm_get_frame_offsets ();
27678 saved_regs_mask = offsets->saved_regs_mask;
27679 num_regs = bit_count (saved_regs_mask);
27680
27681 if (frame_pointer_needed)
27682 {
27683 rtx insn;
27684 /* Restore stack pointer if necessary. */
27685 if (TARGET_ARM)
27686 {
27687 /* In ARM mode, frame pointer points to first saved register.
27688 Restore stack pointer to last saved register. */
27689 amount = offsets->frame - offsets->saved_regs;
27690
27691 /* Force out any pending memory operations that reference stacked data
27692 before stack de-allocation occurs. */
27693 emit_insn (gen_blockage ());
27694 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27695 hard_frame_pointer_rtx,
27696 GEN_INT (amount)));
27697 arm_add_cfa_adjust_cfa_note (insn, amount,
27698 stack_pointer_rtx,
27699 hard_frame_pointer_rtx);
27700
27701 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27702 deleted. */
27703 emit_insn (gen_force_register_use (stack_pointer_rtx));
27704 }
27705 else
27706 {
27707 /* In Thumb-2 mode, the frame pointer points to the last saved
27708 register. */
27709 amount = offsets->locals_base - offsets->saved_regs;
27710 if (amount)
27711 {
27712 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27713 hard_frame_pointer_rtx,
27714 GEN_INT (amount)));
27715 arm_add_cfa_adjust_cfa_note (insn, amount,
27716 hard_frame_pointer_rtx,
27717 hard_frame_pointer_rtx);
27718 }
27719
27720 /* Force out any pending memory operations that reference stacked data
27721 before stack de-allocation occurs. */
27722 emit_insn (gen_blockage ());
27723 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27724 hard_frame_pointer_rtx));
27725 arm_add_cfa_adjust_cfa_note (insn, 0,
27726 stack_pointer_rtx,
27727 hard_frame_pointer_rtx);
27728 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27729 deleted. */
27730 emit_insn (gen_force_register_use (stack_pointer_rtx));
27731 }
27732 }
27733 else
27734 {
27735 /* Pop off outgoing args and local frame to adjust stack pointer to
27736 last saved register. */
27737 amount = offsets->outgoing_args - offsets->saved_regs;
27738 if (amount)
27739 {
27740 rtx tmp;
27741 /* Force out any pending memory operations that reference stacked data
27742 before stack de-allocation occurs. */
27743 emit_insn (gen_blockage ());
27744 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27745 stack_pointer_rtx,
27746 GEN_INT (amount)));
27747 arm_add_cfa_adjust_cfa_note (tmp, amount,
27748 stack_pointer_rtx, stack_pointer_rtx);
27749 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27750 not deleted. */
27751 emit_insn (gen_force_register_use (stack_pointer_rtx));
27752 }
27753 }
27754
27755 if (TARGET_HARD_FLOAT && TARGET_VFP)
27756 {
27757 /* Generate VFP register multi-pop. */
27758 int end_reg = LAST_VFP_REGNUM + 1;
27759
27760 /* Scan the registers in reverse order. We need to match
27761 any groupings made in the prologue and generate matching
27762 vldm operations. The need to match groups is because,
27763 unlike pop, vldm can only do consecutive regs. */
27764 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27765 /* Look for a case where a reg does not need restoring. */
27766 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27767 && (!df_regs_ever_live_p (i + 1)
27768 || call_used_regs[i + 1]))
27769 {
27770 /* Restore the regs discovered so far (from reg+2 to
27771 end_reg). */
27772 if (end_reg > i + 2)
27773 arm_emit_vfp_multi_reg_pop (i + 2,
27774 (end_reg - (i + 2)) / 2,
27775 stack_pointer_rtx);
27776 end_reg = i;
27777 }
27778
27779 /* Restore the remaining regs that we have discovered (or possibly
27780 even all of them, if the conditional in the for loop never
27781 fired). */
27782 if (end_reg > i + 2)
27783 arm_emit_vfp_multi_reg_pop (i + 2,
27784 (end_reg - (i + 2)) / 2,
27785 stack_pointer_rtx);
27786 }
27787
27788 if (TARGET_IWMMXT)
27789 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27790 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27791 {
27792 rtx insn;
27793 rtx addr = gen_rtx_MEM (V2SImode,
27794 gen_rtx_POST_INC (SImode,
27795 stack_pointer_rtx));
27796 set_mem_alias_set (addr, get_frame_alias_set ());
27797 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27798 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27799 gen_rtx_REG (V2SImode, i),
27800 NULL_RTX);
27801 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27802 stack_pointer_rtx, stack_pointer_rtx);
27803 }
27804
27805 if (saved_regs_mask)
27806 {
27807 rtx insn;
27808 bool return_in_pc = false;
27809
27810 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27811 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27812 && !IS_STACKALIGN (func_type)
27813 && really_return
27814 && crtl->args.pretend_args_size == 0
27815 && saved_regs_mask & (1 << LR_REGNUM)
27816 && !crtl->calls_eh_return)
27817 {
27818 saved_regs_mask &= ~(1 << LR_REGNUM);
27819 saved_regs_mask |= (1 << PC_REGNUM);
27820 return_in_pc = true;
27821 }
27822
27823 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27824 {
27825 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27826 if (saved_regs_mask & (1 << i))
27827 {
27828 rtx addr = gen_rtx_MEM (SImode,
27829 gen_rtx_POST_INC (SImode,
27830 stack_pointer_rtx));
27831 set_mem_alias_set (addr, get_frame_alias_set ());
27832
27833 if (i == PC_REGNUM)
27834 {
27835 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27836 XVECEXP (insn, 0, 0) = ret_rtx;
27837 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27838 gen_rtx_REG (SImode, i),
27839 addr);
27840 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27841 insn = emit_jump_insn (insn);
27842 }
27843 else
27844 {
27845 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27846 addr));
27847 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27848 gen_rtx_REG (SImode, i),
27849 NULL_RTX);
27850 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27851 stack_pointer_rtx,
27852 stack_pointer_rtx);
27853 }
27854 }
27855 }
27856 else
27857 {
27858 if (TARGET_LDRD
27859 && current_tune->prefer_ldrd_strd
27860 && !optimize_function_for_size_p (cfun))
27861 {
27862 if (TARGET_THUMB2)
27863 thumb2_emit_ldrd_pop (saved_regs_mask);
27864 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27865 arm_emit_ldrd_pop (saved_regs_mask);
27866 else
27867 arm_emit_multi_reg_pop (saved_regs_mask);
27868 }
27869 else
27870 arm_emit_multi_reg_pop (saved_regs_mask);
27871 }
27872
27873 if (return_in_pc == true)
27874 return;
27875 }
27876
27877 if (crtl->args.pretend_args_size)
27878 {
27879 int i, j;
27880 rtx dwarf = NULL_RTX;
27881 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27882 stack_pointer_rtx,
27883 GEN_INT (crtl->args.pretend_args_size)));
27884
27885 RTX_FRAME_RELATED_P (tmp) = 1;
27886
27887 if (cfun->machine->uses_anonymous_args)
27888 {
27889 /* Restore pretend args. Refer arm_expand_prologue on how to save
27890 pretend_args in stack. */
27891 int num_regs = crtl->args.pretend_args_size / 4;
27892 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27893 for (j = 0, i = 0; j < num_regs; i++)
27894 if (saved_regs_mask & (1 << i))
27895 {
27896 rtx reg = gen_rtx_REG (SImode, i);
27897 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27898 j++;
27899 }
27900 REG_NOTES (tmp) = dwarf;
27901 }
27902 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27903 stack_pointer_rtx, stack_pointer_rtx);
27904 }
27905
27906 if (!really_return)
27907 return;
27908
27909 if (crtl->calls_eh_return)
27910 emit_insn (gen_addsi3 (stack_pointer_rtx,
27911 stack_pointer_rtx,
27912 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27913
27914 if (IS_STACKALIGN (func_type))
27915 /* Restore the original stack pointer. Before prologue, the stack was
27916 realigned and the original stack pointer saved in r0. For details,
27917 see comment in arm_expand_prologue. */
27918 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27919
27920 emit_jump_insn (simple_return_rtx);
27921 }
27922
27923 /* Implementation of insn prologue_thumb1_interwork. This is the first
27924 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27925
27926 const char *
27927 thumb1_output_interwork (void)
27928 {
27929 const char * name;
27930 FILE *f = asm_out_file;
27931
27932 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27933 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27934 == SYMBOL_REF);
27935 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27936
27937 /* Generate code sequence to switch us into Thumb mode. */
27938 /* The .code 32 directive has already been emitted by
27939 ASM_DECLARE_FUNCTION_NAME. */
27940 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27941 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27942
27943 /* Generate a label, so that the debugger will notice the
27944 change in instruction sets. This label is also used by
27945 the assembler to bypass the ARM code when this function
27946 is called from a Thumb encoded function elsewhere in the
27947 same file. Hence the definition of STUB_NAME here must
27948 agree with the definition in gas/config/tc-arm.c. */
27949
27950 #define STUB_NAME ".real_start_of"
27951
27952 fprintf (f, "\t.code\t16\n");
27953 #ifdef ARM_PE
27954 if (arm_dllexport_name_p (name))
27955 name = arm_strip_name_encoding (name);
27956 #endif
27957 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27958 fprintf (f, "\t.thumb_func\n");
27959 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27960
27961 return "";
27962 }
27963
27964 /* Handle the case of a double word load into a low register from
27965 a computed memory address. The computed address may involve a
27966 register which is overwritten by the load. */
27967 const char *
27968 thumb_load_double_from_address (rtx *operands)
27969 {
27970 rtx addr;
27971 rtx base;
27972 rtx offset;
27973 rtx arg1;
27974 rtx arg2;
27975
27976 gcc_assert (REG_P (operands[0]));
27977 gcc_assert (MEM_P (operands[1]));
27978
27979 /* Get the memory address. */
27980 addr = XEXP (operands[1], 0);
27981
27982 /* Work out how the memory address is computed. */
27983 switch (GET_CODE (addr))
27984 {
27985 case REG:
27986 operands[2] = adjust_address (operands[1], SImode, 4);
27987
27988 if (REGNO (operands[0]) == REGNO (addr))
27989 {
27990 output_asm_insn ("ldr\t%H0, %2", operands);
27991 output_asm_insn ("ldr\t%0, %1", operands);
27992 }
27993 else
27994 {
27995 output_asm_insn ("ldr\t%0, %1", operands);
27996 output_asm_insn ("ldr\t%H0, %2", operands);
27997 }
27998 break;
27999
28000 case CONST:
28001 /* Compute <address> + 4 for the high order load. */
28002 operands[2] = adjust_address (operands[1], SImode, 4);
28003
28004 output_asm_insn ("ldr\t%0, %1", operands);
28005 output_asm_insn ("ldr\t%H0, %2", operands);
28006 break;
28007
28008 case PLUS:
28009 arg1 = XEXP (addr, 0);
28010 arg2 = XEXP (addr, 1);
28011
28012 if (CONSTANT_P (arg1))
28013 base = arg2, offset = arg1;
28014 else
28015 base = arg1, offset = arg2;
28016
28017 gcc_assert (REG_P (base));
28018
28019 /* Catch the case of <address> = <reg> + <reg> */
28020 if (REG_P (offset))
28021 {
28022 int reg_offset = REGNO (offset);
28023 int reg_base = REGNO (base);
28024 int reg_dest = REGNO (operands[0]);
28025
28026 /* Add the base and offset registers together into the
28027 higher destination register. */
28028 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28029 reg_dest + 1, reg_base, reg_offset);
28030
28031 /* Load the lower destination register from the address in
28032 the higher destination register. */
28033 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28034 reg_dest, reg_dest + 1);
28035
28036 /* Load the higher destination register from its own address
28037 plus 4. */
28038 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28039 reg_dest + 1, reg_dest + 1);
28040 }
28041 else
28042 {
28043 /* Compute <address> + 4 for the high order load. */
28044 operands[2] = adjust_address (operands[1], SImode, 4);
28045
28046 /* If the computed address is held in the low order register
28047 then load the high order register first, otherwise always
28048 load the low order register first. */
28049 if (REGNO (operands[0]) == REGNO (base))
28050 {
28051 output_asm_insn ("ldr\t%H0, %2", operands);
28052 output_asm_insn ("ldr\t%0, %1", operands);
28053 }
28054 else
28055 {
28056 output_asm_insn ("ldr\t%0, %1", operands);
28057 output_asm_insn ("ldr\t%H0, %2", operands);
28058 }
28059 }
28060 break;
28061
28062 case LABEL_REF:
28063 /* With no registers to worry about we can just load the value
28064 directly. */
28065 operands[2] = adjust_address (operands[1], SImode, 4);
28066
28067 output_asm_insn ("ldr\t%H0, %2", operands);
28068 output_asm_insn ("ldr\t%0, %1", operands);
28069 break;
28070
28071 default:
28072 gcc_unreachable ();
28073 }
28074
28075 return "";
28076 }
28077
28078 const char *
28079 thumb_output_move_mem_multiple (int n, rtx *operands)
28080 {
28081 rtx tmp;
28082
28083 switch (n)
28084 {
28085 case 2:
28086 if (REGNO (operands[4]) > REGNO (operands[5]))
28087 {
28088 tmp = operands[4];
28089 operands[4] = operands[5];
28090 operands[5] = tmp;
28091 }
28092 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28093 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28094 break;
28095
28096 case 3:
28097 if (REGNO (operands[4]) > REGNO (operands[5]))
28098 {
28099 tmp = operands[4];
28100 operands[4] = operands[5];
28101 operands[5] = tmp;
28102 }
28103 if (REGNO (operands[5]) > REGNO (operands[6]))
28104 {
28105 tmp = operands[5];
28106 operands[5] = operands[6];
28107 operands[6] = tmp;
28108 }
28109 if (REGNO (operands[4]) > REGNO (operands[5]))
28110 {
28111 tmp = operands[4];
28112 operands[4] = operands[5];
28113 operands[5] = tmp;
28114 }
28115
28116 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28117 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28118 break;
28119
28120 default:
28121 gcc_unreachable ();
28122 }
28123
28124 return "";
28125 }
28126
28127 /* Output a call-via instruction for thumb state. */
28128 const char *
28129 thumb_call_via_reg (rtx reg)
28130 {
28131 int regno = REGNO (reg);
28132 rtx *labelp;
28133
28134 gcc_assert (regno < LR_REGNUM);
28135
28136 /* If we are in the normal text section we can use a single instance
28137 per compilation unit. If we are doing function sections, then we need
28138 an entry per section, since we can't rely on reachability. */
28139 if (in_section == text_section)
28140 {
28141 thumb_call_reg_needed = 1;
28142
28143 if (thumb_call_via_label[regno] == NULL)
28144 thumb_call_via_label[regno] = gen_label_rtx ();
28145 labelp = thumb_call_via_label + regno;
28146 }
28147 else
28148 {
28149 if (cfun->machine->call_via[regno] == NULL)
28150 cfun->machine->call_via[regno] = gen_label_rtx ();
28151 labelp = cfun->machine->call_via + regno;
28152 }
28153
28154 output_asm_insn ("bl\t%a0", labelp);
28155 return "";
28156 }
28157
28158 /* Routines for generating rtl. */
28159 void
28160 thumb_expand_movmemqi (rtx *operands)
28161 {
28162 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28163 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28164 HOST_WIDE_INT len = INTVAL (operands[2]);
28165 HOST_WIDE_INT offset = 0;
28166
28167 while (len >= 12)
28168 {
28169 emit_insn (gen_movmem12b (out, in, out, in));
28170 len -= 12;
28171 }
28172
28173 if (len >= 8)
28174 {
28175 emit_insn (gen_movmem8b (out, in, out, in));
28176 len -= 8;
28177 }
28178
28179 if (len >= 4)
28180 {
28181 rtx reg = gen_reg_rtx (SImode);
28182 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28183 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28184 len -= 4;
28185 offset += 4;
28186 }
28187
28188 if (len >= 2)
28189 {
28190 rtx reg = gen_reg_rtx (HImode);
28191 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28192 plus_constant (Pmode, in,
28193 offset))));
28194 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28195 offset)),
28196 reg));
28197 len -= 2;
28198 offset += 2;
28199 }
28200
28201 if (len)
28202 {
28203 rtx reg = gen_reg_rtx (QImode);
28204 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28205 plus_constant (Pmode, in,
28206 offset))));
28207 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28208 offset)),
28209 reg));
28210 }
28211 }
28212
28213 void
28214 thumb_reload_out_hi (rtx *operands)
28215 {
28216 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28217 }
28218
28219 /* Handle reading a half-word from memory during reload. */
28220 void
28221 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28222 {
28223 gcc_unreachable ();
28224 }
28225
28226 /* Return the length of a function name prefix
28227 that starts with the character 'c'. */
28228 static int
28229 arm_get_strip_length (int c)
28230 {
28231 switch (c)
28232 {
28233 ARM_NAME_ENCODING_LENGTHS
28234 default: return 0;
28235 }
28236 }
28237
28238 /* Return a pointer to a function's name with any
28239 and all prefix encodings stripped from it. */
28240 const char *
28241 arm_strip_name_encoding (const char *name)
28242 {
28243 int skip;
28244
28245 while ((skip = arm_get_strip_length (* name)))
28246 name += skip;
28247
28248 return name;
28249 }
28250
28251 /* If there is a '*' anywhere in the name's prefix, then
28252 emit the stripped name verbatim, otherwise prepend an
28253 underscore if leading underscores are being used. */
28254 void
28255 arm_asm_output_labelref (FILE *stream, const char *name)
28256 {
28257 int skip;
28258 int verbatim = 0;
28259
28260 while ((skip = arm_get_strip_length (* name)))
28261 {
28262 verbatim |= (*name == '*');
28263 name += skip;
28264 }
28265
28266 if (verbatim)
28267 fputs (name, stream);
28268 else
28269 asm_fprintf (stream, "%U%s", name);
28270 }
28271
28272 /* This function is used to emit an EABI tag and its associated value.
28273 We emit the numerical value of the tag in case the assembler does not
28274 support textual tags. (Eg gas prior to 2.20). If requested we include
28275 the tag name in a comment so that anyone reading the assembler output
28276 will know which tag is being set.
28277
28278 This function is not static because arm-c.c needs it too. */
28279
28280 void
28281 arm_emit_eabi_attribute (const char *name, int num, int val)
28282 {
28283 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28284 if (flag_verbose_asm || flag_debug_asm)
28285 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28286 asm_fprintf (asm_out_file, "\n");
28287 }
28288
28289 static void
28290 arm_file_start (void)
28291 {
28292 int val;
28293
28294 if (TARGET_UNIFIED_ASM)
28295 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28296
28297 if (TARGET_BPABI)
28298 {
28299 const char *fpu_name;
28300 if (arm_selected_arch)
28301 {
28302 /* armv7ve doesn't support any extensions. */
28303 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28304 {
28305 /* Keep backward compatability for assemblers
28306 which don't support armv7ve. */
28307 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28308 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28309 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28310 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28311 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28312 }
28313 else
28314 {
28315 const char* pos = strchr (arm_selected_arch->name, '+');
28316 if (pos)
28317 {
28318 char buf[15];
28319 gcc_assert (strlen (arm_selected_arch->name)
28320 <= sizeof (buf) / sizeof (*pos));
28321 strncpy (buf, arm_selected_arch->name,
28322 (pos - arm_selected_arch->name) * sizeof (*pos));
28323 buf[pos - arm_selected_arch->name] = '\0';
28324 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28325 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28326 }
28327 else
28328 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28329 }
28330 }
28331 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28332 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28333 else
28334 {
28335 const char* truncated_name
28336 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28337 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28338 }
28339
28340 if (TARGET_SOFT_FLOAT)
28341 {
28342 fpu_name = "softvfp";
28343 }
28344 else
28345 {
28346 fpu_name = arm_fpu_desc->name;
28347 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28348 {
28349 if (TARGET_HARD_FLOAT)
28350 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28351 if (TARGET_HARD_FLOAT_ABI)
28352 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28353 }
28354 }
28355 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28356
28357 /* Some of these attributes only apply when the corresponding features
28358 are used. However we don't have any easy way of figuring this out.
28359 Conservatively record the setting that would have been used. */
28360
28361 if (flag_rounding_math)
28362 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28363
28364 if (!flag_unsafe_math_optimizations)
28365 {
28366 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28367 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28368 }
28369 if (flag_signaling_nans)
28370 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28371
28372 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28373 flag_finite_math_only ? 1 : 3);
28374
28375 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28376 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28377 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28378 flag_short_enums ? 1 : 2);
28379
28380 /* Tag_ABI_optimization_goals. */
28381 if (optimize_size)
28382 val = 4;
28383 else if (optimize >= 2)
28384 val = 2;
28385 else if (optimize)
28386 val = 1;
28387 else
28388 val = 6;
28389 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28390
28391 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28392 unaligned_access);
28393
28394 if (arm_fp16_format)
28395 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28396 (int) arm_fp16_format);
28397
28398 if (arm_lang_output_object_attributes_hook)
28399 arm_lang_output_object_attributes_hook();
28400 }
28401
28402 default_file_start ();
28403 }
28404
28405 static void
28406 arm_file_end (void)
28407 {
28408 int regno;
28409
28410 if (NEED_INDICATE_EXEC_STACK)
28411 /* Add .note.GNU-stack. */
28412 file_end_indicate_exec_stack ();
28413
28414 if (! thumb_call_reg_needed)
28415 return;
28416
28417 switch_to_section (text_section);
28418 asm_fprintf (asm_out_file, "\t.code 16\n");
28419 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28420
28421 for (regno = 0; regno < LR_REGNUM; regno++)
28422 {
28423 rtx label = thumb_call_via_label[regno];
28424
28425 if (label != 0)
28426 {
28427 targetm.asm_out.internal_label (asm_out_file, "L",
28428 CODE_LABEL_NUMBER (label));
28429 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28430 }
28431 }
28432 }
28433
28434 #ifndef ARM_PE
28435 /* Symbols in the text segment can be accessed without indirecting via the
28436 constant pool; it may take an extra binary operation, but this is still
28437 faster than indirecting via memory. Don't do this when not optimizing,
28438 since we won't be calculating al of the offsets necessary to do this
28439 simplification. */
28440
28441 static void
28442 arm_encode_section_info (tree decl, rtx rtl, int first)
28443 {
28444 if (optimize > 0 && TREE_CONSTANT (decl))
28445 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28446
28447 default_encode_section_info (decl, rtl, first);
28448 }
28449 #endif /* !ARM_PE */
28450
28451 static void
28452 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28453 {
28454 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28455 && !strcmp (prefix, "L"))
28456 {
28457 arm_ccfsm_state = 0;
28458 arm_target_insn = NULL;
28459 }
28460 default_internal_label (stream, prefix, labelno);
28461 }
28462
28463 /* Output code to add DELTA to the first argument, and then jump
28464 to FUNCTION. Used for C++ multiple inheritance. */
28465 static void
28466 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28467 HOST_WIDE_INT delta,
28468 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28469 tree function)
28470 {
28471 static int thunk_label = 0;
28472 char label[256];
28473 char labelpc[256];
28474 int mi_delta = delta;
28475 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28476 int shift = 0;
28477 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28478 ? 1 : 0);
28479 if (mi_delta < 0)
28480 mi_delta = - mi_delta;
28481
28482 final_start_function (emit_barrier (), file, 1);
28483
28484 if (TARGET_THUMB1)
28485 {
28486 int labelno = thunk_label++;
28487 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28488 /* Thunks are entered in arm mode when avaiable. */
28489 if (TARGET_THUMB1_ONLY)
28490 {
28491 /* push r3 so we can use it as a temporary. */
28492 /* TODO: Omit this save if r3 is not used. */
28493 fputs ("\tpush {r3}\n", file);
28494 fputs ("\tldr\tr3, ", file);
28495 }
28496 else
28497 {
28498 fputs ("\tldr\tr12, ", file);
28499 }
28500 assemble_name (file, label);
28501 fputc ('\n', file);
28502 if (flag_pic)
28503 {
28504 /* If we are generating PIC, the ldr instruction below loads
28505 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28506 the address of the add + 8, so we have:
28507
28508 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28509 = target + 1.
28510
28511 Note that we have "+ 1" because some versions of GNU ld
28512 don't set the low bit of the result for R_ARM_REL32
28513 relocations against thumb function symbols.
28514 On ARMv6M this is +4, not +8. */
28515 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28516 assemble_name (file, labelpc);
28517 fputs (":\n", file);
28518 if (TARGET_THUMB1_ONLY)
28519 {
28520 /* This is 2 insns after the start of the thunk, so we know it
28521 is 4-byte aligned. */
28522 fputs ("\tadd\tr3, pc, r3\n", file);
28523 fputs ("\tmov r12, r3\n", file);
28524 }
28525 else
28526 fputs ("\tadd\tr12, pc, r12\n", file);
28527 }
28528 else if (TARGET_THUMB1_ONLY)
28529 fputs ("\tmov r12, r3\n", file);
28530 }
28531 if (TARGET_THUMB1_ONLY)
28532 {
28533 if (mi_delta > 255)
28534 {
28535 fputs ("\tldr\tr3, ", file);
28536 assemble_name (file, label);
28537 fputs ("+4\n", file);
28538 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28539 mi_op, this_regno, this_regno);
28540 }
28541 else if (mi_delta != 0)
28542 {
28543 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28544 mi_op, this_regno, this_regno,
28545 mi_delta);
28546 }
28547 }
28548 else
28549 {
28550 /* TODO: Use movw/movt for large constants when available. */
28551 while (mi_delta != 0)
28552 {
28553 if ((mi_delta & (3 << shift)) == 0)
28554 shift += 2;
28555 else
28556 {
28557 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28558 mi_op, this_regno, this_regno,
28559 mi_delta & (0xff << shift));
28560 mi_delta &= ~(0xff << shift);
28561 shift += 8;
28562 }
28563 }
28564 }
28565 if (TARGET_THUMB1)
28566 {
28567 if (TARGET_THUMB1_ONLY)
28568 fputs ("\tpop\t{r3}\n", file);
28569
28570 fprintf (file, "\tbx\tr12\n");
28571 ASM_OUTPUT_ALIGN (file, 2);
28572 assemble_name (file, label);
28573 fputs (":\n", file);
28574 if (flag_pic)
28575 {
28576 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28577 rtx tem = XEXP (DECL_RTL (function), 0);
28578 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28579 pipeline offset is four rather than eight. Adjust the offset
28580 accordingly. */
28581 tem = plus_constant (GET_MODE (tem), tem,
28582 TARGET_THUMB1_ONLY ? -3 : -7);
28583 tem = gen_rtx_MINUS (GET_MODE (tem),
28584 tem,
28585 gen_rtx_SYMBOL_REF (Pmode,
28586 ggc_strdup (labelpc)));
28587 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28588 }
28589 else
28590 /* Output ".word .LTHUNKn". */
28591 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28592
28593 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28594 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28595 }
28596 else
28597 {
28598 fputs ("\tb\t", file);
28599 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28600 if (NEED_PLT_RELOC)
28601 fputs ("(PLT)", file);
28602 fputc ('\n', file);
28603 }
28604
28605 final_end_function ();
28606 }
28607
28608 int
28609 arm_emit_vector_const (FILE *file, rtx x)
28610 {
28611 int i;
28612 const char * pattern;
28613
28614 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28615
28616 switch (GET_MODE (x))
28617 {
28618 case V2SImode: pattern = "%08x"; break;
28619 case V4HImode: pattern = "%04x"; break;
28620 case V8QImode: pattern = "%02x"; break;
28621 default: gcc_unreachable ();
28622 }
28623
28624 fprintf (file, "0x");
28625 for (i = CONST_VECTOR_NUNITS (x); i--;)
28626 {
28627 rtx element;
28628
28629 element = CONST_VECTOR_ELT (x, i);
28630 fprintf (file, pattern, INTVAL (element));
28631 }
28632
28633 return 1;
28634 }
28635
28636 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28637 HFmode constant pool entries are actually loaded with ldr. */
28638 void
28639 arm_emit_fp16_const (rtx c)
28640 {
28641 REAL_VALUE_TYPE r;
28642 long bits;
28643
28644 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28645 bits = real_to_target (NULL, &r, HFmode);
28646 if (WORDS_BIG_ENDIAN)
28647 assemble_zeros (2);
28648 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28649 if (!WORDS_BIG_ENDIAN)
28650 assemble_zeros (2);
28651 }
28652
28653 const char *
28654 arm_output_load_gr (rtx *operands)
28655 {
28656 rtx reg;
28657 rtx offset;
28658 rtx wcgr;
28659 rtx sum;
28660
28661 if (!MEM_P (operands [1])
28662 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28663 || !REG_P (reg = XEXP (sum, 0))
28664 || !CONST_INT_P (offset = XEXP (sum, 1))
28665 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28666 return "wldrw%?\t%0, %1";
28667
28668 /* Fix up an out-of-range load of a GR register. */
28669 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28670 wcgr = operands[0];
28671 operands[0] = reg;
28672 output_asm_insn ("ldr%?\t%0, %1", operands);
28673
28674 operands[0] = wcgr;
28675 operands[1] = reg;
28676 output_asm_insn ("tmcr%?\t%0, %1", operands);
28677 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28678
28679 return "";
28680 }
28681
28682 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28683
28684 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28685 named arg and all anonymous args onto the stack.
28686 XXX I know the prologue shouldn't be pushing registers, but it is faster
28687 that way. */
28688
28689 static void
28690 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28691 enum machine_mode mode,
28692 tree type,
28693 int *pretend_size,
28694 int second_time ATTRIBUTE_UNUSED)
28695 {
28696 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28697 int nregs;
28698
28699 cfun->machine->uses_anonymous_args = 1;
28700 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28701 {
28702 nregs = pcum->aapcs_ncrn;
28703 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28704 nregs++;
28705 }
28706 else
28707 nregs = pcum->nregs;
28708
28709 if (nregs < NUM_ARG_REGS)
28710 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28711 }
28712
28713 /* We can't rely on the caller doing the proper promotion when
28714 using APCS or ATPCS. */
28715
28716 static bool
28717 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28718 {
28719 return !TARGET_AAPCS_BASED;
28720 }
28721
28722 static enum machine_mode
28723 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28724 enum machine_mode mode,
28725 int *punsignedp ATTRIBUTE_UNUSED,
28726 const_tree fntype ATTRIBUTE_UNUSED,
28727 int for_return ATTRIBUTE_UNUSED)
28728 {
28729 if (GET_MODE_CLASS (mode) == MODE_INT
28730 && GET_MODE_SIZE (mode) < 4)
28731 return SImode;
28732
28733 return mode;
28734 }
28735
28736 /* AAPCS based ABIs use short enums by default. */
28737
28738 static bool
28739 arm_default_short_enums (void)
28740 {
28741 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28742 }
28743
28744
28745 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28746
28747 static bool
28748 arm_align_anon_bitfield (void)
28749 {
28750 return TARGET_AAPCS_BASED;
28751 }
28752
28753
28754 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28755
28756 static tree
28757 arm_cxx_guard_type (void)
28758 {
28759 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28760 }
28761
28762
28763 /* The EABI says test the least significant bit of a guard variable. */
28764
28765 static bool
28766 arm_cxx_guard_mask_bit (void)
28767 {
28768 return TARGET_AAPCS_BASED;
28769 }
28770
28771
28772 /* The EABI specifies that all array cookies are 8 bytes long. */
28773
28774 static tree
28775 arm_get_cookie_size (tree type)
28776 {
28777 tree size;
28778
28779 if (!TARGET_AAPCS_BASED)
28780 return default_cxx_get_cookie_size (type);
28781
28782 size = build_int_cst (sizetype, 8);
28783 return size;
28784 }
28785
28786
28787 /* The EABI says that array cookies should also contain the element size. */
28788
28789 static bool
28790 arm_cookie_has_size (void)
28791 {
28792 return TARGET_AAPCS_BASED;
28793 }
28794
28795
28796 /* The EABI says constructors and destructors should return a pointer to
28797 the object constructed/destroyed. */
28798
28799 static bool
28800 arm_cxx_cdtor_returns_this (void)
28801 {
28802 return TARGET_AAPCS_BASED;
28803 }
28804
28805 /* The EABI says that an inline function may never be the key
28806 method. */
28807
28808 static bool
28809 arm_cxx_key_method_may_be_inline (void)
28810 {
28811 return !TARGET_AAPCS_BASED;
28812 }
28813
28814 static void
28815 arm_cxx_determine_class_data_visibility (tree decl)
28816 {
28817 if (!TARGET_AAPCS_BASED
28818 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28819 return;
28820
28821 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28822 is exported. However, on systems without dynamic vague linkage,
28823 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28824 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28825 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28826 else
28827 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28828 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28829 }
28830
28831 static bool
28832 arm_cxx_class_data_always_comdat (void)
28833 {
28834 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28835 vague linkage if the class has no key function. */
28836 return !TARGET_AAPCS_BASED;
28837 }
28838
28839
28840 /* The EABI says __aeabi_atexit should be used to register static
28841 destructors. */
28842
28843 static bool
28844 arm_cxx_use_aeabi_atexit (void)
28845 {
28846 return TARGET_AAPCS_BASED;
28847 }
28848
28849
28850 void
28851 arm_set_return_address (rtx source, rtx scratch)
28852 {
28853 arm_stack_offsets *offsets;
28854 HOST_WIDE_INT delta;
28855 rtx addr;
28856 unsigned long saved_regs;
28857
28858 offsets = arm_get_frame_offsets ();
28859 saved_regs = offsets->saved_regs_mask;
28860
28861 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28862 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28863 else
28864 {
28865 if (frame_pointer_needed)
28866 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28867 else
28868 {
28869 /* LR will be the first saved register. */
28870 delta = offsets->outgoing_args - (offsets->frame + 4);
28871
28872
28873 if (delta >= 4096)
28874 {
28875 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28876 GEN_INT (delta & ~4095)));
28877 addr = scratch;
28878 delta &= 4095;
28879 }
28880 else
28881 addr = stack_pointer_rtx;
28882
28883 addr = plus_constant (Pmode, addr, delta);
28884 }
28885 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28886 }
28887 }
28888
28889
28890 void
28891 thumb_set_return_address (rtx source, rtx scratch)
28892 {
28893 arm_stack_offsets *offsets;
28894 HOST_WIDE_INT delta;
28895 HOST_WIDE_INT limit;
28896 int reg;
28897 rtx addr;
28898 unsigned long mask;
28899
28900 emit_use (source);
28901
28902 offsets = arm_get_frame_offsets ();
28903 mask = offsets->saved_regs_mask;
28904 if (mask & (1 << LR_REGNUM))
28905 {
28906 limit = 1024;
28907 /* Find the saved regs. */
28908 if (frame_pointer_needed)
28909 {
28910 delta = offsets->soft_frame - offsets->saved_args;
28911 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28912 if (TARGET_THUMB1)
28913 limit = 128;
28914 }
28915 else
28916 {
28917 delta = offsets->outgoing_args - offsets->saved_args;
28918 reg = SP_REGNUM;
28919 }
28920 /* Allow for the stack frame. */
28921 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28922 delta -= 16;
28923 /* The link register is always the first saved register. */
28924 delta -= 4;
28925
28926 /* Construct the address. */
28927 addr = gen_rtx_REG (SImode, reg);
28928 if (delta > limit)
28929 {
28930 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28931 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28932 addr = scratch;
28933 }
28934 else
28935 addr = plus_constant (Pmode, addr, delta);
28936
28937 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28938 }
28939 else
28940 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28941 }
28942
28943 /* Implements target hook vector_mode_supported_p. */
28944 bool
28945 arm_vector_mode_supported_p (enum machine_mode mode)
28946 {
28947 /* Neon also supports V2SImode, etc. listed in the clause below. */
28948 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28949 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28950 return true;
28951
28952 if ((TARGET_NEON || TARGET_IWMMXT)
28953 && ((mode == V2SImode)
28954 || (mode == V4HImode)
28955 || (mode == V8QImode)))
28956 return true;
28957
28958 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28959 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28960 || mode == V2HAmode))
28961 return true;
28962
28963 return false;
28964 }
28965
28966 /* Implements target hook array_mode_supported_p. */
28967
28968 static bool
28969 arm_array_mode_supported_p (enum machine_mode mode,
28970 unsigned HOST_WIDE_INT nelems)
28971 {
28972 if (TARGET_NEON
28973 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28974 && (nelems >= 2 && nelems <= 4))
28975 return true;
28976
28977 return false;
28978 }
28979
28980 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28981 registers when autovectorizing for Neon, at least until multiple vector
28982 widths are supported properly by the middle-end. */
28983
28984 static enum machine_mode
28985 arm_preferred_simd_mode (enum machine_mode mode)
28986 {
28987 if (TARGET_NEON)
28988 switch (mode)
28989 {
28990 case SFmode:
28991 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28992 case SImode:
28993 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28994 case HImode:
28995 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28996 case QImode:
28997 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28998 case DImode:
28999 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29000 return V2DImode;
29001 break;
29002
29003 default:;
29004 }
29005
29006 if (TARGET_REALLY_IWMMXT)
29007 switch (mode)
29008 {
29009 case SImode:
29010 return V2SImode;
29011 case HImode:
29012 return V4HImode;
29013 case QImode:
29014 return V8QImode;
29015
29016 default:;
29017 }
29018
29019 return word_mode;
29020 }
29021
29022 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29023
29024 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29025 using r0-r4 for function arguments, r7 for the stack frame and don't have
29026 enough left over to do doubleword arithmetic. For Thumb-2 all the
29027 potentially problematic instructions accept high registers so this is not
29028 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29029 that require many low registers. */
29030 static bool
29031 arm_class_likely_spilled_p (reg_class_t rclass)
29032 {
29033 if ((TARGET_THUMB1 && rclass == LO_REGS)
29034 || rclass == CC_REG)
29035 return true;
29036
29037 return false;
29038 }
29039
29040 /* Implements target hook small_register_classes_for_mode_p. */
29041 bool
29042 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
29043 {
29044 return TARGET_THUMB1;
29045 }
29046
29047 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29048 ARM insns and therefore guarantee that the shift count is modulo 256.
29049 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29050 guarantee no particular behavior for out-of-range counts. */
29051
29052 static unsigned HOST_WIDE_INT
29053 arm_shift_truncation_mask (enum machine_mode mode)
29054 {
29055 return mode == SImode ? 255 : 0;
29056 }
29057
29058
29059 /* Map internal gcc register numbers to DWARF2 register numbers. */
29060
29061 unsigned int
29062 arm_dbx_register_number (unsigned int regno)
29063 {
29064 if (regno < 16)
29065 return regno;
29066
29067 if (IS_VFP_REGNUM (regno))
29068 {
29069 /* See comment in arm_dwarf_register_span. */
29070 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29071 return 64 + regno - FIRST_VFP_REGNUM;
29072 else
29073 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29074 }
29075
29076 if (IS_IWMMXT_GR_REGNUM (regno))
29077 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29078
29079 if (IS_IWMMXT_REGNUM (regno))
29080 return 112 + regno - FIRST_IWMMXT_REGNUM;
29081
29082 gcc_unreachable ();
29083 }
29084
29085 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29086 GCC models tham as 64 32-bit registers, so we need to describe this to
29087 the DWARF generation code. Other registers can use the default. */
29088 static rtx
29089 arm_dwarf_register_span (rtx rtl)
29090 {
29091 enum machine_mode mode;
29092 unsigned regno;
29093 rtx parts[16];
29094 int nregs;
29095 int i;
29096
29097 regno = REGNO (rtl);
29098 if (!IS_VFP_REGNUM (regno))
29099 return NULL_RTX;
29100
29101 /* XXX FIXME: The EABI defines two VFP register ranges:
29102 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29103 256-287: D0-D31
29104 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29105 corresponding D register. Until GDB supports this, we shall use the
29106 legacy encodings. We also use these encodings for D0-D15 for
29107 compatibility with older debuggers. */
29108 mode = GET_MODE (rtl);
29109 if (GET_MODE_SIZE (mode) < 8)
29110 return NULL_RTX;
29111
29112 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29113 {
29114 nregs = GET_MODE_SIZE (mode) / 4;
29115 for (i = 0; i < nregs; i += 2)
29116 if (TARGET_BIG_END)
29117 {
29118 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29119 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29120 }
29121 else
29122 {
29123 parts[i] = gen_rtx_REG (SImode, regno + i);
29124 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29125 }
29126 }
29127 else
29128 {
29129 nregs = GET_MODE_SIZE (mode) / 8;
29130 for (i = 0; i < nregs; i++)
29131 parts[i] = gen_rtx_REG (DImode, regno + i);
29132 }
29133
29134 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29135 }
29136
29137 #if ARM_UNWIND_INFO
29138 /* Emit unwind directives for a store-multiple instruction or stack pointer
29139 push during alignment.
29140 These should only ever be generated by the function prologue code, so
29141 expect them to have a particular form.
29142 The store-multiple instruction sometimes pushes pc as the last register,
29143 although it should not be tracked into unwind information, or for -Os
29144 sometimes pushes some dummy registers before first register that needs
29145 to be tracked in unwind information; such dummy registers are there just
29146 to avoid separate stack adjustment, and will not be restored in the
29147 epilogue. */
29148
29149 static void
29150 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29151 {
29152 int i;
29153 HOST_WIDE_INT offset;
29154 HOST_WIDE_INT nregs;
29155 int reg_size;
29156 unsigned reg;
29157 unsigned lastreg;
29158 unsigned padfirst = 0, padlast = 0;
29159 rtx e;
29160
29161 e = XVECEXP (p, 0, 0);
29162 gcc_assert (GET_CODE (e) == SET);
29163
29164 /* First insn will adjust the stack pointer. */
29165 gcc_assert (GET_CODE (e) == SET
29166 && REG_P (SET_DEST (e))
29167 && REGNO (SET_DEST (e)) == SP_REGNUM
29168 && GET_CODE (SET_SRC (e)) == PLUS);
29169
29170 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29171 nregs = XVECLEN (p, 0) - 1;
29172 gcc_assert (nregs);
29173
29174 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29175 if (reg < 16)
29176 {
29177 /* For -Os dummy registers can be pushed at the beginning to
29178 avoid separate stack pointer adjustment. */
29179 e = XVECEXP (p, 0, 1);
29180 e = XEXP (SET_DEST (e), 0);
29181 if (GET_CODE (e) == PLUS)
29182 padfirst = INTVAL (XEXP (e, 1));
29183 gcc_assert (padfirst == 0 || optimize_size);
29184 /* The function prologue may also push pc, but not annotate it as it is
29185 never restored. We turn this into a stack pointer adjustment. */
29186 e = XVECEXP (p, 0, nregs);
29187 e = XEXP (SET_DEST (e), 0);
29188 if (GET_CODE (e) == PLUS)
29189 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29190 else
29191 padlast = offset - 4;
29192 gcc_assert (padlast == 0 || padlast == 4);
29193 if (padlast == 4)
29194 fprintf (asm_out_file, "\t.pad #4\n");
29195 reg_size = 4;
29196 fprintf (asm_out_file, "\t.save {");
29197 }
29198 else if (IS_VFP_REGNUM (reg))
29199 {
29200 reg_size = 8;
29201 fprintf (asm_out_file, "\t.vsave {");
29202 }
29203 else
29204 /* Unknown register type. */
29205 gcc_unreachable ();
29206
29207 /* If the stack increment doesn't match the size of the saved registers,
29208 something has gone horribly wrong. */
29209 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29210
29211 offset = padfirst;
29212 lastreg = 0;
29213 /* The remaining insns will describe the stores. */
29214 for (i = 1; i <= nregs; i++)
29215 {
29216 /* Expect (set (mem <addr>) (reg)).
29217 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29218 e = XVECEXP (p, 0, i);
29219 gcc_assert (GET_CODE (e) == SET
29220 && MEM_P (SET_DEST (e))
29221 && REG_P (SET_SRC (e)));
29222
29223 reg = REGNO (SET_SRC (e));
29224 gcc_assert (reg >= lastreg);
29225
29226 if (i != 1)
29227 fprintf (asm_out_file, ", ");
29228 /* We can't use %r for vfp because we need to use the
29229 double precision register names. */
29230 if (IS_VFP_REGNUM (reg))
29231 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29232 else
29233 asm_fprintf (asm_out_file, "%r", reg);
29234
29235 #ifdef ENABLE_CHECKING
29236 /* Check that the addresses are consecutive. */
29237 e = XEXP (SET_DEST (e), 0);
29238 if (GET_CODE (e) == PLUS)
29239 gcc_assert (REG_P (XEXP (e, 0))
29240 && REGNO (XEXP (e, 0)) == SP_REGNUM
29241 && CONST_INT_P (XEXP (e, 1))
29242 && offset == INTVAL (XEXP (e, 1)));
29243 else
29244 gcc_assert (i == 1
29245 && REG_P (e)
29246 && REGNO (e) == SP_REGNUM);
29247 offset += reg_size;
29248 #endif
29249 }
29250 fprintf (asm_out_file, "}\n");
29251 if (padfirst)
29252 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29253 }
29254
29255 /* Emit unwind directives for a SET. */
29256
29257 static void
29258 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29259 {
29260 rtx e0;
29261 rtx e1;
29262 unsigned reg;
29263
29264 e0 = XEXP (p, 0);
29265 e1 = XEXP (p, 1);
29266 switch (GET_CODE (e0))
29267 {
29268 case MEM:
29269 /* Pushing a single register. */
29270 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29271 || !REG_P (XEXP (XEXP (e0, 0), 0))
29272 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29273 abort ();
29274
29275 asm_fprintf (asm_out_file, "\t.save ");
29276 if (IS_VFP_REGNUM (REGNO (e1)))
29277 asm_fprintf(asm_out_file, "{d%d}\n",
29278 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29279 else
29280 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29281 break;
29282
29283 case REG:
29284 if (REGNO (e0) == SP_REGNUM)
29285 {
29286 /* A stack increment. */
29287 if (GET_CODE (e1) != PLUS
29288 || !REG_P (XEXP (e1, 0))
29289 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29290 || !CONST_INT_P (XEXP (e1, 1)))
29291 abort ();
29292
29293 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29294 -INTVAL (XEXP (e1, 1)));
29295 }
29296 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29297 {
29298 HOST_WIDE_INT offset;
29299
29300 if (GET_CODE (e1) == PLUS)
29301 {
29302 if (!REG_P (XEXP (e1, 0))
29303 || !CONST_INT_P (XEXP (e1, 1)))
29304 abort ();
29305 reg = REGNO (XEXP (e1, 0));
29306 offset = INTVAL (XEXP (e1, 1));
29307 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29308 HARD_FRAME_POINTER_REGNUM, reg,
29309 offset);
29310 }
29311 else if (REG_P (e1))
29312 {
29313 reg = REGNO (e1);
29314 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29315 HARD_FRAME_POINTER_REGNUM, reg);
29316 }
29317 else
29318 abort ();
29319 }
29320 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29321 {
29322 /* Move from sp to reg. */
29323 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29324 }
29325 else if (GET_CODE (e1) == PLUS
29326 && REG_P (XEXP (e1, 0))
29327 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29328 && CONST_INT_P (XEXP (e1, 1)))
29329 {
29330 /* Set reg to offset from sp. */
29331 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29332 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29333 }
29334 else
29335 abort ();
29336 break;
29337
29338 default:
29339 abort ();
29340 }
29341 }
29342
29343
29344 /* Emit unwind directives for the given insn. */
29345
29346 static void
29347 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29348 {
29349 rtx note, pat;
29350 bool handled_one = false;
29351
29352 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29353 return;
29354
29355 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29356 && (TREE_NOTHROW (current_function_decl)
29357 || crtl->all_throwers_are_sibcalls))
29358 return;
29359
29360 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29361 return;
29362
29363 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29364 {
29365 switch (REG_NOTE_KIND (note))
29366 {
29367 case REG_FRAME_RELATED_EXPR:
29368 pat = XEXP (note, 0);
29369 goto found;
29370
29371 case REG_CFA_REGISTER:
29372 pat = XEXP (note, 0);
29373 if (pat == NULL)
29374 {
29375 pat = PATTERN (insn);
29376 if (GET_CODE (pat) == PARALLEL)
29377 pat = XVECEXP (pat, 0, 0);
29378 }
29379
29380 /* Only emitted for IS_STACKALIGN re-alignment. */
29381 {
29382 rtx dest, src;
29383 unsigned reg;
29384
29385 src = SET_SRC (pat);
29386 dest = SET_DEST (pat);
29387
29388 gcc_assert (src == stack_pointer_rtx);
29389 reg = REGNO (dest);
29390 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29391 reg + 0x90, reg);
29392 }
29393 handled_one = true;
29394 break;
29395
29396 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29397 to get correct dwarf information for shrink-wrap. We should not
29398 emit unwind information for it because these are used either for
29399 pretend arguments or notes to adjust sp and restore registers from
29400 stack. */
29401 case REG_CFA_DEF_CFA:
29402 case REG_CFA_ADJUST_CFA:
29403 case REG_CFA_RESTORE:
29404 return;
29405
29406 case REG_CFA_EXPRESSION:
29407 case REG_CFA_OFFSET:
29408 /* ??? Only handling here what we actually emit. */
29409 gcc_unreachable ();
29410
29411 default:
29412 break;
29413 }
29414 }
29415 if (handled_one)
29416 return;
29417 pat = PATTERN (insn);
29418 found:
29419
29420 switch (GET_CODE (pat))
29421 {
29422 case SET:
29423 arm_unwind_emit_set (asm_out_file, pat);
29424 break;
29425
29426 case SEQUENCE:
29427 /* Store multiple. */
29428 arm_unwind_emit_sequence (asm_out_file, pat);
29429 break;
29430
29431 default:
29432 abort();
29433 }
29434 }
29435
29436
29437 /* Output a reference from a function exception table to the type_info
29438 object X. The EABI specifies that the symbol should be relocated by
29439 an R_ARM_TARGET2 relocation. */
29440
29441 static bool
29442 arm_output_ttype (rtx x)
29443 {
29444 fputs ("\t.word\t", asm_out_file);
29445 output_addr_const (asm_out_file, x);
29446 /* Use special relocations for symbol references. */
29447 if (!CONST_INT_P (x))
29448 fputs ("(TARGET2)", asm_out_file);
29449 fputc ('\n', asm_out_file);
29450
29451 return TRUE;
29452 }
29453
29454 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29455
29456 static void
29457 arm_asm_emit_except_personality (rtx personality)
29458 {
29459 fputs ("\t.personality\t", asm_out_file);
29460 output_addr_const (asm_out_file, personality);
29461 fputc ('\n', asm_out_file);
29462 }
29463
29464 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29465
29466 static void
29467 arm_asm_init_sections (void)
29468 {
29469 exception_section = get_unnamed_section (0, output_section_asm_op,
29470 "\t.handlerdata");
29471 }
29472 #endif /* ARM_UNWIND_INFO */
29473
29474 /* Output unwind directives for the start/end of a function. */
29475
29476 void
29477 arm_output_fn_unwind (FILE * f, bool prologue)
29478 {
29479 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29480 return;
29481
29482 if (prologue)
29483 fputs ("\t.fnstart\n", f);
29484 else
29485 {
29486 /* If this function will never be unwound, then mark it as such.
29487 The came condition is used in arm_unwind_emit to suppress
29488 the frame annotations. */
29489 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29490 && (TREE_NOTHROW (current_function_decl)
29491 || crtl->all_throwers_are_sibcalls))
29492 fputs("\t.cantunwind\n", f);
29493
29494 fputs ("\t.fnend\n", f);
29495 }
29496 }
29497
29498 static bool
29499 arm_emit_tls_decoration (FILE *fp, rtx x)
29500 {
29501 enum tls_reloc reloc;
29502 rtx val;
29503
29504 val = XVECEXP (x, 0, 0);
29505 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29506
29507 output_addr_const (fp, val);
29508
29509 switch (reloc)
29510 {
29511 case TLS_GD32:
29512 fputs ("(tlsgd)", fp);
29513 break;
29514 case TLS_LDM32:
29515 fputs ("(tlsldm)", fp);
29516 break;
29517 case TLS_LDO32:
29518 fputs ("(tlsldo)", fp);
29519 break;
29520 case TLS_IE32:
29521 fputs ("(gottpoff)", fp);
29522 break;
29523 case TLS_LE32:
29524 fputs ("(tpoff)", fp);
29525 break;
29526 case TLS_DESCSEQ:
29527 fputs ("(tlsdesc)", fp);
29528 break;
29529 default:
29530 gcc_unreachable ();
29531 }
29532
29533 switch (reloc)
29534 {
29535 case TLS_GD32:
29536 case TLS_LDM32:
29537 case TLS_IE32:
29538 case TLS_DESCSEQ:
29539 fputs (" + (. - ", fp);
29540 output_addr_const (fp, XVECEXP (x, 0, 2));
29541 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29542 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29543 output_addr_const (fp, XVECEXP (x, 0, 3));
29544 fputc (')', fp);
29545 break;
29546 default:
29547 break;
29548 }
29549
29550 return TRUE;
29551 }
29552
29553 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29554
29555 static void
29556 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29557 {
29558 gcc_assert (size == 4);
29559 fputs ("\t.word\t", file);
29560 output_addr_const (file, x);
29561 fputs ("(tlsldo)", file);
29562 }
29563
29564 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29565
29566 static bool
29567 arm_output_addr_const_extra (FILE *fp, rtx x)
29568 {
29569 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29570 return arm_emit_tls_decoration (fp, x);
29571 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29572 {
29573 char label[256];
29574 int labelno = INTVAL (XVECEXP (x, 0, 0));
29575
29576 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29577 assemble_name_raw (fp, label);
29578
29579 return TRUE;
29580 }
29581 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29582 {
29583 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29584 if (GOT_PCREL)
29585 fputs ("+.", fp);
29586 fputs ("-(", fp);
29587 output_addr_const (fp, XVECEXP (x, 0, 0));
29588 fputc (')', fp);
29589 return TRUE;
29590 }
29591 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29592 {
29593 output_addr_const (fp, XVECEXP (x, 0, 0));
29594 if (GOT_PCREL)
29595 fputs ("+.", fp);
29596 fputs ("-(", fp);
29597 output_addr_const (fp, XVECEXP (x, 0, 1));
29598 fputc (')', fp);
29599 return TRUE;
29600 }
29601 else if (GET_CODE (x) == CONST_VECTOR)
29602 return arm_emit_vector_const (fp, x);
29603
29604 return FALSE;
29605 }
29606
29607 /* Output assembly for a shift instruction.
29608 SET_FLAGS determines how the instruction modifies the condition codes.
29609 0 - Do not set condition codes.
29610 1 - Set condition codes.
29611 2 - Use smallest instruction. */
29612 const char *
29613 arm_output_shift(rtx * operands, int set_flags)
29614 {
29615 char pattern[100];
29616 static const char flag_chars[3] = {'?', '.', '!'};
29617 const char *shift;
29618 HOST_WIDE_INT val;
29619 char c;
29620
29621 c = flag_chars[set_flags];
29622 if (TARGET_UNIFIED_ASM)
29623 {
29624 shift = shift_op(operands[3], &val);
29625 if (shift)
29626 {
29627 if (val != -1)
29628 operands[2] = GEN_INT(val);
29629 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29630 }
29631 else
29632 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29633 }
29634 else
29635 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29636 output_asm_insn (pattern, operands);
29637 return "";
29638 }
29639
29640 /* Output assembly for a WMMX immediate shift instruction. */
29641 const char *
29642 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29643 {
29644 int shift = INTVAL (operands[2]);
29645 char templ[50];
29646 enum machine_mode opmode = GET_MODE (operands[0]);
29647
29648 gcc_assert (shift >= 0);
29649
29650 /* If the shift value in the register versions is > 63 (for D qualifier),
29651 31 (for W qualifier) or 15 (for H qualifier). */
29652 if (((opmode == V4HImode) && (shift > 15))
29653 || ((opmode == V2SImode) && (shift > 31))
29654 || ((opmode == DImode) && (shift > 63)))
29655 {
29656 if (wror_or_wsra)
29657 {
29658 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29659 output_asm_insn (templ, operands);
29660 if (opmode == DImode)
29661 {
29662 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29663 output_asm_insn (templ, operands);
29664 }
29665 }
29666 else
29667 {
29668 /* The destination register will contain all zeros. */
29669 sprintf (templ, "wzero\t%%0");
29670 output_asm_insn (templ, operands);
29671 }
29672 return "";
29673 }
29674
29675 if ((opmode == DImode) && (shift > 32))
29676 {
29677 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29678 output_asm_insn (templ, operands);
29679 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29680 output_asm_insn (templ, operands);
29681 }
29682 else
29683 {
29684 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29685 output_asm_insn (templ, operands);
29686 }
29687 return "";
29688 }
29689
29690 /* Output assembly for a WMMX tinsr instruction. */
29691 const char *
29692 arm_output_iwmmxt_tinsr (rtx *operands)
29693 {
29694 int mask = INTVAL (operands[3]);
29695 int i;
29696 char templ[50];
29697 int units = mode_nunits[GET_MODE (operands[0])];
29698 gcc_assert ((mask & (mask - 1)) == 0);
29699 for (i = 0; i < units; ++i)
29700 {
29701 if ((mask & 0x01) == 1)
29702 {
29703 break;
29704 }
29705 mask >>= 1;
29706 }
29707 gcc_assert (i < units);
29708 {
29709 switch (GET_MODE (operands[0]))
29710 {
29711 case V8QImode:
29712 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29713 break;
29714 case V4HImode:
29715 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29716 break;
29717 case V2SImode:
29718 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29719 break;
29720 default:
29721 gcc_unreachable ();
29722 break;
29723 }
29724 output_asm_insn (templ, operands);
29725 }
29726 return "";
29727 }
29728
29729 /* Output a Thumb-1 casesi dispatch sequence. */
29730 const char *
29731 thumb1_output_casesi (rtx *operands)
29732 {
29733 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29734
29735 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29736
29737 switch (GET_MODE(diff_vec))
29738 {
29739 case QImode:
29740 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29741 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29742 case HImode:
29743 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29744 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29745 case SImode:
29746 return "bl\t%___gnu_thumb1_case_si";
29747 default:
29748 gcc_unreachable ();
29749 }
29750 }
29751
29752 /* Output a Thumb-2 casesi instruction. */
29753 const char *
29754 thumb2_output_casesi (rtx *operands)
29755 {
29756 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29757
29758 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29759
29760 output_asm_insn ("cmp\t%0, %1", operands);
29761 output_asm_insn ("bhi\t%l3", operands);
29762 switch (GET_MODE(diff_vec))
29763 {
29764 case QImode:
29765 return "tbb\t[%|pc, %0]";
29766 case HImode:
29767 return "tbh\t[%|pc, %0, lsl #1]";
29768 case SImode:
29769 if (flag_pic)
29770 {
29771 output_asm_insn ("adr\t%4, %l2", operands);
29772 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29773 output_asm_insn ("add\t%4, %4, %5", operands);
29774 return "bx\t%4";
29775 }
29776 else
29777 {
29778 output_asm_insn ("adr\t%4, %l2", operands);
29779 return "ldr\t%|pc, [%4, %0, lsl #2]";
29780 }
29781 default:
29782 gcc_unreachable ();
29783 }
29784 }
29785
29786 /* Most ARM cores are single issue, but some newer ones can dual issue.
29787 The scheduler descriptions rely on this being correct. */
29788 static int
29789 arm_issue_rate (void)
29790 {
29791 switch (arm_tune)
29792 {
29793 case cortexa15:
29794 case cortexa57:
29795 return 3;
29796
29797 case cortexr4:
29798 case cortexr4f:
29799 case cortexr5:
29800 case genericv7a:
29801 case cortexa5:
29802 case cortexa7:
29803 case cortexa8:
29804 case cortexa9:
29805 case cortexa12:
29806 case cortexa53:
29807 case fa726te:
29808 case marvell_pj4:
29809 return 2;
29810
29811 default:
29812 return 1;
29813 }
29814 }
29815
29816 /* A table and a function to perform ARM-specific name mangling for
29817 NEON vector types in order to conform to the AAPCS (see "Procedure
29818 Call Standard for the ARM Architecture", Appendix A). To qualify
29819 for emission with the mangled names defined in that document, a
29820 vector type must not only be of the correct mode but also be
29821 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29822 typedef struct
29823 {
29824 enum machine_mode mode;
29825 const char *element_type_name;
29826 const char *aapcs_name;
29827 } arm_mangle_map_entry;
29828
29829 static arm_mangle_map_entry arm_mangle_map[] = {
29830 /* 64-bit containerized types. */
29831 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29832 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29833 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29834 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29835 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29836 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29837 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29838 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29839 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29840 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29841
29842 /* 128-bit containerized types. */
29843 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29844 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29845 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29846 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29847 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29848 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29849 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29850 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29851 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29852 { VOIDmode, NULL, NULL }
29853 };
29854
29855 const char *
29856 arm_mangle_type (const_tree type)
29857 {
29858 arm_mangle_map_entry *pos = arm_mangle_map;
29859
29860 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29861 has to be managled as if it is in the "std" namespace. */
29862 if (TARGET_AAPCS_BASED
29863 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29864 return "St9__va_list";
29865
29866 /* Half-precision float. */
29867 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29868 return "Dh";
29869
29870 if (TREE_CODE (type) != VECTOR_TYPE)
29871 return NULL;
29872
29873 /* Check the mode of the vector type, and the name of the vector
29874 element type, against the table. */
29875 while (pos->mode != VOIDmode)
29876 {
29877 tree elt_type = TREE_TYPE (type);
29878
29879 if (pos->mode == TYPE_MODE (type)
29880 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29881 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29882 pos->element_type_name))
29883 return pos->aapcs_name;
29884
29885 pos++;
29886 }
29887
29888 /* Use the default mangling for unrecognized (possibly user-defined)
29889 vector types. */
29890 return NULL;
29891 }
29892
29893 /* Order of allocation of core registers for Thumb: this allocation is
29894 written over the corresponding initial entries of the array
29895 initialized with REG_ALLOC_ORDER. We allocate all low registers
29896 first. Saving and restoring a low register is usually cheaper than
29897 using a call-clobbered high register. */
29898
29899 static const int thumb_core_reg_alloc_order[] =
29900 {
29901 3, 2, 1, 0, 4, 5, 6, 7,
29902 14, 12, 8, 9, 10, 11
29903 };
29904
29905 /* Adjust register allocation order when compiling for Thumb. */
29906
29907 void
29908 arm_order_regs_for_local_alloc (void)
29909 {
29910 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29911 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29912 if (TARGET_THUMB)
29913 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29914 sizeof (thumb_core_reg_alloc_order));
29915 }
29916
29917 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29918
29919 bool
29920 arm_frame_pointer_required (void)
29921 {
29922 return (cfun->has_nonlocal_label
29923 || SUBTARGET_FRAME_POINTER_REQUIRED
29924 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29925 }
29926
29927 /* Only thumb1 can't support conditional execution, so return true if
29928 the target is not thumb1. */
29929 static bool
29930 arm_have_conditional_execution (void)
29931 {
29932 return !TARGET_THUMB1;
29933 }
29934
29935 tree
29936 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29937 {
29938 enum machine_mode in_mode, out_mode;
29939 int in_n, out_n;
29940
29941 if (TREE_CODE (type_out) != VECTOR_TYPE
29942 || TREE_CODE (type_in) != VECTOR_TYPE)
29943 return NULL_TREE;
29944
29945 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29946 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29947 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29948 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29949
29950 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29951 decl of the vectorized builtin for the appropriate vector mode.
29952 NULL_TREE is returned if no such builtin is available. */
29953 #undef ARM_CHECK_BUILTIN_MODE
29954 #define ARM_CHECK_BUILTIN_MODE(C) \
29955 (TARGET_NEON && TARGET_FPU_ARMV8 \
29956 && flag_unsafe_math_optimizations \
29957 && ARM_CHECK_BUILTIN_MODE_1 (C))
29958
29959 #undef ARM_CHECK_BUILTIN_MODE_1
29960 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29961 (out_mode == SFmode && out_n == C \
29962 && in_mode == SFmode && in_n == C)
29963
29964 #undef ARM_FIND_VRINT_VARIANT
29965 #define ARM_FIND_VRINT_VARIANT(N) \
29966 (ARM_CHECK_BUILTIN_MODE (2) \
29967 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29968 : (ARM_CHECK_BUILTIN_MODE (4) \
29969 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29970 : NULL_TREE))
29971
29972 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29973 {
29974 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29975 switch (fn)
29976 {
29977 case BUILT_IN_FLOORF:
29978 return ARM_FIND_VRINT_VARIANT (vrintm);
29979 case BUILT_IN_CEILF:
29980 return ARM_FIND_VRINT_VARIANT (vrintp);
29981 case BUILT_IN_TRUNCF:
29982 return ARM_FIND_VRINT_VARIANT (vrintz);
29983 case BUILT_IN_ROUNDF:
29984 return ARM_FIND_VRINT_VARIANT (vrinta);
29985 #undef ARM_CHECK_BUILTIN_MODE
29986 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29987 (out_mode == N##Imode && out_n == C \
29988 && in_mode == N##Imode && in_n == C)
29989 case BUILT_IN_BSWAP16:
29990 if (ARM_CHECK_BUILTIN_MODE (4, H))
29991 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
29992 else if (ARM_CHECK_BUILTIN_MODE (8, H))
29993 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
29994 else
29995 return NULL_TREE;
29996 case BUILT_IN_BSWAP32:
29997 if (ARM_CHECK_BUILTIN_MODE (2, S))
29998 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
29999 else if (ARM_CHECK_BUILTIN_MODE (4, S))
30000 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
30001 else
30002 return NULL_TREE;
30003 case BUILT_IN_BSWAP64:
30004 if (ARM_CHECK_BUILTIN_MODE (2, D))
30005 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
30006 else
30007 return NULL_TREE;
30008
30009 default:
30010 return NULL_TREE;
30011 }
30012 }
30013 return NULL_TREE;
30014 }
30015 #undef ARM_CHECK_BUILTIN_MODE
30016 #undef ARM_FIND_VRINT_VARIANT
30017
30018 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30019 static HOST_WIDE_INT
30020 arm_vector_alignment (const_tree type)
30021 {
30022 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30023
30024 if (TARGET_AAPCS_BASED)
30025 align = MIN (align, 64);
30026
30027 return align;
30028 }
30029
30030 static unsigned int
30031 arm_autovectorize_vector_sizes (void)
30032 {
30033 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
30034 }
30035
30036 static bool
30037 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30038 {
30039 /* Vectors which aren't in packed structures will not be less aligned than
30040 the natural alignment of their element type, so this is safe. */
30041 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30042 return !is_packed;
30043
30044 return default_builtin_vector_alignment_reachable (type, is_packed);
30045 }
30046
30047 static bool
30048 arm_builtin_support_vector_misalignment (enum machine_mode mode,
30049 const_tree type, int misalignment,
30050 bool is_packed)
30051 {
30052 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30053 {
30054 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30055
30056 if (is_packed)
30057 return align == 1;
30058
30059 /* If the misalignment is unknown, we should be able to handle the access
30060 so long as it is not to a member of a packed data structure. */
30061 if (misalignment == -1)
30062 return true;
30063
30064 /* Return true if the misalignment is a multiple of the natural alignment
30065 of the vector's element type. This is probably always going to be
30066 true in practice, since we've already established that this isn't a
30067 packed access. */
30068 return ((misalignment % align) == 0);
30069 }
30070
30071 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30072 is_packed);
30073 }
30074
30075 static void
30076 arm_conditional_register_usage (void)
30077 {
30078 int regno;
30079
30080 if (TARGET_THUMB1 && optimize_size)
30081 {
30082 /* When optimizing for size on Thumb-1, it's better not
30083 to use the HI regs, because of the overhead of
30084 stacking them. */
30085 for (regno = FIRST_HI_REGNUM;
30086 regno <= LAST_HI_REGNUM; ++regno)
30087 fixed_regs[regno] = call_used_regs[regno] = 1;
30088 }
30089
30090 /* The link register can be clobbered by any branch insn,
30091 but we have no way to track that at present, so mark
30092 it as unavailable. */
30093 if (TARGET_THUMB1)
30094 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30095
30096 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
30097 {
30098 /* VFPv3 registers are disabled when earlier VFP
30099 versions are selected due to the definition of
30100 LAST_VFP_REGNUM. */
30101 for (regno = FIRST_VFP_REGNUM;
30102 regno <= LAST_VFP_REGNUM; ++ regno)
30103 {
30104 fixed_regs[regno] = 0;
30105 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30106 || regno >= FIRST_VFP_REGNUM + 32;
30107 }
30108 }
30109
30110 if (TARGET_REALLY_IWMMXT)
30111 {
30112 regno = FIRST_IWMMXT_GR_REGNUM;
30113 /* The 2002/10/09 revision of the XScale ABI has wCG0
30114 and wCG1 as call-preserved registers. The 2002/11/21
30115 revision changed this so that all wCG registers are
30116 scratch registers. */
30117 for (regno = FIRST_IWMMXT_GR_REGNUM;
30118 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30119 fixed_regs[regno] = 0;
30120 /* The XScale ABI has wR0 - wR9 as scratch registers,
30121 the rest as call-preserved registers. */
30122 for (regno = FIRST_IWMMXT_REGNUM;
30123 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30124 {
30125 fixed_regs[regno] = 0;
30126 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30127 }
30128 }
30129
30130 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30131 {
30132 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30133 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30134 }
30135 else if (TARGET_APCS_STACK)
30136 {
30137 fixed_regs[10] = 1;
30138 call_used_regs[10] = 1;
30139 }
30140 /* -mcaller-super-interworking reserves r11 for calls to
30141 _interwork_r11_call_via_rN(). Making the register global
30142 is an easy way of ensuring that it remains valid for all
30143 calls. */
30144 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30145 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30146 {
30147 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30148 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30149 if (TARGET_CALLER_INTERWORKING)
30150 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30151 }
30152 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30153 }
30154
30155 static reg_class_t
30156 arm_preferred_rename_class (reg_class_t rclass)
30157 {
30158 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30159 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30160 and code size can be reduced. */
30161 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30162 return LO_REGS;
30163 else
30164 return NO_REGS;
30165 }
30166
30167 /* Compute the atrribute "length" of insn "*push_multi".
30168 So this function MUST be kept in sync with that insn pattern. */
30169 int
30170 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30171 {
30172 int i, regno, hi_reg;
30173 int num_saves = XVECLEN (parallel_op, 0);
30174
30175 /* ARM mode. */
30176 if (TARGET_ARM)
30177 return 4;
30178 /* Thumb1 mode. */
30179 if (TARGET_THUMB1)
30180 return 2;
30181
30182 /* Thumb2 mode. */
30183 regno = REGNO (first_op);
30184 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30185 for (i = 1; i < num_saves && !hi_reg; i++)
30186 {
30187 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30188 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30189 }
30190
30191 if (!hi_reg)
30192 return 2;
30193 return 4;
30194 }
30195
30196 /* Compute the number of instructions emitted by output_move_double. */
30197 int
30198 arm_count_output_move_double_insns (rtx *operands)
30199 {
30200 int count;
30201 rtx ops[2];
30202 /* output_move_double may modify the operands array, so call it
30203 here on a copy of the array. */
30204 ops[0] = operands[0];
30205 ops[1] = operands[1];
30206 output_move_double (ops, false, &count);
30207 return count;
30208 }
30209
30210 int
30211 vfp3_const_double_for_fract_bits (rtx operand)
30212 {
30213 REAL_VALUE_TYPE r0;
30214
30215 if (!CONST_DOUBLE_P (operand))
30216 return 0;
30217
30218 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30219 if (exact_real_inverse (DFmode, &r0))
30220 {
30221 if (exact_real_truncate (DFmode, &r0))
30222 {
30223 HOST_WIDE_INT value = real_to_integer (&r0);
30224 value = value & 0xffffffff;
30225 if ((value != 0) && ( (value & (value - 1)) == 0))
30226 return int_log2 (value);
30227 }
30228 }
30229 return 0;
30230 }
30231
30232 int
30233 vfp3_const_double_for_bits (rtx operand)
30234 {
30235 REAL_VALUE_TYPE r0;
30236
30237 if (!CONST_DOUBLE_P (operand))
30238 return 0;
30239
30240 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30241 if (exact_real_truncate (DFmode, &r0))
30242 {
30243 HOST_WIDE_INT value = real_to_integer (&r0);
30244 value = value & 0xffffffff;
30245 if ((value != 0) && ( (value & (value - 1)) == 0))
30246 return int_log2 (value);
30247 }
30248
30249 return 0;
30250 }
30251 \f
30252 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30253
30254 static void
30255 arm_pre_atomic_barrier (enum memmodel model)
30256 {
30257 if (need_atomic_barrier_p (model, true))
30258 emit_insn (gen_memory_barrier ());
30259 }
30260
30261 static void
30262 arm_post_atomic_barrier (enum memmodel model)
30263 {
30264 if (need_atomic_barrier_p (model, false))
30265 emit_insn (gen_memory_barrier ());
30266 }
30267
30268 /* Emit the load-exclusive and store-exclusive instructions.
30269 Use acquire and release versions if necessary. */
30270
30271 static void
30272 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30273 {
30274 rtx (*gen) (rtx, rtx);
30275
30276 if (acq)
30277 {
30278 switch (mode)
30279 {
30280 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30281 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30282 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30283 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30284 default:
30285 gcc_unreachable ();
30286 }
30287 }
30288 else
30289 {
30290 switch (mode)
30291 {
30292 case QImode: gen = gen_arm_load_exclusiveqi; break;
30293 case HImode: gen = gen_arm_load_exclusivehi; break;
30294 case SImode: gen = gen_arm_load_exclusivesi; break;
30295 case DImode: gen = gen_arm_load_exclusivedi; break;
30296 default:
30297 gcc_unreachable ();
30298 }
30299 }
30300
30301 emit_insn (gen (rval, mem));
30302 }
30303
30304 static void
30305 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30306 rtx mem, bool rel)
30307 {
30308 rtx (*gen) (rtx, rtx, rtx);
30309
30310 if (rel)
30311 {
30312 switch (mode)
30313 {
30314 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30315 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30316 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30317 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30318 default:
30319 gcc_unreachable ();
30320 }
30321 }
30322 else
30323 {
30324 switch (mode)
30325 {
30326 case QImode: gen = gen_arm_store_exclusiveqi; break;
30327 case HImode: gen = gen_arm_store_exclusivehi; break;
30328 case SImode: gen = gen_arm_store_exclusivesi; break;
30329 case DImode: gen = gen_arm_store_exclusivedi; break;
30330 default:
30331 gcc_unreachable ();
30332 }
30333 }
30334
30335 emit_insn (gen (bval, rval, mem));
30336 }
30337
30338 /* Mark the previous jump instruction as unlikely. */
30339
30340 static void
30341 emit_unlikely_jump (rtx insn)
30342 {
30343 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30344
30345 insn = emit_jump_insn (insn);
30346 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30347 }
30348
30349 /* Expand a compare and swap pattern. */
30350
30351 void
30352 arm_expand_compare_and_swap (rtx operands[])
30353 {
30354 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30355 enum machine_mode mode;
30356 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30357
30358 bval = operands[0];
30359 rval = operands[1];
30360 mem = operands[2];
30361 oldval = operands[3];
30362 newval = operands[4];
30363 is_weak = operands[5];
30364 mod_s = operands[6];
30365 mod_f = operands[7];
30366 mode = GET_MODE (mem);
30367
30368 /* Normally the succ memory model must be stronger than fail, but in the
30369 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30370 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30371
30372 if (TARGET_HAVE_LDACQ
30373 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30374 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30375 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30376
30377 switch (mode)
30378 {
30379 case QImode:
30380 case HImode:
30381 /* For narrow modes, we're going to perform the comparison in SImode,
30382 so do the zero-extension now. */
30383 rval = gen_reg_rtx (SImode);
30384 oldval = convert_modes (SImode, mode, oldval, true);
30385 /* FALLTHRU */
30386
30387 case SImode:
30388 /* Force the value into a register if needed. We waited until after
30389 the zero-extension above to do this properly. */
30390 if (!arm_add_operand (oldval, SImode))
30391 oldval = force_reg (SImode, oldval);
30392 break;
30393
30394 case DImode:
30395 if (!cmpdi_operand (oldval, mode))
30396 oldval = force_reg (mode, oldval);
30397 break;
30398
30399 default:
30400 gcc_unreachable ();
30401 }
30402
30403 switch (mode)
30404 {
30405 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30406 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30407 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30408 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30409 default:
30410 gcc_unreachable ();
30411 }
30412
30413 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30414
30415 if (mode == QImode || mode == HImode)
30416 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30417
30418 /* In all cases, we arrange for success to be signaled by Z set.
30419 This arrangement allows for the boolean result to be used directly
30420 in a subsequent branch, post optimization. */
30421 x = gen_rtx_REG (CCmode, CC_REGNUM);
30422 x = gen_rtx_EQ (SImode, x, const0_rtx);
30423 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30424 }
30425
30426 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30427 another memory store between the load-exclusive and store-exclusive can
30428 reset the monitor from Exclusive to Open state. This means we must wait
30429 until after reload to split the pattern, lest we get a register spill in
30430 the middle of the atomic sequence. */
30431
30432 void
30433 arm_split_compare_and_swap (rtx operands[])
30434 {
30435 rtx rval, mem, oldval, newval, scratch;
30436 enum machine_mode mode;
30437 enum memmodel mod_s, mod_f;
30438 bool is_weak;
30439 rtx label1, label2, x, cond;
30440
30441 rval = operands[0];
30442 mem = operands[1];
30443 oldval = operands[2];
30444 newval = operands[3];
30445 is_weak = (operands[4] != const0_rtx);
30446 mod_s = (enum memmodel) INTVAL (operands[5]);
30447 mod_f = (enum memmodel) INTVAL (operands[6]);
30448 scratch = operands[7];
30449 mode = GET_MODE (mem);
30450
30451 bool use_acquire = TARGET_HAVE_LDACQ
30452 && !(mod_s == MEMMODEL_RELAXED
30453 || mod_s == MEMMODEL_CONSUME
30454 || mod_s == MEMMODEL_RELEASE);
30455
30456 bool use_release = TARGET_HAVE_LDACQ
30457 && !(mod_s == MEMMODEL_RELAXED
30458 || mod_s == MEMMODEL_CONSUME
30459 || mod_s == MEMMODEL_ACQUIRE);
30460
30461 /* Checks whether a barrier is needed and emits one accordingly. */
30462 if (!(use_acquire || use_release))
30463 arm_pre_atomic_barrier (mod_s);
30464
30465 label1 = NULL_RTX;
30466 if (!is_weak)
30467 {
30468 label1 = gen_label_rtx ();
30469 emit_label (label1);
30470 }
30471 label2 = gen_label_rtx ();
30472
30473 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30474
30475 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30476 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30477 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30478 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30479 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30480
30481 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30482
30483 /* Weak or strong, we want EQ to be true for success, so that we
30484 match the flags that we got from the compare above. */
30485 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30486 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30487 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30488
30489 if (!is_weak)
30490 {
30491 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30492 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30493 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30494 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30495 }
30496
30497 if (mod_f != MEMMODEL_RELAXED)
30498 emit_label (label2);
30499
30500 /* Checks whether a barrier is needed and emits one accordingly. */
30501 if (!(use_acquire || use_release))
30502 arm_post_atomic_barrier (mod_s);
30503
30504 if (mod_f == MEMMODEL_RELAXED)
30505 emit_label (label2);
30506 }
30507
30508 void
30509 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30510 rtx value, rtx model_rtx, rtx cond)
30511 {
30512 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30513 enum machine_mode mode = GET_MODE (mem);
30514 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30515 rtx label, x;
30516
30517 bool use_acquire = TARGET_HAVE_LDACQ
30518 && !(model == MEMMODEL_RELAXED
30519 || model == MEMMODEL_CONSUME
30520 || model == MEMMODEL_RELEASE);
30521
30522 bool use_release = TARGET_HAVE_LDACQ
30523 && !(model == MEMMODEL_RELAXED
30524 || model == MEMMODEL_CONSUME
30525 || model == MEMMODEL_ACQUIRE);
30526
30527 /* Checks whether a barrier is needed and emits one accordingly. */
30528 if (!(use_acquire || use_release))
30529 arm_pre_atomic_barrier (model);
30530
30531 label = gen_label_rtx ();
30532 emit_label (label);
30533
30534 if (new_out)
30535 new_out = gen_lowpart (wmode, new_out);
30536 if (old_out)
30537 old_out = gen_lowpart (wmode, old_out);
30538 else
30539 old_out = new_out;
30540 value = simplify_gen_subreg (wmode, value, mode, 0);
30541
30542 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30543
30544 switch (code)
30545 {
30546 case SET:
30547 new_out = value;
30548 break;
30549
30550 case NOT:
30551 x = gen_rtx_AND (wmode, old_out, value);
30552 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30553 x = gen_rtx_NOT (wmode, new_out);
30554 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30555 break;
30556
30557 case MINUS:
30558 if (CONST_INT_P (value))
30559 {
30560 value = GEN_INT (-INTVAL (value));
30561 code = PLUS;
30562 }
30563 /* FALLTHRU */
30564
30565 case PLUS:
30566 if (mode == DImode)
30567 {
30568 /* DImode plus/minus need to clobber flags. */
30569 /* The adddi3 and subdi3 patterns are incorrectly written so that
30570 they require matching operands, even when we could easily support
30571 three operands. Thankfully, this can be fixed up post-splitting,
30572 as the individual add+adc patterns do accept three operands and
30573 post-reload cprop can make these moves go away. */
30574 emit_move_insn (new_out, old_out);
30575 if (code == PLUS)
30576 x = gen_adddi3 (new_out, new_out, value);
30577 else
30578 x = gen_subdi3 (new_out, new_out, value);
30579 emit_insn (x);
30580 break;
30581 }
30582 /* FALLTHRU */
30583
30584 default:
30585 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30586 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30587 break;
30588 }
30589
30590 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30591 use_release);
30592
30593 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30594 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30595
30596 /* Checks whether a barrier is needed and emits one accordingly. */
30597 if (!(use_acquire || use_release))
30598 arm_post_atomic_barrier (model);
30599 }
30600 \f
30601 #define MAX_VECT_LEN 16
30602
30603 struct expand_vec_perm_d
30604 {
30605 rtx target, op0, op1;
30606 unsigned char perm[MAX_VECT_LEN];
30607 enum machine_mode vmode;
30608 unsigned char nelt;
30609 bool one_vector_p;
30610 bool testing_p;
30611 };
30612
30613 /* Generate a variable permutation. */
30614
30615 static void
30616 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30617 {
30618 enum machine_mode vmode = GET_MODE (target);
30619 bool one_vector_p = rtx_equal_p (op0, op1);
30620
30621 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30622 gcc_checking_assert (GET_MODE (op0) == vmode);
30623 gcc_checking_assert (GET_MODE (op1) == vmode);
30624 gcc_checking_assert (GET_MODE (sel) == vmode);
30625 gcc_checking_assert (TARGET_NEON);
30626
30627 if (one_vector_p)
30628 {
30629 if (vmode == V8QImode)
30630 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30631 else
30632 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30633 }
30634 else
30635 {
30636 rtx pair;
30637
30638 if (vmode == V8QImode)
30639 {
30640 pair = gen_reg_rtx (V16QImode);
30641 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30642 pair = gen_lowpart (TImode, pair);
30643 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30644 }
30645 else
30646 {
30647 pair = gen_reg_rtx (OImode);
30648 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30649 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30650 }
30651 }
30652 }
30653
30654 void
30655 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30656 {
30657 enum machine_mode vmode = GET_MODE (target);
30658 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30659 bool one_vector_p = rtx_equal_p (op0, op1);
30660 rtx rmask[MAX_VECT_LEN], mask;
30661
30662 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30663 numbering of elements for big-endian, we must reverse the order. */
30664 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30665
30666 /* The VTBL instruction does not use a modulo index, so we must take care
30667 of that ourselves. */
30668 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30669 for (i = 0; i < nelt; ++i)
30670 rmask[i] = mask;
30671 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30672 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30673
30674 arm_expand_vec_perm_1 (target, op0, op1, sel);
30675 }
30676
30677 /* Generate or test for an insn that supports a constant permutation. */
30678
30679 /* Recognize patterns for the VUZP insns. */
30680
30681 static bool
30682 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30683 {
30684 unsigned int i, odd, mask, nelt = d->nelt;
30685 rtx out0, out1, in0, in1, x;
30686 rtx (*gen)(rtx, rtx, rtx, rtx);
30687
30688 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30689 return false;
30690
30691 /* Note that these are little-endian tests. Adjust for big-endian later. */
30692 if (d->perm[0] == 0)
30693 odd = 0;
30694 else if (d->perm[0] == 1)
30695 odd = 1;
30696 else
30697 return false;
30698 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30699
30700 for (i = 0; i < nelt; i++)
30701 {
30702 unsigned elt = (i * 2 + odd) & mask;
30703 if (d->perm[i] != elt)
30704 return false;
30705 }
30706
30707 /* Success! */
30708 if (d->testing_p)
30709 return true;
30710
30711 switch (d->vmode)
30712 {
30713 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30714 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30715 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30716 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30717 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30718 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30719 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30720 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30721 default:
30722 gcc_unreachable ();
30723 }
30724
30725 in0 = d->op0;
30726 in1 = d->op1;
30727 if (BYTES_BIG_ENDIAN)
30728 {
30729 x = in0, in0 = in1, in1 = x;
30730 odd = !odd;
30731 }
30732
30733 out0 = d->target;
30734 out1 = gen_reg_rtx (d->vmode);
30735 if (odd)
30736 x = out0, out0 = out1, out1 = x;
30737
30738 emit_insn (gen (out0, in0, in1, out1));
30739 return true;
30740 }
30741
30742 /* Recognize patterns for the VZIP insns. */
30743
30744 static bool
30745 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30746 {
30747 unsigned int i, high, mask, nelt = d->nelt;
30748 rtx out0, out1, in0, in1, x;
30749 rtx (*gen)(rtx, rtx, rtx, rtx);
30750
30751 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30752 return false;
30753
30754 /* Note that these are little-endian tests. Adjust for big-endian later. */
30755 high = nelt / 2;
30756 if (d->perm[0] == high)
30757 ;
30758 else if (d->perm[0] == 0)
30759 high = 0;
30760 else
30761 return false;
30762 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30763
30764 for (i = 0; i < nelt / 2; i++)
30765 {
30766 unsigned elt = (i + high) & mask;
30767 if (d->perm[i * 2] != elt)
30768 return false;
30769 elt = (elt + nelt) & mask;
30770 if (d->perm[i * 2 + 1] != elt)
30771 return false;
30772 }
30773
30774 /* Success! */
30775 if (d->testing_p)
30776 return true;
30777
30778 switch (d->vmode)
30779 {
30780 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30781 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30782 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30783 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30784 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30785 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30786 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30787 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30788 default:
30789 gcc_unreachable ();
30790 }
30791
30792 in0 = d->op0;
30793 in1 = d->op1;
30794 if (BYTES_BIG_ENDIAN)
30795 {
30796 x = in0, in0 = in1, in1 = x;
30797 high = !high;
30798 }
30799
30800 out0 = d->target;
30801 out1 = gen_reg_rtx (d->vmode);
30802 if (high)
30803 x = out0, out0 = out1, out1 = x;
30804
30805 emit_insn (gen (out0, in0, in1, out1));
30806 return true;
30807 }
30808
30809 /* Recognize patterns for the VREV insns. */
30810
30811 static bool
30812 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30813 {
30814 unsigned int i, j, diff, nelt = d->nelt;
30815 rtx (*gen)(rtx, rtx, rtx);
30816
30817 if (!d->one_vector_p)
30818 return false;
30819
30820 diff = d->perm[0];
30821 switch (diff)
30822 {
30823 case 7:
30824 switch (d->vmode)
30825 {
30826 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30827 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30828 default:
30829 return false;
30830 }
30831 break;
30832 case 3:
30833 switch (d->vmode)
30834 {
30835 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30836 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30837 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30838 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30839 default:
30840 return false;
30841 }
30842 break;
30843 case 1:
30844 switch (d->vmode)
30845 {
30846 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30847 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30848 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30849 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30850 case V4SImode: gen = gen_neon_vrev64v4si; break;
30851 case V2SImode: gen = gen_neon_vrev64v2si; break;
30852 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30853 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30854 default:
30855 return false;
30856 }
30857 break;
30858 default:
30859 return false;
30860 }
30861
30862 for (i = 0; i < nelt ; i += diff + 1)
30863 for (j = 0; j <= diff; j += 1)
30864 {
30865 /* This is guaranteed to be true as the value of diff
30866 is 7, 3, 1 and we should have enough elements in the
30867 queue to generate this. Getting a vector mask with a
30868 value of diff other than these values implies that
30869 something is wrong by the time we get here. */
30870 gcc_assert (i + j < nelt);
30871 if (d->perm[i + j] != i + diff - j)
30872 return false;
30873 }
30874
30875 /* Success! */
30876 if (d->testing_p)
30877 return true;
30878
30879 /* ??? The third operand is an artifact of the builtin infrastructure
30880 and is ignored by the actual instruction. */
30881 emit_insn (gen (d->target, d->op0, const0_rtx));
30882 return true;
30883 }
30884
30885 /* Recognize patterns for the VTRN insns. */
30886
30887 static bool
30888 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30889 {
30890 unsigned int i, odd, mask, nelt = d->nelt;
30891 rtx out0, out1, in0, in1, x;
30892 rtx (*gen)(rtx, rtx, rtx, rtx);
30893
30894 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30895 return false;
30896
30897 /* Note that these are little-endian tests. Adjust for big-endian later. */
30898 if (d->perm[0] == 0)
30899 odd = 0;
30900 else if (d->perm[0] == 1)
30901 odd = 1;
30902 else
30903 return false;
30904 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30905
30906 for (i = 0; i < nelt; i += 2)
30907 {
30908 if (d->perm[i] != i + odd)
30909 return false;
30910 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30911 return false;
30912 }
30913
30914 /* Success! */
30915 if (d->testing_p)
30916 return true;
30917
30918 switch (d->vmode)
30919 {
30920 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30921 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30922 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30923 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30924 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30925 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30926 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30927 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30928 default:
30929 gcc_unreachable ();
30930 }
30931
30932 in0 = d->op0;
30933 in1 = d->op1;
30934 if (BYTES_BIG_ENDIAN)
30935 {
30936 x = in0, in0 = in1, in1 = x;
30937 odd = !odd;
30938 }
30939
30940 out0 = d->target;
30941 out1 = gen_reg_rtx (d->vmode);
30942 if (odd)
30943 x = out0, out0 = out1, out1 = x;
30944
30945 emit_insn (gen (out0, in0, in1, out1));
30946 return true;
30947 }
30948
30949 /* Recognize patterns for the VEXT insns. */
30950
30951 static bool
30952 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30953 {
30954 unsigned int i, nelt = d->nelt;
30955 rtx (*gen) (rtx, rtx, rtx, rtx);
30956 rtx offset;
30957
30958 unsigned int location;
30959
30960 unsigned int next = d->perm[0] + 1;
30961
30962 /* TODO: Handle GCC's numbering of elements for big-endian. */
30963 if (BYTES_BIG_ENDIAN)
30964 return false;
30965
30966 /* Check if the extracted indexes are increasing by one. */
30967 for (i = 1; i < nelt; next++, i++)
30968 {
30969 /* If we hit the most significant element of the 2nd vector in
30970 the previous iteration, no need to test further. */
30971 if (next == 2 * nelt)
30972 return false;
30973
30974 /* If we are operating on only one vector: it could be a
30975 rotation. If there are only two elements of size < 64, let
30976 arm_evpc_neon_vrev catch it. */
30977 if (d->one_vector_p && (next == nelt))
30978 {
30979 if ((nelt == 2) && (d->vmode != V2DImode))
30980 return false;
30981 else
30982 next = 0;
30983 }
30984
30985 if (d->perm[i] != next)
30986 return false;
30987 }
30988
30989 location = d->perm[0];
30990
30991 switch (d->vmode)
30992 {
30993 case V16QImode: gen = gen_neon_vextv16qi; break;
30994 case V8QImode: gen = gen_neon_vextv8qi; break;
30995 case V4HImode: gen = gen_neon_vextv4hi; break;
30996 case V8HImode: gen = gen_neon_vextv8hi; break;
30997 case V2SImode: gen = gen_neon_vextv2si; break;
30998 case V4SImode: gen = gen_neon_vextv4si; break;
30999 case V2SFmode: gen = gen_neon_vextv2sf; break;
31000 case V4SFmode: gen = gen_neon_vextv4sf; break;
31001 case V2DImode: gen = gen_neon_vextv2di; break;
31002 default:
31003 return false;
31004 }
31005
31006 /* Success! */
31007 if (d->testing_p)
31008 return true;
31009
31010 offset = GEN_INT (location);
31011 emit_insn (gen (d->target, d->op0, d->op1, offset));
31012 return true;
31013 }
31014
31015 /* The NEON VTBL instruction is a fully variable permuation that's even
31016 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31017 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31018 can do slightly better by expanding this as a constant where we don't
31019 have to apply a mask. */
31020
31021 static bool
31022 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31023 {
31024 rtx rperm[MAX_VECT_LEN], sel;
31025 enum machine_mode vmode = d->vmode;
31026 unsigned int i, nelt = d->nelt;
31027
31028 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31029 numbering of elements for big-endian, we must reverse the order. */
31030 if (BYTES_BIG_ENDIAN)
31031 return false;
31032
31033 if (d->testing_p)
31034 return true;
31035
31036 /* Generic code will try constant permutation twice. Once with the
31037 original mode and again with the elements lowered to QImode.
31038 So wait and don't do the selector expansion ourselves. */
31039 if (vmode != V8QImode && vmode != V16QImode)
31040 return false;
31041
31042 for (i = 0; i < nelt; ++i)
31043 rperm[i] = GEN_INT (d->perm[i]);
31044 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31045 sel = force_reg (vmode, sel);
31046
31047 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31048 return true;
31049 }
31050
31051 static bool
31052 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31053 {
31054 /* Check if the input mask matches vext before reordering the
31055 operands. */
31056 if (TARGET_NEON)
31057 if (arm_evpc_neon_vext (d))
31058 return true;
31059
31060 /* The pattern matching functions above are written to look for a small
31061 number to begin the sequence (0, 1, N/2). If we begin with an index
31062 from the second operand, we can swap the operands. */
31063 if (d->perm[0] >= d->nelt)
31064 {
31065 unsigned i, nelt = d->nelt;
31066 rtx x;
31067
31068 for (i = 0; i < nelt; ++i)
31069 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
31070
31071 x = d->op0;
31072 d->op0 = d->op1;
31073 d->op1 = x;
31074 }
31075
31076 if (TARGET_NEON)
31077 {
31078 if (arm_evpc_neon_vuzp (d))
31079 return true;
31080 if (arm_evpc_neon_vzip (d))
31081 return true;
31082 if (arm_evpc_neon_vrev (d))
31083 return true;
31084 if (arm_evpc_neon_vtrn (d))
31085 return true;
31086 return arm_evpc_neon_vtbl (d);
31087 }
31088 return false;
31089 }
31090
31091 /* Expand a vec_perm_const pattern. */
31092
31093 bool
31094 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
31095 {
31096 struct expand_vec_perm_d d;
31097 int i, nelt, which;
31098
31099 d.target = target;
31100 d.op0 = op0;
31101 d.op1 = op1;
31102
31103 d.vmode = GET_MODE (target);
31104 gcc_assert (VECTOR_MODE_P (d.vmode));
31105 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31106 d.testing_p = false;
31107
31108 for (i = which = 0; i < nelt; ++i)
31109 {
31110 rtx e = XVECEXP (sel, 0, i);
31111 int ei = INTVAL (e) & (2 * nelt - 1);
31112 which |= (ei < nelt ? 1 : 2);
31113 d.perm[i] = ei;
31114 }
31115
31116 switch (which)
31117 {
31118 default:
31119 gcc_unreachable();
31120
31121 case 3:
31122 d.one_vector_p = false;
31123 if (!rtx_equal_p (op0, op1))
31124 break;
31125
31126 /* The elements of PERM do not suggest that only the first operand
31127 is used, but both operands are identical. Allow easier matching
31128 of the permutation by folding the permutation into the single
31129 input vector. */
31130 /* FALLTHRU */
31131 case 2:
31132 for (i = 0; i < nelt; ++i)
31133 d.perm[i] &= nelt - 1;
31134 d.op0 = op1;
31135 d.one_vector_p = true;
31136 break;
31137
31138 case 1:
31139 d.op1 = op0;
31140 d.one_vector_p = true;
31141 break;
31142 }
31143
31144 return arm_expand_vec_perm_const_1 (&d);
31145 }
31146
31147 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
31148
31149 static bool
31150 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31151 const unsigned char *sel)
31152 {
31153 struct expand_vec_perm_d d;
31154 unsigned int i, nelt, which;
31155 bool ret;
31156
31157 d.vmode = vmode;
31158 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31159 d.testing_p = true;
31160 memcpy (d.perm, sel, nelt);
31161
31162 /* Categorize the set of elements in the selector. */
31163 for (i = which = 0; i < nelt; ++i)
31164 {
31165 unsigned char e = d.perm[i];
31166 gcc_assert (e < 2 * nelt);
31167 which |= (e < nelt ? 1 : 2);
31168 }
31169
31170 /* For all elements from second vector, fold the elements to first. */
31171 if (which == 2)
31172 for (i = 0; i < nelt; ++i)
31173 d.perm[i] -= nelt;
31174
31175 /* Check whether the mask can be applied to the vector type. */
31176 d.one_vector_p = (which != 3);
31177
31178 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31179 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31180 if (!d.one_vector_p)
31181 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31182
31183 start_sequence ();
31184 ret = arm_expand_vec_perm_const_1 (&d);
31185 end_sequence ();
31186
31187 return ret;
31188 }
31189
31190 bool
31191 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31192 {
31193 /* If we are soft float and we do not have ldrd
31194 then all auto increment forms are ok. */
31195 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31196 return true;
31197
31198 switch (code)
31199 {
31200 /* Post increment and Pre Decrement are supported for all
31201 instruction forms except for vector forms. */
31202 case ARM_POST_INC:
31203 case ARM_PRE_DEC:
31204 if (VECTOR_MODE_P (mode))
31205 {
31206 if (code != ARM_PRE_DEC)
31207 return true;
31208 else
31209 return false;
31210 }
31211
31212 return true;
31213
31214 case ARM_POST_DEC:
31215 case ARM_PRE_INC:
31216 /* Without LDRD and mode size greater than
31217 word size, there is no point in auto-incrementing
31218 because ldm and stm will not have these forms. */
31219 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31220 return false;
31221
31222 /* Vector and floating point modes do not support
31223 these auto increment forms. */
31224 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31225 return false;
31226
31227 return true;
31228
31229 default:
31230 return false;
31231
31232 }
31233
31234 return false;
31235 }
31236
31237 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31238 on ARM, since we know that shifts by negative amounts are no-ops.
31239 Additionally, the default expansion code is not available or suitable
31240 for post-reload insn splits (this can occur when the register allocator
31241 chooses not to do a shift in NEON).
31242
31243 This function is used in both initial expand and post-reload splits, and
31244 handles all kinds of 64-bit shifts.
31245
31246 Input requirements:
31247 - It is safe for the input and output to be the same register, but
31248 early-clobber rules apply for the shift amount and scratch registers.
31249 - Shift by register requires both scratch registers. In all other cases
31250 the scratch registers may be NULL.
31251 - Ashiftrt by a register also clobbers the CC register. */
31252 void
31253 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31254 rtx amount, rtx scratch1, rtx scratch2)
31255 {
31256 rtx out_high = gen_highpart (SImode, out);
31257 rtx out_low = gen_lowpart (SImode, out);
31258 rtx in_high = gen_highpart (SImode, in);
31259 rtx in_low = gen_lowpart (SImode, in);
31260
31261 /* Terminology:
31262 in = the register pair containing the input value.
31263 out = the destination register pair.
31264 up = the high- or low-part of each pair.
31265 down = the opposite part to "up".
31266 In a shift, we can consider bits to shift from "up"-stream to
31267 "down"-stream, so in a left-shift "up" is the low-part and "down"
31268 is the high-part of each register pair. */
31269
31270 rtx out_up = code == ASHIFT ? out_low : out_high;
31271 rtx out_down = code == ASHIFT ? out_high : out_low;
31272 rtx in_up = code == ASHIFT ? in_low : in_high;
31273 rtx in_down = code == ASHIFT ? in_high : in_low;
31274
31275 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31276 gcc_assert (out
31277 && (REG_P (out) || GET_CODE (out) == SUBREG)
31278 && GET_MODE (out) == DImode);
31279 gcc_assert (in
31280 && (REG_P (in) || GET_CODE (in) == SUBREG)
31281 && GET_MODE (in) == DImode);
31282 gcc_assert (amount
31283 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31284 && GET_MODE (amount) == SImode)
31285 || CONST_INT_P (amount)));
31286 gcc_assert (scratch1 == NULL
31287 || (GET_CODE (scratch1) == SCRATCH)
31288 || (GET_MODE (scratch1) == SImode
31289 && REG_P (scratch1)));
31290 gcc_assert (scratch2 == NULL
31291 || (GET_CODE (scratch2) == SCRATCH)
31292 || (GET_MODE (scratch2) == SImode
31293 && REG_P (scratch2)));
31294 gcc_assert (!REG_P (out) || !REG_P (amount)
31295 || !HARD_REGISTER_P (out)
31296 || (REGNO (out) != REGNO (amount)
31297 && REGNO (out) + 1 != REGNO (amount)));
31298
31299 /* Macros to make following code more readable. */
31300 #define SUB_32(DEST,SRC) \
31301 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31302 #define RSB_32(DEST,SRC) \
31303 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31304 #define SUB_S_32(DEST,SRC) \
31305 gen_addsi3_compare0 ((DEST), (SRC), \
31306 GEN_INT (-32))
31307 #define SET(DEST,SRC) \
31308 gen_rtx_SET (SImode, (DEST), (SRC))
31309 #define SHIFT(CODE,SRC,AMOUNT) \
31310 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31311 #define LSHIFT(CODE,SRC,AMOUNT) \
31312 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31313 SImode, (SRC), (AMOUNT))
31314 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31315 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31316 SImode, (SRC), (AMOUNT))
31317 #define ORR(A,B) \
31318 gen_rtx_IOR (SImode, (A), (B))
31319 #define BRANCH(COND,LABEL) \
31320 gen_arm_cond_branch ((LABEL), \
31321 gen_rtx_ ## COND (CCmode, cc_reg, \
31322 const0_rtx), \
31323 cc_reg)
31324
31325 /* Shifts by register and shifts by constant are handled separately. */
31326 if (CONST_INT_P (amount))
31327 {
31328 /* We have a shift-by-constant. */
31329
31330 /* First, handle out-of-range shift amounts.
31331 In both cases we try to match the result an ARM instruction in a
31332 shift-by-register would give. This helps reduce execution
31333 differences between optimization levels, but it won't stop other
31334 parts of the compiler doing different things. This is "undefined
31335 behaviour, in any case. */
31336 if (INTVAL (amount) <= 0)
31337 emit_insn (gen_movdi (out, in));
31338 else if (INTVAL (amount) >= 64)
31339 {
31340 if (code == ASHIFTRT)
31341 {
31342 rtx const31_rtx = GEN_INT (31);
31343 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31344 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31345 }
31346 else
31347 emit_insn (gen_movdi (out, const0_rtx));
31348 }
31349
31350 /* Now handle valid shifts. */
31351 else if (INTVAL (amount) < 32)
31352 {
31353 /* Shifts by a constant less than 32. */
31354 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31355
31356 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31357 emit_insn (SET (out_down,
31358 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31359 out_down)));
31360 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31361 }
31362 else
31363 {
31364 /* Shifts by a constant greater than 31. */
31365 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31366
31367 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31368 if (code == ASHIFTRT)
31369 emit_insn (gen_ashrsi3 (out_up, in_up,
31370 GEN_INT (31)));
31371 else
31372 emit_insn (SET (out_up, const0_rtx));
31373 }
31374 }
31375 else
31376 {
31377 /* We have a shift-by-register. */
31378 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31379
31380 /* This alternative requires the scratch registers. */
31381 gcc_assert (scratch1 && REG_P (scratch1));
31382 gcc_assert (scratch2 && REG_P (scratch2));
31383
31384 /* We will need the values "amount-32" and "32-amount" later.
31385 Swapping them around now allows the later code to be more general. */
31386 switch (code)
31387 {
31388 case ASHIFT:
31389 emit_insn (SUB_32 (scratch1, amount));
31390 emit_insn (RSB_32 (scratch2, amount));
31391 break;
31392 case ASHIFTRT:
31393 emit_insn (RSB_32 (scratch1, amount));
31394 /* Also set CC = amount > 32. */
31395 emit_insn (SUB_S_32 (scratch2, amount));
31396 break;
31397 case LSHIFTRT:
31398 emit_insn (RSB_32 (scratch1, amount));
31399 emit_insn (SUB_32 (scratch2, amount));
31400 break;
31401 default:
31402 gcc_unreachable ();
31403 }
31404
31405 /* Emit code like this:
31406
31407 arithmetic-left:
31408 out_down = in_down << amount;
31409 out_down = (in_up << (amount - 32)) | out_down;
31410 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31411 out_up = in_up << amount;
31412
31413 arithmetic-right:
31414 out_down = in_down >> amount;
31415 out_down = (in_up << (32 - amount)) | out_down;
31416 if (amount < 32)
31417 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31418 out_up = in_up << amount;
31419
31420 logical-right:
31421 out_down = in_down >> amount;
31422 out_down = (in_up << (32 - amount)) | out_down;
31423 if (amount < 32)
31424 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31425 out_up = in_up << amount;
31426
31427 The ARM and Thumb2 variants are the same but implemented slightly
31428 differently. If this were only called during expand we could just
31429 use the Thumb2 case and let combine do the right thing, but this
31430 can also be called from post-reload splitters. */
31431
31432 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31433
31434 if (!TARGET_THUMB2)
31435 {
31436 /* Emit code for ARM mode. */
31437 emit_insn (SET (out_down,
31438 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31439 if (code == ASHIFTRT)
31440 {
31441 rtx done_label = gen_label_rtx ();
31442 emit_jump_insn (BRANCH (LT, done_label));
31443 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31444 out_down)));
31445 emit_label (done_label);
31446 }
31447 else
31448 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31449 out_down)));
31450 }
31451 else
31452 {
31453 /* Emit code for Thumb2 mode.
31454 Thumb2 can't do shift and or in one insn. */
31455 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31456 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31457
31458 if (code == ASHIFTRT)
31459 {
31460 rtx done_label = gen_label_rtx ();
31461 emit_jump_insn (BRANCH (LT, done_label));
31462 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31463 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31464 emit_label (done_label);
31465 }
31466 else
31467 {
31468 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31469 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31470 }
31471 }
31472
31473 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31474 }
31475
31476 #undef SUB_32
31477 #undef RSB_32
31478 #undef SUB_S_32
31479 #undef SET
31480 #undef SHIFT
31481 #undef LSHIFT
31482 #undef REV_LSHIFT
31483 #undef ORR
31484 #undef BRANCH
31485 }
31486
31487
31488 /* Returns true if a valid comparison operation and makes
31489 the operands in a form that is valid. */
31490 bool
31491 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31492 {
31493 enum rtx_code code = GET_CODE (*comparison);
31494 int code_int;
31495 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31496 ? GET_MODE (*op2) : GET_MODE (*op1);
31497
31498 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31499
31500 if (code == UNEQ || code == LTGT)
31501 return false;
31502
31503 code_int = (int)code;
31504 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31505 PUT_CODE (*comparison, (enum rtx_code)code_int);
31506
31507 switch (mode)
31508 {
31509 case SImode:
31510 if (!arm_add_operand (*op1, mode))
31511 *op1 = force_reg (mode, *op1);
31512 if (!arm_add_operand (*op2, mode))
31513 *op2 = force_reg (mode, *op2);
31514 return true;
31515
31516 case DImode:
31517 if (!cmpdi_operand (*op1, mode))
31518 *op1 = force_reg (mode, *op1);
31519 if (!cmpdi_operand (*op2, mode))
31520 *op2 = force_reg (mode, *op2);
31521 return true;
31522
31523 case SFmode:
31524 case DFmode:
31525 if (!arm_float_compare_operand (*op1, mode))
31526 *op1 = force_reg (mode, *op1);
31527 if (!arm_float_compare_operand (*op2, mode))
31528 *op2 = force_reg (mode, *op2);
31529 return true;
31530 default:
31531 break;
31532 }
31533
31534 return false;
31535
31536 }
31537
31538 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31539
31540 static unsigned HOST_WIDE_INT
31541 arm_asan_shadow_offset (void)
31542 {
31543 return (unsigned HOST_WIDE_INT) 1 << 29;
31544 }
31545
31546
31547 /* This is a temporary fix for PR60655. Ideally we need
31548 to handle most of these cases in the generic part but
31549 currently we reject minus (..) (sym_ref). We try to
31550 ameliorate the case with minus (sym_ref1) (sym_ref2)
31551 where they are in the same section. */
31552
31553 static bool
31554 arm_const_not_ok_for_debug_p (rtx p)
31555 {
31556 tree decl_op0 = NULL;
31557 tree decl_op1 = NULL;
31558
31559 if (GET_CODE (p) == MINUS)
31560 {
31561 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31562 {
31563 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31564 if (decl_op1
31565 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31566 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31567 {
31568 if ((TREE_CODE (decl_op1) == VAR_DECL
31569 || TREE_CODE (decl_op1) == CONST_DECL)
31570 && (TREE_CODE (decl_op0) == VAR_DECL
31571 || TREE_CODE (decl_op0) == CONST_DECL))
31572 return (get_variable_section (decl_op1, false)
31573 != get_variable_section (decl_op0, false));
31574
31575 if (TREE_CODE (decl_op1) == LABEL_DECL
31576 && TREE_CODE (decl_op0) == LABEL_DECL)
31577 return (DECL_CONTEXT (decl_op1)
31578 != DECL_CONTEXT (decl_op0));
31579 }
31580
31581 return true;
31582 }
31583 }
31584
31585 return false;
31586 }
31587
31588 static void
31589 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
31590 {
31591 const unsigned ARM_FE_INVALID = 1;
31592 const unsigned ARM_FE_DIVBYZERO = 2;
31593 const unsigned ARM_FE_OVERFLOW = 4;
31594 const unsigned ARM_FE_UNDERFLOW = 8;
31595 const unsigned ARM_FE_INEXACT = 16;
31596 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
31597 | ARM_FE_DIVBYZERO
31598 | ARM_FE_OVERFLOW
31599 | ARM_FE_UNDERFLOW
31600 | ARM_FE_INEXACT);
31601 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
31602 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
31603 tree new_fenv_var, reload_fenv, restore_fnenv;
31604 tree update_call, atomic_feraiseexcept, hold_fnclex;
31605
31606 if (!TARGET_VFP || !TARGET_HARD_FLOAT)
31607 return;
31608
31609 /* Generate the equivalent of :
31610 unsigned int fenv_var;
31611 fenv_var = __builtin_arm_get_fpscr ();
31612
31613 unsigned int masked_fenv;
31614 masked_fenv = fenv_var & mask;
31615
31616 __builtin_arm_set_fpscr (masked_fenv); */
31617
31618 fenv_var = create_tmp_var (unsigned_type_node, NULL);
31619 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
31620 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
31621 mask = build_int_cst (unsigned_type_node,
31622 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
31623 | ARM_FE_ALL_EXCEPT));
31624 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
31625 fenv_var, build_call_expr (get_fpscr, 0));
31626 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
31627 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
31628 *hold = build2 (COMPOUND_EXPR, void_type_node,
31629 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
31630 hold_fnclex);
31631
31632 /* Store the value of masked_fenv to clear the exceptions:
31633 __builtin_arm_set_fpscr (masked_fenv); */
31634
31635 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
31636
31637 /* Generate the equivalent of :
31638 unsigned int new_fenv_var;
31639 new_fenv_var = __builtin_arm_get_fpscr ();
31640
31641 __builtin_arm_set_fpscr (fenv_var);
31642
31643 __atomic_feraiseexcept (new_fenv_var); */
31644
31645 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
31646 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
31647 build_call_expr (get_fpscr, 0));
31648 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
31649 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
31650 update_call = build_call_expr (atomic_feraiseexcept, 1,
31651 fold_convert (integer_type_node, new_fenv_var));
31652 *update = build2 (COMPOUND_EXPR, void_type_node,
31653 build2 (COMPOUND_EXPR, void_type_node,
31654 reload_fenv, restore_fnenv), update_call);
31655 }
31656
31657 /* return TRUE if x is a reference to a value in a constant pool */
31658 extern bool
31659 arm_is_constant_pool_ref (rtx x)
31660 {
31661 return (MEM_P (x)
31662 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
31663 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
31664 }
31665
31666 #include "gt-arm.h"