]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
recog.h (operand_alternative): Convert reg_class, reject, matched and matches into...
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "gimple-expr.h"
64 #include "builtins.h"
65
66 /* Forward definitions of types. */
67 typedef struct minipool_node Mnode;
68 typedef struct minipool_fixup Mfix;
69
70 void (*arm_lang_output_object_attributes_hook)(void);
71
72 struct four_ints
73 {
74 int i[4];
75 };
76
77 /* Forward function declarations. */
78 static bool arm_const_not_ok_for_debug_p (rtx);
79 static bool arm_lra_p (void);
80 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
81 static int arm_compute_static_chain_stack_bytes (void);
82 static arm_stack_offsets *arm_get_frame_offsets (void);
83 static void arm_add_gc_roots (void);
84 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
85 HOST_WIDE_INT, rtx, rtx, int, int);
86 static unsigned bit_count (unsigned long);
87 static int arm_address_register_rtx_p (rtx, int);
88 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
89 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
90 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
91 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
92 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
93 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
94 inline static int thumb1_index_register_rtx_p (rtx, int);
95 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
96 static int thumb_far_jump_used_p (void);
97 static bool thumb_force_lr_save (void);
98 static unsigned arm_size_return_regs (void);
99 static bool arm_assemble_integer (rtx, unsigned int, int);
100 static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
101 static void arm_print_operand (FILE *, rtx, int);
102 static void arm_print_operand_address (FILE *, rtx);
103 static bool arm_print_operand_punct_valid_p (unsigned char code);
104 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
105 static arm_cc get_arm_condition_code (rtx);
106 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
107 static const char *output_multi_immediate (rtx *, const char *, const char *,
108 int, HOST_WIDE_INT);
109 static const char *shift_op (rtx, HOST_WIDE_INT *);
110 static struct machine_function *arm_init_machine_status (void);
111 static void thumb_exit (FILE *, int);
112 static HOST_WIDE_INT get_jump_table_size (rtx);
113 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
114 static Mnode *add_minipool_forward_ref (Mfix *);
115 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
116 static Mnode *add_minipool_backward_ref (Mfix *);
117 static void assign_minipool_offsets (Mfix *);
118 static void arm_print_value (FILE *, rtx);
119 static void dump_minipool (rtx);
120 static int arm_barrier_cost (rtx);
121 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
122 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
123 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
124 rtx);
125 static void arm_reorg (void);
126 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
127 static unsigned long arm_compute_save_reg0_reg12_mask (void);
128 static unsigned long arm_compute_save_reg_mask (void);
129 static unsigned long arm_isr_value (tree);
130 static unsigned long arm_compute_func_type (void);
131 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
132 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
135 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
136 #endif
137 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
138 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
139 static int arm_comp_type_attributes (const_tree, const_tree);
140 static void arm_set_default_type_attributes (tree);
141 static int arm_adjust_cost (rtx, rtx, rtx, int);
142 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
143 static int optimal_immediate_sequence (enum rtx_code code,
144 unsigned HOST_WIDE_INT val,
145 struct four_ints *return_sequence);
146 static int optimal_immediate_sequence_1 (enum rtx_code code,
147 unsigned HOST_WIDE_INT val,
148 struct four_ints *return_sequence,
149 int i);
150 static int arm_get_strip_length (int);
151 static bool arm_function_ok_for_sibcall (tree, tree);
152 static enum machine_mode arm_promote_function_mode (const_tree,
153 enum machine_mode, int *,
154 const_tree, int);
155 static bool arm_return_in_memory (const_tree, const_tree);
156 static rtx arm_function_value (const_tree, const_tree, bool);
157 static rtx arm_libcall_value_1 (enum machine_mode);
158 static rtx arm_libcall_value (enum machine_mode, const_rtx);
159 static bool arm_function_value_regno_p (const unsigned int);
160 static void arm_internal_label (FILE *, const char *, unsigned long);
161 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
162 tree);
163 static bool arm_have_conditional_execution (void);
164 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
165 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
166 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
167 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
168 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
173 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
174 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
175 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
176 static void arm_init_builtins (void);
177 static void arm_init_iwmmxt_builtins (void);
178 static rtx safe_vector_operand (rtx, enum machine_mode);
179 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
180 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
181 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
182 static tree arm_builtin_decl (unsigned, bool);
183 static void emit_constant_insn (rtx cond, rtx pattern);
184 static rtx emit_set_insn (rtx, rtx);
185 static rtx emit_multi_reg_push (unsigned long, unsigned long);
186 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
187 tree, bool);
188 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
189 const_tree, bool);
190 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
191 const_tree, bool);
192 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
193 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
194 const_tree);
195 static rtx aapcs_libcall_value (enum machine_mode);
196 static int aapcs_select_return_coproc (const_tree, const_tree);
197
198 #ifdef OBJECT_FORMAT_ELF
199 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
200 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
201 #endif
202 #ifndef ARM_PE
203 static void arm_encode_section_info (tree, rtx, int);
204 #endif
205
206 static void arm_file_end (void);
207 static void arm_file_start (void);
208
209 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
210 tree, int *, int);
211 static bool arm_pass_by_reference (cumulative_args_t,
212 enum machine_mode, const_tree, bool);
213 static bool arm_promote_prototypes (const_tree);
214 static bool arm_default_short_enums (void);
215 static bool arm_align_anon_bitfield (void);
216 static bool arm_return_in_msb (const_tree);
217 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
218 static bool arm_return_in_memory (const_tree, const_tree);
219 #if ARM_UNWIND_INFO
220 static void arm_unwind_emit (FILE *, rtx);
221 static bool arm_output_ttype (rtx);
222 static void arm_asm_emit_except_personality (rtx);
223 static void arm_asm_init_sections (void);
224 #endif
225 static rtx arm_dwarf_register_span (rtx);
226
227 static tree arm_cxx_guard_type (void);
228 static bool arm_cxx_guard_mask_bit (void);
229 static tree arm_get_cookie_size (tree);
230 static bool arm_cookie_has_size (void);
231 static bool arm_cxx_cdtor_returns_this (void);
232 static bool arm_cxx_key_method_may_be_inline (void);
233 static void arm_cxx_determine_class_data_visibility (tree);
234 static bool arm_cxx_class_data_always_comdat (void);
235 static bool arm_cxx_use_aeabi_atexit (void);
236 static void arm_init_libfuncs (void);
237 static tree arm_build_builtin_va_list (void);
238 static void arm_expand_builtin_va_start (tree, rtx);
239 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
240 static void arm_option_override (void);
241 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
242 static bool arm_cannot_copy_insn_p (rtx);
243 static int arm_issue_rate (void);
244 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
245 static bool arm_output_addr_const_extra (FILE *, rtx);
246 static bool arm_allocate_stack_slots_for_args (void);
247 static bool arm_warn_func_return (tree);
248 static const char *arm_invalid_parameter_type (const_tree t);
249 static const char *arm_invalid_return_type (const_tree t);
250 static tree arm_promoted_type (const_tree t);
251 static tree arm_convert_to_type (tree type, tree expr);
252 static bool arm_scalar_mode_supported_p (enum machine_mode);
253 static bool arm_frame_pointer_required (void);
254 static bool arm_can_eliminate (const int, const int);
255 static void arm_asm_trampoline_template (FILE *);
256 static void arm_trampoline_init (rtx, tree, rtx);
257 static rtx arm_trampoline_adjust_address (rtx);
258 static rtx arm_pic_static_addr (rtx orig, rtx reg);
259 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
260 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
261 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
262 static bool arm_array_mode_supported_p (enum machine_mode,
263 unsigned HOST_WIDE_INT);
264 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
265 static bool arm_class_likely_spilled_p (reg_class_t);
266 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
267 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
268 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
269 const_tree type,
270 int misalignment,
271 bool is_packed);
272 static void arm_conditional_register_usage (void);
273 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
274 static unsigned int arm_autovectorize_vector_sizes (void);
275 static int arm_default_branch_cost (bool, bool);
276 static int arm_cortex_a5_branch_cost (bool, bool);
277 static int arm_cortex_m_branch_cost (bool, bool);
278
279 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
280 const unsigned char *sel);
281
282 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
283 tree vectype,
284 int misalign ATTRIBUTE_UNUSED);
285 static unsigned arm_add_stmt_cost (void *data, int count,
286 enum vect_cost_for_stmt kind,
287 struct _stmt_vec_info *stmt_info,
288 int misalign,
289 enum vect_cost_model_location where);
290
291 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
292 bool op0_preserve_value);
293 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
294 \f
295 /* Table of machine attributes. */
296 static const struct attribute_spec arm_attribute_table[] =
297 {
298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
299 affects_type_identity } */
300 /* Function calls made to this symbol must be done indirectly, because
301 it may lie outside of the 26 bit addressing range of a normal function
302 call. */
303 { "long_call", 0, 0, false, true, true, NULL, false },
304 /* Whereas these functions are always known to reside within the 26 bit
305 addressing range. */
306 { "short_call", 0, 0, false, true, true, NULL, false },
307 /* Specify the procedure call conventions for a function. */
308 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
309 false },
310 /* Interrupt Service Routines have special prologue and epilogue requirements. */
311 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
312 false },
313 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
314 false },
315 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
316 false },
317 #ifdef ARM_PE
318 /* ARM/PE has three new attributes:
319 interfacearm - ?
320 dllexport - for exporting a function/variable that will live in a dll
321 dllimport - for importing a function/variable from a dll
322
323 Microsoft allows multiple declspecs in one __declspec, separating
324 them with spaces. We do NOT support this. Instead, use __declspec
325 multiple times.
326 */
327 { "dllimport", 0, 0, true, false, false, NULL, false },
328 { "dllexport", 0, 0, true, false, false, NULL, false },
329 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
330 false },
331 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
332 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
333 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
334 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
335 false },
336 #endif
337 { NULL, 0, 0, false, false, false, NULL, false }
338 };
339 \f
340 /* Initialize the GCC target structure. */
341 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
342 #undef TARGET_MERGE_DECL_ATTRIBUTES
343 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
344 #endif
345
346 #undef TARGET_LEGITIMIZE_ADDRESS
347 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
348
349 #undef TARGET_LRA_P
350 #define TARGET_LRA_P arm_lra_p
351
352 #undef TARGET_ATTRIBUTE_TABLE
353 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
354
355 #undef TARGET_ASM_FILE_START
356 #define TARGET_ASM_FILE_START arm_file_start
357 #undef TARGET_ASM_FILE_END
358 #define TARGET_ASM_FILE_END arm_file_end
359
360 #undef TARGET_ASM_ALIGNED_SI_OP
361 #define TARGET_ASM_ALIGNED_SI_OP NULL
362 #undef TARGET_ASM_INTEGER
363 #define TARGET_ASM_INTEGER arm_assemble_integer
364
365 #undef TARGET_PRINT_OPERAND
366 #define TARGET_PRINT_OPERAND arm_print_operand
367 #undef TARGET_PRINT_OPERAND_ADDRESS
368 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
369 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
370 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
371
372 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
373 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
374
375 #undef TARGET_ASM_FUNCTION_PROLOGUE
376 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
377
378 #undef TARGET_ASM_FUNCTION_EPILOGUE
379 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
380
381 #undef TARGET_OPTION_OVERRIDE
382 #define TARGET_OPTION_OVERRIDE arm_option_override
383
384 #undef TARGET_COMP_TYPE_ATTRIBUTES
385 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
386
387 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
388 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
389
390 #undef TARGET_SCHED_ADJUST_COST
391 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
392
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER arm_sched_reorder
395
396 #undef TARGET_REGISTER_MOVE_COST
397 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
398
399 #undef TARGET_MEMORY_MOVE_COST
400 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
401
402 #undef TARGET_ENCODE_SECTION_INFO
403 #ifdef ARM_PE
404 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
405 #else
406 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
407 #endif
408
409 #undef TARGET_STRIP_NAME_ENCODING
410 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
411
412 #undef TARGET_ASM_INTERNAL_LABEL
413 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
414
415 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
416 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
417
418 #undef TARGET_FUNCTION_VALUE
419 #define TARGET_FUNCTION_VALUE arm_function_value
420
421 #undef TARGET_LIBCALL_VALUE
422 #define TARGET_LIBCALL_VALUE arm_libcall_value
423
424 #undef TARGET_FUNCTION_VALUE_REGNO_P
425 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
426
427 #undef TARGET_ASM_OUTPUT_MI_THUNK
428 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
429 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
430 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
431
432 #undef TARGET_RTX_COSTS
433 #define TARGET_RTX_COSTS arm_rtx_costs
434 #undef TARGET_ADDRESS_COST
435 #define TARGET_ADDRESS_COST arm_address_cost
436
437 #undef TARGET_SHIFT_TRUNCATION_MASK
438 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
439 #undef TARGET_VECTOR_MODE_SUPPORTED_P
440 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
441 #undef TARGET_ARRAY_MODE_SUPPORTED_P
442 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
443 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
444 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
445 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
446 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
447 arm_autovectorize_vector_sizes
448
449 #undef TARGET_MACHINE_DEPENDENT_REORG
450 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
451
452 #undef TARGET_INIT_BUILTINS
453 #define TARGET_INIT_BUILTINS arm_init_builtins
454 #undef TARGET_EXPAND_BUILTIN
455 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
456 #undef TARGET_BUILTIN_DECL
457 #define TARGET_BUILTIN_DECL arm_builtin_decl
458
459 #undef TARGET_INIT_LIBFUNCS
460 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
461
462 #undef TARGET_PROMOTE_FUNCTION_MODE
463 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
464 #undef TARGET_PROMOTE_PROTOTYPES
465 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
466 #undef TARGET_PASS_BY_REFERENCE
467 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
468 #undef TARGET_ARG_PARTIAL_BYTES
469 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
470 #undef TARGET_FUNCTION_ARG
471 #define TARGET_FUNCTION_ARG arm_function_arg
472 #undef TARGET_FUNCTION_ARG_ADVANCE
473 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
474 #undef TARGET_FUNCTION_ARG_BOUNDARY
475 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
476
477 #undef TARGET_SETUP_INCOMING_VARARGS
478 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
479
480 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
481 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
482
483 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
484 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
485 #undef TARGET_TRAMPOLINE_INIT
486 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
487 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
488 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
489
490 #undef TARGET_WARN_FUNC_RETURN
491 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
492
493 #undef TARGET_DEFAULT_SHORT_ENUMS
494 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
495
496 #undef TARGET_ALIGN_ANON_BITFIELD
497 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
498
499 #undef TARGET_NARROW_VOLATILE_BITFIELD
500 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
501
502 #undef TARGET_CXX_GUARD_TYPE
503 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
504
505 #undef TARGET_CXX_GUARD_MASK_BIT
506 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
507
508 #undef TARGET_CXX_GET_COOKIE_SIZE
509 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
510
511 #undef TARGET_CXX_COOKIE_HAS_SIZE
512 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
513
514 #undef TARGET_CXX_CDTOR_RETURNS_THIS
515 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
516
517 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
518 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
519
520 #undef TARGET_CXX_USE_AEABI_ATEXIT
521 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
522
523 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
524 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
525 arm_cxx_determine_class_data_visibility
526
527 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
528 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
529
530 #undef TARGET_RETURN_IN_MSB
531 #define TARGET_RETURN_IN_MSB arm_return_in_msb
532
533 #undef TARGET_RETURN_IN_MEMORY
534 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
535
536 #undef TARGET_MUST_PASS_IN_STACK
537 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
538
539 #if ARM_UNWIND_INFO
540 #undef TARGET_ASM_UNWIND_EMIT
541 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
542
543 /* EABI unwinding tables use a different format for the typeinfo tables. */
544 #undef TARGET_ASM_TTYPE
545 #define TARGET_ASM_TTYPE arm_output_ttype
546
547 #undef TARGET_ARM_EABI_UNWINDER
548 #define TARGET_ARM_EABI_UNWINDER true
549
550 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
551 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
552
553 #undef TARGET_ASM_INIT_SECTIONS
554 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
555 #endif /* ARM_UNWIND_INFO */
556
557 #undef TARGET_DWARF_REGISTER_SPAN
558 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
559
560 #undef TARGET_CANNOT_COPY_INSN_P
561 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
562
563 #ifdef HAVE_AS_TLS
564 #undef TARGET_HAVE_TLS
565 #define TARGET_HAVE_TLS true
566 #endif
567
568 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
569 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
570
571 #undef TARGET_LEGITIMATE_CONSTANT_P
572 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
573
574 #undef TARGET_CANNOT_FORCE_CONST_MEM
575 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
576
577 #undef TARGET_MAX_ANCHOR_OFFSET
578 #define TARGET_MAX_ANCHOR_OFFSET 4095
579
580 /* The minimum is set such that the total size of the block
581 for a particular anchor is -4088 + 1 + 4095 bytes, which is
582 divisible by eight, ensuring natural spacing of anchors. */
583 #undef TARGET_MIN_ANCHOR_OFFSET
584 #define TARGET_MIN_ANCHOR_OFFSET -4088
585
586 #undef TARGET_SCHED_ISSUE_RATE
587 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
588
589 #undef TARGET_MANGLE_TYPE
590 #define TARGET_MANGLE_TYPE arm_mangle_type
591
592 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
593 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
594
595 #undef TARGET_BUILD_BUILTIN_VA_LIST
596 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
597 #undef TARGET_EXPAND_BUILTIN_VA_START
598 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
599 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
600 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
601
602 #ifdef HAVE_AS_TLS
603 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
604 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
605 #endif
606
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
609
610 #undef TARGET_PREFERRED_RELOAD_CLASS
611 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
612
613 #undef TARGET_INVALID_PARAMETER_TYPE
614 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
615
616 #undef TARGET_INVALID_RETURN_TYPE
617 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
618
619 #undef TARGET_PROMOTED_TYPE
620 #define TARGET_PROMOTED_TYPE arm_promoted_type
621
622 #undef TARGET_CONVERT_TO_TYPE
623 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
624
625 #undef TARGET_SCALAR_MODE_SUPPORTED_P
626 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
627
628 #undef TARGET_FRAME_POINTER_REQUIRED
629 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
630
631 #undef TARGET_CAN_ELIMINATE
632 #define TARGET_CAN_ELIMINATE arm_can_eliminate
633
634 #undef TARGET_CONDITIONAL_REGISTER_USAGE
635 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
636
637 #undef TARGET_CLASS_LIKELY_SPILLED_P
638 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
639
640 #undef TARGET_VECTORIZE_BUILTINS
641 #define TARGET_VECTORIZE_BUILTINS
642
643 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
644 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
645 arm_builtin_vectorized_function
646
647 #undef TARGET_VECTOR_ALIGNMENT
648 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
649
650 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
651 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
652 arm_vector_alignment_reachable
653
654 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
655 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
656 arm_builtin_support_vector_misalignment
657
658 #undef TARGET_PREFERRED_RENAME_CLASS
659 #define TARGET_PREFERRED_RENAME_CLASS \
660 arm_preferred_rename_class
661
662 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
663 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
664 arm_vectorize_vec_perm_const_ok
665
666 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
667 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
668 arm_builtin_vectorization_cost
669 #undef TARGET_VECTORIZE_ADD_STMT_COST
670 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
671
672 #undef TARGET_CANONICALIZE_COMPARISON
673 #define TARGET_CANONICALIZE_COMPARISON \
674 arm_canonicalize_comparison
675
676 #undef TARGET_ASAN_SHADOW_OFFSET
677 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
678
679 #undef MAX_INSN_PER_IT_BLOCK
680 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
681
682 #undef TARGET_CAN_USE_DOLOOP_P
683 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
684
685 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
686 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
687
688 struct gcc_target targetm = TARGET_INITIALIZER;
689 \f
690 /* Obstack for minipool constant handling. */
691 static struct obstack minipool_obstack;
692 static char * minipool_startobj;
693
694 /* The maximum number of insns skipped which
695 will be conditionalised if possible. */
696 static int max_insns_skipped = 5;
697
698 extern FILE * asm_out_file;
699
700 /* True if we are currently building a constant table. */
701 int making_const_table;
702
703 /* The processor for which instructions should be scheduled. */
704 enum processor_type arm_tune = arm_none;
705
706 /* The current tuning set. */
707 const struct tune_params *current_tune;
708
709 /* Which floating point hardware to schedule for. */
710 int arm_fpu_attr;
711
712 /* Which floating popint hardware to use. */
713 const struct arm_fpu_desc *arm_fpu_desc;
714
715 /* Used for Thumb call_via trampolines. */
716 rtx thumb_call_via_label[14];
717 static int thumb_call_reg_needed;
718
719 /* Bit values used to identify processor capabilities. */
720 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
721 #define FL_ARCH3M (1 << 1) /* Extended multiply */
722 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
723 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
724 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
725 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
726 #define FL_THUMB (1 << 6) /* Thumb aware */
727 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
728 #define FL_STRONG (1 << 8) /* StrongARM */
729 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
730 #define FL_XSCALE (1 << 10) /* XScale */
731 /* spare (1 << 11) */
732 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
733 media instructions. */
734 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
735 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
736 Note: ARM6 & 7 derivatives only. */
737 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
738 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
739 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
740 profile. */
741 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
742 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
743 #define FL_NEON (1 << 20) /* Neon instructions. */
744 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
745 architecture. */
746 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
747 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
748 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
749 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
750
751 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
752 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
753
754 /* Flags that only effect tuning, not available instructions. */
755 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
756 | FL_CO_PROC)
757
758 #define FL_FOR_ARCH2 FL_NOTM
759 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
760 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
761 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
762 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
763 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
764 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
765 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
766 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
767 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
768 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
769 #define FL_FOR_ARCH6J FL_FOR_ARCH6
770 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
771 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
772 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
773 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
774 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
775 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
776 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
777 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
778 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
779 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
780 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
781 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
782
783 /* The bits in this mask specify which
784 instructions we are allowed to generate. */
785 static unsigned long insn_flags = 0;
786
787 /* The bits in this mask specify which instruction scheduling options should
788 be used. */
789 static unsigned long tune_flags = 0;
790
791 /* The highest ARM architecture version supported by the
792 target. */
793 enum base_architecture arm_base_arch = BASE_ARCH_0;
794
795 /* The following are used in the arm.md file as equivalents to bits
796 in the above two flag variables. */
797
798 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
799 int arm_arch3m = 0;
800
801 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
802 int arm_arch4 = 0;
803
804 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
805 int arm_arch4t = 0;
806
807 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
808 int arm_arch5 = 0;
809
810 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
811 int arm_arch5e = 0;
812
813 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
814 int arm_arch6 = 0;
815
816 /* Nonzero if this chip supports the ARM 6K extensions. */
817 int arm_arch6k = 0;
818
819 /* Nonzero if instructions present in ARMv6-M can be used. */
820 int arm_arch6m = 0;
821
822 /* Nonzero if this chip supports the ARM 7 extensions. */
823 int arm_arch7 = 0;
824
825 /* Nonzero if instructions not present in the 'M' profile can be used. */
826 int arm_arch_notm = 0;
827
828 /* Nonzero if instructions present in ARMv7E-M can be used. */
829 int arm_arch7em = 0;
830
831 /* Nonzero if instructions present in ARMv8 can be used. */
832 int arm_arch8 = 0;
833
834 /* Nonzero if this chip can benefit from load scheduling. */
835 int arm_ld_sched = 0;
836
837 /* Nonzero if this chip is a StrongARM. */
838 int arm_tune_strongarm = 0;
839
840 /* Nonzero if this chip supports Intel Wireless MMX technology. */
841 int arm_arch_iwmmxt = 0;
842
843 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
844 int arm_arch_iwmmxt2 = 0;
845
846 /* Nonzero if this chip is an XScale. */
847 int arm_arch_xscale = 0;
848
849 /* Nonzero if tuning for XScale */
850 int arm_tune_xscale = 0;
851
852 /* Nonzero if we want to tune for stores that access the write-buffer.
853 This typically means an ARM6 or ARM7 with MMU or MPU. */
854 int arm_tune_wbuf = 0;
855
856 /* Nonzero if tuning for Cortex-A9. */
857 int arm_tune_cortex_a9 = 0;
858
859 /* Nonzero if generating Thumb instructions. */
860 int thumb_code = 0;
861
862 /* Nonzero if generating Thumb-1 instructions. */
863 int thumb1_code = 0;
864
865 /* Nonzero if we should define __THUMB_INTERWORK__ in the
866 preprocessor.
867 XXX This is a bit of a hack, it's intended to help work around
868 problems in GLD which doesn't understand that armv5t code is
869 interworking clean. */
870 int arm_cpp_interwork = 0;
871
872 /* Nonzero if chip supports Thumb 2. */
873 int arm_arch_thumb2;
874
875 /* Nonzero if chip supports integer division instruction. */
876 int arm_arch_arm_hwdiv;
877 int arm_arch_thumb_hwdiv;
878
879 /* Nonzero if we should use Neon to handle 64-bits operations rather
880 than core registers. */
881 int prefer_neon_for_64bits = 0;
882
883 /* Nonzero if we shouldn't use literal pools. */
884 bool arm_disable_literal_pool = false;
885
886 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
887 we must report the mode of the memory reference from
888 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
889 enum machine_mode output_memory_reference_mode;
890
891 /* The register number to be used for the PIC offset register. */
892 unsigned arm_pic_register = INVALID_REGNUM;
893
894 enum arm_pcs arm_pcs_default;
895
896 /* For an explanation of these variables, see final_prescan_insn below. */
897 int arm_ccfsm_state;
898 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
899 enum arm_cond_code arm_current_cc;
900
901 rtx arm_target_insn;
902 int arm_target_label;
903 /* The number of conditionally executed insns, including the current insn. */
904 int arm_condexec_count = 0;
905 /* A bitmask specifying the patterns for the IT block.
906 Zero means do not output an IT block before this insn. */
907 int arm_condexec_mask = 0;
908 /* The number of bits used in arm_condexec_mask. */
909 int arm_condexec_masklen = 0;
910
911 /* Nonzero if chip supports the ARMv8 CRC instructions. */
912 int arm_arch_crc = 0;
913
914 /* The condition codes of the ARM, and the inverse function. */
915 static const char * const arm_condition_codes[] =
916 {
917 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
918 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
919 };
920
921 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
922 int arm_regs_in_sequence[] =
923 {
924 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
925 };
926
927 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
928 #define streq(string1, string2) (strcmp (string1, string2) == 0)
929
930 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
931 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
932 | (1 << PIC_OFFSET_TABLE_REGNUM)))
933 \f
934 /* Initialization code. */
935
936 struct processors
937 {
938 const char *const name;
939 enum processor_type core;
940 const char *arch;
941 enum base_architecture base_arch;
942 const unsigned long flags;
943 const struct tune_params *const tune;
944 };
945
946
947 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
948 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
949 prefetch_slots, \
950 l1_size, \
951 l1_line_size
952
953 /* arm generic vectorizer costs. */
954 static const
955 struct cpu_vec_costs arm_default_vec_cost = {
956 1, /* scalar_stmt_cost. */
957 1, /* scalar load_cost. */
958 1, /* scalar_store_cost. */
959 1, /* vec_stmt_cost. */
960 1, /* vec_to_scalar_cost. */
961 1, /* scalar_to_vec_cost. */
962 1, /* vec_align_load_cost. */
963 1, /* vec_unalign_load_cost. */
964 1, /* vec_unalign_store_cost. */
965 1, /* vec_store_cost. */
966 3, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
968 };
969
970 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
971 #include "aarch-cost-tables.h"
972
973
974
975 const struct cpu_cost_table cortexa9_extra_costs =
976 {
977 /* ALU */
978 {
979 0, /* arith. */
980 0, /* logical. */
981 0, /* shift. */
982 COSTS_N_INSNS (1), /* shift_reg. */
983 COSTS_N_INSNS (1), /* arith_shift. */
984 COSTS_N_INSNS (2), /* arith_shift_reg. */
985 0, /* log_shift. */
986 COSTS_N_INSNS (1), /* log_shift_reg. */
987 COSTS_N_INSNS (1), /* extend. */
988 COSTS_N_INSNS (2), /* extend_arith. */
989 COSTS_N_INSNS (1), /* bfi. */
990 COSTS_N_INSNS (1), /* bfx. */
991 0, /* clz. */
992 0, /* rev. */
993 0, /* non_exec. */
994 true /* non_exec_costs_exec. */
995 },
996 {
997 /* MULT SImode */
998 {
999 COSTS_N_INSNS (3), /* simple. */
1000 COSTS_N_INSNS (3), /* flag_setting. */
1001 COSTS_N_INSNS (2), /* extend. */
1002 COSTS_N_INSNS (3), /* add. */
1003 COSTS_N_INSNS (2), /* extend_add. */
1004 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1005 },
1006 /* MULT DImode */
1007 {
1008 0, /* simple (N/A). */
1009 0, /* flag_setting (N/A). */
1010 COSTS_N_INSNS (4), /* extend. */
1011 0, /* add (N/A). */
1012 COSTS_N_INSNS (4), /* extend_add. */
1013 0 /* idiv (N/A). */
1014 }
1015 },
1016 /* LD/ST */
1017 {
1018 COSTS_N_INSNS (2), /* load. */
1019 COSTS_N_INSNS (2), /* load_sign_extend. */
1020 COSTS_N_INSNS (2), /* ldrd. */
1021 COSTS_N_INSNS (2), /* ldm_1st. */
1022 1, /* ldm_regs_per_insn_1st. */
1023 2, /* ldm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (5), /* loadf. */
1025 COSTS_N_INSNS (5), /* loadd. */
1026 COSTS_N_INSNS (1), /* load_unaligned. */
1027 COSTS_N_INSNS (2), /* store. */
1028 COSTS_N_INSNS (2), /* strd. */
1029 COSTS_N_INSNS (2), /* stm_1st. */
1030 1, /* stm_regs_per_insn_1st. */
1031 2, /* stm_regs_per_insn_subsequent. */
1032 COSTS_N_INSNS (1), /* storef. */
1033 COSTS_N_INSNS (1), /* stored. */
1034 COSTS_N_INSNS (1) /* store_unaligned. */
1035 },
1036 {
1037 /* FP SFmode */
1038 {
1039 COSTS_N_INSNS (14), /* div. */
1040 COSTS_N_INSNS (4), /* mult. */
1041 COSTS_N_INSNS (7), /* mult_addsub. */
1042 COSTS_N_INSNS (30), /* fma. */
1043 COSTS_N_INSNS (3), /* addsub. */
1044 COSTS_N_INSNS (1), /* fpconst. */
1045 COSTS_N_INSNS (1), /* neg. */
1046 COSTS_N_INSNS (3), /* compare. */
1047 COSTS_N_INSNS (3), /* widen. */
1048 COSTS_N_INSNS (3), /* narrow. */
1049 COSTS_N_INSNS (3), /* toint. */
1050 COSTS_N_INSNS (3), /* fromint. */
1051 COSTS_N_INSNS (3) /* roundint. */
1052 },
1053 /* FP DFmode */
1054 {
1055 COSTS_N_INSNS (24), /* div. */
1056 COSTS_N_INSNS (5), /* mult. */
1057 COSTS_N_INSNS (8), /* mult_addsub. */
1058 COSTS_N_INSNS (30), /* fma. */
1059 COSTS_N_INSNS (3), /* addsub. */
1060 COSTS_N_INSNS (1), /* fpconst. */
1061 COSTS_N_INSNS (1), /* neg. */
1062 COSTS_N_INSNS (3), /* compare. */
1063 COSTS_N_INSNS (3), /* widen. */
1064 COSTS_N_INSNS (3), /* narrow. */
1065 COSTS_N_INSNS (3), /* toint. */
1066 COSTS_N_INSNS (3), /* fromint. */
1067 COSTS_N_INSNS (3) /* roundint. */
1068 }
1069 },
1070 /* Vector */
1071 {
1072 COSTS_N_INSNS (1) /* alu. */
1073 }
1074 };
1075
1076 const struct cpu_cost_table cortexa8_extra_costs =
1077 {
1078 /* ALU */
1079 {
1080 0, /* arith. */
1081 0, /* logical. */
1082 COSTS_N_INSNS (1), /* shift. */
1083 0, /* shift_reg. */
1084 COSTS_N_INSNS (1), /* arith_shift. */
1085 0, /* arith_shift_reg. */
1086 COSTS_N_INSNS (1), /* log_shift. */
1087 0, /* log_shift_reg. */
1088 0, /* extend. */
1089 0, /* extend_arith. */
1090 0, /* bfi. */
1091 0, /* bfx. */
1092 0, /* clz. */
1093 0, /* rev. */
1094 0, /* non_exec. */
1095 true /* non_exec_costs_exec. */
1096 },
1097 {
1098 /* MULT SImode */
1099 {
1100 COSTS_N_INSNS (1), /* simple. */
1101 COSTS_N_INSNS (1), /* flag_setting. */
1102 COSTS_N_INSNS (1), /* extend. */
1103 COSTS_N_INSNS (1), /* add. */
1104 COSTS_N_INSNS (1), /* extend_add. */
1105 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1106 },
1107 /* MULT DImode */
1108 {
1109 0, /* simple (N/A). */
1110 0, /* flag_setting (N/A). */
1111 COSTS_N_INSNS (2), /* extend. */
1112 0, /* add (N/A). */
1113 COSTS_N_INSNS (2), /* extend_add. */
1114 0 /* idiv (N/A). */
1115 }
1116 },
1117 /* LD/ST */
1118 {
1119 COSTS_N_INSNS (1), /* load. */
1120 COSTS_N_INSNS (1), /* load_sign_extend. */
1121 COSTS_N_INSNS (1), /* ldrd. */
1122 COSTS_N_INSNS (1), /* ldm_1st. */
1123 1, /* ldm_regs_per_insn_1st. */
1124 2, /* ldm_regs_per_insn_subsequent. */
1125 COSTS_N_INSNS (1), /* loadf. */
1126 COSTS_N_INSNS (1), /* loadd. */
1127 COSTS_N_INSNS (1), /* load_unaligned. */
1128 COSTS_N_INSNS (1), /* store. */
1129 COSTS_N_INSNS (1), /* strd. */
1130 COSTS_N_INSNS (1), /* stm_1st. */
1131 1, /* stm_regs_per_insn_1st. */
1132 2, /* stm_regs_per_insn_subsequent. */
1133 COSTS_N_INSNS (1), /* storef. */
1134 COSTS_N_INSNS (1), /* stored. */
1135 COSTS_N_INSNS (1) /* store_unaligned. */
1136 },
1137 {
1138 /* FP SFmode */
1139 {
1140 COSTS_N_INSNS (36), /* div. */
1141 COSTS_N_INSNS (11), /* mult. */
1142 COSTS_N_INSNS (20), /* mult_addsub. */
1143 COSTS_N_INSNS (30), /* fma. */
1144 COSTS_N_INSNS (9), /* addsub. */
1145 COSTS_N_INSNS (3), /* fpconst. */
1146 COSTS_N_INSNS (3), /* neg. */
1147 COSTS_N_INSNS (6), /* compare. */
1148 COSTS_N_INSNS (4), /* widen. */
1149 COSTS_N_INSNS (4), /* narrow. */
1150 COSTS_N_INSNS (8), /* toint. */
1151 COSTS_N_INSNS (8), /* fromint. */
1152 COSTS_N_INSNS (8) /* roundint. */
1153 },
1154 /* FP DFmode */
1155 {
1156 COSTS_N_INSNS (64), /* div. */
1157 COSTS_N_INSNS (16), /* mult. */
1158 COSTS_N_INSNS (25), /* mult_addsub. */
1159 COSTS_N_INSNS (30), /* fma. */
1160 COSTS_N_INSNS (9), /* addsub. */
1161 COSTS_N_INSNS (3), /* fpconst. */
1162 COSTS_N_INSNS (3), /* neg. */
1163 COSTS_N_INSNS (6), /* compare. */
1164 COSTS_N_INSNS (6), /* widen. */
1165 COSTS_N_INSNS (6), /* narrow. */
1166 COSTS_N_INSNS (8), /* toint. */
1167 COSTS_N_INSNS (8), /* fromint. */
1168 COSTS_N_INSNS (8) /* roundint. */
1169 }
1170 },
1171 /* Vector */
1172 {
1173 COSTS_N_INSNS (1) /* alu. */
1174 }
1175 };
1176
1177
1178
1179 const struct cpu_cost_table cortexa7_extra_costs =
1180 {
1181 /* ALU */
1182 {
1183 0, /* arith. */
1184 0, /* logical. */
1185 COSTS_N_INSNS (1), /* shift. */
1186 COSTS_N_INSNS (1), /* shift_reg. */
1187 COSTS_N_INSNS (1), /* arith_shift. */
1188 COSTS_N_INSNS (1), /* arith_shift_reg. */
1189 COSTS_N_INSNS (1), /* log_shift. */
1190 COSTS_N_INSNS (1), /* log_shift_reg. */
1191 COSTS_N_INSNS (1), /* extend. */
1192 COSTS_N_INSNS (1), /* extend_arith. */
1193 COSTS_N_INSNS (1), /* bfi. */
1194 COSTS_N_INSNS (1), /* bfx. */
1195 COSTS_N_INSNS (1), /* clz. */
1196 COSTS_N_INSNS (1), /* rev. */
1197 0, /* non_exec. */
1198 true /* non_exec_costs_exec. */
1199 },
1200
1201 {
1202 /* MULT SImode */
1203 {
1204 0, /* simple. */
1205 COSTS_N_INSNS (1), /* flag_setting. */
1206 COSTS_N_INSNS (1), /* extend. */
1207 COSTS_N_INSNS (1), /* add. */
1208 COSTS_N_INSNS (1), /* extend_add. */
1209 COSTS_N_INSNS (7) /* idiv. */
1210 },
1211 /* MULT DImode */
1212 {
1213 0, /* simple (N/A). */
1214 0, /* flag_setting (N/A). */
1215 COSTS_N_INSNS (1), /* extend. */
1216 0, /* add. */
1217 COSTS_N_INSNS (2), /* extend_add. */
1218 0 /* idiv (N/A). */
1219 }
1220 },
1221 /* LD/ST */
1222 {
1223 COSTS_N_INSNS (1), /* load. */
1224 COSTS_N_INSNS (1), /* load_sign_extend. */
1225 COSTS_N_INSNS (3), /* ldrd. */
1226 COSTS_N_INSNS (1), /* ldm_1st. */
1227 1, /* ldm_regs_per_insn_1st. */
1228 2, /* ldm_regs_per_insn_subsequent. */
1229 COSTS_N_INSNS (2), /* loadf. */
1230 COSTS_N_INSNS (2), /* loadd. */
1231 COSTS_N_INSNS (1), /* load_unaligned. */
1232 COSTS_N_INSNS (1), /* store. */
1233 COSTS_N_INSNS (3), /* strd. */
1234 COSTS_N_INSNS (1), /* stm_1st. */
1235 1, /* stm_regs_per_insn_1st. */
1236 2, /* stm_regs_per_insn_subsequent. */
1237 COSTS_N_INSNS (2), /* storef. */
1238 COSTS_N_INSNS (2), /* stored. */
1239 COSTS_N_INSNS (1) /* store_unaligned. */
1240 },
1241 {
1242 /* FP SFmode */
1243 {
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1257 },
1258 /* FP DFmode */
1259 {
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1273 }
1274 },
1275 /* Vector */
1276 {
1277 COSTS_N_INSNS (1) /* alu. */
1278 }
1279 };
1280
1281 const struct cpu_cost_table cortexa12_extra_costs =
1282 {
1283 /* ALU */
1284 {
1285 0, /* arith. */
1286 0, /* logical. */
1287 0, /* shift. */
1288 COSTS_N_INSNS (1), /* shift_reg. */
1289 COSTS_N_INSNS (1), /* arith_shift. */
1290 COSTS_N_INSNS (1), /* arith_shift_reg. */
1291 COSTS_N_INSNS (1), /* log_shift. */
1292 COSTS_N_INSNS (1), /* log_shift_reg. */
1293 0, /* extend. */
1294 COSTS_N_INSNS (1), /* extend_arith. */
1295 0, /* bfi. */
1296 COSTS_N_INSNS (1), /* bfx. */
1297 COSTS_N_INSNS (1), /* clz. */
1298 COSTS_N_INSNS (1), /* rev. */
1299 0, /* non_exec. */
1300 true /* non_exec_costs_exec. */
1301 },
1302 /* MULT SImode */
1303 {
1304 {
1305 COSTS_N_INSNS (2), /* simple. */
1306 COSTS_N_INSNS (3), /* flag_setting. */
1307 COSTS_N_INSNS (2), /* extend. */
1308 COSTS_N_INSNS (3), /* add. */
1309 COSTS_N_INSNS (2), /* extend_add. */
1310 COSTS_N_INSNS (18) /* idiv. */
1311 },
1312 /* MULT DImode */
1313 {
1314 0, /* simple (N/A). */
1315 0, /* flag_setting (N/A). */
1316 COSTS_N_INSNS (3), /* extend. */
1317 0, /* add (N/A). */
1318 COSTS_N_INSNS (3), /* extend_add. */
1319 0 /* idiv (N/A). */
1320 }
1321 },
1322 /* LD/ST */
1323 {
1324 COSTS_N_INSNS (3), /* load. */
1325 COSTS_N_INSNS (3), /* load_sign_extend. */
1326 COSTS_N_INSNS (3), /* ldrd. */
1327 COSTS_N_INSNS (3), /* ldm_1st. */
1328 1, /* ldm_regs_per_insn_1st. */
1329 2, /* ldm_regs_per_insn_subsequent. */
1330 COSTS_N_INSNS (3), /* loadf. */
1331 COSTS_N_INSNS (3), /* loadd. */
1332 0, /* load_unaligned. */
1333 0, /* store. */
1334 0, /* strd. */
1335 0, /* stm_1st. */
1336 1, /* stm_regs_per_insn_1st. */
1337 2, /* stm_regs_per_insn_subsequent. */
1338 COSTS_N_INSNS (2), /* storef. */
1339 COSTS_N_INSNS (2), /* stored. */
1340 0 /* store_unaligned. */
1341 },
1342 {
1343 /* FP SFmode */
1344 {
1345 COSTS_N_INSNS (17), /* div. */
1346 COSTS_N_INSNS (4), /* mult. */
1347 COSTS_N_INSNS (8), /* mult_addsub. */
1348 COSTS_N_INSNS (8), /* fma. */
1349 COSTS_N_INSNS (4), /* addsub. */
1350 COSTS_N_INSNS (2), /* fpconst. */
1351 COSTS_N_INSNS (2), /* neg. */
1352 COSTS_N_INSNS (2), /* compare. */
1353 COSTS_N_INSNS (4), /* widen. */
1354 COSTS_N_INSNS (4), /* narrow. */
1355 COSTS_N_INSNS (4), /* toint. */
1356 COSTS_N_INSNS (4), /* fromint. */
1357 COSTS_N_INSNS (4) /* roundint. */
1358 },
1359 /* FP DFmode */
1360 {
1361 COSTS_N_INSNS (31), /* div. */
1362 COSTS_N_INSNS (4), /* mult. */
1363 COSTS_N_INSNS (8), /* mult_addsub. */
1364 COSTS_N_INSNS (8), /* fma. */
1365 COSTS_N_INSNS (4), /* addsub. */
1366 COSTS_N_INSNS (2), /* fpconst. */
1367 COSTS_N_INSNS (2), /* neg. */
1368 COSTS_N_INSNS (2), /* compare. */
1369 COSTS_N_INSNS (4), /* widen. */
1370 COSTS_N_INSNS (4), /* narrow. */
1371 COSTS_N_INSNS (4), /* toint. */
1372 COSTS_N_INSNS (4), /* fromint. */
1373 COSTS_N_INSNS (4) /* roundint. */
1374 }
1375 },
1376 /* Vector */
1377 {
1378 COSTS_N_INSNS (1) /* alu. */
1379 }
1380 };
1381
1382 const struct cpu_cost_table cortexa15_extra_costs =
1383 {
1384 /* ALU */
1385 {
1386 0, /* arith. */
1387 0, /* logical. */
1388 0, /* shift. */
1389 0, /* shift_reg. */
1390 COSTS_N_INSNS (1), /* arith_shift. */
1391 COSTS_N_INSNS (1), /* arith_shift_reg. */
1392 COSTS_N_INSNS (1), /* log_shift. */
1393 COSTS_N_INSNS (1), /* log_shift_reg. */
1394 0, /* extend. */
1395 COSTS_N_INSNS (1), /* extend_arith. */
1396 COSTS_N_INSNS (1), /* bfi. */
1397 0, /* bfx. */
1398 0, /* clz. */
1399 0, /* rev. */
1400 0, /* non_exec. */
1401 true /* non_exec_costs_exec. */
1402 },
1403 /* MULT SImode */
1404 {
1405 {
1406 COSTS_N_INSNS (2), /* simple. */
1407 COSTS_N_INSNS (3), /* flag_setting. */
1408 COSTS_N_INSNS (2), /* extend. */
1409 COSTS_N_INSNS (2), /* add. */
1410 COSTS_N_INSNS (2), /* extend_add. */
1411 COSTS_N_INSNS (18) /* idiv. */
1412 },
1413 /* MULT DImode */
1414 {
1415 0, /* simple (N/A). */
1416 0, /* flag_setting (N/A). */
1417 COSTS_N_INSNS (3), /* extend. */
1418 0, /* add (N/A). */
1419 COSTS_N_INSNS (3), /* extend_add. */
1420 0 /* idiv (N/A). */
1421 }
1422 },
1423 /* LD/ST */
1424 {
1425 COSTS_N_INSNS (3), /* load. */
1426 COSTS_N_INSNS (3), /* load_sign_extend. */
1427 COSTS_N_INSNS (3), /* ldrd. */
1428 COSTS_N_INSNS (4), /* ldm_1st. */
1429 1, /* ldm_regs_per_insn_1st. */
1430 2, /* ldm_regs_per_insn_subsequent. */
1431 COSTS_N_INSNS (4), /* loadf. */
1432 COSTS_N_INSNS (4), /* loadd. */
1433 0, /* load_unaligned. */
1434 0, /* store. */
1435 0, /* strd. */
1436 COSTS_N_INSNS (1), /* stm_1st. */
1437 1, /* stm_regs_per_insn_1st. */
1438 2, /* stm_regs_per_insn_subsequent. */
1439 0, /* storef. */
1440 0, /* stored. */
1441 0 /* store_unaligned. */
1442 },
1443 {
1444 /* FP SFmode */
1445 {
1446 COSTS_N_INSNS (17), /* div. */
1447 COSTS_N_INSNS (4), /* mult. */
1448 COSTS_N_INSNS (8), /* mult_addsub. */
1449 COSTS_N_INSNS (8), /* fma. */
1450 COSTS_N_INSNS (4), /* addsub. */
1451 COSTS_N_INSNS (2), /* fpconst. */
1452 COSTS_N_INSNS (2), /* neg. */
1453 COSTS_N_INSNS (5), /* compare. */
1454 COSTS_N_INSNS (4), /* widen. */
1455 COSTS_N_INSNS (4), /* narrow. */
1456 COSTS_N_INSNS (4), /* toint. */
1457 COSTS_N_INSNS (4), /* fromint. */
1458 COSTS_N_INSNS (4) /* roundint. */
1459 },
1460 /* FP DFmode */
1461 {
1462 COSTS_N_INSNS (31), /* div. */
1463 COSTS_N_INSNS (4), /* mult. */
1464 COSTS_N_INSNS (8), /* mult_addsub. */
1465 COSTS_N_INSNS (8), /* fma. */
1466 COSTS_N_INSNS (4), /* addsub. */
1467 COSTS_N_INSNS (2), /* fpconst. */
1468 COSTS_N_INSNS (2), /* neg. */
1469 COSTS_N_INSNS (2), /* compare. */
1470 COSTS_N_INSNS (4), /* widen. */
1471 COSTS_N_INSNS (4), /* narrow. */
1472 COSTS_N_INSNS (4), /* toint. */
1473 COSTS_N_INSNS (4), /* fromint. */
1474 COSTS_N_INSNS (4) /* roundint. */
1475 }
1476 },
1477 /* Vector */
1478 {
1479 COSTS_N_INSNS (1) /* alu. */
1480 }
1481 };
1482
1483 const struct cpu_cost_table v7m_extra_costs =
1484 {
1485 /* ALU */
1486 {
1487 0, /* arith. */
1488 0, /* logical. */
1489 0, /* shift. */
1490 0, /* shift_reg. */
1491 0, /* arith_shift. */
1492 COSTS_N_INSNS (1), /* arith_shift_reg. */
1493 0, /* log_shift. */
1494 COSTS_N_INSNS (1), /* log_shift_reg. */
1495 0, /* extend. */
1496 COSTS_N_INSNS (1), /* extend_arith. */
1497 0, /* bfi. */
1498 0, /* bfx. */
1499 0, /* clz. */
1500 0, /* rev. */
1501 COSTS_N_INSNS (1), /* non_exec. */
1502 false /* non_exec_costs_exec. */
1503 },
1504 {
1505 /* MULT SImode */
1506 {
1507 COSTS_N_INSNS (1), /* simple. */
1508 COSTS_N_INSNS (1), /* flag_setting. */
1509 COSTS_N_INSNS (2), /* extend. */
1510 COSTS_N_INSNS (1), /* add. */
1511 COSTS_N_INSNS (3), /* extend_add. */
1512 COSTS_N_INSNS (8) /* idiv. */
1513 },
1514 /* MULT DImode */
1515 {
1516 0, /* simple (N/A). */
1517 0, /* flag_setting (N/A). */
1518 COSTS_N_INSNS (2), /* extend. */
1519 0, /* add (N/A). */
1520 COSTS_N_INSNS (3), /* extend_add. */
1521 0 /* idiv (N/A). */
1522 }
1523 },
1524 /* LD/ST */
1525 {
1526 COSTS_N_INSNS (2), /* load. */
1527 0, /* load_sign_extend. */
1528 COSTS_N_INSNS (3), /* ldrd. */
1529 COSTS_N_INSNS (2), /* ldm_1st. */
1530 1, /* ldm_regs_per_insn_1st. */
1531 1, /* ldm_regs_per_insn_subsequent. */
1532 COSTS_N_INSNS (2), /* loadf. */
1533 COSTS_N_INSNS (3), /* loadd. */
1534 COSTS_N_INSNS (1), /* load_unaligned. */
1535 COSTS_N_INSNS (2), /* store. */
1536 COSTS_N_INSNS (3), /* strd. */
1537 COSTS_N_INSNS (2), /* stm_1st. */
1538 1, /* stm_regs_per_insn_1st. */
1539 1, /* stm_regs_per_insn_subsequent. */
1540 COSTS_N_INSNS (2), /* storef. */
1541 COSTS_N_INSNS (3), /* stored. */
1542 COSTS_N_INSNS (1) /* store_unaligned. */
1543 },
1544 {
1545 /* FP SFmode */
1546 {
1547 COSTS_N_INSNS (7), /* div. */
1548 COSTS_N_INSNS (2), /* mult. */
1549 COSTS_N_INSNS (5), /* mult_addsub. */
1550 COSTS_N_INSNS (3), /* fma. */
1551 COSTS_N_INSNS (1), /* addsub. */
1552 0, /* fpconst. */
1553 0, /* neg. */
1554 0, /* compare. */
1555 0, /* widen. */
1556 0, /* narrow. */
1557 0, /* toint. */
1558 0, /* fromint. */
1559 0 /* roundint. */
1560 },
1561 /* FP DFmode */
1562 {
1563 COSTS_N_INSNS (15), /* div. */
1564 COSTS_N_INSNS (5), /* mult. */
1565 COSTS_N_INSNS (7), /* mult_addsub. */
1566 COSTS_N_INSNS (7), /* fma. */
1567 COSTS_N_INSNS (3), /* addsub. */
1568 0, /* fpconst. */
1569 0, /* neg. */
1570 0, /* compare. */
1571 0, /* widen. */
1572 0, /* narrow. */
1573 0, /* toint. */
1574 0, /* fromint. */
1575 0 /* roundint. */
1576 }
1577 },
1578 /* Vector */
1579 {
1580 COSTS_N_INSNS (1) /* alu. */
1581 }
1582 };
1583
1584 const struct tune_params arm_slowmul_tune =
1585 {
1586 arm_slowmul_rtx_costs,
1587 NULL,
1588 NULL, /* Sched adj cost. */
1589 3, /* Constant limit. */
1590 5, /* Max cond insns. */
1591 ARM_PREFETCH_NOT_BENEFICIAL,
1592 true, /* Prefer constant pool. */
1593 arm_default_branch_cost,
1594 false, /* Prefer LDRD/STRD. */
1595 {true, true}, /* Prefer non short circuit. */
1596 &arm_default_vec_cost, /* Vectorizer costs. */
1597 false, /* Prefer Neon for 64-bits bitops. */
1598 false, false /* Prefer 32-bit encodings. */
1599 };
1600
1601 const struct tune_params arm_fastmul_tune =
1602 {
1603 arm_fastmul_rtx_costs,
1604 NULL,
1605 NULL, /* Sched adj cost. */
1606 1, /* Constant limit. */
1607 5, /* Max cond insns. */
1608 ARM_PREFETCH_NOT_BENEFICIAL,
1609 true, /* Prefer constant pool. */
1610 arm_default_branch_cost,
1611 false, /* Prefer LDRD/STRD. */
1612 {true, true}, /* Prefer non short circuit. */
1613 &arm_default_vec_cost, /* Vectorizer costs. */
1614 false, /* Prefer Neon for 64-bits bitops. */
1615 false, false /* Prefer 32-bit encodings. */
1616 };
1617
1618 /* StrongARM has early execution of branches, so a sequence that is worth
1619 skipping is shorter. Set max_insns_skipped to a lower value. */
1620
1621 const struct tune_params arm_strongarm_tune =
1622 {
1623 arm_fastmul_rtx_costs,
1624 NULL,
1625 NULL, /* Sched adj cost. */
1626 1, /* Constant limit. */
1627 3, /* Max cond insns. */
1628 ARM_PREFETCH_NOT_BENEFICIAL,
1629 true, /* Prefer constant pool. */
1630 arm_default_branch_cost,
1631 false, /* Prefer LDRD/STRD. */
1632 {true, true}, /* Prefer non short circuit. */
1633 &arm_default_vec_cost, /* Vectorizer costs. */
1634 false, /* Prefer Neon for 64-bits bitops. */
1635 false, false /* Prefer 32-bit encodings. */
1636 };
1637
1638 const struct tune_params arm_xscale_tune =
1639 {
1640 arm_xscale_rtx_costs,
1641 NULL,
1642 xscale_sched_adjust_cost,
1643 2, /* Constant limit. */
1644 3, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL,
1646 true, /* Prefer constant pool. */
1647 arm_default_branch_cost,
1648 false, /* Prefer LDRD/STRD. */
1649 {true, true}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost, /* Vectorizer costs. */
1651 false, /* Prefer Neon for 64-bits bitops. */
1652 false, false /* Prefer 32-bit encodings. */
1653 };
1654
1655 const struct tune_params arm_9e_tune =
1656 {
1657 arm_9e_rtx_costs,
1658 NULL,
1659 NULL, /* Sched adj cost. */
1660 1, /* Constant limit. */
1661 5, /* Max cond insns. */
1662 ARM_PREFETCH_NOT_BENEFICIAL,
1663 true, /* Prefer constant pool. */
1664 arm_default_branch_cost,
1665 false, /* Prefer LDRD/STRD. */
1666 {true, true}, /* Prefer non short circuit. */
1667 &arm_default_vec_cost, /* Vectorizer costs. */
1668 false, /* Prefer Neon for 64-bits bitops. */
1669 false, false /* Prefer 32-bit encodings. */
1670 };
1671
1672 const struct tune_params arm_v6t2_tune =
1673 {
1674 arm_9e_rtx_costs,
1675 NULL,
1676 NULL, /* Sched adj cost. */
1677 1, /* Constant limit. */
1678 5, /* Max cond insns. */
1679 ARM_PREFETCH_NOT_BENEFICIAL,
1680 false, /* Prefer constant pool. */
1681 arm_default_branch_cost,
1682 false, /* Prefer LDRD/STRD. */
1683 {true, true}, /* Prefer non short circuit. */
1684 &arm_default_vec_cost, /* Vectorizer costs. */
1685 false, /* Prefer Neon for 64-bits bitops. */
1686 false, false /* Prefer 32-bit encodings. */
1687 };
1688
1689 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1690 const struct tune_params arm_cortex_tune =
1691 {
1692 arm_9e_rtx_costs,
1693 &generic_extra_costs,
1694 NULL, /* Sched adj cost. */
1695 1, /* Constant limit. */
1696 5, /* Max cond insns. */
1697 ARM_PREFETCH_NOT_BENEFICIAL,
1698 false, /* Prefer constant pool. */
1699 arm_default_branch_cost,
1700 false, /* Prefer LDRD/STRD. */
1701 {true, true}, /* Prefer non short circuit. */
1702 &arm_default_vec_cost, /* Vectorizer costs. */
1703 false, /* Prefer Neon for 64-bits bitops. */
1704 false, false /* Prefer 32-bit encodings. */
1705 };
1706
1707 const struct tune_params arm_cortex_a8_tune =
1708 {
1709 arm_9e_rtx_costs,
1710 &cortexa8_extra_costs,
1711 NULL, /* Sched adj cost. */
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 ARM_PREFETCH_NOT_BENEFICIAL,
1715 false, /* Prefer constant pool. */
1716 arm_default_branch_cost,
1717 false, /* Prefer LDRD/STRD. */
1718 {true, true}, /* Prefer non short circuit. */
1719 &arm_default_vec_cost, /* Vectorizer costs. */
1720 false, /* Prefer Neon for 64-bits bitops. */
1721 false, false /* Prefer 32-bit encodings. */
1722 };
1723
1724 const struct tune_params arm_cortex_a7_tune =
1725 {
1726 arm_9e_rtx_costs,
1727 &cortexa7_extra_costs,
1728 NULL,
1729 1, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 ARM_PREFETCH_NOT_BENEFICIAL,
1732 false, /* Prefer constant pool. */
1733 arm_default_branch_cost,
1734 false, /* Prefer LDRD/STRD. */
1735 {true, true}, /* Prefer non short circuit. */
1736 &arm_default_vec_cost, /* Vectorizer costs. */
1737 false, /* Prefer Neon for 64-bits bitops. */
1738 false, false /* Prefer 32-bit encodings. */
1739 };
1740
1741 const struct tune_params arm_cortex_a15_tune =
1742 {
1743 arm_9e_rtx_costs,
1744 &cortexa15_extra_costs,
1745 NULL, /* Sched adj cost. */
1746 1, /* Constant limit. */
1747 2, /* Max cond insns. */
1748 ARM_PREFETCH_NOT_BENEFICIAL,
1749 false, /* Prefer constant pool. */
1750 arm_default_branch_cost,
1751 true, /* Prefer LDRD/STRD. */
1752 {true, true}, /* Prefer non short circuit. */
1753 &arm_default_vec_cost, /* Vectorizer costs. */
1754 false, /* Prefer Neon for 64-bits bitops. */
1755 true, true /* Prefer 32-bit encodings. */
1756 };
1757
1758 const struct tune_params arm_cortex_a53_tune =
1759 {
1760 arm_9e_rtx_costs,
1761 &cortexa53_extra_costs,
1762 NULL, /* Scheduler cost adjustment. */
1763 1, /* Constant limit. */
1764 5, /* Max cond insns. */
1765 ARM_PREFETCH_NOT_BENEFICIAL,
1766 false, /* Prefer constant pool. */
1767 arm_default_branch_cost,
1768 false, /* Prefer LDRD/STRD. */
1769 {true, true}, /* Prefer non short circuit. */
1770 &arm_default_vec_cost, /* Vectorizer costs. */
1771 false, /* Prefer Neon for 64-bits bitops. */
1772 false, false /* Prefer 32-bit encodings. */
1773 };
1774
1775 const struct tune_params arm_cortex_a57_tune =
1776 {
1777 arm_9e_rtx_costs,
1778 &cortexa57_extra_costs,
1779 NULL, /* Scheduler cost adjustment. */
1780 1, /* Constant limit. */
1781 2, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 false, /* Prefer constant pool. */
1784 arm_default_branch_cost,
1785 true, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 true, true /* Prefer 32-bit encodings. */
1790 };
1791
1792 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1793 less appealing. Set max_insns_skipped to a low value. */
1794
1795 const struct tune_params arm_cortex_a5_tune =
1796 {
1797 arm_9e_rtx_costs,
1798 NULL,
1799 NULL, /* Sched adj cost. */
1800 1, /* Constant limit. */
1801 1, /* Max cond insns. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 false, /* Prefer constant pool. */
1804 arm_cortex_a5_branch_cost,
1805 false, /* Prefer LDRD/STRD. */
1806 {false, false}, /* Prefer non short circuit. */
1807 &arm_default_vec_cost, /* Vectorizer costs. */
1808 false, /* Prefer Neon for 64-bits bitops. */
1809 false, false /* Prefer 32-bit encodings. */
1810 };
1811
1812 const struct tune_params arm_cortex_a9_tune =
1813 {
1814 arm_9e_rtx_costs,
1815 &cortexa9_extra_costs,
1816 cortex_a9_sched_adjust_cost,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 ARM_PREFETCH_BENEFICIAL(4,32,32),
1820 false, /* Prefer constant pool. */
1821 arm_default_branch_cost,
1822 false, /* Prefer LDRD/STRD. */
1823 {true, true}, /* Prefer non short circuit. */
1824 &arm_default_vec_cost, /* Vectorizer costs. */
1825 false, /* Prefer Neon for 64-bits bitops. */
1826 false, false /* Prefer 32-bit encodings. */
1827 };
1828
1829 const struct tune_params arm_cortex_a12_tune =
1830 {
1831 arm_9e_rtx_costs,
1832 &cortexa12_extra_costs,
1833 NULL,
1834 1, /* Constant limit. */
1835 5, /* Max cond insns. */
1836 ARM_PREFETCH_BENEFICIAL(4,32,32),
1837 false, /* Prefer constant pool. */
1838 arm_default_branch_cost,
1839 true, /* Prefer LDRD/STRD. */
1840 {true, true}, /* Prefer non short circuit. */
1841 &arm_default_vec_cost, /* Vectorizer costs. */
1842 false, /* Prefer Neon for 64-bits bitops. */
1843 false, false /* Prefer 32-bit encodings. */
1844 };
1845
1846 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1847 cycle to execute each. An LDR from the constant pool also takes two cycles
1848 to execute, but mildly increases pipelining opportunity (consecutive
1849 loads/stores can be pipelined together, saving one cycle), and may also
1850 improve icache utilisation. Hence we prefer the constant pool for such
1851 processors. */
1852
1853 const struct tune_params arm_v7m_tune =
1854 {
1855 arm_9e_rtx_costs,
1856 &v7m_extra_costs,
1857 NULL, /* Sched adj cost. */
1858 1, /* Constant limit. */
1859 2, /* Max cond insns. */
1860 ARM_PREFETCH_NOT_BENEFICIAL,
1861 true, /* Prefer constant pool. */
1862 arm_cortex_m_branch_cost,
1863 false, /* Prefer LDRD/STRD. */
1864 {false, false}, /* Prefer non short circuit. */
1865 &arm_default_vec_cost, /* Vectorizer costs. */
1866 false, /* Prefer Neon for 64-bits bitops. */
1867 false, false /* Prefer 32-bit encodings. */
1868 };
1869
1870 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1871 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1872 const struct tune_params arm_v6m_tune =
1873 {
1874 arm_9e_rtx_costs,
1875 NULL,
1876 NULL, /* Sched adj cost. */
1877 1, /* Constant limit. */
1878 5, /* Max cond insns. */
1879 ARM_PREFETCH_NOT_BENEFICIAL,
1880 false, /* Prefer constant pool. */
1881 arm_default_branch_cost,
1882 false, /* Prefer LDRD/STRD. */
1883 {false, false}, /* Prefer non short circuit. */
1884 &arm_default_vec_cost, /* Vectorizer costs. */
1885 false, /* Prefer Neon for 64-bits bitops. */
1886 false, false /* Prefer 32-bit encodings. */
1887 };
1888
1889 const struct tune_params arm_fa726te_tune =
1890 {
1891 arm_9e_rtx_costs,
1892 NULL,
1893 fa726te_sched_adjust_cost,
1894 1, /* Constant limit. */
1895 5, /* Max cond insns. */
1896 ARM_PREFETCH_NOT_BENEFICIAL,
1897 true, /* Prefer constant pool. */
1898 arm_default_branch_cost,
1899 false, /* Prefer LDRD/STRD. */
1900 {true, true}, /* Prefer non short circuit. */
1901 &arm_default_vec_cost, /* Vectorizer costs. */
1902 false, /* Prefer Neon for 64-bits bitops. */
1903 false, false /* Prefer 32-bit encodings. */
1904 };
1905
1906
1907 /* Not all of these give usefully different compilation alternatives,
1908 but there is no simple way of generalizing them. */
1909 static const struct processors all_cores[] =
1910 {
1911 /* ARM Cores */
1912 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1913 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1914 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1915 #include "arm-cores.def"
1916 #undef ARM_CORE
1917 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1918 };
1919
1920 static const struct processors all_architectures[] =
1921 {
1922 /* ARM Architectures */
1923 /* We don't specify tuning costs here as it will be figured out
1924 from the core. */
1925
1926 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1927 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1928 #include "arm-arches.def"
1929 #undef ARM_ARCH
1930 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1931 };
1932
1933
1934 /* These are populated as commandline arguments are processed, or NULL
1935 if not specified. */
1936 static const struct processors *arm_selected_arch;
1937 static const struct processors *arm_selected_cpu;
1938 static const struct processors *arm_selected_tune;
1939
1940 /* The name of the preprocessor macro to define for this architecture. */
1941
1942 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1943
1944 /* Available values for -mfpu=. */
1945
1946 static const struct arm_fpu_desc all_fpus[] =
1947 {
1948 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1949 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1950 #include "arm-fpus.def"
1951 #undef ARM_FPU
1952 };
1953
1954
1955 /* Supported TLS relocations. */
1956
1957 enum tls_reloc {
1958 TLS_GD32,
1959 TLS_LDM32,
1960 TLS_LDO32,
1961 TLS_IE32,
1962 TLS_LE32,
1963 TLS_DESCSEQ /* GNU scheme */
1964 };
1965
1966 /* The maximum number of insns to be used when loading a constant. */
1967 inline static int
1968 arm_constant_limit (bool size_p)
1969 {
1970 return size_p ? 1 : current_tune->constant_limit;
1971 }
1972
1973 /* Emit an insn that's a simple single-set. Both the operands must be known
1974 to be valid. */
1975 inline static rtx
1976 emit_set_insn (rtx x, rtx y)
1977 {
1978 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1979 }
1980
1981 /* Return the number of bits set in VALUE. */
1982 static unsigned
1983 bit_count (unsigned long value)
1984 {
1985 unsigned long count = 0;
1986
1987 while (value)
1988 {
1989 count++;
1990 value &= value - 1; /* Clear the least-significant set bit. */
1991 }
1992
1993 return count;
1994 }
1995
1996 typedef struct
1997 {
1998 enum machine_mode mode;
1999 const char *name;
2000 } arm_fixed_mode_set;
2001
2002 /* A small helper for setting fixed-point library libfuncs. */
2003
2004 static void
2005 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2006 const char *funcname, const char *modename,
2007 int num_suffix)
2008 {
2009 char buffer[50];
2010
2011 if (num_suffix == 0)
2012 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2013 else
2014 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2015
2016 set_optab_libfunc (optable, mode, buffer);
2017 }
2018
2019 static void
2020 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2021 enum machine_mode from, const char *funcname,
2022 const char *toname, const char *fromname)
2023 {
2024 char buffer[50];
2025 const char *maybe_suffix_2 = "";
2026
2027 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2028 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2029 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2030 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2031 maybe_suffix_2 = "2";
2032
2033 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2034 maybe_suffix_2);
2035
2036 set_conv_libfunc (optable, to, from, buffer);
2037 }
2038
2039 /* Set up library functions unique to ARM. */
2040
2041 static void
2042 arm_init_libfuncs (void)
2043 {
2044 /* For Linux, we have access to kernel support for atomic operations. */
2045 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2046 init_sync_libfuncs (2 * UNITS_PER_WORD);
2047
2048 /* There are no special library functions unless we are using the
2049 ARM BPABI. */
2050 if (!TARGET_BPABI)
2051 return;
2052
2053 /* The functions below are described in Section 4 of the "Run-Time
2054 ABI for the ARM architecture", Version 1.0. */
2055
2056 /* Double-precision floating-point arithmetic. Table 2. */
2057 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2058 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2059 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2060 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2061 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2062
2063 /* Double-precision comparisons. Table 3. */
2064 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2065 set_optab_libfunc (ne_optab, DFmode, NULL);
2066 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2067 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2068 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2069 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2070 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2071
2072 /* Single-precision floating-point arithmetic. Table 4. */
2073 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2074 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2075 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2076 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2077 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2078
2079 /* Single-precision comparisons. Table 5. */
2080 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2081 set_optab_libfunc (ne_optab, SFmode, NULL);
2082 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2083 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2084 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2085 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2086 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2087
2088 /* Floating-point to integer conversions. Table 6. */
2089 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2090 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2091 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2092 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2093 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2094 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2095 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2096 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2097
2098 /* Conversions between floating types. Table 7. */
2099 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2100 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2101
2102 /* Integer to floating-point conversions. Table 8. */
2103 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2104 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2105 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2106 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2107 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2108 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2109 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2110 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2111
2112 /* Long long. Table 9. */
2113 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2114 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2115 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2116 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2117 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2118 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2119 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2120 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2121
2122 /* Integer (32/32->32) division. \S 4.3.1. */
2123 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2124 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2125
2126 /* The divmod functions are designed so that they can be used for
2127 plain division, even though they return both the quotient and the
2128 remainder. The quotient is returned in the usual location (i.e.,
2129 r0 for SImode, {r0, r1} for DImode), just as would be expected
2130 for an ordinary division routine. Because the AAPCS calling
2131 conventions specify that all of { r0, r1, r2, r3 } are
2132 callee-saved registers, there is no need to tell the compiler
2133 explicitly that those registers are clobbered by these
2134 routines. */
2135 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2136 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2137
2138 /* For SImode division the ABI provides div-without-mod routines,
2139 which are faster. */
2140 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2141 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2142
2143 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2144 divmod libcalls instead. */
2145 set_optab_libfunc (smod_optab, DImode, NULL);
2146 set_optab_libfunc (umod_optab, DImode, NULL);
2147 set_optab_libfunc (smod_optab, SImode, NULL);
2148 set_optab_libfunc (umod_optab, SImode, NULL);
2149
2150 /* Half-precision float operations. The compiler handles all operations
2151 with NULL libfuncs by converting the SFmode. */
2152 switch (arm_fp16_format)
2153 {
2154 case ARM_FP16_FORMAT_IEEE:
2155 case ARM_FP16_FORMAT_ALTERNATIVE:
2156
2157 /* Conversions. */
2158 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2159 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2160 ? "__gnu_f2h_ieee"
2161 : "__gnu_f2h_alternative"));
2162 set_conv_libfunc (sext_optab, SFmode, HFmode,
2163 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2164 ? "__gnu_h2f_ieee"
2165 : "__gnu_h2f_alternative"));
2166
2167 /* Arithmetic. */
2168 set_optab_libfunc (add_optab, HFmode, NULL);
2169 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2170 set_optab_libfunc (smul_optab, HFmode, NULL);
2171 set_optab_libfunc (neg_optab, HFmode, NULL);
2172 set_optab_libfunc (sub_optab, HFmode, NULL);
2173
2174 /* Comparisons. */
2175 set_optab_libfunc (eq_optab, HFmode, NULL);
2176 set_optab_libfunc (ne_optab, HFmode, NULL);
2177 set_optab_libfunc (lt_optab, HFmode, NULL);
2178 set_optab_libfunc (le_optab, HFmode, NULL);
2179 set_optab_libfunc (ge_optab, HFmode, NULL);
2180 set_optab_libfunc (gt_optab, HFmode, NULL);
2181 set_optab_libfunc (unord_optab, HFmode, NULL);
2182 break;
2183
2184 default:
2185 break;
2186 }
2187
2188 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2189 {
2190 const arm_fixed_mode_set fixed_arith_modes[] =
2191 {
2192 { QQmode, "qq" },
2193 { UQQmode, "uqq" },
2194 { HQmode, "hq" },
2195 { UHQmode, "uhq" },
2196 { SQmode, "sq" },
2197 { USQmode, "usq" },
2198 { DQmode, "dq" },
2199 { UDQmode, "udq" },
2200 { TQmode, "tq" },
2201 { UTQmode, "utq" },
2202 { HAmode, "ha" },
2203 { UHAmode, "uha" },
2204 { SAmode, "sa" },
2205 { USAmode, "usa" },
2206 { DAmode, "da" },
2207 { UDAmode, "uda" },
2208 { TAmode, "ta" },
2209 { UTAmode, "uta" }
2210 };
2211 const arm_fixed_mode_set fixed_conv_modes[] =
2212 {
2213 { QQmode, "qq" },
2214 { UQQmode, "uqq" },
2215 { HQmode, "hq" },
2216 { UHQmode, "uhq" },
2217 { SQmode, "sq" },
2218 { USQmode, "usq" },
2219 { DQmode, "dq" },
2220 { UDQmode, "udq" },
2221 { TQmode, "tq" },
2222 { UTQmode, "utq" },
2223 { HAmode, "ha" },
2224 { UHAmode, "uha" },
2225 { SAmode, "sa" },
2226 { USAmode, "usa" },
2227 { DAmode, "da" },
2228 { UDAmode, "uda" },
2229 { TAmode, "ta" },
2230 { UTAmode, "uta" },
2231 { QImode, "qi" },
2232 { HImode, "hi" },
2233 { SImode, "si" },
2234 { DImode, "di" },
2235 { TImode, "ti" },
2236 { SFmode, "sf" },
2237 { DFmode, "df" }
2238 };
2239 unsigned int i, j;
2240
2241 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2242 {
2243 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2244 "add", fixed_arith_modes[i].name, 3);
2245 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2246 "ssadd", fixed_arith_modes[i].name, 3);
2247 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2248 "usadd", fixed_arith_modes[i].name, 3);
2249 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2250 "sub", fixed_arith_modes[i].name, 3);
2251 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2252 "sssub", fixed_arith_modes[i].name, 3);
2253 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2254 "ussub", fixed_arith_modes[i].name, 3);
2255 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2256 "mul", fixed_arith_modes[i].name, 3);
2257 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2258 "ssmul", fixed_arith_modes[i].name, 3);
2259 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2260 "usmul", fixed_arith_modes[i].name, 3);
2261 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2262 "div", fixed_arith_modes[i].name, 3);
2263 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2264 "udiv", fixed_arith_modes[i].name, 3);
2265 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2266 "ssdiv", fixed_arith_modes[i].name, 3);
2267 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2268 "usdiv", fixed_arith_modes[i].name, 3);
2269 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2270 "neg", fixed_arith_modes[i].name, 2);
2271 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2272 "ssneg", fixed_arith_modes[i].name, 2);
2273 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2274 "usneg", fixed_arith_modes[i].name, 2);
2275 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2276 "ashl", fixed_arith_modes[i].name, 3);
2277 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2278 "ashr", fixed_arith_modes[i].name, 3);
2279 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2280 "lshr", fixed_arith_modes[i].name, 3);
2281 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2282 "ssashl", fixed_arith_modes[i].name, 3);
2283 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2284 "usashl", fixed_arith_modes[i].name, 3);
2285 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2286 "cmp", fixed_arith_modes[i].name, 2);
2287 }
2288
2289 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2290 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2291 {
2292 if (i == j
2293 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2294 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2295 continue;
2296
2297 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2298 fixed_conv_modes[j].mode, "fract",
2299 fixed_conv_modes[i].name,
2300 fixed_conv_modes[j].name);
2301 arm_set_fixed_conv_libfunc (satfract_optab,
2302 fixed_conv_modes[i].mode,
2303 fixed_conv_modes[j].mode, "satfract",
2304 fixed_conv_modes[i].name,
2305 fixed_conv_modes[j].name);
2306 arm_set_fixed_conv_libfunc (fractuns_optab,
2307 fixed_conv_modes[i].mode,
2308 fixed_conv_modes[j].mode, "fractuns",
2309 fixed_conv_modes[i].name,
2310 fixed_conv_modes[j].name);
2311 arm_set_fixed_conv_libfunc (satfractuns_optab,
2312 fixed_conv_modes[i].mode,
2313 fixed_conv_modes[j].mode, "satfractuns",
2314 fixed_conv_modes[i].name,
2315 fixed_conv_modes[j].name);
2316 }
2317 }
2318
2319 if (TARGET_AAPCS_BASED)
2320 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2321 }
2322
2323 /* On AAPCS systems, this is the "struct __va_list". */
2324 static GTY(()) tree va_list_type;
2325
2326 /* Return the type to use as __builtin_va_list. */
2327 static tree
2328 arm_build_builtin_va_list (void)
2329 {
2330 tree va_list_name;
2331 tree ap_field;
2332
2333 if (!TARGET_AAPCS_BASED)
2334 return std_build_builtin_va_list ();
2335
2336 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2337 defined as:
2338
2339 struct __va_list
2340 {
2341 void *__ap;
2342 };
2343
2344 The C Library ABI further reinforces this definition in \S
2345 4.1.
2346
2347 We must follow this definition exactly. The structure tag
2348 name is visible in C++ mangled names, and thus forms a part
2349 of the ABI. The field name may be used by people who
2350 #include <stdarg.h>. */
2351 /* Create the type. */
2352 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2353 /* Give it the required name. */
2354 va_list_name = build_decl (BUILTINS_LOCATION,
2355 TYPE_DECL,
2356 get_identifier ("__va_list"),
2357 va_list_type);
2358 DECL_ARTIFICIAL (va_list_name) = 1;
2359 TYPE_NAME (va_list_type) = va_list_name;
2360 TYPE_STUB_DECL (va_list_type) = va_list_name;
2361 /* Create the __ap field. */
2362 ap_field = build_decl (BUILTINS_LOCATION,
2363 FIELD_DECL,
2364 get_identifier ("__ap"),
2365 ptr_type_node);
2366 DECL_ARTIFICIAL (ap_field) = 1;
2367 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2368 TYPE_FIELDS (va_list_type) = ap_field;
2369 /* Compute its layout. */
2370 layout_type (va_list_type);
2371
2372 return va_list_type;
2373 }
2374
2375 /* Return an expression of type "void *" pointing to the next
2376 available argument in a variable-argument list. VALIST is the
2377 user-level va_list object, of type __builtin_va_list. */
2378 static tree
2379 arm_extract_valist_ptr (tree valist)
2380 {
2381 if (TREE_TYPE (valist) == error_mark_node)
2382 return error_mark_node;
2383
2384 /* On an AAPCS target, the pointer is stored within "struct
2385 va_list". */
2386 if (TARGET_AAPCS_BASED)
2387 {
2388 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2389 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2390 valist, ap_field, NULL_TREE);
2391 }
2392
2393 return valist;
2394 }
2395
2396 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2397 static void
2398 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2399 {
2400 valist = arm_extract_valist_ptr (valist);
2401 std_expand_builtin_va_start (valist, nextarg);
2402 }
2403
2404 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2405 static tree
2406 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2407 gimple_seq *post_p)
2408 {
2409 valist = arm_extract_valist_ptr (valist);
2410 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2411 }
2412
2413 /* Fix up any incompatible options that the user has specified. */
2414 static void
2415 arm_option_override (void)
2416 {
2417 if (global_options_set.x_arm_arch_option)
2418 arm_selected_arch = &all_architectures[arm_arch_option];
2419
2420 if (global_options_set.x_arm_cpu_option)
2421 {
2422 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2423 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2424 }
2425
2426 if (global_options_set.x_arm_tune_option)
2427 arm_selected_tune = &all_cores[(int) arm_tune_option];
2428
2429 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2430 SUBTARGET_OVERRIDE_OPTIONS;
2431 #endif
2432
2433 if (arm_selected_arch)
2434 {
2435 if (arm_selected_cpu)
2436 {
2437 /* Check for conflict between mcpu and march. */
2438 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2439 {
2440 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2441 arm_selected_cpu->name, arm_selected_arch->name);
2442 /* -march wins for code generation.
2443 -mcpu wins for default tuning. */
2444 if (!arm_selected_tune)
2445 arm_selected_tune = arm_selected_cpu;
2446
2447 arm_selected_cpu = arm_selected_arch;
2448 }
2449 else
2450 /* -mcpu wins. */
2451 arm_selected_arch = NULL;
2452 }
2453 else
2454 /* Pick a CPU based on the architecture. */
2455 arm_selected_cpu = arm_selected_arch;
2456 }
2457
2458 /* If the user did not specify a processor, choose one for them. */
2459 if (!arm_selected_cpu)
2460 {
2461 const struct processors * sel;
2462 unsigned int sought;
2463
2464 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2465 if (!arm_selected_cpu->name)
2466 {
2467 #ifdef SUBTARGET_CPU_DEFAULT
2468 /* Use the subtarget default CPU if none was specified by
2469 configure. */
2470 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2471 #endif
2472 /* Default to ARM6. */
2473 if (!arm_selected_cpu->name)
2474 arm_selected_cpu = &all_cores[arm6];
2475 }
2476
2477 sel = arm_selected_cpu;
2478 insn_flags = sel->flags;
2479
2480 /* Now check to see if the user has specified some command line
2481 switch that require certain abilities from the cpu. */
2482 sought = 0;
2483
2484 if (TARGET_INTERWORK || TARGET_THUMB)
2485 {
2486 sought |= (FL_THUMB | FL_MODE32);
2487
2488 /* There are no ARM processors that support both APCS-26 and
2489 interworking. Therefore we force FL_MODE26 to be removed
2490 from insn_flags here (if it was set), so that the search
2491 below will always be able to find a compatible processor. */
2492 insn_flags &= ~FL_MODE26;
2493 }
2494
2495 if (sought != 0 && ((sought & insn_flags) != sought))
2496 {
2497 /* Try to locate a CPU type that supports all of the abilities
2498 of the default CPU, plus the extra abilities requested by
2499 the user. */
2500 for (sel = all_cores; sel->name != NULL; sel++)
2501 if ((sel->flags & sought) == (sought | insn_flags))
2502 break;
2503
2504 if (sel->name == NULL)
2505 {
2506 unsigned current_bit_count = 0;
2507 const struct processors * best_fit = NULL;
2508
2509 /* Ideally we would like to issue an error message here
2510 saying that it was not possible to find a CPU compatible
2511 with the default CPU, but which also supports the command
2512 line options specified by the programmer, and so they
2513 ought to use the -mcpu=<name> command line option to
2514 override the default CPU type.
2515
2516 If we cannot find a cpu that has both the
2517 characteristics of the default cpu and the given
2518 command line options we scan the array again looking
2519 for a best match. */
2520 for (sel = all_cores; sel->name != NULL; sel++)
2521 if ((sel->flags & sought) == sought)
2522 {
2523 unsigned count;
2524
2525 count = bit_count (sel->flags & insn_flags);
2526
2527 if (count >= current_bit_count)
2528 {
2529 best_fit = sel;
2530 current_bit_count = count;
2531 }
2532 }
2533
2534 gcc_assert (best_fit);
2535 sel = best_fit;
2536 }
2537
2538 arm_selected_cpu = sel;
2539 }
2540 }
2541
2542 gcc_assert (arm_selected_cpu);
2543 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2544 if (!arm_selected_tune)
2545 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2546
2547 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2548 insn_flags = arm_selected_cpu->flags;
2549 arm_base_arch = arm_selected_cpu->base_arch;
2550
2551 arm_tune = arm_selected_tune->core;
2552 tune_flags = arm_selected_tune->flags;
2553 current_tune = arm_selected_tune->tune;
2554
2555 /* Make sure that the processor choice does not conflict with any of the
2556 other command line choices. */
2557 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2558 error ("target CPU does not support ARM mode");
2559
2560 /* BPABI targets use linker tricks to allow interworking on cores
2561 without thumb support. */
2562 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2563 {
2564 warning (0, "target CPU does not support interworking" );
2565 target_flags &= ~MASK_INTERWORK;
2566 }
2567
2568 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2569 {
2570 warning (0, "target CPU does not support THUMB instructions");
2571 target_flags &= ~MASK_THUMB;
2572 }
2573
2574 if (TARGET_APCS_FRAME && TARGET_THUMB)
2575 {
2576 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2577 target_flags &= ~MASK_APCS_FRAME;
2578 }
2579
2580 /* Callee super interworking implies thumb interworking. Adding
2581 this to the flags here simplifies the logic elsewhere. */
2582 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2583 target_flags |= MASK_INTERWORK;
2584
2585 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2586 from here where no function is being compiled currently. */
2587 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2588 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2589
2590 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2591 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2592
2593 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2594 {
2595 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2596 target_flags |= MASK_APCS_FRAME;
2597 }
2598
2599 if (TARGET_POKE_FUNCTION_NAME)
2600 target_flags |= MASK_APCS_FRAME;
2601
2602 if (TARGET_APCS_REENT && flag_pic)
2603 error ("-fpic and -mapcs-reent are incompatible");
2604
2605 if (TARGET_APCS_REENT)
2606 warning (0, "APCS reentrant code not supported. Ignored");
2607
2608 /* If this target is normally configured to use APCS frames, warn if they
2609 are turned off and debugging is turned on. */
2610 if (TARGET_ARM
2611 && write_symbols != NO_DEBUG
2612 && !TARGET_APCS_FRAME
2613 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2614 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2615
2616 if (TARGET_APCS_FLOAT)
2617 warning (0, "passing floating point arguments in fp regs not yet supported");
2618
2619 if (TARGET_LITTLE_WORDS)
2620 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2621 "will be removed in a future release");
2622
2623 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2624 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2625 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2626 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2627 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2628 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2629 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2630 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2631 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2632 arm_arch6m = arm_arch6 && !arm_arch_notm;
2633 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2634 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2635 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2636 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2637 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2638
2639 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2640 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2641 thumb_code = TARGET_ARM == 0;
2642 thumb1_code = TARGET_THUMB1 != 0;
2643 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2644 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2645 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2646 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2647 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2648 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2649 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2650 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2651 if (arm_restrict_it == 2)
2652 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2653
2654 if (!TARGET_THUMB2)
2655 arm_restrict_it = 0;
2656
2657 /* If we are not using the default (ARM mode) section anchor offset
2658 ranges, then set the correct ranges now. */
2659 if (TARGET_THUMB1)
2660 {
2661 /* Thumb-1 LDR instructions cannot have negative offsets.
2662 Permissible positive offset ranges are 5-bit (for byte loads),
2663 6-bit (for halfword loads), or 7-bit (for word loads).
2664 Empirical results suggest a 7-bit anchor range gives the best
2665 overall code size. */
2666 targetm.min_anchor_offset = 0;
2667 targetm.max_anchor_offset = 127;
2668 }
2669 else if (TARGET_THUMB2)
2670 {
2671 /* The minimum is set such that the total size of the block
2672 for a particular anchor is 248 + 1 + 4095 bytes, which is
2673 divisible by eight, ensuring natural spacing of anchors. */
2674 targetm.min_anchor_offset = -248;
2675 targetm.max_anchor_offset = 4095;
2676 }
2677
2678 /* V5 code we generate is completely interworking capable, so we turn off
2679 TARGET_INTERWORK here to avoid many tests later on. */
2680
2681 /* XXX However, we must pass the right pre-processor defines to CPP
2682 or GLD can get confused. This is a hack. */
2683 if (TARGET_INTERWORK)
2684 arm_cpp_interwork = 1;
2685
2686 if (arm_arch5)
2687 target_flags &= ~MASK_INTERWORK;
2688
2689 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2690 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2691
2692 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2693 error ("iwmmxt abi requires an iwmmxt capable cpu");
2694
2695 if (!global_options_set.x_arm_fpu_index)
2696 {
2697 const char *target_fpu_name;
2698 bool ok;
2699
2700 #ifdef FPUTYPE_DEFAULT
2701 target_fpu_name = FPUTYPE_DEFAULT;
2702 #else
2703 target_fpu_name = "vfp";
2704 #endif
2705
2706 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2707 CL_TARGET);
2708 gcc_assert (ok);
2709 }
2710
2711 arm_fpu_desc = &all_fpus[arm_fpu_index];
2712
2713 switch (arm_fpu_desc->model)
2714 {
2715 case ARM_FP_MODEL_VFP:
2716 arm_fpu_attr = FPU_VFP;
2717 break;
2718
2719 default:
2720 gcc_unreachable();
2721 }
2722
2723 if (TARGET_AAPCS_BASED)
2724 {
2725 if (TARGET_CALLER_INTERWORKING)
2726 error ("AAPCS does not support -mcaller-super-interworking");
2727 else
2728 if (TARGET_CALLEE_INTERWORKING)
2729 error ("AAPCS does not support -mcallee-super-interworking");
2730 }
2731
2732 /* iWMMXt and NEON are incompatible. */
2733 if (TARGET_IWMMXT && TARGET_NEON)
2734 error ("iWMMXt and NEON are incompatible");
2735
2736 /* iWMMXt unsupported under Thumb mode. */
2737 if (TARGET_THUMB && TARGET_IWMMXT)
2738 error ("iWMMXt unsupported under Thumb mode");
2739
2740 /* __fp16 support currently assumes the core has ldrh. */
2741 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2742 sorry ("__fp16 and no ldrh");
2743
2744 /* If soft-float is specified then don't use FPU. */
2745 if (TARGET_SOFT_FLOAT)
2746 arm_fpu_attr = FPU_NONE;
2747
2748 if (TARGET_AAPCS_BASED)
2749 {
2750 if (arm_abi == ARM_ABI_IWMMXT)
2751 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2752 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2753 && TARGET_HARD_FLOAT
2754 && TARGET_VFP)
2755 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2756 else
2757 arm_pcs_default = ARM_PCS_AAPCS;
2758 }
2759 else
2760 {
2761 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2762 sorry ("-mfloat-abi=hard and VFP");
2763
2764 if (arm_abi == ARM_ABI_APCS)
2765 arm_pcs_default = ARM_PCS_APCS;
2766 else
2767 arm_pcs_default = ARM_PCS_ATPCS;
2768 }
2769
2770 /* For arm2/3 there is no need to do any scheduling if we are doing
2771 software floating-point. */
2772 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2773 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2774
2775 /* Use the cp15 method if it is available. */
2776 if (target_thread_pointer == TP_AUTO)
2777 {
2778 if (arm_arch6k && !TARGET_THUMB1)
2779 target_thread_pointer = TP_CP15;
2780 else
2781 target_thread_pointer = TP_SOFT;
2782 }
2783
2784 if (TARGET_HARD_TP && TARGET_THUMB1)
2785 error ("can not use -mtp=cp15 with 16-bit Thumb");
2786
2787 /* Override the default structure alignment for AAPCS ABI. */
2788 if (!global_options_set.x_arm_structure_size_boundary)
2789 {
2790 if (TARGET_AAPCS_BASED)
2791 arm_structure_size_boundary = 8;
2792 }
2793 else
2794 {
2795 if (arm_structure_size_boundary != 8
2796 && arm_structure_size_boundary != 32
2797 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2798 {
2799 if (ARM_DOUBLEWORD_ALIGN)
2800 warning (0,
2801 "structure size boundary can only be set to 8, 32 or 64");
2802 else
2803 warning (0, "structure size boundary can only be set to 8 or 32");
2804 arm_structure_size_boundary
2805 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2806 }
2807 }
2808
2809 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2810 {
2811 error ("RTP PIC is incompatible with Thumb");
2812 flag_pic = 0;
2813 }
2814
2815 /* If stack checking is disabled, we can use r10 as the PIC register,
2816 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2817 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2818 {
2819 if (TARGET_VXWORKS_RTP)
2820 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2821 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2822 }
2823
2824 if (flag_pic && TARGET_VXWORKS_RTP)
2825 arm_pic_register = 9;
2826
2827 if (arm_pic_register_string != NULL)
2828 {
2829 int pic_register = decode_reg_name (arm_pic_register_string);
2830
2831 if (!flag_pic)
2832 warning (0, "-mpic-register= is useless without -fpic");
2833
2834 /* Prevent the user from choosing an obviously stupid PIC register. */
2835 else if (pic_register < 0 || call_used_regs[pic_register]
2836 || pic_register == HARD_FRAME_POINTER_REGNUM
2837 || pic_register == STACK_POINTER_REGNUM
2838 || pic_register >= PC_REGNUM
2839 || (TARGET_VXWORKS_RTP
2840 && (unsigned int) pic_register != arm_pic_register))
2841 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2842 else
2843 arm_pic_register = pic_register;
2844 }
2845
2846 if (TARGET_VXWORKS_RTP
2847 && !global_options_set.x_arm_pic_data_is_text_relative)
2848 arm_pic_data_is_text_relative = 0;
2849
2850 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2851 if (fix_cm3_ldrd == 2)
2852 {
2853 if (arm_selected_cpu->core == cortexm3)
2854 fix_cm3_ldrd = 1;
2855 else
2856 fix_cm3_ldrd = 0;
2857 }
2858
2859 /* Enable -munaligned-access by default for
2860 - all ARMv6 architecture-based processors
2861 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2862 - ARMv8 architecture-base processors.
2863
2864 Disable -munaligned-access by default for
2865 - all pre-ARMv6 architecture-based processors
2866 - ARMv6-M architecture-based processors. */
2867
2868 if (unaligned_access == 2)
2869 {
2870 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2871 unaligned_access = 1;
2872 else
2873 unaligned_access = 0;
2874 }
2875 else if (unaligned_access == 1
2876 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2877 {
2878 warning (0, "target CPU does not support unaligned accesses");
2879 unaligned_access = 0;
2880 }
2881
2882 if (TARGET_THUMB1 && flag_schedule_insns)
2883 {
2884 /* Don't warn since it's on by default in -O2. */
2885 flag_schedule_insns = 0;
2886 }
2887
2888 if (optimize_size)
2889 {
2890 /* If optimizing for size, bump the number of instructions that we
2891 are prepared to conditionally execute (even on a StrongARM). */
2892 max_insns_skipped = 6;
2893 }
2894 else
2895 max_insns_skipped = current_tune->max_insns_skipped;
2896
2897 /* Hot/Cold partitioning is not currently supported, since we can't
2898 handle literal pool placement in that case. */
2899 if (flag_reorder_blocks_and_partition)
2900 {
2901 inform (input_location,
2902 "-freorder-blocks-and-partition not supported on this architecture");
2903 flag_reorder_blocks_and_partition = 0;
2904 flag_reorder_blocks = 1;
2905 }
2906
2907 if (flag_pic)
2908 /* Hoisting PIC address calculations more aggressively provides a small,
2909 but measurable, size reduction for PIC code. Therefore, we decrease
2910 the bar for unrestricted expression hoisting to the cost of PIC address
2911 calculation, which is 2 instructions. */
2912 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2913 global_options.x_param_values,
2914 global_options_set.x_param_values);
2915
2916 /* ARM EABI defaults to strict volatile bitfields. */
2917 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2918 && abi_version_at_least(2))
2919 flag_strict_volatile_bitfields = 1;
2920
2921 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2922 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2923 if (flag_prefetch_loop_arrays < 0
2924 && HAVE_prefetch
2925 && optimize >= 3
2926 && current_tune->num_prefetch_slots > 0)
2927 flag_prefetch_loop_arrays = 1;
2928
2929 /* Set up parameters to be used in prefetching algorithm. Do not override the
2930 defaults unless we are tuning for a core we have researched values for. */
2931 if (current_tune->num_prefetch_slots > 0)
2932 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2933 current_tune->num_prefetch_slots,
2934 global_options.x_param_values,
2935 global_options_set.x_param_values);
2936 if (current_tune->l1_cache_line_size >= 0)
2937 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2938 current_tune->l1_cache_line_size,
2939 global_options.x_param_values,
2940 global_options_set.x_param_values);
2941 if (current_tune->l1_cache_size >= 0)
2942 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2943 current_tune->l1_cache_size,
2944 global_options.x_param_values,
2945 global_options_set.x_param_values);
2946
2947 /* Use Neon to perform 64-bits operations rather than core
2948 registers. */
2949 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2950 if (use_neon_for_64bits == 1)
2951 prefer_neon_for_64bits = true;
2952
2953 /* Use the alternative scheduling-pressure algorithm by default. */
2954 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
2955 global_options.x_param_values,
2956 global_options_set.x_param_values);
2957
2958 /* Disable shrink-wrap when optimizing function for size, since it tends to
2959 generate additional returns. */
2960 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2961 flag_shrink_wrap = false;
2962 /* TBD: Dwarf info for apcs frame is not handled yet. */
2963 if (TARGET_APCS_FRAME)
2964 flag_shrink_wrap = false;
2965
2966 /* We only support -mslow-flash-data on armv7-m targets. */
2967 if (target_slow_flash_data
2968 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2969 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2970 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2971
2972 /* Currently, for slow flash data, we just disable literal pools. */
2973 if (target_slow_flash_data)
2974 arm_disable_literal_pool = true;
2975
2976 /* Register global variables with the garbage collector. */
2977 arm_add_gc_roots ();
2978 }
2979
2980 static void
2981 arm_add_gc_roots (void)
2982 {
2983 gcc_obstack_init(&minipool_obstack);
2984 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2985 }
2986 \f
2987 /* A table of known ARM exception types.
2988 For use with the interrupt function attribute. */
2989
2990 typedef struct
2991 {
2992 const char *const arg;
2993 const unsigned long return_value;
2994 }
2995 isr_attribute_arg;
2996
2997 static const isr_attribute_arg isr_attribute_args [] =
2998 {
2999 { "IRQ", ARM_FT_ISR },
3000 { "irq", ARM_FT_ISR },
3001 { "FIQ", ARM_FT_FIQ },
3002 { "fiq", ARM_FT_FIQ },
3003 { "ABORT", ARM_FT_ISR },
3004 { "abort", ARM_FT_ISR },
3005 { "ABORT", ARM_FT_ISR },
3006 { "abort", ARM_FT_ISR },
3007 { "UNDEF", ARM_FT_EXCEPTION },
3008 { "undef", ARM_FT_EXCEPTION },
3009 { "SWI", ARM_FT_EXCEPTION },
3010 { "swi", ARM_FT_EXCEPTION },
3011 { NULL, ARM_FT_NORMAL }
3012 };
3013
3014 /* Returns the (interrupt) function type of the current
3015 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3016
3017 static unsigned long
3018 arm_isr_value (tree argument)
3019 {
3020 const isr_attribute_arg * ptr;
3021 const char * arg;
3022
3023 if (!arm_arch_notm)
3024 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3025
3026 /* No argument - default to IRQ. */
3027 if (argument == NULL_TREE)
3028 return ARM_FT_ISR;
3029
3030 /* Get the value of the argument. */
3031 if (TREE_VALUE (argument) == NULL_TREE
3032 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3033 return ARM_FT_UNKNOWN;
3034
3035 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3036
3037 /* Check it against the list of known arguments. */
3038 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3039 if (streq (arg, ptr->arg))
3040 return ptr->return_value;
3041
3042 /* An unrecognized interrupt type. */
3043 return ARM_FT_UNKNOWN;
3044 }
3045
3046 /* Computes the type of the current function. */
3047
3048 static unsigned long
3049 arm_compute_func_type (void)
3050 {
3051 unsigned long type = ARM_FT_UNKNOWN;
3052 tree a;
3053 tree attr;
3054
3055 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3056
3057 /* Decide if the current function is volatile. Such functions
3058 never return, and many memory cycles can be saved by not storing
3059 register values that will never be needed again. This optimization
3060 was added to speed up context switching in a kernel application. */
3061 if (optimize > 0
3062 && (TREE_NOTHROW (current_function_decl)
3063 || !(flag_unwind_tables
3064 || (flag_exceptions
3065 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3066 && TREE_THIS_VOLATILE (current_function_decl))
3067 type |= ARM_FT_VOLATILE;
3068
3069 if (cfun->static_chain_decl != NULL)
3070 type |= ARM_FT_NESTED;
3071
3072 attr = DECL_ATTRIBUTES (current_function_decl);
3073
3074 a = lookup_attribute ("naked", attr);
3075 if (a != NULL_TREE)
3076 type |= ARM_FT_NAKED;
3077
3078 a = lookup_attribute ("isr", attr);
3079 if (a == NULL_TREE)
3080 a = lookup_attribute ("interrupt", attr);
3081
3082 if (a == NULL_TREE)
3083 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3084 else
3085 type |= arm_isr_value (TREE_VALUE (a));
3086
3087 return type;
3088 }
3089
3090 /* Returns the type of the current function. */
3091
3092 unsigned long
3093 arm_current_func_type (void)
3094 {
3095 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3096 cfun->machine->func_type = arm_compute_func_type ();
3097
3098 return cfun->machine->func_type;
3099 }
3100
3101 bool
3102 arm_allocate_stack_slots_for_args (void)
3103 {
3104 /* Naked functions should not allocate stack slots for arguments. */
3105 return !IS_NAKED (arm_current_func_type ());
3106 }
3107
3108 static bool
3109 arm_warn_func_return (tree decl)
3110 {
3111 /* Naked functions are implemented entirely in assembly, including the
3112 return sequence, so suppress warnings about this. */
3113 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3114 }
3115
3116 \f
3117 /* Output assembler code for a block containing the constant parts
3118 of a trampoline, leaving space for the variable parts.
3119
3120 On the ARM, (if r8 is the static chain regnum, and remembering that
3121 referencing pc adds an offset of 8) the trampoline looks like:
3122 ldr r8, [pc, #0]
3123 ldr pc, [pc]
3124 .word static chain value
3125 .word function's address
3126 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3127
3128 static void
3129 arm_asm_trampoline_template (FILE *f)
3130 {
3131 if (TARGET_ARM)
3132 {
3133 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3134 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3135 }
3136 else if (TARGET_THUMB2)
3137 {
3138 /* The Thumb-2 trampoline is similar to the arm implementation.
3139 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3140 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3141 STATIC_CHAIN_REGNUM, PC_REGNUM);
3142 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3143 }
3144 else
3145 {
3146 ASM_OUTPUT_ALIGN (f, 2);
3147 fprintf (f, "\t.code\t16\n");
3148 fprintf (f, ".Ltrampoline_start:\n");
3149 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3150 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3151 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3152 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3153 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3154 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3155 }
3156 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3157 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3158 }
3159
3160 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3161
3162 static void
3163 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3164 {
3165 rtx fnaddr, mem, a_tramp;
3166
3167 emit_block_move (m_tramp, assemble_trampoline_template (),
3168 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3169
3170 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3171 emit_move_insn (mem, chain_value);
3172
3173 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3174 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3175 emit_move_insn (mem, fnaddr);
3176
3177 a_tramp = XEXP (m_tramp, 0);
3178 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3179 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3180 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3181 }
3182
3183 /* Thumb trampolines should be entered in thumb mode, so set
3184 the bottom bit of the address. */
3185
3186 static rtx
3187 arm_trampoline_adjust_address (rtx addr)
3188 {
3189 if (TARGET_THUMB)
3190 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3191 NULL, 0, OPTAB_LIB_WIDEN);
3192 return addr;
3193 }
3194 \f
3195 /* Return 1 if it is possible to return using a single instruction.
3196 If SIBLING is non-null, this is a test for a return before a sibling
3197 call. SIBLING is the call insn, so we can examine its register usage. */
3198
3199 int
3200 use_return_insn (int iscond, rtx sibling)
3201 {
3202 int regno;
3203 unsigned int func_type;
3204 unsigned long saved_int_regs;
3205 unsigned HOST_WIDE_INT stack_adjust;
3206 arm_stack_offsets *offsets;
3207
3208 /* Never use a return instruction before reload has run. */
3209 if (!reload_completed)
3210 return 0;
3211
3212 func_type = arm_current_func_type ();
3213
3214 /* Naked, volatile and stack alignment functions need special
3215 consideration. */
3216 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3217 return 0;
3218
3219 /* So do interrupt functions that use the frame pointer and Thumb
3220 interrupt functions. */
3221 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3222 return 0;
3223
3224 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3225 && !optimize_function_for_size_p (cfun))
3226 return 0;
3227
3228 offsets = arm_get_frame_offsets ();
3229 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3230
3231 /* As do variadic functions. */
3232 if (crtl->args.pretend_args_size
3233 || cfun->machine->uses_anonymous_args
3234 /* Or if the function calls __builtin_eh_return () */
3235 || crtl->calls_eh_return
3236 /* Or if the function calls alloca */
3237 || cfun->calls_alloca
3238 /* Or if there is a stack adjustment. However, if the stack pointer
3239 is saved on the stack, we can use a pre-incrementing stack load. */
3240 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3241 && stack_adjust == 4)))
3242 return 0;
3243
3244 saved_int_regs = offsets->saved_regs_mask;
3245
3246 /* Unfortunately, the insn
3247
3248 ldmib sp, {..., sp, ...}
3249
3250 triggers a bug on most SA-110 based devices, such that the stack
3251 pointer won't be correctly restored if the instruction takes a
3252 page fault. We work around this problem by popping r3 along with
3253 the other registers, since that is never slower than executing
3254 another instruction.
3255
3256 We test for !arm_arch5 here, because code for any architecture
3257 less than this could potentially be run on one of the buggy
3258 chips. */
3259 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3260 {
3261 /* Validate that r3 is a call-clobbered register (always true in
3262 the default abi) ... */
3263 if (!call_used_regs[3])
3264 return 0;
3265
3266 /* ... that it isn't being used for a return value ... */
3267 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3268 return 0;
3269
3270 /* ... or for a tail-call argument ... */
3271 if (sibling)
3272 {
3273 gcc_assert (CALL_P (sibling));
3274
3275 if (find_regno_fusage (sibling, USE, 3))
3276 return 0;
3277 }
3278
3279 /* ... and that there are no call-saved registers in r0-r2
3280 (always true in the default ABI). */
3281 if (saved_int_regs & 0x7)
3282 return 0;
3283 }
3284
3285 /* Can't be done if interworking with Thumb, and any registers have been
3286 stacked. */
3287 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3288 return 0;
3289
3290 /* On StrongARM, conditional returns are expensive if they aren't
3291 taken and multiple registers have been stacked. */
3292 if (iscond && arm_tune_strongarm)
3293 {
3294 /* Conditional return when just the LR is stored is a simple
3295 conditional-load instruction, that's not expensive. */
3296 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3297 return 0;
3298
3299 if (flag_pic
3300 && arm_pic_register != INVALID_REGNUM
3301 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3302 return 0;
3303 }
3304
3305 /* If there are saved registers but the LR isn't saved, then we need
3306 two instructions for the return. */
3307 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3308 return 0;
3309
3310 /* Can't be done if any of the VFP regs are pushed,
3311 since this also requires an insn. */
3312 if (TARGET_HARD_FLOAT && TARGET_VFP)
3313 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3314 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3315 return 0;
3316
3317 if (TARGET_REALLY_IWMMXT)
3318 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3319 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3320 return 0;
3321
3322 return 1;
3323 }
3324
3325 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3326 shrink-wrapping if possible. This is the case if we need to emit a
3327 prologue, which we can test by looking at the offsets. */
3328 bool
3329 use_simple_return_p (void)
3330 {
3331 arm_stack_offsets *offsets;
3332
3333 offsets = arm_get_frame_offsets ();
3334 return offsets->outgoing_args != 0;
3335 }
3336
3337 /* Return TRUE if int I is a valid immediate ARM constant. */
3338
3339 int
3340 const_ok_for_arm (HOST_WIDE_INT i)
3341 {
3342 int lowbit;
3343
3344 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3345 be all zero, or all one. */
3346 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3347 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3348 != ((~(unsigned HOST_WIDE_INT) 0)
3349 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3350 return FALSE;
3351
3352 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3353
3354 /* Fast return for 0 and small values. We must do this for zero, since
3355 the code below can't handle that one case. */
3356 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3357 return TRUE;
3358
3359 /* Get the number of trailing zeros. */
3360 lowbit = ffs((int) i) - 1;
3361
3362 /* Only even shifts are allowed in ARM mode so round down to the
3363 nearest even number. */
3364 if (TARGET_ARM)
3365 lowbit &= ~1;
3366
3367 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3368 return TRUE;
3369
3370 if (TARGET_ARM)
3371 {
3372 /* Allow rotated constants in ARM mode. */
3373 if (lowbit <= 4
3374 && ((i & ~0xc000003f) == 0
3375 || (i & ~0xf000000f) == 0
3376 || (i & ~0xfc000003) == 0))
3377 return TRUE;
3378 }
3379 else
3380 {
3381 HOST_WIDE_INT v;
3382
3383 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3384 v = i & 0xff;
3385 v |= v << 16;
3386 if (i == v || i == (v | (v << 8)))
3387 return TRUE;
3388
3389 /* Allow repeated pattern 0xXY00XY00. */
3390 v = i & 0xff00;
3391 v |= v << 16;
3392 if (i == v)
3393 return TRUE;
3394 }
3395
3396 return FALSE;
3397 }
3398
3399 /* Return true if I is a valid constant for the operation CODE. */
3400 int
3401 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3402 {
3403 if (const_ok_for_arm (i))
3404 return 1;
3405
3406 switch (code)
3407 {
3408 case SET:
3409 /* See if we can use movw. */
3410 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3411 return 1;
3412 else
3413 /* Otherwise, try mvn. */
3414 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3415
3416 case PLUS:
3417 /* See if we can use addw or subw. */
3418 if (TARGET_THUMB2
3419 && ((i & 0xfffff000) == 0
3420 || ((-i) & 0xfffff000) == 0))
3421 return 1;
3422 /* else fall through. */
3423
3424 case COMPARE:
3425 case EQ:
3426 case NE:
3427 case GT:
3428 case LE:
3429 case LT:
3430 case GE:
3431 case GEU:
3432 case LTU:
3433 case GTU:
3434 case LEU:
3435 case UNORDERED:
3436 case ORDERED:
3437 case UNEQ:
3438 case UNGE:
3439 case UNLT:
3440 case UNGT:
3441 case UNLE:
3442 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3443
3444 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3445 case XOR:
3446 return 0;
3447
3448 case IOR:
3449 if (TARGET_THUMB2)
3450 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3451 return 0;
3452
3453 case AND:
3454 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3455
3456 default:
3457 gcc_unreachable ();
3458 }
3459 }
3460
3461 /* Return true if I is a valid di mode constant for the operation CODE. */
3462 int
3463 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3464 {
3465 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3466 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3467 rtx hi = GEN_INT (hi_val);
3468 rtx lo = GEN_INT (lo_val);
3469
3470 if (TARGET_THUMB1)
3471 return 0;
3472
3473 switch (code)
3474 {
3475 case AND:
3476 case IOR:
3477 case XOR:
3478 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3479 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3480 case PLUS:
3481 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3482
3483 default:
3484 return 0;
3485 }
3486 }
3487
3488 /* Emit a sequence of insns to handle a large constant.
3489 CODE is the code of the operation required, it can be any of SET, PLUS,
3490 IOR, AND, XOR, MINUS;
3491 MODE is the mode in which the operation is being performed;
3492 VAL is the integer to operate on;
3493 SOURCE is the other operand (a register, or a null-pointer for SET);
3494 SUBTARGETS means it is safe to create scratch registers if that will
3495 either produce a simpler sequence, or we will want to cse the values.
3496 Return value is the number of insns emitted. */
3497
3498 /* ??? Tweak this for thumb2. */
3499 int
3500 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3501 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3502 {
3503 rtx cond;
3504
3505 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3506 cond = COND_EXEC_TEST (PATTERN (insn));
3507 else
3508 cond = NULL_RTX;
3509
3510 if (subtargets || code == SET
3511 || (REG_P (target) && REG_P (source)
3512 && REGNO (target) != REGNO (source)))
3513 {
3514 /* After arm_reorg has been called, we can't fix up expensive
3515 constants by pushing them into memory so we must synthesize
3516 them in-line, regardless of the cost. This is only likely to
3517 be more costly on chips that have load delay slots and we are
3518 compiling without running the scheduler (so no splitting
3519 occurred before the final instruction emission).
3520
3521 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3522 */
3523 if (!cfun->machine->after_arm_reorg
3524 && !cond
3525 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3526 1, 0)
3527 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3528 + (code != SET))))
3529 {
3530 if (code == SET)
3531 {
3532 /* Currently SET is the only monadic value for CODE, all
3533 the rest are diadic. */
3534 if (TARGET_USE_MOVT)
3535 arm_emit_movpair (target, GEN_INT (val));
3536 else
3537 emit_set_insn (target, GEN_INT (val));
3538
3539 return 1;
3540 }
3541 else
3542 {
3543 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3544
3545 if (TARGET_USE_MOVT)
3546 arm_emit_movpair (temp, GEN_INT (val));
3547 else
3548 emit_set_insn (temp, GEN_INT (val));
3549
3550 /* For MINUS, the value is subtracted from, since we never
3551 have subtraction of a constant. */
3552 if (code == MINUS)
3553 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3554 else
3555 emit_set_insn (target,
3556 gen_rtx_fmt_ee (code, mode, source, temp));
3557 return 2;
3558 }
3559 }
3560 }
3561
3562 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3563 1);
3564 }
3565
3566 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3567 ARM/THUMB2 immediates, and add up to VAL.
3568 Thr function return value gives the number of insns required. */
3569 static int
3570 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3571 struct four_ints *return_sequence)
3572 {
3573 int best_consecutive_zeros = 0;
3574 int i;
3575 int best_start = 0;
3576 int insns1, insns2;
3577 struct four_ints tmp_sequence;
3578
3579 /* If we aren't targeting ARM, the best place to start is always at
3580 the bottom, otherwise look more closely. */
3581 if (TARGET_ARM)
3582 {
3583 for (i = 0; i < 32; i += 2)
3584 {
3585 int consecutive_zeros = 0;
3586
3587 if (!(val & (3 << i)))
3588 {
3589 while ((i < 32) && !(val & (3 << i)))
3590 {
3591 consecutive_zeros += 2;
3592 i += 2;
3593 }
3594 if (consecutive_zeros > best_consecutive_zeros)
3595 {
3596 best_consecutive_zeros = consecutive_zeros;
3597 best_start = i - consecutive_zeros;
3598 }
3599 i -= 2;
3600 }
3601 }
3602 }
3603
3604 /* So long as it won't require any more insns to do so, it's
3605 desirable to emit a small constant (in bits 0...9) in the last
3606 insn. This way there is more chance that it can be combined with
3607 a later addressing insn to form a pre-indexed load or store
3608 operation. Consider:
3609
3610 *((volatile int *)0xe0000100) = 1;
3611 *((volatile int *)0xe0000110) = 2;
3612
3613 We want this to wind up as:
3614
3615 mov rA, #0xe0000000
3616 mov rB, #1
3617 str rB, [rA, #0x100]
3618 mov rB, #2
3619 str rB, [rA, #0x110]
3620
3621 rather than having to synthesize both large constants from scratch.
3622
3623 Therefore, we calculate how many insns would be required to emit
3624 the constant starting from `best_start', and also starting from
3625 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3626 yield a shorter sequence, we may as well use zero. */
3627 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3628 if (best_start != 0
3629 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3630 {
3631 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3632 if (insns2 <= insns1)
3633 {
3634 *return_sequence = tmp_sequence;
3635 insns1 = insns2;
3636 }
3637 }
3638
3639 return insns1;
3640 }
3641
3642 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3643 static int
3644 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3645 struct four_ints *return_sequence, int i)
3646 {
3647 int remainder = val & 0xffffffff;
3648 int insns = 0;
3649
3650 /* Try and find a way of doing the job in either two or three
3651 instructions.
3652
3653 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3654 location. We start at position I. This may be the MSB, or
3655 optimial_immediate_sequence may have positioned it at the largest block
3656 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3657 wrapping around to the top of the word when we drop off the bottom.
3658 In the worst case this code should produce no more than four insns.
3659
3660 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3661 constants, shifted to any arbitrary location. We should always start
3662 at the MSB. */
3663 do
3664 {
3665 int end;
3666 unsigned int b1, b2, b3, b4;
3667 unsigned HOST_WIDE_INT result;
3668 int loc;
3669
3670 gcc_assert (insns < 4);
3671
3672 if (i <= 0)
3673 i += 32;
3674
3675 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3676 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3677 {
3678 loc = i;
3679 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3680 /* We can use addw/subw for the last 12 bits. */
3681 result = remainder;
3682 else
3683 {
3684 /* Use an 8-bit shifted/rotated immediate. */
3685 end = i - 8;
3686 if (end < 0)
3687 end += 32;
3688 result = remainder & ((0x0ff << end)
3689 | ((i < end) ? (0xff >> (32 - end))
3690 : 0));
3691 i -= 8;
3692 }
3693 }
3694 else
3695 {
3696 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3697 arbitrary shifts. */
3698 i -= TARGET_ARM ? 2 : 1;
3699 continue;
3700 }
3701
3702 /* Next, see if we can do a better job with a thumb2 replicated
3703 constant.
3704
3705 We do it this way around to catch the cases like 0x01F001E0 where
3706 two 8-bit immediates would work, but a replicated constant would
3707 make it worse.
3708
3709 TODO: 16-bit constants that don't clear all the bits, but still win.
3710 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3711 if (TARGET_THUMB2)
3712 {
3713 b1 = (remainder & 0xff000000) >> 24;
3714 b2 = (remainder & 0x00ff0000) >> 16;
3715 b3 = (remainder & 0x0000ff00) >> 8;
3716 b4 = remainder & 0xff;
3717
3718 if (loc > 24)
3719 {
3720 /* The 8-bit immediate already found clears b1 (and maybe b2),
3721 but must leave b3 and b4 alone. */
3722
3723 /* First try to find a 32-bit replicated constant that clears
3724 almost everything. We can assume that we can't do it in one,
3725 or else we wouldn't be here. */
3726 unsigned int tmp = b1 & b2 & b3 & b4;
3727 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3728 + (tmp << 24);
3729 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3730 + (tmp == b3) + (tmp == b4);
3731 if (tmp
3732 && (matching_bytes >= 3
3733 || (matching_bytes == 2
3734 && const_ok_for_op (remainder & ~tmp2, code))))
3735 {
3736 /* At least 3 of the bytes match, and the fourth has at
3737 least as many bits set, or two of the bytes match
3738 and it will only require one more insn to finish. */
3739 result = tmp2;
3740 i = tmp != b1 ? 32
3741 : tmp != b2 ? 24
3742 : tmp != b3 ? 16
3743 : 8;
3744 }
3745
3746 /* Second, try to find a 16-bit replicated constant that can
3747 leave three of the bytes clear. If b2 or b4 is already
3748 zero, then we can. If the 8-bit from above would not
3749 clear b2 anyway, then we still win. */
3750 else if (b1 == b3 && (!b2 || !b4
3751 || (remainder & 0x00ff0000 & ~result)))
3752 {
3753 result = remainder & 0xff00ff00;
3754 i = 24;
3755 }
3756 }
3757 else if (loc > 16)
3758 {
3759 /* The 8-bit immediate already found clears b2 (and maybe b3)
3760 and we don't get here unless b1 is alredy clear, but it will
3761 leave b4 unchanged. */
3762
3763 /* If we can clear b2 and b4 at once, then we win, since the
3764 8-bits couldn't possibly reach that far. */
3765 if (b2 == b4)
3766 {
3767 result = remainder & 0x00ff00ff;
3768 i = 16;
3769 }
3770 }
3771 }
3772
3773 return_sequence->i[insns++] = result;
3774 remainder &= ~result;
3775
3776 if (code == SET || code == MINUS)
3777 code = PLUS;
3778 }
3779 while (remainder);
3780
3781 return insns;
3782 }
3783
3784 /* Emit an instruction with the indicated PATTERN. If COND is
3785 non-NULL, conditionalize the execution of the instruction on COND
3786 being true. */
3787
3788 static void
3789 emit_constant_insn (rtx cond, rtx pattern)
3790 {
3791 if (cond)
3792 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3793 emit_insn (pattern);
3794 }
3795
3796 /* As above, but extra parameter GENERATE which, if clear, suppresses
3797 RTL generation. */
3798
3799 static int
3800 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3801 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3802 int generate)
3803 {
3804 int can_invert = 0;
3805 int can_negate = 0;
3806 int final_invert = 0;
3807 int i;
3808 int set_sign_bit_copies = 0;
3809 int clear_sign_bit_copies = 0;
3810 int clear_zero_bit_copies = 0;
3811 int set_zero_bit_copies = 0;
3812 int insns = 0, neg_insns, inv_insns;
3813 unsigned HOST_WIDE_INT temp1, temp2;
3814 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3815 struct four_ints *immediates;
3816 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3817
3818 /* Find out which operations are safe for a given CODE. Also do a quick
3819 check for degenerate cases; these can occur when DImode operations
3820 are split. */
3821 switch (code)
3822 {
3823 case SET:
3824 can_invert = 1;
3825 break;
3826
3827 case PLUS:
3828 can_negate = 1;
3829 break;
3830
3831 case IOR:
3832 if (remainder == 0xffffffff)
3833 {
3834 if (generate)
3835 emit_constant_insn (cond,
3836 gen_rtx_SET (VOIDmode, target,
3837 GEN_INT (ARM_SIGN_EXTEND (val))));
3838 return 1;
3839 }
3840
3841 if (remainder == 0)
3842 {
3843 if (reload_completed && rtx_equal_p (target, source))
3844 return 0;
3845
3846 if (generate)
3847 emit_constant_insn (cond,
3848 gen_rtx_SET (VOIDmode, target, source));
3849 return 1;
3850 }
3851 break;
3852
3853 case AND:
3854 if (remainder == 0)
3855 {
3856 if (generate)
3857 emit_constant_insn (cond,
3858 gen_rtx_SET (VOIDmode, target, const0_rtx));
3859 return 1;
3860 }
3861 if (remainder == 0xffffffff)
3862 {
3863 if (reload_completed && rtx_equal_p (target, source))
3864 return 0;
3865 if (generate)
3866 emit_constant_insn (cond,
3867 gen_rtx_SET (VOIDmode, target, source));
3868 return 1;
3869 }
3870 can_invert = 1;
3871 break;
3872
3873 case XOR:
3874 if (remainder == 0)
3875 {
3876 if (reload_completed && rtx_equal_p (target, source))
3877 return 0;
3878 if (generate)
3879 emit_constant_insn (cond,
3880 gen_rtx_SET (VOIDmode, target, source));
3881 return 1;
3882 }
3883
3884 if (remainder == 0xffffffff)
3885 {
3886 if (generate)
3887 emit_constant_insn (cond,
3888 gen_rtx_SET (VOIDmode, target,
3889 gen_rtx_NOT (mode, source)));
3890 return 1;
3891 }
3892 final_invert = 1;
3893 break;
3894
3895 case MINUS:
3896 /* We treat MINUS as (val - source), since (source - val) is always
3897 passed as (source + (-val)). */
3898 if (remainder == 0)
3899 {
3900 if (generate)
3901 emit_constant_insn (cond,
3902 gen_rtx_SET (VOIDmode, target,
3903 gen_rtx_NEG (mode, source)));
3904 return 1;
3905 }
3906 if (const_ok_for_arm (val))
3907 {
3908 if (generate)
3909 emit_constant_insn (cond,
3910 gen_rtx_SET (VOIDmode, target,
3911 gen_rtx_MINUS (mode, GEN_INT (val),
3912 source)));
3913 return 1;
3914 }
3915
3916 break;
3917
3918 default:
3919 gcc_unreachable ();
3920 }
3921
3922 /* If we can do it in one insn get out quickly. */
3923 if (const_ok_for_op (val, code))
3924 {
3925 if (generate)
3926 emit_constant_insn (cond,
3927 gen_rtx_SET (VOIDmode, target,
3928 (source
3929 ? gen_rtx_fmt_ee (code, mode, source,
3930 GEN_INT (val))
3931 : GEN_INT (val))));
3932 return 1;
3933 }
3934
3935 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3936 insn. */
3937 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3938 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3939 {
3940 if (generate)
3941 {
3942 if (mode == SImode && i == 16)
3943 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3944 smaller insn. */
3945 emit_constant_insn (cond,
3946 gen_zero_extendhisi2
3947 (target, gen_lowpart (HImode, source)));
3948 else
3949 /* Extz only supports SImode, but we can coerce the operands
3950 into that mode. */
3951 emit_constant_insn (cond,
3952 gen_extzv_t2 (gen_lowpart (SImode, target),
3953 gen_lowpart (SImode, source),
3954 GEN_INT (i), const0_rtx));
3955 }
3956
3957 return 1;
3958 }
3959
3960 /* Calculate a few attributes that may be useful for specific
3961 optimizations. */
3962 /* Count number of leading zeros. */
3963 for (i = 31; i >= 0; i--)
3964 {
3965 if ((remainder & (1 << i)) == 0)
3966 clear_sign_bit_copies++;
3967 else
3968 break;
3969 }
3970
3971 /* Count number of leading 1's. */
3972 for (i = 31; i >= 0; i--)
3973 {
3974 if ((remainder & (1 << i)) != 0)
3975 set_sign_bit_copies++;
3976 else
3977 break;
3978 }
3979
3980 /* Count number of trailing zero's. */
3981 for (i = 0; i <= 31; i++)
3982 {
3983 if ((remainder & (1 << i)) == 0)
3984 clear_zero_bit_copies++;
3985 else
3986 break;
3987 }
3988
3989 /* Count number of trailing 1's. */
3990 for (i = 0; i <= 31; i++)
3991 {
3992 if ((remainder & (1 << i)) != 0)
3993 set_zero_bit_copies++;
3994 else
3995 break;
3996 }
3997
3998 switch (code)
3999 {
4000 case SET:
4001 /* See if we can do this by sign_extending a constant that is known
4002 to be negative. This is a good, way of doing it, since the shift
4003 may well merge into a subsequent insn. */
4004 if (set_sign_bit_copies > 1)
4005 {
4006 if (const_ok_for_arm
4007 (temp1 = ARM_SIGN_EXTEND (remainder
4008 << (set_sign_bit_copies - 1))))
4009 {
4010 if (generate)
4011 {
4012 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4013 emit_constant_insn (cond,
4014 gen_rtx_SET (VOIDmode, new_src,
4015 GEN_INT (temp1)));
4016 emit_constant_insn (cond,
4017 gen_ashrsi3 (target, new_src,
4018 GEN_INT (set_sign_bit_copies - 1)));
4019 }
4020 return 2;
4021 }
4022 /* For an inverted constant, we will need to set the low bits,
4023 these will be shifted out of harm's way. */
4024 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4025 if (const_ok_for_arm (~temp1))
4026 {
4027 if (generate)
4028 {
4029 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4030 emit_constant_insn (cond,
4031 gen_rtx_SET (VOIDmode, new_src,
4032 GEN_INT (temp1)));
4033 emit_constant_insn (cond,
4034 gen_ashrsi3 (target, new_src,
4035 GEN_INT (set_sign_bit_copies - 1)));
4036 }
4037 return 2;
4038 }
4039 }
4040
4041 /* See if we can calculate the value as the difference between two
4042 valid immediates. */
4043 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4044 {
4045 int topshift = clear_sign_bit_copies & ~1;
4046
4047 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4048 & (0xff000000 >> topshift));
4049
4050 /* If temp1 is zero, then that means the 9 most significant
4051 bits of remainder were 1 and we've caused it to overflow.
4052 When topshift is 0 we don't need to do anything since we
4053 can borrow from 'bit 32'. */
4054 if (temp1 == 0 && topshift != 0)
4055 temp1 = 0x80000000 >> (topshift - 1);
4056
4057 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4058
4059 if (const_ok_for_arm (temp2))
4060 {
4061 if (generate)
4062 {
4063 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4064 emit_constant_insn (cond,
4065 gen_rtx_SET (VOIDmode, new_src,
4066 GEN_INT (temp1)));
4067 emit_constant_insn (cond,
4068 gen_addsi3 (target, new_src,
4069 GEN_INT (-temp2)));
4070 }
4071
4072 return 2;
4073 }
4074 }
4075
4076 /* See if we can generate this by setting the bottom (or the top)
4077 16 bits, and then shifting these into the other half of the
4078 word. We only look for the simplest cases, to do more would cost
4079 too much. Be careful, however, not to generate this when the
4080 alternative would take fewer insns. */
4081 if (val & 0xffff0000)
4082 {
4083 temp1 = remainder & 0xffff0000;
4084 temp2 = remainder & 0x0000ffff;
4085
4086 /* Overlaps outside this range are best done using other methods. */
4087 for (i = 9; i < 24; i++)
4088 {
4089 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4090 && !const_ok_for_arm (temp2))
4091 {
4092 rtx new_src = (subtargets
4093 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4094 : target);
4095 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4096 source, subtargets, generate);
4097 source = new_src;
4098 if (generate)
4099 emit_constant_insn
4100 (cond,
4101 gen_rtx_SET
4102 (VOIDmode, target,
4103 gen_rtx_IOR (mode,
4104 gen_rtx_ASHIFT (mode, source,
4105 GEN_INT (i)),
4106 source)));
4107 return insns + 1;
4108 }
4109 }
4110
4111 /* Don't duplicate cases already considered. */
4112 for (i = 17; i < 24; i++)
4113 {
4114 if (((temp1 | (temp1 >> i)) == remainder)
4115 && !const_ok_for_arm (temp1))
4116 {
4117 rtx new_src = (subtargets
4118 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4119 : target);
4120 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4121 source, subtargets, generate);
4122 source = new_src;
4123 if (generate)
4124 emit_constant_insn
4125 (cond,
4126 gen_rtx_SET (VOIDmode, target,
4127 gen_rtx_IOR
4128 (mode,
4129 gen_rtx_LSHIFTRT (mode, source,
4130 GEN_INT (i)),
4131 source)));
4132 return insns + 1;
4133 }
4134 }
4135 }
4136 break;
4137
4138 case IOR:
4139 case XOR:
4140 /* If we have IOR or XOR, and the constant can be loaded in a
4141 single instruction, and we can find a temporary to put it in,
4142 then this can be done in two instructions instead of 3-4. */
4143 if (subtargets
4144 /* TARGET can't be NULL if SUBTARGETS is 0 */
4145 || (reload_completed && !reg_mentioned_p (target, source)))
4146 {
4147 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4148 {
4149 if (generate)
4150 {
4151 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4152
4153 emit_constant_insn (cond,
4154 gen_rtx_SET (VOIDmode, sub,
4155 GEN_INT (val)));
4156 emit_constant_insn (cond,
4157 gen_rtx_SET (VOIDmode, target,
4158 gen_rtx_fmt_ee (code, mode,
4159 source, sub)));
4160 }
4161 return 2;
4162 }
4163 }
4164
4165 if (code == XOR)
4166 break;
4167
4168 /* Convert.
4169 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4170 and the remainder 0s for e.g. 0xfff00000)
4171 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4172
4173 This can be done in 2 instructions by using shifts with mov or mvn.
4174 e.g. for
4175 x = x | 0xfff00000;
4176 we generate.
4177 mvn r0, r0, asl #12
4178 mvn r0, r0, lsr #12 */
4179 if (set_sign_bit_copies > 8
4180 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4181 {
4182 if (generate)
4183 {
4184 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4185 rtx shift = GEN_INT (set_sign_bit_copies);
4186
4187 emit_constant_insn
4188 (cond,
4189 gen_rtx_SET (VOIDmode, sub,
4190 gen_rtx_NOT (mode,
4191 gen_rtx_ASHIFT (mode,
4192 source,
4193 shift))));
4194 emit_constant_insn
4195 (cond,
4196 gen_rtx_SET (VOIDmode, target,
4197 gen_rtx_NOT (mode,
4198 gen_rtx_LSHIFTRT (mode, sub,
4199 shift))));
4200 }
4201 return 2;
4202 }
4203
4204 /* Convert
4205 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4206 to
4207 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4208
4209 For eg. r0 = r0 | 0xfff
4210 mvn r0, r0, lsr #12
4211 mvn r0, r0, asl #12
4212
4213 */
4214 if (set_zero_bit_copies > 8
4215 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4216 {
4217 if (generate)
4218 {
4219 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4220 rtx shift = GEN_INT (set_zero_bit_copies);
4221
4222 emit_constant_insn
4223 (cond,
4224 gen_rtx_SET (VOIDmode, sub,
4225 gen_rtx_NOT (mode,
4226 gen_rtx_LSHIFTRT (mode,
4227 source,
4228 shift))));
4229 emit_constant_insn
4230 (cond,
4231 gen_rtx_SET (VOIDmode, target,
4232 gen_rtx_NOT (mode,
4233 gen_rtx_ASHIFT (mode, sub,
4234 shift))));
4235 }
4236 return 2;
4237 }
4238
4239 /* This will never be reached for Thumb2 because orn is a valid
4240 instruction. This is for Thumb1 and the ARM 32 bit cases.
4241
4242 x = y | constant (such that ~constant is a valid constant)
4243 Transform this to
4244 x = ~(~y & ~constant).
4245 */
4246 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4247 {
4248 if (generate)
4249 {
4250 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4251 emit_constant_insn (cond,
4252 gen_rtx_SET (VOIDmode, sub,
4253 gen_rtx_NOT (mode, source)));
4254 source = sub;
4255 if (subtargets)
4256 sub = gen_reg_rtx (mode);
4257 emit_constant_insn (cond,
4258 gen_rtx_SET (VOIDmode, sub,
4259 gen_rtx_AND (mode, source,
4260 GEN_INT (temp1))));
4261 emit_constant_insn (cond,
4262 gen_rtx_SET (VOIDmode, target,
4263 gen_rtx_NOT (mode, sub)));
4264 }
4265 return 3;
4266 }
4267 break;
4268
4269 case AND:
4270 /* See if two shifts will do 2 or more insn's worth of work. */
4271 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4272 {
4273 HOST_WIDE_INT shift_mask = ((0xffffffff
4274 << (32 - clear_sign_bit_copies))
4275 & 0xffffffff);
4276
4277 if ((remainder | shift_mask) != 0xffffffff)
4278 {
4279 if (generate)
4280 {
4281 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4282 insns = arm_gen_constant (AND, mode, cond,
4283 remainder | shift_mask,
4284 new_src, source, subtargets, 1);
4285 source = new_src;
4286 }
4287 else
4288 {
4289 rtx targ = subtargets ? NULL_RTX : target;
4290 insns = arm_gen_constant (AND, mode, cond,
4291 remainder | shift_mask,
4292 targ, source, subtargets, 0);
4293 }
4294 }
4295
4296 if (generate)
4297 {
4298 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4299 rtx shift = GEN_INT (clear_sign_bit_copies);
4300
4301 emit_insn (gen_ashlsi3 (new_src, source, shift));
4302 emit_insn (gen_lshrsi3 (target, new_src, shift));
4303 }
4304
4305 return insns + 2;
4306 }
4307
4308 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4309 {
4310 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4311
4312 if ((remainder | shift_mask) != 0xffffffff)
4313 {
4314 if (generate)
4315 {
4316 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4317
4318 insns = arm_gen_constant (AND, mode, cond,
4319 remainder | shift_mask,
4320 new_src, source, subtargets, 1);
4321 source = new_src;
4322 }
4323 else
4324 {
4325 rtx targ = subtargets ? NULL_RTX : target;
4326
4327 insns = arm_gen_constant (AND, mode, cond,
4328 remainder | shift_mask,
4329 targ, source, subtargets, 0);
4330 }
4331 }
4332
4333 if (generate)
4334 {
4335 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4336 rtx shift = GEN_INT (clear_zero_bit_copies);
4337
4338 emit_insn (gen_lshrsi3 (new_src, source, shift));
4339 emit_insn (gen_ashlsi3 (target, new_src, shift));
4340 }
4341
4342 return insns + 2;
4343 }
4344
4345 break;
4346
4347 default:
4348 break;
4349 }
4350
4351 /* Calculate what the instruction sequences would be if we generated it
4352 normally, negated, or inverted. */
4353 if (code == AND)
4354 /* AND cannot be split into multiple insns, so invert and use BIC. */
4355 insns = 99;
4356 else
4357 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4358
4359 if (can_negate)
4360 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4361 &neg_immediates);
4362 else
4363 neg_insns = 99;
4364
4365 if (can_invert || final_invert)
4366 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4367 &inv_immediates);
4368 else
4369 inv_insns = 99;
4370
4371 immediates = &pos_immediates;
4372
4373 /* Is the negated immediate sequence more efficient? */
4374 if (neg_insns < insns && neg_insns <= inv_insns)
4375 {
4376 insns = neg_insns;
4377 immediates = &neg_immediates;
4378 }
4379 else
4380 can_negate = 0;
4381
4382 /* Is the inverted immediate sequence more efficient?
4383 We must allow for an extra NOT instruction for XOR operations, although
4384 there is some chance that the final 'mvn' will get optimized later. */
4385 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4386 {
4387 insns = inv_insns;
4388 immediates = &inv_immediates;
4389 }
4390 else
4391 {
4392 can_invert = 0;
4393 final_invert = 0;
4394 }
4395
4396 /* Now output the chosen sequence as instructions. */
4397 if (generate)
4398 {
4399 for (i = 0; i < insns; i++)
4400 {
4401 rtx new_src, temp1_rtx;
4402
4403 temp1 = immediates->i[i];
4404
4405 if (code == SET || code == MINUS)
4406 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4407 else if ((final_invert || i < (insns - 1)) && subtargets)
4408 new_src = gen_reg_rtx (mode);
4409 else
4410 new_src = target;
4411
4412 if (can_invert)
4413 temp1 = ~temp1;
4414 else if (can_negate)
4415 temp1 = -temp1;
4416
4417 temp1 = trunc_int_for_mode (temp1, mode);
4418 temp1_rtx = GEN_INT (temp1);
4419
4420 if (code == SET)
4421 ;
4422 else if (code == MINUS)
4423 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4424 else
4425 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4426
4427 emit_constant_insn (cond,
4428 gen_rtx_SET (VOIDmode, new_src,
4429 temp1_rtx));
4430 source = new_src;
4431
4432 if (code == SET)
4433 {
4434 can_negate = can_invert;
4435 can_invert = 0;
4436 code = PLUS;
4437 }
4438 else if (code == MINUS)
4439 code = PLUS;
4440 }
4441 }
4442
4443 if (final_invert)
4444 {
4445 if (generate)
4446 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4447 gen_rtx_NOT (mode, source)));
4448 insns++;
4449 }
4450
4451 return insns;
4452 }
4453
4454 /* Canonicalize a comparison so that we are more likely to recognize it.
4455 This can be done for a few constant compares, where we can make the
4456 immediate value easier to load. */
4457
4458 static void
4459 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4460 bool op0_preserve_value)
4461 {
4462 enum machine_mode mode;
4463 unsigned HOST_WIDE_INT i, maxval;
4464
4465 mode = GET_MODE (*op0);
4466 if (mode == VOIDmode)
4467 mode = GET_MODE (*op1);
4468
4469 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4470
4471 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4472 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4473 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4474 for GTU/LEU in Thumb mode. */
4475 if (mode == DImode)
4476 {
4477 rtx tem;
4478
4479 if (*code == GT || *code == LE
4480 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4481 {
4482 /* Missing comparison. First try to use an available
4483 comparison. */
4484 if (CONST_INT_P (*op1))
4485 {
4486 i = INTVAL (*op1);
4487 switch (*code)
4488 {
4489 case GT:
4490 case LE:
4491 if (i != maxval
4492 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4493 {
4494 *op1 = GEN_INT (i + 1);
4495 *code = *code == GT ? GE : LT;
4496 return;
4497 }
4498 break;
4499 case GTU:
4500 case LEU:
4501 if (i != ~((unsigned HOST_WIDE_INT) 0)
4502 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4503 {
4504 *op1 = GEN_INT (i + 1);
4505 *code = *code == GTU ? GEU : LTU;
4506 return;
4507 }
4508 break;
4509 default:
4510 gcc_unreachable ();
4511 }
4512 }
4513
4514 /* If that did not work, reverse the condition. */
4515 if (!op0_preserve_value)
4516 {
4517 tem = *op0;
4518 *op0 = *op1;
4519 *op1 = tem;
4520 *code = (int)swap_condition ((enum rtx_code)*code);
4521 }
4522 }
4523 return;
4524 }
4525
4526 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4527 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4528 to facilitate possible combining with a cmp into 'ands'. */
4529 if (mode == SImode
4530 && GET_CODE (*op0) == ZERO_EXTEND
4531 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4532 && GET_MODE (XEXP (*op0, 0)) == QImode
4533 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4534 && subreg_lowpart_p (XEXP (*op0, 0))
4535 && *op1 == const0_rtx)
4536 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4537 GEN_INT (255));
4538
4539 /* Comparisons smaller than DImode. Only adjust comparisons against
4540 an out-of-range constant. */
4541 if (!CONST_INT_P (*op1)
4542 || const_ok_for_arm (INTVAL (*op1))
4543 || const_ok_for_arm (- INTVAL (*op1)))
4544 return;
4545
4546 i = INTVAL (*op1);
4547
4548 switch (*code)
4549 {
4550 case EQ:
4551 case NE:
4552 return;
4553
4554 case GT:
4555 case LE:
4556 if (i != maxval
4557 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4558 {
4559 *op1 = GEN_INT (i + 1);
4560 *code = *code == GT ? GE : LT;
4561 return;
4562 }
4563 break;
4564
4565 case GE:
4566 case LT:
4567 if (i != ~maxval
4568 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4569 {
4570 *op1 = GEN_INT (i - 1);
4571 *code = *code == GE ? GT : LE;
4572 return;
4573 }
4574 break;
4575
4576 case GTU:
4577 case LEU:
4578 if (i != ~((unsigned HOST_WIDE_INT) 0)
4579 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4580 {
4581 *op1 = GEN_INT (i + 1);
4582 *code = *code == GTU ? GEU : LTU;
4583 return;
4584 }
4585 break;
4586
4587 case GEU:
4588 case LTU:
4589 if (i != 0
4590 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4591 {
4592 *op1 = GEN_INT (i - 1);
4593 *code = *code == GEU ? GTU : LEU;
4594 return;
4595 }
4596 break;
4597
4598 default:
4599 gcc_unreachable ();
4600 }
4601 }
4602
4603
4604 /* Define how to find the value returned by a function. */
4605
4606 static rtx
4607 arm_function_value(const_tree type, const_tree func,
4608 bool outgoing ATTRIBUTE_UNUSED)
4609 {
4610 enum machine_mode mode;
4611 int unsignedp ATTRIBUTE_UNUSED;
4612 rtx r ATTRIBUTE_UNUSED;
4613
4614 mode = TYPE_MODE (type);
4615
4616 if (TARGET_AAPCS_BASED)
4617 return aapcs_allocate_return_reg (mode, type, func);
4618
4619 /* Promote integer types. */
4620 if (INTEGRAL_TYPE_P (type))
4621 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4622
4623 /* Promotes small structs returned in a register to full-word size
4624 for big-endian AAPCS. */
4625 if (arm_return_in_msb (type))
4626 {
4627 HOST_WIDE_INT size = int_size_in_bytes (type);
4628 if (size % UNITS_PER_WORD != 0)
4629 {
4630 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4631 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4632 }
4633 }
4634
4635 return arm_libcall_value_1 (mode);
4636 }
4637
4638 /* libcall hashtable helpers. */
4639
4640 struct libcall_hasher : typed_noop_remove <rtx_def>
4641 {
4642 typedef rtx_def value_type;
4643 typedef rtx_def compare_type;
4644 static inline hashval_t hash (const value_type *);
4645 static inline bool equal (const value_type *, const compare_type *);
4646 static inline void remove (value_type *);
4647 };
4648
4649 inline bool
4650 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4651 {
4652 return rtx_equal_p (p1, p2);
4653 }
4654
4655 inline hashval_t
4656 libcall_hasher::hash (const value_type *p1)
4657 {
4658 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4659 }
4660
4661 typedef hash_table <libcall_hasher> libcall_table_type;
4662
4663 static void
4664 add_libcall (libcall_table_type htab, rtx libcall)
4665 {
4666 *htab.find_slot (libcall, INSERT) = libcall;
4667 }
4668
4669 static bool
4670 arm_libcall_uses_aapcs_base (const_rtx libcall)
4671 {
4672 static bool init_done = false;
4673 static libcall_table_type libcall_htab;
4674
4675 if (!init_done)
4676 {
4677 init_done = true;
4678
4679 libcall_htab.create (31);
4680 add_libcall (libcall_htab,
4681 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4682 add_libcall (libcall_htab,
4683 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4684 add_libcall (libcall_htab,
4685 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4686 add_libcall (libcall_htab,
4687 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4688
4689 add_libcall (libcall_htab,
4690 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4691 add_libcall (libcall_htab,
4692 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4693 add_libcall (libcall_htab,
4694 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4695 add_libcall (libcall_htab,
4696 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4697
4698 add_libcall (libcall_htab,
4699 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4700 add_libcall (libcall_htab,
4701 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4702 add_libcall (libcall_htab,
4703 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4704 add_libcall (libcall_htab,
4705 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4706 add_libcall (libcall_htab,
4707 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4708 add_libcall (libcall_htab,
4709 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4710 add_libcall (libcall_htab,
4711 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4712 add_libcall (libcall_htab,
4713 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4714
4715 /* Values from double-precision helper functions are returned in core
4716 registers if the selected core only supports single-precision
4717 arithmetic, even if we are using the hard-float ABI. The same is
4718 true for single-precision helpers, but we will never be using the
4719 hard-float ABI on a CPU which doesn't support single-precision
4720 operations in hardware. */
4721 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4722 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4723 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4724 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4725 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4726 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4727 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4728 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4729 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4730 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4731 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4732 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4733 SFmode));
4734 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4735 DFmode));
4736 }
4737
4738 return libcall && libcall_htab.find (libcall) != NULL;
4739 }
4740
4741 static rtx
4742 arm_libcall_value_1 (enum machine_mode mode)
4743 {
4744 if (TARGET_AAPCS_BASED)
4745 return aapcs_libcall_value (mode);
4746 else if (TARGET_IWMMXT_ABI
4747 && arm_vector_mode_supported_p (mode))
4748 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4749 else
4750 return gen_rtx_REG (mode, ARG_REGISTER (1));
4751 }
4752
4753 /* Define how to find the value returned by a library function
4754 assuming the value has mode MODE. */
4755
4756 static rtx
4757 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4758 {
4759 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4760 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4761 {
4762 /* The following libcalls return their result in integer registers,
4763 even though they return a floating point value. */
4764 if (arm_libcall_uses_aapcs_base (libcall))
4765 return gen_rtx_REG (mode, ARG_REGISTER(1));
4766
4767 }
4768
4769 return arm_libcall_value_1 (mode);
4770 }
4771
4772 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4773
4774 static bool
4775 arm_function_value_regno_p (const unsigned int regno)
4776 {
4777 if (regno == ARG_REGISTER (1)
4778 || (TARGET_32BIT
4779 && TARGET_AAPCS_BASED
4780 && TARGET_VFP
4781 && TARGET_HARD_FLOAT
4782 && regno == FIRST_VFP_REGNUM)
4783 || (TARGET_IWMMXT_ABI
4784 && regno == FIRST_IWMMXT_REGNUM))
4785 return true;
4786
4787 return false;
4788 }
4789
4790 /* Determine the amount of memory needed to store the possible return
4791 registers of an untyped call. */
4792 int
4793 arm_apply_result_size (void)
4794 {
4795 int size = 16;
4796
4797 if (TARGET_32BIT)
4798 {
4799 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4800 size += 32;
4801 if (TARGET_IWMMXT_ABI)
4802 size += 8;
4803 }
4804
4805 return size;
4806 }
4807
4808 /* Decide whether TYPE should be returned in memory (true)
4809 or in a register (false). FNTYPE is the type of the function making
4810 the call. */
4811 static bool
4812 arm_return_in_memory (const_tree type, const_tree fntype)
4813 {
4814 HOST_WIDE_INT size;
4815
4816 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4817
4818 if (TARGET_AAPCS_BASED)
4819 {
4820 /* Simple, non-aggregate types (ie not including vectors and
4821 complex) are always returned in a register (or registers).
4822 We don't care about which register here, so we can short-cut
4823 some of the detail. */
4824 if (!AGGREGATE_TYPE_P (type)
4825 && TREE_CODE (type) != VECTOR_TYPE
4826 && TREE_CODE (type) != COMPLEX_TYPE)
4827 return false;
4828
4829 /* Any return value that is no larger than one word can be
4830 returned in r0. */
4831 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4832 return false;
4833
4834 /* Check any available co-processors to see if they accept the
4835 type as a register candidate (VFP, for example, can return
4836 some aggregates in consecutive registers). These aren't
4837 available if the call is variadic. */
4838 if (aapcs_select_return_coproc (type, fntype) >= 0)
4839 return false;
4840
4841 /* Vector values should be returned using ARM registers, not
4842 memory (unless they're over 16 bytes, which will break since
4843 we only have four call-clobbered registers to play with). */
4844 if (TREE_CODE (type) == VECTOR_TYPE)
4845 return (size < 0 || size > (4 * UNITS_PER_WORD));
4846
4847 /* The rest go in memory. */
4848 return true;
4849 }
4850
4851 if (TREE_CODE (type) == VECTOR_TYPE)
4852 return (size < 0 || size > (4 * UNITS_PER_WORD));
4853
4854 if (!AGGREGATE_TYPE_P (type) &&
4855 (TREE_CODE (type) != VECTOR_TYPE))
4856 /* All simple types are returned in registers. */
4857 return false;
4858
4859 if (arm_abi != ARM_ABI_APCS)
4860 {
4861 /* ATPCS and later return aggregate types in memory only if they are
4862 larger than a word (or are variable size). */
4863 return (size < 0 || size > UNITS_PER_WORD);
4864 }
4865
4866 /* For the arm-wince targets we choose to be compatible with Microsoft's
4867 ARM and Thumb compilers, which always return aggregates in memory. */
4868 #ifndef ARM_WINCE
4869 /* All structures/unions bigger than one word are returned in memory.
4870 Also catch the case where int_size_in_bytes returns -1. In this case
4871 the aggregate is either huge or of variable size, and in either case
4872 we will want to return it via memory and not in a register. */
4873 if (size < 0 || size > UNITS_PER_WORD)
4874 return true;
4875
4876 if (TREE_CODE (type) == RECORD_TYPE)
4877 {
4878 tree field;
4879
4880 /* For a struct the APCS says that we only return in a register
4881 if the type is 'integer like' and every addressable element
4882 has an offset of zero. For practical purposes this means
4883 that the structure can have at most one non bit-field element
4884 and that this element must be the first one in the structure. */
4885
4886 /* Find the first field, ignoring non FIELD_DECL things which will
4887 have been created by C++. */
4888 for (field = TYPE_FIELDS (type);
4889 field && TREE_CODE (field) != FIELD_DECL;
4890 field = DECL_CHAIN (field))
4891 continue;
4892
4893 if (field == NULL)
4894 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4895
4896 /* Check that the first field is valid for returning in a register. */
4897
4898 /* ... Floats are not allowed */
4899 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4900 return true;
4901
4902 /* ... Aggregates that are not themselves valid for returning in
4903 a register are not allowed. */
4904 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4905 return true;
4906
4907 /* Now check the remaining fields, if any. Only bitfields are allowed,
4908 since they are not addressable. */
4909 for (field = DECL_CHAIN (field);
4910 field;
4911 field = DECL_CHAIN (field))
4912 {
4913 if (TREE_CODE (field) != FIELD_DECL)
4914 continue;
4915
4916 if (!DECL_BIT_FIELD_TYPE (field))
4917 return true;
4918 }
4919
4920 return false;
4921 }
4922
4923 if (TREE_CODE (type) == UNION_TYPE)
4924 {
4925 tree field;
4926
4927 /* Unions can be returned in registers if every element is
4928 integral, or can be returned in an integer register. */
4929 for (field = TYPE_FIELDS (type);
4930 field;
4931 field = DECL_CHAIN (field))
4932 {
4933 if (TREE_CODE (field) != FIELD_DECL)
4934 continue;
4935
4936 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4937 return true;
4938
4939 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4940 return true;
4941 }
4942
4943 return false;
4944 }
4945 #endif /* not ARM_WINCE */
4946
4947 /* Return all other types in memory. */
4948 return true;
4949 }
4950
4951 const struct pcs_attribute_arg
4952 {
4953 const char *arg;
4954 enum arm_pcs value;
4955 } pcs_attribute_args[] =
4956 {
4957 {"aapcs", ARM_PCS_AAPCS},
4958 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4959 #if 0
4960 /* We could recognize these, but changes would be needed elsewhere
4961 * to implement them. */
4962 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4963 {"atpcs", ARM_PCS_ATPCS},
4964 {"apcs", ARM_PCS_APCS},
4965 #endif
4966 {NULL, ARM_PCS_UNKNOWN}
4967 };
4968
4969 static enum arm_pcs
4970 arm_pcs_from_attribute (tree attr)
4971 {
4972 const struct pcs_attribute_arg *ptr;
4973 const char *arg;
4974
4975 /* Get the value of the argument. */
4976 if (TREE_VALUE (attr) == NULL_TREE
4977 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4978 return ARM_PCS_UNKNOWN;
4979
4980 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4981
4982 /* Check it against the list of known arguments. */
4983 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4984 if (streq (arg, ptr->arg))
4985 return ptr->value;
4986
4987 /* An unrecognized interrupt type. */
4988 return ARM_PCS_UNKNOWN;
4989 }
4990
4991 /* Get the PCS variant to use for this call. TYPE is the function's type
4992 specification, DECL is the specific declartion. DECL may be null if
4993 the call could be indirect or if this is a library call. */
4994 static enum arm_pcs
4995 arm_get_pcs_model (const_tree type, const_tree decl)
4996 {
4997 bool user_convention = false;
4998 enum arm_pcs user_pcs = arm_pcs_default;
4999 tree attr;
5000
5001 gcc_assert (type);
5002
5003 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5004 if (attr)
5005 {
5006 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5007 user_convention = true;
5008 }
5009
5010 if (TARGET_AAPCS_BASED)
5011 {
5012 /* Detect varargs functions. These always use the base rules
5013 (no argument is ever a candidate for a co-processor
5014 register). */
5015 bool base_rules = stdarg_p (type);
5016
5017 if (user_convention)
5018 {
5019 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5020 sorry ("non-AAPCS derived PCS variant");
5021 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5022 error ("variadic functions must use the base AAPCS variant");
5023 }
5024
5025 if (base_rules)
5026 return ARM_PCS_AAPCS;
5027 else if (user_convention)
5028 return user_pcs;
5029 else if (decl && flag_unit_at_a_time)
5030 {
5031 /* Local functions never leak outside this compilation unit,
5032 so we are free to use whatever conventions are
5033 appropriate. */
5034 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5035 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5036 if (i && i->local)
5037 return ARM_PCS_AAPCS_LOCAL;
5038 }
5039 }
5040 else if (user_convention && user_pcs != arm_pcs_default)
5041 sorry ("PCS variant");
5042
5043 /* For everything else we use the target's default. */
5044 return arm_pcs_default;
5045 }
5046
5047
5048 static void
5049 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5050 const_tree fntype ATTRIBUTE_UNUSED,
5051 rtx libcall ATTRIBUTE_UNUSED,
5052 const_tree fndecl ATTRIBUTE_UNUSED)
5053 {
5054 /* Record the unallocated VFP registers. */
5055 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5056 pcum->aapcs_vfp_reg_alloc = 0;
5057 }
5058
5059 /* Walk down the type tree of TYPE counting consecutive base elements.
5060 If *MODEP is VOIDmode, then set it to the first valid floating point
5061 type. If a non-floating point type is found, or if a floating point
5062 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5063 otherwise return the count in the sub-tree. */
5064 static int
5065 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5066 {
5067 enum machine_mode mode;
5068 HOST_WIDE_INT size;
5069
5070 switch (TREE_CODE (type))
5071 {
5072 case REAL_TYPE:
5073 mode = TYPE_MODE (type);
5074 if (mode != DFmode && mode != SFmode)
5075 return -1;
5076
5077 if (*modep == VOIDmode)
5078 *modep = mode;
5079
5080 if (*modep == mode)
5081 return 1;
5082
5083 break;
5084
5085 case COMPLEX_TYPE:
5086 mode = TYPE_MODE (TREE_TYPE (type));
5087 if (mode != DFmode && mode != SFmode)
5088 return -1;
5089
5090 if (*modep == VOIDmode)
5091 *modep = mode;
5092
5093 if (*modep == mode)
5094 return 2;
5095
5096 break;
5097
5098 case VECTOR_TYPE:
5099 /* Use V2SImode and V4SImode as representatives of all 64-bit
5100 and 128-bit vector types, whether or not those modes are
5101 supported with the present options. */
5102 size = int_size_in_bytes (type);
5103 switch (size)
5104 {
5105 case 8:
5106 mode = V2SImode;
5107 break;
5108 case 16:
5109 mode = V4SImode;
5110 break;
5111 default:
5112 return -1;
5113 }
5114
5115 if (*modep == VOIDmode)
5116 *modep = mode;
5117
5118 /* Vector modes are considered to be opaque: two vectors are
5119 equivalent for the purposes of being homogeneous aggregates
5120 if they are the same size. */
5121 if (*modep == mode)
5122 return 1;
5123
5124 break;
5125
5126 case ARRAY_TYPE:
5127 {
5128 int count;
5129 tree index = TYPE_DOMAIN (type);
5130
5131 /* Can't handle incomplete types nor sizes that are not
5132 fixed. */
5133 if (!COMPLETE_TYPE_P (type)
5134 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5135 return -1;
5136
5137 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5138 if (count == -1
5139 || !index
5140 || !TYPE_MAX_VALUE (index)
5141 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5142 || !TYPE_MIN_VALUE (index)
5143 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5144 || count < 0)
5145 return -1;
5146
5147 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5148 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5149
5150 /* There must be no padding. */
5151 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5152 return -1;
5153
5154 return count;
5155 }
5156
5157 case RECORD_TYPE:
5158 {
5159 int count = 0;
5160 int sub_count;
5161 tree field;
5162
5163 /* Can't handle incomplete types nor sizes that are not
5164 fixed. */
5165 if (!COMPLETE_TYPE_P (type)
5166 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5167 return -1;
5168
5169 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5170 {
5171 if (TREE_CODE (field) != FIELD_DECL)
5172 continue;
5173
5174 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5175 if (sub_count < 0)
5176 return -1;
5177 count += sub_count;
5178 }
5179
5180 /* There must be no padding. */
5181 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5182 return -1;
5183
5184 return count;
5185 }
5186
5187 case UNION_TYPE:
5188 case QUAL_UNION_TYPE:
5189 {
5190 /* These aren't very interesting except in a degenerate case. */
5191 int count = 0;
5192 int sub_count;
5193 tree field;
5194
5195 /* Can't handle incomplete types nor sizes that are not
5196 fixed. */
5197 if (!COMPLETE_TYPE_P (type)
5198 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5199 return -1;
5200
5201 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5202 {
5203 if (TREE_CODE (field) != FIELD_DECL)
5204 continue;
5205
5206 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5207 if (sub_count < 0)
5208 return -1;
5209 count = count > sub_count ? count : sub_count;
5210 }
5211
5212 /* There must be no padding. */
5213 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5214 return -1;
5215
5216 return count;
5217 }
5218
5219 default:
5220 break;
5221 }
5222
5223 return -1;
5224 }
5225
5226 /* Return true if PCS_VARIANT should use VFP registers. */
5227 static bool
5228 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5229 {
5230 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5231 {
5232 static bool seen_thumb1_vfp = false;
5233
5234 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5235 {
5236 sorry ("Thumb-1 hard-float VFP ABI");
5237 /* sorry() is not immediately fatal, so only display this once. */
5238 seen_thumb1_vfp = true;
5239 }
5240
5241 return true;
5242 }
5243
5244 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5245 return false;
5246
5247 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5248 (TARGET_VFP_DOUBLE || !is_double));
5249 }
5250
5251 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5252 suitable for passing or returning in VFP registers for the PCS
5253 variant selected. If it is, then *BASE_MODE is updated to contain
5254 a machine mode describing each element of the argument's type and
5255 *COUNT to hold the number of such elements. */
5256 static bool
5257 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5258 enum machine_mode mode, const_tree type,
5259 enum machine_mode *base_mode, int *count)
5260 {
5261 enum machine_mode new_mode = VOIDmode;
5262
5263 /* If we have the type information, prefer that to working things
5264 out from the mode. */
5265 if (type)
5266 {
5267 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5268
5269 if (ag_count > 0 && ag_count <= 4)
5270 *count = ag_count;
5271 else
5272 return false;
5273 }
5274 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5275 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5276 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5277 {
5278 *count = 1;
5279 new_mode = mode;
5280 }
5281 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5282 {
5283 *count = 2;
5284 new_mode = (mode == DCmode ? DFmode : SFmode);
5285 }
5286 else
5287 return false;
5288
5289
5290 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5291 return false;
5292
5293 *base_mode = new_mode;
5294 return true;
5295 }
5296
5297 static bool
5298 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5299 enum machine_mode mode, const_tree type)
5300 {
5301 int count ATTRIBUTE_UNUSED;
5302 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5303
5304 if (!use_vfp_abi (pcs_variant, false))
5305 return false;
5306 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5307 &ag_mode, &count);
5308 }
5309
5310 static bool
5311 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5312 const_tree type)
5313 {
5314 if (!use_vfp_abi (pcum->pcs_variant, false))
5315 return false;
5316
5317 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5318 &pcum->aapcs_vfp_rmode,
5319 &pcum->aapcs_vfp_rcount);
5320 }
5321
5322 static bool
5323 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5324 const_tree type ATTRIBUTE_UNUSED)
5325 {
5326 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5327 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5328 int regno;
5329
5330 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5331 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5332 {
5333 pcum->aapcs_vfp_reg_alloc = mask << regno;
5334 if (mode == BLKmode
5335 || (mode == TImode && ! TARGET_NEON)
5336 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5337 {
5338 int i;
5339 int rcount = pcum->aapcs_vfp_rcount;
5340 int rshift = shift;
5341 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5342 rtx par;
5343 if (!TARGET_NEON)
5344 {
5345 /* Avoid using unsupported vector modes. */
5346 if (rmode == V2SImode)
5347 rmode = DImode;
5348 else if (rmode == V4SImode)
5349 {
5350 rmode = DImode;
5351 rcount *= 2;
5352 rshift /= 2;
5353 }
5354 }
5355 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5356 for (i = 0; i < rcount; i++)
5357 {
5358 rtx tmp = gen_rtx_REG (rmode,
5359 FIRST_VFP_REGNUM + regno + i * rshift);
5360 tmp = gen_rtx_EXPR_LIST
5361 (VOIDmode, tmp,
5362 GEN_INT (i * GET_MODE_SIZE (rmode)));
5363 XVECEXP (par, 0, i) = tmp;
5364 }
5365
5366 pcum->aapcs_reg = par;
5367 }
5368 else
5369 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5370 return true;
5371 }
5372 return false;
5373 }
5374
5375 static rtx
5376 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5377 enum machine_mode mode,
5378 const_tree type ATTRIBUTE_UNUSED)
5379 {
5380 if (!use_vfp_abi (pcs_variant, false))
5381 return NULL;
5382
5383 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5384 {
5385 int count;
5386 enum machine_mode ag_mode;
5387 int i;
5388 rtx par;
5389 int shift;
5390
5391 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5392 &ag_mode, &count);
5393
5394 if (!TARGET_NEON)
5395 {
5396 if (ag_mode == V2SImode)
5397 ag_mode = DImode;
5398 else if (ag_mode == V4SImode)
5399 {
5400 ag_mode = DImode;
5401 count *= 2;
5402 }
5403 }
5404 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5405 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5406 for (i = 0; i < count; i++)
5407 {
5408 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5409 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5410 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5411 XVECEXP (par, 0, i) = tmp;
5412 }
5413
5414 return par;
5415 }
5416
5417 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5418 }
5419
5420 static void
5421 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5422 enum machine_mode mode ATTRIBUTE_UNUSED,
5423 const_tree type ATTRIBUTE_UNUSED)
5424 {
5425 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5426 pcum->aapcs_vfp_reg_alloc = 0;
5427 return;
5428 }
5429
5430 #define AAPCS_CP(X) \
5431 { \
5432 aapcs_ ## X ## _cum_init, \
5433 aapcs_ ## X ## _is_call_candidate, \
5434 aapcs_ ## X ## _allocate, \
5435 aapcs_ ## X ## _is_return_candidate, \
5436 aapcs_ ## X ## _allocate_return_reg, \
5437 aapcs_ ## X ## _advance \
5438 }
5439
5440 /* Table of co-processors that can be used to pass arguments in
5441 registers. Idealy no arugment should be a candidate for more than
5442 one co-processor table entry, but the table is processed in order
5443 and stops after the first match. If that entry then fails to put
5444 the argument into a co-processor register, the argument will go on
5445 the stack. */
5446 static struct
5447 {
5448 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5449 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5450
5451 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5452 BLKmode) is a candidate for this co-processor's registers; this
5453 function should ignore any position-dependent state in
5454 CUMULATIVE_ARGS and only use call-type dependent information. */
5455 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5456
5457 /* Return true if the argument does get a co-processor register; it
5458 should set aapcs_reg to an RTX of the register allocated as is
5459 required for a return from FUNCTION_ARG. */
5460 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5461
5462 /* Return true if a result of mode MODE (or type TYPE if MODE is
5463 BLKmode) is can be returned in this co-processor's registers. */
5464 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5465
5466 /* Allocate and return an RTX element to hold the return type of a
5467 call, this routine must not fail and will only be called if
5468 is_return_candidate returned true with the same parameters. */
5469 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5470
5471 /* Finish processing this argument and prepare to start processing
5472 the next one. */
5473 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5474 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5475 {
5476 AAPCS_CP(vfp)
5477 };
5478
5479 #undef AAPCS_CP
5480
5481 static int
5482 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5483 const_tree type)
5484 {
5485 int i;
5486
5487 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5488 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5489 return i;
5490
5491 return -1;
5492 }
5493
5494 static int
5495 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5496 {
5497 /* We aren't passed a decl, so we can't check that a call is local.
5498 However, it isn't clear that that would be a win anyway, since it
5499 might limit some tail-calling opportunities. */
5500 enum arm_pcs pcs_variant;
5501
5502 if (fntype)
5503 {
5504 const_tree fndecl = NULL_TREE;
5505
5506 if (TREE_CODE (fntype) == FUNCTION_DECL)
5507 {
5508 fndecl = fntype;
5509 fntype = TREE_TYPE (fntype);
5510 }
5511
5512 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5513 }
5514 else
5515 pcs_variant = arm_pcs_default;
5516
5517 if (pcs_variant != ARM_PCS_AAPCS)
5518 {
5519 int i;
5520
5521 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5522 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5523 TYPE_MODE (type),
5524 type))
5525 return i;
5526 }
5527 return -1;
5528 }
5529
5530 static rtx
5531 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5532 const_tree fntype)
5533 {
5534 /* We aren't passed a decl, so we can't check that a call is local.
5535 However, it isn't clear that that would be a win anyway, since it
5536 might limit some tail-calling opportunities. */
5537 enum arm_pcs pcs_variant;
5538 int unsignedp ATTRIBUTE_UNUSED;
5539
5540 if (fntype)
5541 {
5542 const_tree fndecl = NULL_TREE;
5543
5544 if (TREE_CODE (fntype) == FUNCTION_DECL)
5545 {
5546 fndecl = fntype;
5547 fntype = TREE_TYPE (fntype);
5548 }
5549
5550 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5551 }
5552 else
5553 pcs_variant = arm_pcs_default;
5554
5555 /* Promote integer types. */
5556 if (type && INTEGRAL_TYPE_P (type))
5557 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5558
5559 if (pcs_variant != ARM_PCS_AAPCS)
5560 {
5561 int i;
5562
5563 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5564 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5565 type))
5566 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5567 mode, type);
5568 }
5569
5570 /* Promotes small structs returned in a register to full-word size
5571 for big-endian AAPCS. */
5572 if (type && arm_return_in_msb (type))
5573 {
5574 HOST_WIDE_INT size = int_size_in_bytes (type);
5575 if (size % UNITS_PER_WORD != 0)
5576 {
5577 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5578 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5579 }
5580 }
5581
5582 return gen_rtx_REG (mode, R0_REGNUM);
5583 }
5584
5585 static rtx
5586 aapcs_libcall_value (enum machine_mode mode)
5587 {
5588 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5589 && GET_MODE_SIZE (mode) <= 4)
5590 mode = SImode;
5591
5592 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5593 }
5594
5595 /* Lay out a function argument using the AAPCS rules. The rule
5596 numbers referred to here are those in the AAPCS. */
5597 static void
5598 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5599 const_tree type, bool named)
5600 {
5601 int nregs, nregs2;
5602 int ncrn;
5603
5604 /* We only need to do this once per argument. */
5605 if (pcum->aapcs_arg_processed)
5606 return;
5607
5608 pcum->aapcs_arg_processed = true;
5609
5610 /* Special case: if named is false then we are handling an incoming
5611 anonymous argument which is on the stack. */
5612 if (!named)
5613 return;
5614
5615 /* Is this a potential co-processor register candidate? */
5616 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5617 {
5618 int slot = aapcs_select_call_coproc (pcum, mode, type);
5619 pcum->aapcs_cprc_slot = slot;
5620
5621 /* We don't have to apply any of the rules from part B of the
5622 preparation phase, these are handled elsewhere in the
5623 compiler. */
5624
5625 if (slot >= 0)
5626 {
5627 /* A Co-processor register candidate goes either in its own
5628 class of registers or on the stack. */
5629 if (!pcum->aapcs_cprc_failed[slot])
5630 {
5631 /* C1.cp - Try to allocate the argument to co-processor
5632 registers. */
5633 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5634 return;
5635
5636 /* C2.cp - Put the argument on the stack and note that we
5637 can't assign any more candidates in this slot. We also
5638 need to note that we have allocated stack space, so that
5639 we won't later try to split a non-cprc candidate between
5640 core registers and the stack. */
5641 pcum->aapcs_cprc_failed[slot] = true;
5642 pcum->can_split = false;
5643 }
5644
5645 /* We didn't get a register, so this argument goes on the
5646 stack. */
5647 gcc_assert (pcum->can_split == false);
5648 return;
5649 }
5650 }
5651
5652 /* C3 - For double-word aligned arguments, round the NCRN up to the
5653 next even number. */
5654 ncrn = pcum->aapcs_ncrn;
5655 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5656 ncrn++;
5657
5658 nregs = ARM_NUM_REGS2(mode, type);
5659
5660 /* Sigh, this test should really assert that nregs > 0, but a GCC
5661 extension allows empty structs and then gives them empty size; it
5662 then allows such a structure to be passed by value. For some of
5663 the code below we have to pretend that such an argument has
5664 non-zero size so that we 'locate' it correctly either in
5665 registers or on the stack. */
5666 gcc_assert (nregs >= 0);
5667
5668 nregs2 = nregs ? nregs : 1;
5669
5670 /* C4 - Argument fits entirely in core registers. */
5671 if (ncrn + nregs2 <= NUM_ARG_REGS)
5672 {
5673 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5674 pcum->aapcs_next_ncrn = ncrn + nregs;
5675 return;
5676 }
5677
5678 /* C5 - Some core registers left and there are no arguments already
5679 on the stack: split this argument between the remaining core
5680 registers and the stack. */
5681 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5682 {
5683 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5684 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5685 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5686 return;
5687 }
5688
5689 /* C6 - NCRN is set to 4. */
5690 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5691
5692 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5693 return;
5694 }
5695
5696 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5697 for a call to a function whose data type is FNTYPE.
5698 For a library call, FNTYPE is NULL. */
5699 void
5700 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5701 rtx libname,
5702 tree fndecl ATTRIBUTE_UNUSED)
5703 {
5704 /* Long call handling. */
5705 if (fntype)
5706 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5707 else
5708 pcum->pcs_variant = arm_pcs_default;
5709
5710 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5711 {
5712 if (arm_libcall_uses_aapcs_base (libname))
5713 pcum->pcs_variant = ARM_PCS_AAPCS;
5714
5715 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5716 pcum->aapcs_reg = NULL_RTX;
5717 pcum->aapcs_partial = 0;
5718 pcum->aapcs_arg_processed = false;
5719 pcum->aapcs_cprc_slot = -1;
5720 pcum->can_split = true;
5721
5722 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5723 {
5724 int i;
5725
5726 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5727 {
5728 pcum->aapcs_cprc_failed[i] = false;
5729 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5730 }
5731 }
5732 return;
5733 }
5734
5735 /* Legacy ABIs */
5736
5737 /* On the ARM, the offset starts at 0. */
5738 pcum->nregs = 0;
5739 pcum->iwmmxt_nregs = 0;
5740 pcum->can_split = true;
5741
5742 /* Varargs vectors are treated the same as long long.
5743 named_count avoids having to change the way arm handles 'named' */
5744 pcum->named_count = 0;
5745 pcum->nargs = 0;
5746
5747 if (TARGET_REALLY_IWMMXT && fntype)
5748 {
5749 tree fn_arg;
5750
5751 for (fn_arg = TYPE_ARG_TYPES (fntype);
5752 fn_arg;
5753 fn_arg = TREE_CHAIN (fn_arg))
5754 pcum->named_count += 1;
5755
5756 if (! pcum->named_count)
5757 pcum->named_count = INT_MAX;
5758 }
5759 }
5760
5761 /* Return true if we use LRA instead of reload pass. */
5762 static bool
5763 arm_lra_p (void)
5764 {
5765 return arm_lra_flag;
5766 }
5767
5768 /* Return true if mode/type need doubleword alignment. */
5769 static bool
5770 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5771 {
5772 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5773 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5774 }
5775
5776
5777 /* Determine where to put an argument to a function.
5778 Value is zero to push the argument on the stack,
5779 or a hard register in which to store the argument.
5780
5781 MODE is the argument's machine mode.
5782 TYPE is the data type of the argument (as a tree).
5783 This is null for libcalls where that information may
5784 not be available.
5785 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5786 the preceding args and about the function being called.
5787 NAMED is nonzero if this argument is a named parameter
5788 (otherwise it is an extra parameter matching an ellipsis).
5789
5790 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5791 other arguments are passed on the stack. If (NAMED == 0) (which happens
5792 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5793 defined), say it is passed in the stack (function_prologue will
5794 indeed make it pass in the stack if necessary). */
5795
5796 static rtx
5797 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5798 const_tree type, bool named)
5799 {
5800 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5801 int nregs;
5802
5803 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5804 a call insn (op3 of a call_value insn). */
5805 if (mode == VOIDmode)
5806 return const0_rtx;
5807
5808 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5809 {
5810 aapcs_layout_arg (pcum, mode, type, named);
5811 return pcum->aapcs_reg;
5812 }
5813
5814 /* Varargs vectors are treated the same as long long.
5815 named_count avoids having to change the way arm handles 'named' */
5816 if (TARGET_IWMMXT_ABI
5817 && arm_vector_mode_supported_p (mode)
5818 && pcum->named_count > pcum->nargs + 1)
5819 {
5820 if (pcum->iwmmxt_nregs <= 9)
5821 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5822 else
5823 {
5824 pcum->can_split = false;
5825 return NULL_RTX;
5826 }
5827 }
5828
5829 /* Put doubleword aligned quantities in even register pairs. */
5830 if (pcum->nregs & 1
5831 && ARM_DOUBLEWORD_ALIGN
5832 && arm_needs_doubleword_align (mode, type))
5833 pcum->nregs++;
5834
5835 /* Only allow splitting an arg between regs and memory if all preceding
5836 args were allocated to regs. For args passed by reference we only count
5837 the reference pointer. */
5838 if (pcum->can_split)
5839 nregs = 1;
5840 else
5841 nregs = ARM_NUM_REGS2 (mode, type);
5842
5843 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5844 return NULL_RTX;
5845
5846 return gen_rtx_REG (mode, pcum->nregs);
5847 }
5848
5849 static unsigned int
5850 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5851 {
5852 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5853 ? DOUBLEWORD_ALIGNMENT
5854 : PARM_BOUNDARY);
5855 }
5856
5857 static int
5858 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5859 tree type, bool named)
5860 {
5861 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5862 int nregs = pcum->nregs;
5863
5864 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5865 {
5866 aapcs_layout_arg (pcum, mode, type, named);
5867 return pcum->aapcs_partial;
5868 }
5869
5870 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5871 return 0;
5872
5873 if (NUM_ARG_REGS > nregs
5874 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5875 && pcum->can_split)
5876 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5877
5878 return 0;
5879 }
5880
5881 /* Update the data in PCUM to advance over an argument
5882 of mode MODE and data type TYPE.
5883 (TYPE is null for libcalls where that information may not be available.) */
5884
5885 static void
5886 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5887 const_tree type, bool named)
5888 {
5889 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5890
5891 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5892 {
5893 aapcs_layout_arg (pcum, mode, type, named);
5894
5895 if (pcum->aapcs_cprc_slot >= 0)
5896 {
5897 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5898 type);
5899 pcum->aapcs_cprc_slot = -1;
5900 }
5901
5902 /* Generic stuff. */
5903 pcum->aapcs_arg_processed = false;
5904 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5905 pcum->aapcs_reg = NULL_RTX;
5906 pcum->aapcs_partial = 0;
5907 }
5908 else
5909 {
5910 pcum->nargs += 1;
5911 if (arm_vector_mode_supported_p (mode)
5912 && pcum->named_count > pcum->nargs
5913 && TARGET_IWMMXT_ABI)
5914 pcum->iwmmxt_nregs += 1;
5915 else
5916 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5917 }
5918 }
5919
5920 /* Variable sized types are passed by reference. This is a GCC
5921 extension to the ARM ABI. */
5922
5923 static bool
5924 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5925 enum machine_mode mode ATTRIBUTE_UNUSED,
5926 const_tree type, bool named ATTRIBUTE_UNUSED)
5927 {
5928 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5929 }
5930 \f
5931 /* Encode the current state of the #pragma [no_]long_calls. */
5932 typedef enum
5933 {
5934 OFF, /* No #pragma [no_]long_calls is in effect. */
5935 LONG, /* #pragma long_calls is in effect. */
5936 SHORT /* #pragma no_long_calls is in effect. */
5937 } arm_pragma_enum;
5938
5939 static arm_pragma_enum arm_pragma_long_calls = OFF;
5940
5941 void
5942 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5943 {
5944 arm_pragma_long_calls = LONG;
5945 }
5946
5947 void
5948 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5949 {
5950 arm_pragma_long_calls = SHORT;
5951 }
5952
5953 void
5954 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5955 {
5956 arm_pragma_long_calls = OFF;
5957 }
5958 \f
5959 /* Handle an attribute requiring a FUNCTION_DECL;
5960 arguments as in struct attribute_spec.handler. */
5961 static tree
5962 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5963 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5964 {
5965 if (TREE_CODE (*node) != FUNCTION_DECL)
5966 {
5967 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5968 name);
5969 *no_add_attrs = true;
5970 }
5971
5972 return NULL_TREE;
5973 }
5974
5975 /* Handle an "interrupt" or "isr" attribute;
5976 arguments as in struct attribute_spec.handler. */
5977 static tree
5978 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5979 bool *no_add_attrs)
5980 {
5981 if (DECL_P (*node))
5982 {
5983 if (TREE_CODE (*node) != FUNCTION_DECL)
5984 {
5985 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5986 name);
5987 *no_add_attrs = true;
5988 }
5989 /* FIXME: the argument if any is checked for type attributes;
5990 should it be checked for decl ones? */
5991 }
5992 else
5993 {
5994 if (TREE_CODE (*node) == FUNCTION_TYPE
5995 || TREE_CODE (*node) == METHOD_TYPE)
5996 {
5997 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5998 {
5999 warning (OPT_Wattributes, "%qE attribute ignored",
6000 name);
6001 *no_add_attrs = true;
6002 }
6003 }
6004 else if (TREE_CODE (*node) == POINTER_TYPE
6005 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6006 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6007 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6008 {
6009 *node = build_variant_type_copy (*node);
6010 TREE_TYPE (*node) = build_type_attribute_variant
6011 (TREE_TYPE (*node),
6012 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6013 *no_add_attrs = true;
6014 }
6015 else
6016 {
6017 /* Possibly pass this attribute on from the type to a decl. */
6018 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6019 | (int) ATTR_FLAG_FUNCTION_NEXT
6020 | (int) ATTR_FLAG_ARRAY_NEXT))
6021 {
6022 *no_add_attrs = true;
6023 return tree_cons (name, args, NULL_TREE);
6024 }
6025 else
6026 {
6027 warning (OPT_Wattributes, "%qE attribute ignored",
6028 name);
6029 }
6030 }
6031 }
6032
6033 return NULL_TREE;
6034 }
6035
6036 /* Handle a "pcs" attribute; arguments as in struct
6037 attribute_spec.handler. */
6038 static tree
6039 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6040 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6041 {
6042 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6043 {
6044 warning (OPT_Wattributes, "%qE attribute ignored", name);
6045 *no_add_attrs = true;
6046 }
6047 return NULL_TREE;
6048 }
6049
6050 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6051 /* Handle the "notshared" attribute. This attribute is another way of
6052 requesting hidden visibility. ARM's compiler supports
6053 "__declspec(notshared)"; we support the same thing via an
6054 attribute. */
6055
6056 static tree
6057 arm_handle_notshared_attribute (tree *node,
6058 tree name ATTRIBUTE_UNUSED,
6059 tree args ATTRIBUTE_UNUSED,
6060 int flags ATTRIBUTE_UNUSED,
6061 bool *no_add_attrs)
6062 {
6063 tree decl = TYPE_NAME (*node);
6064
6065 if (decl)
6066 {
6067 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6068 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6069 *no_add_attrs = false;
6070 }
6071 return NULL_TREE;
6072 }
6073 #endif
6074
6075 /* Return 0 if the attributes for two types are incompatible, 1 if they
6076 are compatible, and 2 if they are nearly compatible (which causes a
6077 warning to be generated). */
6078 static int
6079 arm_comp_type_attributes (const_tree type1, const_tree type2)
6080 {
6081 int l1, l2, s1, s2;
6082
6083 /* Check for mismatch of non-default calling convention. */
6084 if (TREE_CODE (type1) != FUNCTION_TYPE)
6085 return 1;
6086
6087 /* Check for mismatched call attributes. */
6088 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6089 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6090 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6091 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6092
6093 /* Only bother to check if an attribute is defined. */
6094 if (l1 | l2 | s1 | s2)
6095 {
6096 /* If one type has an attribute, the other must have the same attribute. */
6097 if ((l1 != l2) || (s1 != s2))
6098 return 0;
6099
6100 /* Disallow mixed attributes. */
6101 if ((l1 & s2) || (l2 & s1))
6102 return 0;
6103 }
6104
6105 /* Check for mismatched ISR attribute. */
6106 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6107 if (! l1)
6108 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6109 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6110 if (! l2)
6111 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6112 if (l1 != l2)
6113 return 0;
6114
6115 return 1;
6116 }
6117
6118 /* Assigns default attributes to newly defined type. This is used to
6119 set short_call/long_call attributes for function types of
6120 functions defined inside corresponding #pragma scopes. */
6121 static void
6122 arm_set_default_type_attributes (tree type)
6123 {
6124 /* Add __attribute__ ((long_call)) to all functions, when
6125 inside #pragma long_calls or __attribute__ ((short_call)),
6126 when inside #pragma no_long_calls. */
6127 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6128 {
6129 tree type_attr_list, attr_name;
6130 type_attr_list = TYPE_ATTRIBUTES (type);
6131
6132 if (arm_pragma_long_calls == LONG)
6133 attr_name = get_identifier ("long_call");
6134 else if (arm_pragma_long_calls == SHORT)
6135 attr_name = get_identifier ("short_call");
6136 else
6137 return;
6138
6139 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6140 TYPE_ATTRIBUTES (type) = type_attr_list;
6141 }
6142 }
6143 \f
6144 /* Return true if DECL is known to be linked into section SECTION. */
6145
6146 static bool
6147 arm_function_in_section_p (tree decl, section *section)
6148 {
6149 /* We can only be certain about functions defined in the same
6150 compilation unit. */
6151 if (!TREE_STATIC (decl))
6152 return false;
6153
6154 /* Make sure that SYMBOL always binds to the definition in this
6155 compilation unit. */
6156 if (!targetm.binds_local_p (decl))
6157 return false;
6158
6159 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6160 if (!DECL_SECTION_NAME (decl))
6161 {
6162 /* Make sure that we will not create a unique section for DECL. */
6163 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6164 return false;
6165 }
6166
6167 return function_section (decl) == section;
6168 }
6169
6170 /* Return nonzero if a 32-bit "long_call" should be generated for
6171 a call from the current function to DECL. We generate a long_call
6172 if the function:
6173
6174 a. has an __attribute__((long call))
6175 or b. is within the scope of a #pragma long_calls
6176 or c. the -mlong-calls command line switch has been specified
6177
6178 However we do not generate a long call if the function:
6179
6180 d. has an __attribute__ ((short_call))
6181 or e. is inside the scope of a #pragma no_long_calls
6182 or f. is defined in the same section as the current function. */
6183
6184 bool
6185 arm_is_long_call_p (tree decl)
6186 {
6187 tree attrs;
6188
6189 if (!decl)
6190 return TARGET_LONG_CALLS;
6191
6192 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6193 if (lookup_attribute ("short_call", attrs))
6194 return false;
6195
6196 /* For "f", be conservative, and only cater for cases in which the
6197 whole of the current function is placed in the same section. */
6198 if (!flag_reorder_blocks_and_partition
6199 && TREE_CODE (decl) == FUNCTION_DECL
6200 && arm_function_in_section_p (decl, current_function_section ()))
6201 return false;
6202
6203 if (lookup_attribute ("long_call", attrs))
6204 return true;
6205
6206 return TARGET_LONG_CALLS;
6207 }
6208
6209 /* Return nonzero if it is ok to make a tail-call to DECL. */
6210 static bool
6211 arm_function_ok_for_sibcall (tree decl, tree exp)
6212 {
6213 unsigned long func_type;
6214
6215 if (cfun->machine->sibcall_blocked)
6216 return false;
6217
6218 /* Never tailcall something if we are generating code for Thumb-1. */
6219 if (TARGET_THUMB1)
6220 return false;
6221
6222 /* The PIC register is live on entry to VxWorks PLT entries, so we
6223 must make the call before restoring the PIC register. */
6224 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6225 return false;
6226
6227 /* If we are interworking and the function is not declared static
6228 then we can't tail-call it unless we know that it exists in this
6229 compilation unit (since it might be a Thumb routine). */
6230 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6231 && !TREE_ASM_WRITTEN (decl))
6232 return false;
6233
6234 func_type = arm_current_func_type ();
6235 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6236 if (IS_INTERRUPT (func_type))
6237 return false;
6238
6239 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6240 {
6241 /* Check that the return value locations are the same. For
6242 example that we aren't returning a value from the sibling in
6243 a VFP register but then need to transfer it to a core
6244 register. */
6245 rtx a, b;
6246
6247 a = arm_function_value (TREE_TYPE (exp), decl, false);
6248 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6249 cfun->decl, false);
6250 if (!rtx_equal_p (a, b))
6251 return false;
6252 }
6253
6254 /* Never tailcall if function may be called with a misaligned SP. */
6255 if (IS_STACKALIGN (func_type))
6256 return false;
6257
6258 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6259 references should become a NOP. Don't convert such calls into
6260 sibling calls. */
6261 if (TARGET_AAPCS_BASED
6262 && arm_abi == ARM_ABI_AAPCS
6263 && decl
6264 && DECL_WEAK (decl))
6265 return false;
6266
6267 /* Everything else is ok. */
6268 return true;
6269 }
6270
6271 \f
6272 /* Addressing mode support functions. */
6273
6274 /* Return nonzero if X is a legitimate immediate operand when compiling
6275 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6276 int
6277 legitimate_pic_operand_p (rtx x)
6278 {
6279 if (GET_CODE (x) == SYMBOL_REF
6280 || (GET_CODE (x) == CONST
6281 && GET_CODE (XEXP (x, 0)) == PLUS
6282 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6283 return 0;
6284
6285 return 1;
6286 }
6287
6288 /* Record that the current function needs a PIC register. Initialize
6289 cfun->machine->pic_reg if we have not already done so. */
6290
6291 static void
6292 require_pic_register (void)
6293 {
6294 /* A lot of the logic here is made obscure by the fact that this
6295 routine gets called as part of the rtx cost estimation process.
6296 We don't want those calls to affect any assumptions about the real
6297 function; and further, we can't call entry_of_function() until we
6298 start the real expansion process. */
6299 if (!crtl->uses_pic_offset_table)
6300 {
6301 gcc_assert (can_create_pseudo_p ());
6302 if (arm_pic_register != INVALID_REGNUM
6303 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6304 {
6305 if (!cfun->machine->pic_reg)
6306 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6307
6308 /* Play games to avoid marking the function as needing pic
6309 if we are being called as part of the cost-estimation
6310 process. */
6311 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6312 crtl->uses_pic_offset_table = 1;
6313 }
6314 else
6315 {
6316 rtx seq, insn;
6317
6318 if (!cfun->machine->pic_reg)
6319 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6320
6321 /* Play games to avoid marking the function as needing pic
6322 if we are being called as part of the cost-estimation
6323 process. */
6324 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6325 {
6326 crtl->uses_pic_offset_table = 1;
6327 start_sequence ();
6328
6329 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6330 && arm_pic_register > LAST_LO_REGNUM)
6331 emit_move_insn (cfun->machine->pic_reg,
6332 gen_rtx_REG (Pmode, arm_pic_register));
6333 else
6334 arm_load_pic_register (0UL);
6335
6336 seq = get_insns ();
6337 end_sequence ();
6338
6339 for (insn = seq; insn; insn = NEXT_INSN (insn))
6340 if (INSN_P (insn))
6341 INSN_LOCATION (insn) = prologue_location;
6342
6343 /* We can be called during expansion of PHI nodes, where
6344 we can't yet emit instructions directly in the final
6345 insn stream. Queue the insns on the entry edge, they will
6346 be committed after everything else is expanded. */
6347 insert_insn_on_edge (seq,
6348 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6349 }
6350 }
6351 }
6352 }
6353
6354 rtx
6355 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6356 {
6357 if (GET_CODE (orig) == SYMBOL_REF
6358 || GET_CODE (orig) == LABEL_REF)
6359 {
6360 rtx insn;
6361
6362 if (reg == 0)
6363 {
6364 gcc_assert (can_create_pseudo_p ());
6365 reg = gen_reg_rtx (Pmode);
6366 }
6367
6368 /* VxWorks does not impose a fixed gap between segments; the run-time
6369 gap can be different from the object-file gap. We therefore can't
6370 use GOTOFF unless we are absolutely sure that the symbol is in the
6371 same segment as the GOT. Unfortunately, the flexibility of linker
6372 scripts means that we can't be sure of that in general, so assume
6373 that GOTOFF is never valid on VxWorks. */
6374 if ((GET_CODE (orig) == LABEL_REF
6375 || (GET_CODE (orig) == SYMBOL_REF &&
6376 SYMBOL_REF_LOCAL_P (orig)))
6377 && NEED_GOT_RELOC
6378 && arm_pic_data_is_text_relative)
6379 insn = arm_pic_static_addr (orig, reg);
6380 else
6381 {
6382 rtx pat;
6383 rtx mem;
6384
6385 /* If this function doesn't have a pic register, create one now. */
6386 require_pic_register ();
6387
6388 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6389
6390 /* Make the MEM as close to a constant as possible. */
6391 mem = SET_SRC (pat);
6392 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6393 MEM_READONLY_P (mem) = 1;
6394 MEM_NOTRAP_P (mem) = 1;
6395
6396 insn = emit_insn (pat);
6397 }
6398
6399 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6400 by loop. */
6401 set_unique_reg_note (insn, REG_EQUAL, orig);
6402
6403 return reg;
6404 }
6405 else if (GET_CODE (orig) == CONST)
6406 {
6407 rtx base, offset;
6408
6409 if (GET_CODE (XEXP (orig, 0)) == PLUS
6410 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6411 return orig;
6412
6413 /* Handle the case where we have: const (UNSPEC_TLS). */
6414 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6415 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6416 return orig;
6417
6418 /* Handle the case where we have:
6419 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6420 CONST_INT. */
6421 if (GET_CODE (XEXP (orig, 0)) == PLUS
6422 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6423 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6424 {
6425 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6426 return orig;
6427 }
6428
6429 if (reg == 0)
6430 {
6431 gcc_assert (can_create_pseudo_p ());
6432 reg = gen_reg_rtx (Pmode);
6433 }
6434
6435 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6436
6437 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6438 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6439 base == reg ? 0 : reg);
6440
6441 if (CONST_INT_P (offset))
6442 {
6443 /* The base register doesn't really matter, we only want to
6444 test the index for the appropriate mode. */
6445 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6446 {
6447 gcc_assert (can_create_pseudo_p ());
6448 offset = force_reg (Pmode, offset);
6449 }
6450
6451 if (CONST_INT_P (offset))
6452 return plus_constant (Pmode, base, INTVAL (offset));
6453 }
6454
6455 if (GET_MODE_SIZE (mode) > 4
6456 && (GET_MODE_CLASS (mode) == MODE_INT
6457 || TARGET_SOFT_FLOAT))
6458 {
6459 emit_insn (gen_addsi3 (reg, base, offset));
6460 return reg;
6461 }
6462
6463 return gen_rtx_PLUS (Pmode, base, offset);
6464 }
6465
6466 return orig;
6467 }
6468
6469
6470 /* Find a spare register to use during the prolog of a function. */
6471
6472 static int
6473 thumb_find_work_register (unsigned long pushed_regs_mask)
6474 {
6475 int reg;
6476
6477 /* Check the argument registers first as these are call-used. The
6478 register allocation order means that sometimes r3 might be used
6479 but earlier argument registers might not, so check them all. */
6480 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6481 if (!df_regs_ever_live_p (reg))
6482 return reg;
6483
6484 /* Before going on to check the call-saved registers we can try a couple
6485 more ways of deducing that r3 is available. The first is when we are
6486 pushing anonymous arguments onto the stack and we have less than 4
6487 registers worth of fixed arguments(*). In this case r3 will be part of
6488 the variable argument list and so we can be sure that it will be
6489 pushed right at the start of the function. Hence it will be available
6490 for the rest of the prologue.
6491 (*): ie crtl->args.pretend_args_size is greater than 0. */
6492 if (cfun->machine->uses_anonymous_args
6493 && crtl->args.pretend_args_size > 0)
6494 return LAST_ARG_REGNUM;
6495
6496 /* The other case is when we have fixed arguments but less than 4 registers
6497 worth. In this case r3 might be used in the body of the function, but
6498 it is not being used to convey an argument into the function. In theory
6499 we could just check crtl->args.size to see how many bytes are
6500 being passed in argument registers, but it seems that it is unreliable.
6501 Sometimes it will have the value 0 when in fact arguments are being
6502 passed. (See testcase execute/20021111-1.c for an example). So we also
6503 check the args_info.nregs field as well. The problem with this field is
6504 that it makes no allowances for arguments that are passed to the
6505 function but which are not used. Hence we could miss an opportunity
6506 when a function has an unused argument in r3. But it is better to be
6507 safe than to be sorry. */
6508 if (! cfun->machine->uses_anonymous_args
6509 && crtl->args.size >= 0
6510 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6511 && (TARGET_AAPCS_BASED
6512 ? crtl->args.info.aapcs_ncrn < 4
6513 : crtl->args.info.nregs < 4))
6514 return LAST_ARG_REGNUM;
6515
6516 /* Otherwise look for a call-saved register that is going to be pushed. */
6517 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6518 if (pushed_regs_mask & (1 << reg))
6519 return reg;
6520
6521 if (TARGET_THUMB2)
6522 {
6523 /* Thumb-2 can use high regs. */
6524 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6525 if (pushed_regs_mask & (1 << reg))
6526 return reg;
6527 }
6528 /* Something went wrong - thumb_compute_save_reg_mask()
6529 should have arranged for a suitable register to be pushed. */
6530 gcc_unreachable ();
6531 }
6532
6533 static GTY(()) int pic_labelno;
6534
6535 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6536 low register. */
6537
6538 void
6539 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6540 {
6541 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6542
6543 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6544 return;
6545
6546 gcc_assert (flag_pic);
6547
6548 pic_reg = cfun->machine->pic_reg;
6549 if (TARGET_VXWORKS_RTP)
6550 {
6551 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6552 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6553 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6554
6555 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6556
6557 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6558 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6559 }
6560 else
6561 {
6562 /* We use an UNSPEC rather than a LABEL_REF because this label
6563 never appears in the code stream. */
6564
6565 labelno = GEN_INT (pic_labelno++);
6566 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6567 l1 = gen_rtx_CONST (VOIDmode, l1);
6568
6569 /* On the ARM the PC register contains 'dot + 8' at the time of the
6570 addition, on the Thumb it is 'dot + 4'. */
6571 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6572 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6573 UNSPEC_GOTSYM_OFF);
6574 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6575
6576 if (TARGET_32BIT)
6577 {
6578 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6579 }
6580 else /* TARGET_THUMB1 */
6581 {
6582 if (arm_pic_register != INVALID_REGNUM
6583 && REGNO (pic_reg) > LAST_LO_REGNUM)
6584 {
6585 /* We will have pushed the pic register, so we should always be
6586 able to find a work register. */
6587 pic_tmp = gen_rtx_REG (SImode,
6588 thumb_find_work_register (saved_regs));
6589 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6590 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6591 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6592 }
6593 else if (arm_pic_register != INVALID_REGNUM
6594 && arm_pic_register > LAST_LO_REGNUM
6595 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6596 {
6597 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6598 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6599 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6600 }
6601 else
6602 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6603 }
6604 }
6605
6606 /* Need to emit this whether or not we obey regdecls,
6607 since setjmp/longjmp can cause life info to screw up. */
6608 emit_use (pic_reg);
6609 }
6610
6611 /* Generate code to load the address of a static var when flag_pic is set. */
6612 static rtx
6613 arm_pic_static_addr (rtx orig, rtx reg)
6614 {
6615 rtx l1, labelno, offset_rtx, insn;
6616
6617 gcc_assert (flag_pic);
6618
6619 /* We use an UNSPEC rather than a LABEL_REF because this label
6620 never appears in the code stream. */
6621 labelno = GEN_INT (pic_labelno++);
6622 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6623 l1 = gen_rtx_CONST (VOIDmode, l1);
6624
6625 /* On the ARM the PC register contains 'dot + 8' at the time of the
6626 addition, on the Thumb it is 'dot + 4'. */
6627 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6628 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6629 UNSPEC_SYMBOL_OFFSET);
6630 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6631
6632 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6633 return insn;
6634 }
6635
6636 /* Return nonzero if X is valid as an ARM state addressing register. */
6637 static int
6638 arm_address_register_rtx_p (rtx x, int strict_p)
6639 {
6640 int regno;
6641
6642 if (!REG_P (x))
6643 return 0;
6644
6645 regno = REGNO (x);
6646
6647 if (strict_p)
6648 return ARM_REGNO_OK_FOR_BASE_P (regno);
6649
6650 return (regno <= LAST_ARM_REGNUM
6651 || regno >= FIRST_PSEUDO_REGISTER
6652 || regno == FRAME_POINTER_REGNUM
6653 || regno == ARG_POINTER_REGNUM);
6654 }
6655
6656 /* Return TRUE if this rtx is the difference of a symbol and a label,
6657 and will reduce to a PC-relative relocation in the object file.
6658 Expressions like this can be left alone when generating PIC, rather
6659 than forced through the GOT. */
6660 static int
6661 pcrel_constant_p (rtx x)
6662 {
6663 if (GET_CODE (x) == MINUS)
6664 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6665
6666 return FALSE;
6667 }
6668
6669 /* Return true if X will surely end up in an index register after next
6670 splitting pass. */
6671 static bool
6672 will_be_in_index_register (const_rtx x)
6673 {
6674 /* arm.md: calculate_pic_address will split this into a register. */
6675 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6676 }
6677
6678 /* Return nonzero if X is a valid ARM state address operand. */
6679 int
6680 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6681 int strict_p)
6682 {
6683 bool use_ldrd;
6684 enum rtx_code code = GET_CODE (x);
6685
6686 if (arm_address_register_rtx_p (x, strict_p))
6687 return 1;
6688
6689 use_ldrd = (TARGET_LDRD
6690 && (mode == DImode
6691 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6692
6693 if (code == POST_INC || code == PRE_DEC
6694 || ((code == PRE_INC || code == POST_DEC)
6695 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6696 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6697
6698 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6699 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6700 && GET_CODE (XEXP (x, 1)) == PLUS
6701 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6702 {
6703 rtx addend = XEXP (XEXP (x, 1), 1);
6704
6705 /* Don't allow ldrd post increment by register because it's hard
6706 to fixup invalid register choices. */
6707 if (use_ldrd
6708 && GET_CODE (x) == POST_MODIFY
6709 && REG_P (addend))
6710 return 0;
6711
6712 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6713 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6714 }
6715
6716 /* After reload constants split into minipools will have addresses
6717 from a LABEL_REF. */
6718 else if (reload_completed
6719 && (code == LABEL_REF
6720 || (code == CONST
6721 && GET_CODE (XEXP (x, 0)) == PLUS
6722 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6723 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6724 return 1;
6725
6726 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6727 return 0;
6728
6729 else if (code == PLUS)
6730 {
6731 rtx xop0 = XEXP (x, 0);
6732 rtx xop1 = XEXP (x, 1);
6733
6734 return ((arm_address_register_rtx_p (xop0, strict_p)
6735 && ((CONST_INT_P (xop1)
6736 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6737 || (!strict_p && will_be_in_index_register (xop1))))
6738 || (arm_address_register_rtx_p (xop1, strict_p)
6739 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6740 }
6741
6742 #if 0
6743 /* Reload currently can't handle MINUS, so disable this for now */
6744 else if (GET_CODE (x) == MINUS)
6745 {
6746 rtx xop0 = XEXP (x, 0);
6747 rtx xop1 = XEXP (x, 1);
6748
6749 return (arm_address_register_rtx_p (xop0, strict_p)
6750 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6751 }
6752 #endif
6753
6754 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6755 && code == SYMBOL_REF
6756 && CONSTANT_POOL_ADDRESS_P (x)
6757 && ! (flag_pic
6758 && symbol_mentioned_p (get_pool_constant (x))
6759 && ! pcrel_constant_p (get_pool_constant (x))))
6760 return 1;
6761
6762 return 0;
6763 }
6764
6765 /* Return nonzero if X is a valid Thumb-2 address operand. */
6766 static int
6767 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6768 {
6769 bool use_ldrd;
6770 enum rtx_code code = GET_CODE (x);
6771
6772 if (arm_address_register_rtx_p (x, strict_p))
6773 return 1;
6774
6775 use_ldrd = (TARGET_LDRD
6776 && (mode == DImode
6777 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6778
6779 if (code == POST_INC || code == PRE_DEC
6780 || ((code == PRE_INC || code == POST_DEC)
6781 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6782 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6783
6784 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6785 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6786 && GET_CODE (XEXP (x, 1)) == PLUS
6787 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6788 {
6789 /* Thumb-2 only has autoincrement by constant. */
6790 rtx addend = XEXP (XEXP (x, 1), 1);
6791 HOST_WIDE_INT offset;
6792
6793 if (!CONST_INT_P (addend))
6794 return 0;
6795
6796 offset = INTVAL(addend);
6797 if (GET_MODE_SIZE (mode) <= 4)
6798 return (offset > -256 && offset < 256);
6799
6800 return (use_ldrd && offset > -1024 && offset < 1024
6801 && (offset & 3) == 0);
6802 }
6803
6804 /* After reload constants split into minipools will have addresses
6805 from a LABEL_REF. */
6806 else if (reload_completed
6807 && (code == LABEL_REF
6808 || (code == CONST
6809 && GET_CODE (XEXP (x, 0)) == PLUS
6810 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6811 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6812 return 1;
6813
6814 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6815 return 0;
6816
6817 else if (code == PLUS)
6818 {
6819 rtx xop0 = XEXP (x, 0);
6820 rtx xop1 = XEXP (x, 1);
6821
6822 return ((arm_address_register_rtx_p (xop0, strict_p)
6823 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6824 || (!strict_p && will_be_in_index_register (xop1))))
6825 || (arm_address_register_rtx_p (xop1, strict_p)
6826 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6827 }
6828
6829 /* Normally we can assign constant values to target registers without
6830 the help of constant pool. But there are cases we have to use constant
6831 pool like:
6832 1) assign a label to register.
6833 2) sign-extend a 8bit value to 32bit and then assign to register.
6834
6835 Constant pool access in format:
6836 (set (reg r0) (mem (symbol_ref (".LC0"))))
6837 will cause the use of literal pool (later in function arm_reorg).
6838 So here we mark such format as an invalid format, then the compiler
6839 will adjust it into:
6840 (set (reg r0) (symbol_ref (".LC0")))
6841 (set (reg r0) (mem (reg r0))).
6842 No extra register is required, and (mem (reg r0)) won't cause the use
6843 of literal pools. */
6844 else if (arm_disable_literal_pool && code == SYMBOL_REF
6845 && CONSTANT_POOL_ADDRESS_P (x))
6846 return 0;
6847
6848 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6849 && code == SYMBOL_REF
6850 && CONSTANT_POOL_ADDRESS_P (x)
6851 && ! (flag_pic
6852 && symbol_mentioned_p (get_pool_constant (x))
6853 && ! pcrel_constant_p (get_pool_constant (x))))
6854 return 1;
6855
6856 return 0;
6857 }
6858
6859 /* Return nonzero if INDEX is valid for an address index operand in
6860 ARM state. */
6861 static int
6862 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6863 int strict_p)
6864 {
6865 HOST_WIDE_INT range;
6866 enum rtx_code code = GET_CODE (index);
6867
6868 /* Standard coprocessor addressing modes. */
6869 if (TARGET_HARD_FLOAT
6870 && TARGET_VFP
6871 && (mode == SFmode || mode == DFmode))
6872 return (code == CONST_INT && INTVAL (index) < 1024
6873 && INTVAL (index) > -1024
6874 && (INTVAL (index) & 3) == 0);
6875
6876 /* For quad modes, we restrict the constant offset to be slightly less
6877 than what the instruction format permits. We do this because for
6878 quad mode moves, we will actually decompose them into two separate
6879 double-mode reads or writes. INDEX must therefore be a valid
6880 (double-mode) offset and so should INDEX+8. */
6881 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6882 return (code == CONST_INT
6883 && INTVAL (index) < 1016
6884 && INTVAL (index) > -1024
6885 && (INTVAL (index) & 3) == 0);
6886
6887 /* We have no such constraint on double mode offsets, so we permit the
6888 full range of the instruction format. */
6889 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6890 return (code == CONST_INT
6891 && INTVAL (index) < 1024
6892 && INTVAL (index) > -1024
6893 && (INTVAL (index) & 3) == 0);
6894
6895 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6896 return (code == CONST_INT
6897 && INTVAL (index) < 1024
6898 && INTVAL (index) > -1024
6899 && (INTVAL (index) & 3) == 0);
6900
6901 if (arm_address_register_rtx_p (index, strict_p)
6902 && (GET_MODE_SIZE (mode) <= 4))
6903 return 1;
6904
6905 if (mode == DImode || mode == DFmode)
6906 {
6907 if (code == CONST_INT)
6908 {
6909 HOST_WIDE_INT val = INTVAL (index);
6910
6911 if (TARGET_LDRD)
6912 return val > -256 && val < 256;
6913 else
6914 return val > -4096 && val < 4092;
6915 }
6916
6917 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6918 }
6919
6920 if (GET_MODE_SIZE (mode) <= 4
6921 && ! (arm_arch4
6922 && (mode == HImode
6923 || mode == HFmode
6924 || (mode == QImode && outer == SIGN_EXTEND))))
6925 {
6926 if (code == MULT)
6927 {
6928 rtx xiop0 = XEXP (index, 0);
6929 rtx xiop1 = XEXP (index, 1);
6930
6931 return ((arm_address_register_rtx_p (xiop0, strict_p)
6932 && power_of_two_operand (xiop1, SImode))
6933 || (arm_address_register_rtx_p (xiop1, strict_p)
6934 && power_of_two_operand (xiop0, SImode)));
6935 }
6936 else if (code == LSHIFTRT || code == ASHIFTRT
6937 || code == ASHIFT || code == ROTATERT)
6938 {
6939 rtx op = XEXP (index, 1);
6940
6941 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6942 && CONST_INT_P (op)
6943 && INTVAL (op) > 0
6944 && INTVAL (op) <= 31);
6945 }
6946 }
6947
6948 /* For ARM v4 we may be doing a sign-extend operation during the
6949 load. */
6950 if (arm_arch4)
6951 {
6952 if (mode == HImode
6953 || mode == HFmode
6954 || (outer == SIGN_EXTEND && mode == QImode))
6955 range = 256;
6956 else
6957 range = 4096;
6958 }
6959 else
6960 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6961
6962 return (code == CONST_INT
6963 && INTVAL (index) < range
6964 && INTVAL (index) > -range);
6965 }
6966
6967 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6968 index operand. i.e. 1, 2, 4 or 8. */
6969 static bool
6970 thumb2_index_mul_operand (rtx op)
6971 {
6972 HOST_WIDE_INT val;
6973
6974 if (!CONST_INT_P (op))
6975 return false;
6976
6977 val = INTVAL(op);
6978 return (val == 1 || val == 2 || val == 4 || val == 8);
6979 }
6980
6981 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6982 static int
6983 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6984 {
6985 enum rtx_code code = GET_CODE (index);
6986
6987 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6988 /* Standard coprocessor addressing modes. */
6989 if (TARGET_HARD_FLOAT
6990 && TARGET_VFP
6991 && (mode == SFmode || mode == DFmode))
6992 return (code == CONST_INT && INTVAL (index) < 1024
6993 /* Thumb-2 allows only > -256 index range for it's core register
6994 load/stores. Since we allow SF/DF in core registers, we have
6995 to use the intersection between -256~4096 (core) and -1024~1024
6996 (coprocessor). */
6997 && INTVAL (index) > -256
6998 && (INTVAL (index) & 3) == 0);
6999
7000 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7001 {
7002 /* For DImode assume values will usually live in core regs
7003 and only allow LDRD addressing modes. */
7004 if (!TARGET_LDRD || mode != DImode)
7005 return (code == CONST_INT
7006 && INTVAL (index) < 1024
7007 && INTVAL (index) > -1024
7008 && (INTVAL (index) & 3) == 0);
7009 }
7010
7011 /* For quad modes, we restrict the constant offset to be slightly less
7012 than what the instruction format permits. We do this because for
7013 quad mode moves, we will actually decompose them into two separate
7014 double-mode reads or writes. INDEX must therefore be a valid
7015 (double-mode) offset and so should INDEX+8. */
7016 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7017 return (code == CONST_INT
7018 && INTVAL (index) < 1016
7019 && INTVAL (index) > -1024
7020 && (INTVAL (index) & 3) == 0);
7021
7022 /* We have no such constraint on double mode offsets, so we permit the
7023 full range of the instruction format. */
7024 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7025 return (code == CONST_INT
7026 && INTVAL (index) < 1024
7027 && INTVAL (index) > -1024
7028 && (INTVAL (index) & 3) == 0);
7029
7030 if (arm_address_register_rtx_p (index, strict_p)
7031 && (GET_MODE_SIZE (mode) <= 4))
7032 return 1;
7033
7034 if (mode == DImode || mode == DFmode)
7035 {
7036 if (code == CONST_INT)
7037 {
7038 HOST_WIDE_INT val = INTVAL (index);
7039 /* ??? Can we assume ldrd for thumb2? */
7040 /* Thumb-2 ldrd only has reg+const addressing modes. */
7041 /* ldrd supports offsets of +-1020.
7042 However the ldr fallback does not. */
7043 return val > -256 && val < 256 && (val & 3) == 0;
7044 }
7045 else
7046 return 0;
7047 }
7048
7049 if (code == MULT)
7050 {
7051 rtx xiop0 = XEXP (index, 0);
7052 rtx xiop1 = XEXP (index, 1);
7053
7054 return ((arm_address_register_rtx_p (xiop0, strict_p)
7055 && thumb2_index_mul_operand (xiop1))
7056 || (arm_address_register_rtx_p (xiop1, strict_p)
7057 && thumb2_index_mul_operand (xiop0)));
7058 }
7059 else if (code == ASHIFT)
7060 {
7061 rtx op = XEXP (index, 1);
7062
7063 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7064 && CONST_INT_P (op)
7065 && INTVAL (op) > 0
7066 && INTVAL (op) <= 3);
7067 }
7068
7069 return (code == CONST_INT
7070 && INTVAL (index) < 4096
7071 && INTVAL (index) > -256);
7072 }
7073
7074 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7075 static int
7076 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7077 {
7078 int regno;
7079
7080 if (!REG_P (x))
7081 return 0;
7082
7083 regno = REGNO (x);
7084
7085 if (strict_p)
7086 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7087
7088 return (regno <= LAST_LO_REGNUM
7089 || regno > LAST_VIRTUAL_REGISTER
7090 || regno == FRAME_POINTER_REGNUM
7091 || (GET_MODE_SIZE (mode) >= 4
7092 && (regno == STACK_POINTER_REGNUM
7093 || regno >= FIRST_PSEUDO_REGISTER
7094 || x == hard_frame_pointer_rtx
7095 || x == arg_pointer_rtx)));
7096 }
7097
7098 /* Return nonzero if x is a legitimate index register. This is the case
7099 for any base register that can access a QImode object. */
7100 inline static int
7101 thumb1_index_register_rtx_p (rtx x, int strict_p)
7102 {
7103 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7104 }
7105
7106 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7107
7108 The AP may be eliminated to either the SP or the FP, so we use the
7109 least common denominator, e.g. SImode, and offsets from 0 to 64.
7110
7111 ??? Verify whether the above is the right approach.
7112
7113 ??? Also, the FP may be eliminated to the SP, so perhaps that
7114 needs special handling also.
7115
7116 ??? Look at how the mips16 port solves this problem. It probably uses
7117 better ways to solve some of these problems.
7118
7119 Although it is not incorrect, we don't accept QImode and HImode
7120 addresses based on the frame pointer or arg pointer until the
7121 reload pass starts. This is so that eliminating such addresses
7122 into stack based ones won't produce impossible code. */
7123 int
7124 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7125 {
7126 /* ??? Not clear if this is right. Experiment. */
7127 if (GET_MODE_SIZE (mode) < 4
7128 && !(reload_in_progress || reload_completed)
7129 && (reg_mentioned_p (frame_pointer_rtx, x)
7130 || reg_mentioned_p (arg_pointer_rtx, x)
7131 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7132 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7133 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7134 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7135 return 0;
7136
7137 /* Accept any base register. SP only in SImode or larger. */
7138 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7139 return 1;
7140
7141 /* This is PC relative data before arm_reorg runs. */
7142 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7143 && GET_CODE (x) == SYMBOL_REF
7144 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7145 return 1;
7146
7147 /* This is PC relative data after arm_reorg runs. */
7148 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7149 && reload_completed
7150 && (GET_CODE (x) == LABEL_REF
7151 || (GET_CODE (x) == CONST
7152 && GET_CODE (XEXP (x, 0)) == PLUS
7153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7154 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7155 return 1;
7156
7157 /* Post-inc indexing only supported for SImode and larger. */
7158 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7159 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7160 return 1;
7161
7162 else if (GET_CODE (x) == PLUS)
7163 {
7164 /* REG+REG address can be any two index registers. */
7165 /* We disallow FRAME+REG addressing since we know that FRAME
7166 will be replaced with STACK, and SP relative addressing only
7167 permits SP+OFFSET. */
7168 if (GET_MODE_SIZE (mode) <= 4
7169 && XEXP (x, 0) != frame_pointer_rtx
7170 && XEXP (x, 1) != frame_pointer_rtx
7171 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7172 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7173 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7174 return 1;
7175
7176 /* REG+const has 5-7 bit offset for non-SP registers. */
7177 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7178 || XEXP (x, 0) == arg_pointer_rtx)
7179 && CONST_INT_P (XEXP (x, 1))
7180 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7181 return 1;
7182
7183 /* REG+const has 10-bit offset for SP, but only SImode and
7184 larger is supported. */
7185 /* ??? Should probably check for DI/DFmode overflow here
7186 just like GO_IF_LEGITIMATE_OFFSET does. */
7187 else if (REG_P (XEXP (x, 0))
7188 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7189 && GET_MODE_SIZE (mode) >= 4
7190 && CONST_INT_P (XEXP (x, 1))
7191 && INTVAL (XEXP (x, 1)) >= 0
7192 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7193 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7194 return 1;
7195
7196 else if (REG_P (XEXP (x, 0))
7197 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7198 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7199 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7200 && REGNO (XEXP (x, 0))
7201 <= LAST_VIRTUAL_POINTER_REGISTER))
7202 && GET_MODE_SIZE (mode) >= 4
7203 && CONST_INT_P (XEXP (x, 1))
7204 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7205 return 1;
7206 }
7207
7208 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7209 && GET_MODE_SIZE (mode) == 4
7210 && GET_CODE (x) == SYMBOL_REF
7211 && CONSTANT_POOL_ADDRESS_P (x)
7212 && ! (flag_pic
7213 && symbol_mentioned_p (get_pool_constant (x))
7214 && ! pcrel_constant_p (get_pool_constant (x))))
7215 return 1;
7216
7217 return 0;
7218 }
7219
7220 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7221 instruction of mode MODE. */
7222 int
7223 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7224 {
7225 switch (GET_MODE_SIZE (mode))
7226 {
7227 case 1:
7228 return val >= 0 && val < 32;
7229
7230 case 2:
7231 return val >= 0 && val < 64 && (val & 1) == 0;
7232
7233 default:
7234 return (val >= 0
7235 && (val + GET_MODE_SIZE (mode)) <= 128
7236 && (val & 3) == 0);
7237 }
7238 }
7239
7240 bool
7241 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7242 {
7243 if (TARGET_ARM)
7244 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7245 else if (TARGET_THUMB2)
7246 return thumb2_legitimate_address_p (mode, x, strict_p);
7247 else /* if (TARGET_THUMB1) */
7248 return thumb1_legitimate_address_p (mode, x, strict_p);
7249 }
7250
7251 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7252
7253 Given an rtx X being reloaded into a reg required to be
7254 in class CLASS, return the class of reg to actually use.
7255 In general this is just CLASS, but for the Thumb core registers and
7256 immediate constants we prefer a LO_REGS class or a subset. */
7257
7258 static reg_class_t
7259 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7260 {
7261 if (TARGET_32BIT)
7262 return rclass;
7263 else
7264 {
7265 if (rclass == GENERAL_REGS)
7266 return LO_REGS;
7267 else
7268 return rclass;
7269 }
7270 }
7271
7272 /* Build the SYMBOL_REF for __tls_get_addr. */
7273
7274 static GTY(()) rtx tls_get_addr_libfunc;
7275
7276 static rtx
7277 get_tls_get_addr (void)
7278 {
7279 if (!tls_get_addr_libfunc)
7280 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7281 return tls_get_addr_libfunc;
7282 }
7283
7284 rtx
7285 arm_load_tp (rtx target)
7286 {
7287 if (!target)
7288 target = gen_reg_rtx (SImode);
7289
7290 if (TARGET_HARD_TP)
7291 {
7292 /* Can return in any reg. */
7293 emit_insn (gen_load_tp_hard (target));
7294 }
7295 else
7296 {
7297 /* Always returned in r0. Immediately copy the result into a pseudo,
7298 otherwise other uses of r0 (e.g. setting up function arguments) may
7299 clobber the value. */
7300
7301 rtx tmp;
7302
7303 emit_insn (gen_load_tp_soft ());
7304
7305 tmp = gen_rtx_REG (SImode, 0);
7306 emit_move_insn (target, tmp);
7307 }
7308 return target;
7309 }
7310
7311 static rtx
7312 load_tls_operand (rtx x, rtx reg)
7313 {
7314 rtx tmp;
7315
7316 if (reg == NULL_RTX)
7317 reg = gen_reg_rtx (SImode);
7318
7319 tmp = gen_rtx_CONST (SImode, x);
7320
7321 emit_move_insn (reg, tmp);
7322
7323 return reg;
7324 }
7325
7326 static rtx
7327 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7328 {
7329 rtx insns, label, labelno, sum;
7330
7331 gcc_assert (reloc != TLS_DESCSEQ);
7332 start_sequence ();
7333
7334 labelno = GEN_INT (pic_labelno++);
7335 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7336 label = gen_rtx_CONST (VOIDmode, label);
7337
7338 sum = gen_rtx_UNSPEC (Pmode,
7339 gen_rtvec (4, x, GEN_INT (reloc), label,
7340 GEN_INT (TARGET_ARM ? 8 : 4)),
7341 UNSPEC_TLS);
7342 reg = load_tls_operand (sum, reg);
7343
7344 if (TARGET_ARM)
7345 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7346 else
7347 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7348
7349 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7350 LCT_PURE, /* LCT_CONST? */
7351 Pmode, 1, reg, Pmode);
7352
7353 insns = get_insns ();
7354 end_sequence ();
7355
7356 return insns;
7357 }
7358
7359 static rtx
7360 arm_tls_descseq_addr (rtx x, rtx reg)
7361 {
7362 rtx labelno = GEN_INT (pic_labelno++);
7363 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7364 rtx sum = gen_rtx_UNSPEC (Pmode,
7365 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7366 gen_rtx_CONST (VOIDmode, label),
7367 GEN_INT (!TARGET_ARM)),
7368 UNSPEC_TLS);
7369 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7370
7371 emit_insn (gen_tlscall (x, labelno));
7372 if (!reg)
7373 reg = gen_reg_rtx (SImode);
7374 else
7375 gcc_assert (REGNO (reg) != 0);
7376
7377 emit_move_insn (reg, reg0);
7378
7379 return reg;
7380 }
7381
7382 rtx
7383 legitimize_tls_address (rtx x, rtx reg)
7384 {
7385 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7386 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7387
7388 switch (model)
7389 {
7390 case TLS_MODEL_GLOBAL_DYNAMIC:
7391 if (TARGET_GNU2_TLS)
7392 {
7393 reg = arm_tls_descseq_addr (x, reg);
7394
7395 tp = arm_load_tp (NULL_RTX);
7396
7397 dest = gen_rtx_PLUS (Pmode, tp, reg);
7398 }
7399 else
7400 {
7401 /* Original scheme */
7402 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7403 dest = gen_reg_rtx (Pmode);
7404 emit_libcall_block (insns, dest, ret, x);
7405 }
7406 return dest;
7407
7408 case TLS_MODEL_LOCAL_DYNAMIC:
7409 if (TARGET_GNU2_TLS)
7410 {
7411 reg = arm_tls_descseq_addr (x, reg);
7412
7413 tp = arm_load_tp (NULL_RTX);
7414
7415 dest = gen_rtx_PLUS (Pmode, tp, reg);
7416 }
7417 else
7418 {
7419 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7420
7421 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7422 share the LDM result with other LD model accesses. */
7423 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7424 UNSPEC_TLS);
7425 dest = gen_reg_rtx (Pmode);
7426 emit_libcall_block (insns, dest, ret, eqv);
7427
7428 /* Load the addend. */
7429 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7430 GEN_INT (TLS_LDO32)),
7431 UNSPEC_TLS);
7432 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7433 dest = gen_rtx_PLUS (Pmode, dest, addend);
7434 }
7435 return dest;
7436
7437 case TLS_MODEL_INITIAL_EXEC:
7438 labelno = GEN_INT (pic_labelno++);
7439 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7440 label = gen_rtx_CONST (VOIDmode, label);
7441 sum = gen_rtx_UNSPEC (Pmode,
7442 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7443 GEN_INT (TARGET_ARM ? 8 : 4)),
7444 UNSPEC_TLS);
7445 reg = load_tls_operand (sum, reg);
7446
7447 if (TARGET_ARM)
7448 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7449 else if (TARGET_THUMB2)
7450 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7451 else
7452 {
7453 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7454 emit_move_insn (reg, gen_const_mem (SImode, reg));
7455 }
7456
7457 tp = arm_load_tp (NULL_RTX);
7458
7459 return gen_rtx_PLUS (Pmode, tp, reg);
7460
7461 case TLS_MODEL_LOCAL_EXEC:
7462 tp = arm_load_tp (NULL_RTX);
7463
7464 reg = gen_rtx_UNSPEC (Pmode,
7465 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7466 UNSPEC_TLS);
7467 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7468
7469 return gen_rtx_PLUS (Pmode, tp, reg);
7470
7471 default:
7472 abort ();
7473 }
7474 }
7475
7476 /* Try machine-dependent ways of modifying an illegitimate address
7477 to be legitimate. If we find one, return the new, valid address. */
7478 rtx
7479 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7480 {
7481 if (arm_tls_referenced_p (x))
7482 {
7483 rtx addend = NULL;
7484
7485 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7486 {
7487 addend = XEXP (XEXP (x, 0), 1);
7488 x = XEXP (XEXP (x, 0), 0);
7489 }
7490
7491 if (GET_CODE (x) != SYMBOL_REF)
7492 return x;
7493
7494 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7495
7496 x = legitimize_tls_address (x, NULL_RTX);
7497
7498 if (addend)
7499 {
7500 x = gen_rtx_PLUS (SImode, x, addend);
7501 orig_x = x;
7502 }
7503 else
7504 return x;
7505 }
7506
7507 if (!TARGET_ARM)
7508 {
7509 /* TODO: legitimize_address for Thumb2. */
7510 if (TARGET_THUMB2)
7511 return x;
7512 return thumb_legitimize_address (x, orig_x, mode);
7513 }
7514
7515 if (GET_CODE (x) == PLUS)
7516 {
7517 rtx xop0 = XEXP (x, 0);
7518 rtx xop1 = XEXP (x, 1);
7519
7520 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7521 xop0 = force_reg (SImode, xop0);
7522
7523 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7524 && !symbol_mentioned_p (xop1))
7525 xop1 = force_reg (SImode, xop1);
7526
7527 if (ARM_BASE_REGISTER_RTX_P (xop0)
7528 && CONST_INT_P (xop1))
7529 {
7530 HOST_WIDE_INT n, low_n;
7531 rtx base_reg, val;
7532 n = INTVAL (xop1);
7533
7534 /* VFP addressing modes actually allow greater offsets, but for
7535 now we just stick with the lowest common denominator. */
7536 if (mode == DImode
7537 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7538 {
7539 low_n = n & 0x0f;
7540 n &= ~0x0f;
7541 if (low_n > 4)
7542 {
7543 n += 16;
7544 low_n -= 16;
7545 }
7546 }
7547 else
7548 {
7549 low_n = ((mode) == TImode ? 0
7550 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7551 n -= low_n;
7552 }
7553
7554 base_reg = gen_reg_rtx (SImode);
7555 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7556 emit_move_insn (base_reg, val);
7557 x = plus_constant (Pmode, base_reg, low_n);
7558 }
7559 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7560 x = gen_rtx_PLUS (SImode, xop0, xop1);
7561 }
7562
7563 /* XXX We don't allow MINUS any more -- see comment in
7564 arm_legitimate_address_outer_p (). */
7565 else if (GET_CODE (x) == MINUS)
7566 {
7567 rtx xop0 = XEXP (x, 0);
7568 rtx xop1 = XEXP (x, 1);
7569
7570 if (CONSTANT_P (xop0))
7571 xop0 = force_reg (SImode, xop0);
7572
7573 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7574 xop1 = force_reg (SImode, xop1);
7575
7576 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7577 x = gen_rtx_MINUS (SImode, xop0, xop1);
7578 }
7579
7580 /* Make sure to take full advantage of the pre-indexed addressing mode
7581 with absolute addresses which often allows for the base register to
7582 be factorized for multiple adjacent memory references, and it might
7583 even allows for the mini pool to be avoided entirely. */
7584 else if (CONST_INT_P (x) && optimize > 0)
7585 {
7586 unsigned int bits;
7587 HOST_WIDE_INT mask, base, index;
7588 rtx base_reg;
7589
7590 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7591 use a 8-bit index. So let's use a 12-bit index for SImode only and
7592 hope that arm_gen_constant will enable ldrb to use more bits. */
7593 bits = (mode == SImode) ? 12 : 8;
7594 mask = (1 << bits) - 1;
7595 base = INTVAL (x) & ~mask;
7596 index = INTVAL (x) & mask;
7597 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7598 {
7599 /* It'll most probably be more efficient to generate the base
7600 with more bits set and use a negative index instead. */
7601 base |= mask;
7602 index -= mask;
7603 }
7604 base_reg = force_reg (SImode, GEN_INT (base));
7605 x = plus_constant (Pmode, base_reg, index);
7606 }
7607
7608 if (flag_pic)
7609 {
7610 /* We need to find and carefully transform any SYMBOL and LABEL
7611 references; so go back to the original address expression. */
7612 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7613
7614 if (new_x != orig_x)
7615 x = new_x;
7616 }
7617
7618 return x;
7619 }
7620
7621
7622 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7623 to be legitimate. If we find one, return the new, valid address. */
7624 rtx
7625 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7626 {
7627 if (GET_CODE (x) == PLUS
7628 && CONST_INT_P (XEXP (x, 1))
7629 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7630 || INTVAL (XEXP (x, 1)) < 0))
7631 {
7632 rtx xop0 = XEXP (x, 0);
7633 rtx xop1 = XEXP (x, 1);
7634 HOST_WIDE_INT offset = INTVAL (xop1);
7635
7636 /* Try and fold the offset into a biasing of the base register and
7637 then offsetting that. Don't do this when optimizing for space
7638 since it can cause too many CSEs. */
7639 if (optimize_size && offset >= 0
7640 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7641 {
7642 HOST_WIDE_INT delta;
7643
7644 if (offset >= 256)
7645 delta = offset - (256 - GET_MODE_SIZE (mode));
7646 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7647 delta = 31 * GET_MODE_SIZE (mode);
7648 else
7649 delta = offset & (~31 * GET_MODE_SIZE (mode));
7650
7651 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7652 NULL_RTX);
7653 x = plus_constant (Pmode, xop0, delta);
7654 }
7655 else if (offset < 0 && offset > -256)
7656 /* Small negative offsets are best done with a subtract before the
7657 dereference, forcing these into a register normally takes two
7658 instructions. */
7659 x = force_operand (x, NULL_RTX);
7660 else
7661 {
7662 /* For the remaining cases, force the constant into a register. */
7663 xop1 = force_reg (SImode, xop1);
7664 x = gen_rtx_PLUS (SImode, xop0, xop1);
7665 }
7666 }
7667 else if (GET_CODE (x) == PLUS
7668 && s_register_operand (XEXP (x, 1), SImode)
7669 && !s_register_operand (XEXP (x, 0), SImode))
7670 {
7671 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7672
7673 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7674 }
7675
7676 if (flag_pic)
7677 {
7678 /* We need to find and carefully transform any SYMBOL and LABEL
7679 references; so go back to the original address expression. */
7680 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7681
7682 if (new_x != orig_x)
7683 x = new_x;
7684 }
7685
7686 return x;
7687 }
7688
7689 bool
7690 arm_legitimize_reload_address (rtx *p,
7691 enum machine_mode mode,
7692 int opnum, int type,
7693 int ind_levels ATTRIBUTE_UNUSED)
7694 {
7695 /* We must recognize output that we have already generated ourselves. */
7696 if (GET_CODE (*p) == PLUS
7697 && GET_CODE (XEXP (*p, 0)) == PLUS
7698 && REG_P (XEXP (XEXP (*p, 0), 0))
7699 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7700 && CONST_INT_P (XEXP (*p, 1)))
7701 {
7702 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7703 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7704 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7705 return true;
7706 }
7707
7708 if (GET_CODE (*p) == PLUS
7709 && REG_P (XEXP (*p, 0))
7710 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7711 /* If the base register is equivalent to a constant, let the generic
7712 code handle it. Otherwise we will run into problems if a future
7713 reload pass decides to rematerialize the constant. */
7714 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7715 && CONST_INT_P (XEXP (*p, 1)))
7716 {
7717 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7718 HOST_WIDE_INT low, high;
7719
7720 /* Detect coprocessor load/stores. */
7721 bool coproc_p = ((TARGET_HARD_FLOAT
7722 && TARGET_VFP
7723 && (mode == SFmode || mode == DFmode))
7724 || (TARGET_REALLY_IWMMXT
7725 && VALID_IWMMXT_REG_MODE (mode))
7726 || (TARGET_NEON
7727 && (VALID_NEON_DREG_MODE (mode)
7728 || VALID_NEON_QREG_MODE (mode))));
7729
7730 /* For some conditions, bail out when lower two bits are unaligned. */
7731 if ((val & 0x3) != 0
7732 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7733 && (coproc_p
7734 /* For DI, and DF under soft-float: */
7735 || ((mode == DImode || mode == DFmode)
7736 /* Without ldrd, we use stm/ldm, which does not
7737 fair well with unaligned bits. */
7738 && (! TARGET_LDRD
7739 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7740 || TARGET_THUMB2))))
7741 return false;
7742
7743 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7744 of which the (reg+high) gets turned into a reload add insn,
7745 we try to decompose the index into high/low values that can often
7746 also lead to better reload CSE.
7747 For example:
7748 ldr r0, [r2, #4100] // Offset too large
7749 ldr r1, [r2, #4104] // Offset too large
7750
7751 is best reloaded as:
7752 add t1, r2, #4096
7753 ldr r0, [t1, #4]
7754 add t2, r2, #4096
7755 ldr r1, [t2, #8]
7756
7757 which post-reload CSE can simplify in most cases to eliminate the
7758 second add instruction:
7759 add t1, r2, #4096
7760 ldr r0, [t1, #4]
7761 ldr r1, [t1, #8]
7762
7763 The idea here is that we want to split out the bits of the constant
7764 as a mask, rather than as subtracting the maximum offset that the
7765 respective type of load/store used can handle.
7766
7767 When encountering negative offsets, we can still utilize it even if
7768 the overall offset is positive; sometimes this may lead to an immediate
7769 that can be constructed with fewer instructions.
7770 For example:
7771 ldr r0, [r2, #0x3FFFFC]
7772
7773 This is best reloaded as:
7774 add t1, r2, #0x400000
7775 ldr r0, [t1, #-4]
7776
7777 The trick for spotting this for a load insn with N bits of offset
7778 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7779 negative offset that is going to make bit N and all the bits below
7780 it become zero in the remainder part.
7781
7782 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7783 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7784 used in most cases of ARM load/store instructions. */
7785
7786 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7787 (((VAL) & ((1 << (N)) - 1)) \
7788 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7789 : 0)
7790
7791 if (coproc_p)
7792 {
7793 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7794
7795 /* NEON quad-word load/stores are made of two double-word accesses,
7796 so the valid index range is reduced by 8. Treat as 9-bit range if
7797 we go over it. */
7798 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7799 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7800 }
7801 else if (GET_MODE_SIZE (mode) == 8)
7802 {
7803 if (TARGET_LDRD)
7804 low = (TARGET_THUMB2
7805 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7806 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7807 else
7808 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7809 to access doublewords. The supported load/store offsets are
7810 -8, -4, and 4, which we try to produce here. */
7811 low = ((val & 0xf) ^ 0x8) - 0x8;
7812 }
7813 else if (GET_MODE_SIZE (mode) < 8)
7814 {
7815 /* NEON element load/stores do not have an offset. */
7816 if (TARGET_NEON_FP16 && mode == HFmode)
7817 return false;
7818
7819 if (TARGET_THUMB2)
7820 {
7821 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7822 Try the wider 12-bit range first, and re-try if the result
7823 is out of range. */
7824 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7825 if (low < -255)
7826 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7827 }
7828 else
7829 {
7830 if (mode == HImode || mode == HFmode)
7831 {
7832 if (arm_arch4)
7833 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7834 else
7835 {
7836 /* The storehi/movhi_bytes fallbacks can use only
7837 [-4094,+4094] of the full ldrb/strb index range. */
7838 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7839 if (low == 4095 || low == -4095)
7840 return false;
7841 }
7842 }
7843 else
7844 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7845 }
7846 }
7847 else
7848 return false;
7849
7850 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7851 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7852 - (unsigned HOST_WIDE_INT) 0x80000000);
7853 /* Check for overflow or zero */
7854 if (low == 0 || high == 0 || (high + low != val))
7855 return false;
7856
7857 /* Reload the high part into a base reg; leave the low part
7858 in the mem.
7859 Note that replacing this gen_rtx_PLUS with plus_constant is
7860 wrong in this case because we rely on the
7861 (plus (plus reg c1) c2) structure being preserved so that
7862 XEXP (*p, 0) in push_reload below uses the correct term. */
7863 *p = gen_rtx_PLUS (GET_MODE (*p),
7864 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7865 GEN_INT (high)),
7866 GEN_INT (low));
7867 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7868 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7869 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7870 return true;
7871 }
7872
7873 return false;
7874 }
7875
7876 rtx
7877 thumb_legitimize_reload_address (rtx *x_p,
7878 enum machine_mode mode,
7879 int opnum, int type,
7880 int ind_levels ATTRIBUTE_UNUSED)
7881 {
7882 rtx x = *x_p;
7883
7884 if (GET_CODE (x) == PLUS
7885 && GET_MODE_SIZE (mode) < 4
7886 && REG_P (XEXP (x, 0))
7887 && XEXP (x, 0) == stack_pointer_rtx
7888 && CONST_INT_P (XEXP (x, 1))
7889 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7890 {
7891 rtx orig_x = x;
7892
7893 x = copy_rtx (x);
7894 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7895 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7896 return x;
7897 }
7898
7899 /* If both registers are hi-regs, then it's better to reload the
7900 entire expression rather than each register individually. That
7901 only requires one reload register rather than two. */
7902 if (GET_CODE (x) == PLUS
7903 && REG_P (XEXP (x, 0))
7904 && REG_P (XEXP (x, 1))
7905 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7906 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7907 {
7908 rtx orig_x = x;
7909
7910 x = copy_rtx (x);
7911 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7912 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7913 return x;
7914 }
7915
7916 return NULL;
7917 }
7918
7919 /* Test for various thread-local symbols. */
7920
7921 /* Helper for arm_tls_referenced_p. */
7922
7923 static int
7924 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7925 {
7926 if (GET_CODE (*x) == SYMBOL_REF)
7927 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7928
7929 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7930 TLS offsets, not real symbol references. */
7931 if (GET_CODE (*x) == UNSPEC
7932 && XINT (*x, 1) == UNSPEC_TLS)
7933 return -1;
7934
7935 return 0;
7936 }
7937
7938 /* Return TRUE if X contains any TLS symbol references. */
7939
7940 bool
7941 arm_tls_referenced_p (rtx x)
7942 {
7943 if (! TARGET_HAVE_TLS)
7944 return false;
7945
7946 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7947 }
7948
7949 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7950
7951 On the ARM, allow any integer (invalid ones are removed later by insn
7952 patterns), nice doubles and symbol_refs which refer to the function's
7953 constant pool XXX.
7954
7955 When generating pic allow anything. */
7956
7957 static bool
7958 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7959 {
7960 /* At present, we have no support for Neon structure constants, so forbid
7961 them here. It might be possible to handle simple cases like 0 and -1
7962 in future. */
7963 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7964 return false;
7965
7966 return flag_pic || !label_mentioned_p (x);
7967 }
7968
7969 static bool
7970 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7971 {
7972 return (CONST_INT_P (x)
7973 || CONST_DOUBLE_P (x)
7974 || CONSTANT_ADDRESS_P (x)
7975 || flag_pic);
7976 }
7977
7978 static bool
7979 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7980 {
7981 return (!arm_cannot_force_const_mem (mode, x)
7982 && (TARGET_32BIT
7983 ? arm_legitimate_constant_p_1 (mode, x)
7984 : thumb_legitimate_constant_p (mode, x)));
7985 }
7986
7987 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7988
7989 static bool
7990 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7991 {
7992 rtx base, offset;
7993
7994 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7995 {
7996 split_const (x, &base, &offset);
7997 if (GET_CODE (base) == SYMBOL_REF
7998 && !offset_within_block_p (base, INTVAL (offset)))
7999 return true;
8000 }
8001 return arm_tls_referenced_p (x);
8002 }
8003 \f
8004 #define REG_OR_SUBREG_REG(X) \
8005 (REG_P (X) \
8006 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8007
8008 #define REG_OR_SUBREG_RTX(X) \
8009 (REG_P (X) ? (X) : SUBREG_REG (X))
8010
8011 static inline int
8012 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8013 {
8014 enum machine_mode mode = GET_MODE (x);
8015 int total, words;
8016
8017 switch (code)
8018 {
8019 case ASHIFT:
8020 case ASHIFTRT:
8021 case LSHIFTRT:
8022 case ROTATERT:
8023 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8024
8025 case PLUS:
8026 case MINUS:
8027 case COMPARE:
8028 case NEG:
8029 case NOT:
8030 return COSTS_N_INSNS (1);
8031
8032 case MULT:
8033 if (CONST_INT_P (XEXP (x, 1)))
8034 {
8035 int cycles = 0;
8036 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8037
8038 while (i)
8039 {
8040 i >>= 2;
8041 cycles++;
8042 }
8043 return COSTS_N_INSNS (2) + cycles;
8044 }
8045 return COSTS_N_INSNS (1) + 16;
8046
8047 case SET:
8048 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8049 the mode. */
8050 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8051 return (COSTS_N_INSNS (words)
8052 + 4 * ((MEM_P (SET_SRC (x)))
8053 + MEM_P (SET_DEST (x))));
8054
8055 case CONST_INT:
8056 if (outer == SET)
8057 {
8058 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8059 return 0;
8060 if (thumb_shiftable_const (INTVAL (x)))
8061 return COSTS_N_INSNS (2);
8062 return COSTS_N_INSNS (3);
8063 }
8064 else if ((outer == PLUS || outer == COMPARE)
8065 && INTVAL (x) < 256 && INTVAL (x) > -256)
8066 return 0;
8067 else if ((outer == IOR || outer == XOR || outer == AND)
8068 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8069 return COSTS_N_INSNS (1);
8070 else if (outer == AND)
8071 {
8072 int i;
8073 /* This duplicates the tests in the andsi3 expander. */
8074 for (i = 9; i <= 31; i++)
8075 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8076 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8077 return COSTS_N_INSNS (2);
8078 }
8079 else if (outer == ASHIFT || outer == ASHIFTRT
8080 || outer == LSHIFTRT)
8081 return 0;
8082 return COSTS_N_INSNS (2);
8083
8084 case CONST:
8085 case CONST_DOUBLE:
8086 case LABEL_REF:
8087 case SYMBOL_REF:
8088 return COSTS_N_INSNS (3);
8089
8090 case UDIV:
8091 case UMOD:
8092 case DIV:
8093 case MOD:
8094 return 100;
8095
8096 case TRUNCATE:
8097 return 99;
8098
8099 case AND:
8100 case XOR:
8101 case IOR:
8102 /* XXX guess. */
8103 return 8;
8104
8105 case MEM:
8106 /* XXX another guess. */
8107 /* Memory costs quite a lot for the first word, but subsequent words
8108 load at the equivalent of a single insn each. */
8109 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8110 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8111 ? 4 : 0));
8112
8113 case IF_THEN_ELSE:
8114 /* XXX a guess. */
8115 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8116 return 14;
8117 return 2;
8118
8119 case SIGN_EXTEND:
8120 case ZERO_EXTEND:
8121 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8122 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8123
8124 if (mode == SImode)
8125 return total;
8126
8127 if (arm_arch6)
8128 return total + COSTS_N_INSNS (1);
8129
8130 /* Assume a two-shift sequence. Increase the cost slightly so
8131 we prefer actual shifts over an extend operation. */
8132 return total + 1 + COSTS_N_INSNS (2);
8133
8134 default:
8135 return 99;
8136 }
8137 }
8138
8139 static inline bool
8140 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8141 {
8142 enum machine_mode mode = GET_MODE (x);
8143 enum rtx_code subcode;
8144 rtx operand;
8145 enum rtx_code code = GET_CODE (x);
8146 *total = 0;
8147
8148 switch (code)
8149 {
8150 case MEM:
8151 /* Memory costs quite a lot for the first word, but subsequent words
8152 load at the equivalent of a single insn each. */
8153 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8154 return true;
8155
8156 case DIV:
8157 case MOD:
8158 case UDIV:
8159 case UMOD:
8160 if (TARGET_HARD_FLOAT && mode == SFmode)
8161 *total = COSTS_N_INSNS (2);
8162 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8163 *total = COSTS_N_INSNS (4);
8164 else
8165 *total = COSTS_N_INSNS (20);
8166 return false;
8167
8168 case ROTATE:
8169 if (REG_P (XEXP (x, 1)))
8170 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8171 else if (!CONST_INT_P (XEXP (x, 1)))
8172 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8173
8174 /* Fall through */
8175 case ROTATERT:
8176 if (mode != SImode)
8177 {
8178 *total += COSTS_N_INSNS (4);
8179 return true;
8180 }
8181
8182 /* Fall through */
8183 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8184 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8185 if (mode == DImode)
8186 {
8187 *total += COSTS_N_INSNS (3);
8188 return true;
8189 }
8190
8191 *total += COSTS_N_INSNS (1);
8192 /* Increase the cost of complex shifts because they aren't any faster,
8193 and reduce dual issue opportunities. */
8194 if (arm_tune_cortex_a9
8195 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8196 ++*total;
8197
8198 return true;
8199
8200 case MINUS:
8201 if (mode == DImode)
8202 {
8203 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8204 if (CONST_INT_P (XEXP (x, 0))
8205 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8206 {
8207 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8208 return true;
8209 }
8210
8211 if (CONST_INT_P (XEXP (x, 1))
8212 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8213 {
8214 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8215 return true;
8216 }
8217
8218 return false;
8219 }
8220
8221 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8222 {
8223 if (TARGET_HARD_FLOAT
8224 && (mode == SFmode
8225 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8226 {
8227 *total = COSTS_N_INSNS (1);
8228 if (CONST_DOUBLE_P (XEXP (x, 0))
8229 && arm_const_double_rtx (XEXP (x, 0)))
8230 {
8231 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8232 return true;
8233 }
8234
8235 if (CONST_DOUBLE_P (XEXP (x, 1))
8236 && arm_const_double_rtx (XEXP (x, 1)))
8237 {
8238 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8239 return true;
8240 }
8241
8242 return false;
8243 }
8244 *total = COSTS_N_INSNS (20);
8245 return false;
8246 }
8247
8248 *total = COSTS_N_INSNS (1);
8249 if (CONST_INT_P (XEXP (x, 0))
8250 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8251 {
8252 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8253 return true;
8254 }
8255
8256 subcode = GET_CODE (XEXP (x, 1));
8257 if (subcode == ASHIFT || subcode == ASHIFTRT
8258 || subcode == LSHIFTRT
8259 || subcode == ROTATE || subcode == ROTATERT)
8260 {
8261 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8262 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8263 return true;
8264 }
8265
8266 /* A shift as a part of RSB costs no more than RSB itself. */
8267 if (GET_CODE (XEXP (x, 0)) == MULT
8268 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8269 {
8270 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8271 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8272 return true;
8273 }
8274
8275 if (subcode == MULT
8276 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8277 {
8278 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8279 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8280 return true;
8281 }
8282
8283 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8284 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8285 {
8286 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8287 if (REG_P (XEXP (XEXP (x, 1), 0))
8288 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8289 *total += COSTS_N_INSNS (1);
8290
8291 return true;
8292 }
8293
8294 /* Fall through */
8295
8296 case PLUS:
8297 if (code == PLUS && arm_arch6 && mode == SImode
8298 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8299 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8300 {
8301 *total = COSTS_N_INSNS (1);
8302 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8303 0, speed);
8304 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8305 return true;
8306 }
8307
8308 /* MLA: All arguments must be registers. We filter out
8309 multiplication by a power of two, so that we fall down into
8310 the code below. */
8311 if (GET_CODE (XEXP (x, 0)) == MULT
8312 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8313 {
8314 /* The cost comes from the cost of the multiply. */
8315 return false;
8316 }
8317
8318 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8319 {
8320 if (TARGET_HARD_FLOAT
8321 && (mode == SFmode
8322 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8323 {
8324 *total = COSTS_N_INSNS (1);
8325 if (CONST_DOUBLE_P (XEXP (x, 1))
8326 && arm_const_double_rtx (XEXP (x, 1)))
8327 {
8328 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8329 return true;
8330 }
8331
8332 return false;
8333 }
8334
8335 *total = COSTS_N_INSNS (20);
8336 return false;
8337 }
8338
8339 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8340 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8341 {
8342 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8343 if (REG_P (XEXP (XEXP (x, 0), 0))
8344 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8345 *total += COSTS_N_INSNS (1);
8346 return true;
8347 }
8348
8349 /* Fall through */
8350
8351 case AND: case XOR: case IOR:
8352
8353 /* Normally the frame registers will be spilt into reg+const during
8354 reload, so it is a bad idea to combine them with other instructions,
8355 since then they might not be moved outside of loops. As a compromise
8356 we allow integration with ops that have a constant as their second
8357 operand. */
8358 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8359 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8360 && !CONST_INT_P (XEXP (x, 1)))
8361 *total = COSTS_N_INSNS (1);
8362
8363 if (mode == DImode)
8364 {
8365 *total += COSTS_N_INSNS (2);
8366 if (CONST_INT_P (XEXP (x, 1))
8367 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8368 {
8369 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8370 return true;
8371 }
8372
8373 return false;
8374 }
8375
8376 *total += COSTS_N_INSNS (1);
8377 if (CONST_INT_P (XEXP (x, 1))
8378 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8379 {
8380 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8381 return true;
8382 }
8383 subcode = GET_CODE (XEXP (x, 0));
8384 if (subcode == ASHIFT || subcode == ASHIFTRT
8385 || subcode == LSHIFTRT
8386 || subcode == ROTATE || subcode == ROTATERT)
8387 {
8388 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8389 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8390 return true;
8391 }
8392
8393 if (subcode == MULT
8394 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8395 {
8396 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8397 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8398 return true;
8399 }
8400
8401 if (subcode == UMIN || subcode == UMAX
8402 || subcode == SMIN || subcode == SMAX)
8403 {
8404 *total = COSTS_N_INSNS (3);
8405 return true;
8406 }
8407
8408 return false;
8409
8410 case MULT:
8411 /* This should have been handled by the CPU specific routines. */
8412 gcc_unreachable ();
8413
8414 case TRUNCATE:
8415 if (arm_arch3m && mode == SImode
8416 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8417 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8418 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8419 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8420 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8421 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8422 {
8423 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8424 return true;
8425 }
8426 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8427 return false;
8428
8429 case NEG:
8430 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8431 {
8432 if (TARGET_HARD_FLOAT
8433 && (mode == SFmode
8434 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8435 {
8436 *total = COSTS_N_INSNS (1);
8437 return false;
8438 }
8439 *total = COSTS_N_INSNS (2);
8440 return false;
8441 }
8442
8443 /* Fall through */
8444 case NOT:
8445 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8446 if (mode == SImode && code == NOT)
8447 {
8448 subcode = GET_CODE (XEXP (x, 0));
8449 if (subcode == ASHIFT || subcode == ASHIFTRT
8450 || subcode == LSHIFTRT
8451 || subcode == ROTATE || subcode == ROTATERT
8452 || (subcode == MULT
8453 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8454 {
8455 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8456 /* Register shifts cost an extra cycle. */
8457 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8458 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8459 subcode, 1, speed);
8460 return true;
8461 }
8462 }
8463
8464 return false;
8465
8466 case IF_THEN_ELSE:
8467 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8468 {
8469 *total = COSTS_N_INSNS (4);
8470 return true;
8471 }
8472
8473 operand = XEXP (x, 0);
8474
8475 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8476 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8477 && REG_P (XEXP (operand, 0))
8478 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8479 *total += COSTS_N_INSNS (1);
8480 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8481 + rtx_cost (XEXP (x, 2), code, 2, speed));
8482 return true;
8483
8484 case NE:
8485 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8486 {
8487 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8488 return true;
8489 }
8490 goto scc_insn;
8491
8492 case GE:
8493 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8494 && mode == SImode && XEXP (x, 1) == const0_rtx)
8495 {
8496 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8497 return true;
8498 }
8499 goto scc_insn;
8500
8501 case LT:
8502 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8503 && mode == SImode && XEXP (x, 1) == const0_rtx)
8504 {
8505 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8506 return true;
8507 }
8508 goto scc_insn;
8509
8510 case EQ:
8511 case GT:
8512 case LE:
8513 case GEU:
8514 case LTU:
8515 case GTU:
8516 case LEU:
8517 case UNORDERED:
8518 case ORDERED:
8519 case UNEQ:
8520 case UNGE:
8521 case UNLT:
8522 case UNGT:
8523 case UNLE:
8524 scc_insn:
8525 /* SCC insns. In the case where the comparison has already been
8526 performed, then they cost 2 instructions. Otherwise they need
8527 an additional comparison before them. */
8528 *total = COSTS_N_INSNS (2);
8529 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8530 {
8531 return true;
8532 }
8533
8534 /* Fall through */
8535 case COMPARE:
8536 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8537 {
8538 *total = 0;
8539 return true;
8540 }
8541
8542 *total += COSTS_N_INSNS (1);
8543 if (CONST_INT_P (XEXP (x, 1))
8544 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8545 {
8546 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8547 return true;
8548 }
8549
8550 subcode = GET_CODE (XEXP (x, 0));
8551 if (subcode == ASHIFT || subcode == ASHIFTRT
8552 || subcode == LSHIFTRT
8553 || subcode == ROTATE || subcode == ROTATERT)
8554 {
8555 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8556 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8557 return true;
8558 }
8559
8560 if (subcode == MULT
8561 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8562 {
8563 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8564 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8565 return true;
8566 }
8567
8568 return false;
8569
8570 case UMIN:
8571 case UMAX:
8572 case SMIN:
8573 case SMAX:
8574 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8575 if (!CONST_INT_P (XEXP (x, 1))
8576 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8577 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8578 return true;
8579
8580 case ABS:
8581 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8582 {
8583 if (TARGET_HARD_FLOAT
8584 && (mode == SFmode
8585 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8586 {
8587 *total = COSTS_N_INSNS (1);
8588 return false;
8589 }
8590 *total = COSTS_N_INSNS (20);
8591 return false;
8592 }
8593 *total = COSTS_N_INSNS (1);
8594 if (mode == DImode)
8595 *total += COSTS_N_INSNS (3);
8596 return false;
8597
8598 case SIGN_EXTEND:
8599 case ZERO_EXTEND:
8600 *total = 0;
8601 if (GET_MODE_CLASS (mode) == MODE_INT)
8602 {
8603 rtx op = XEXP (x, 0);
8604 enum machine_mode opmode = GET_MODE (op);
8605
8606 if (mode == DImode)
8607 *total += COSTS_N_INSNS (1);
8608
8609 if (opmode != SImode)
8610 {
8611 if (MEM_P (op))
8612 {
8613 /* If !arm_arch4, we use one of the extendhisi2_mem
8614 or movhi_bytes patterns for HImode. For a QImode
8615 sign extension, we first zero-extend from memory
8616 and then perform a shift sequence. */
8617 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8618 *total += COSTS_N_INSNS (2);
8619 }
8620 else if (arm_arch6)
8621 *total += COSTS_N_INSNS (1);
8622
8623 /* We don't have the necessary insn, so we need to perform some
8624 other operation. */
8625 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8626 /* An and with constant 255. */
8627 *total += COSTS_N_INSNS (1);
8628 else
8629 /* A shift sequence. Increase costs slightly to avoid
8630 combining two shifts into an extend operation. */
8631 *total += COSTS_N_INSNS (2) + 1;
8632 }
8633
8634 return false;
8635 }
8636
8637 switch (GET_MODE (XEXP (x, 0)))
8638 {
8639 case V8QImode:
8640 case V4HImode:
8641 case V2SImode:
8642 case V4QImode:
8643 case V2HImode:
8644 *total = COSTS_N_INSNS (1);
8645 return false;
8646
8647 default:
8648 gcc_unreachable ();
8649 }
8650 gcc_unreachable ();
8651
8652 case ZERO_EXTRACT:
8653 case SIGN_EXTRACT:
8654 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8655 return true;
8656
8657 case CONST_INT:
8658 if (const_ok_for_arm (INTVAL (x))
8659 || const_ok_for_arm (~INTVAL (x)))
8660 *total = COSTS_N_INSNS (1);
8661 else
8662 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8663 INTVAL (x), NULL_RTX,
8664 NULL_RTX, 0, 0));
8665 return true;
8666
8667 case CONST:
8668 case LABEL_REF:
8669 case SYMBOL_REF:
8670 *total = COSTS_N_INSNS (3);
8671 return true;
8672
8673 case HIGH:
8674 *total = COSTS_N_INSNS (1);
8675 return true;
8676
8677 case LO_SUM:
8678 *total = COSTS_N_INSNS (1);
8679 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8680 return true;
8681
8682 case CONST_DOUBLE:
8683 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8684 && (mode == SFmode || !TARGET_VFP_SINGLE))
8685 *total = COSTS_N_INSNS (1);
8686 else
8687 *total = COSTS_N_INSNS (4);
8688 return true;
8689
8690 case SET:
8691 /* The vec_extract patterns accept memory operands that require an
8692 address reload. Account for the cost of that reload to give the
8693 auto-inc-dec pass an incentive to try to replace them. */
8694 if (TARGET_NEON && MEM_P (SET_DEST (x))
8695 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8696 {
8697 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8698 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8699 *total += COSTS_N_INSNS (1);
8700 return true;
8701 }
8702 /* Likewise for the vec_set patterns. */
8703 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8704 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8705 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8706 {
8707 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8708 *total = rtx_cost (mem, code, 0, speed);
8709 if (!neon_vector_mem_operand (mem, 2, true))
8710 *total += COSTS_N_INSNS (1);
8711 return true;
8712 }
8713 return false;
8714
8715 case UNSPEC:
8716 /* We cost this as high as our memory costs to allow this to
8717 be hoisted from loops. */
8718 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8719 {
8720 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8721 }
8722 return true;
8723
8724 case CONST_VECTOR:
8725 if (TARGET_NEON
8726 && TARGET_HARD_FLOAT
8727 && outer == SET
8728 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8729 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8730 *total = COSTS_N_INSNS (1);
8731 else
8732 *total = COSTS_N_INSNS (4);
8733 return true;
8734
8735 default:
8736 *total = COSTS_N_INSNS (4);
8737 return false;
8738 }
8739 }
8740
8741 /* Estimates the size cost of thumb1 instructions.
8742 For now most of the code is copied from thumb1_rtx_costs. We need more
8743 fine grain tuning when we have more related test cases. */
8744 static inline int
8745 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8746 {
8747 enum machine_mode mode = GET_MODE (x);
8748 int words;
8749
8750 switch (code)
8751 {
8752 case ASHIFT:
8753 case ASHIFTRT:
8754 case LSHIFTRT:
8755 case ROTATERT:
8756 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8757
8758 case PLUS:
8759 case MINUS:
8760 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8761 defined by RTL expansion, especially for the expansion of
8762 multiplication. */
8763 if ((GET_CODE (XEXP (x, 0)) == MULT
8764 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8765 || (GET_CODE (XEXP (x, 1)) == MULT
8766 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8767 return COSTS_N_INSNS (2);
8768 /* On purpose fall through for normal RTX. */
8769 case COMPARE:
8770 case NEG:
8771 case NOT:
8772 return COSTS_N_INSNS (1);
8773
8774 case MULT:
8775 if (CONST_INT_P (XEXP (x, 1)))
8776 {
8777 /* Thumb1 mul instruction can't operate on const. We must Load it
8778 into a register first. */
8779 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8780 return COSTS_N_INSNS (1) + const_size;
8781 }
8782 return COSTS_N_INSNS (1);
8783
8784 case SET:
8785 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8786 the mode. */
8787 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8788 return (COSTS_N_INSNS (words)
8789 + 4 * ((MEM_P (SET_SRC (x)))
8790 + MEM_P (SET_DEST (x))));
8791
8792 case CONST_INT:
8793 if (outer == SET)
8794 {
8795 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8796 return COSTS_N_INSNS (1);
8797 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8798 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8799 return COSTS_N_INSNS (2);
8800 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8801 if (thumb_shiftable_const (INTVAL (x)))
8802 return COSTS_N_INSNS (2);
8803 return COSTS_N_INSNS (3);
8804 }
8805 else if ((outer == PLUS || outer == COMPARE)
8806 && INTVAL (x) < 256 && INTVAL (x) > -256)
8807 return 0;
8808 else if ((outer == IOR || outer == XOR || outer == AND)
8809 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8810 return COSTS_N_INSNS (1);
8811 else if (outer == AND)
8812 {
8813 int i;
8814 /* This duplicates the tests in the andsi3 expander. */
8815 for (i = 9; i <= 31; i++)
8816 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8817 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8818 return COSTS_N_INSNS (2);
8819 }
8820 else if (outer == ASHIFT || outer == ASHIFTRT
8821 || outer == LSHIFTRT)
8822 return 0;
8823 return COSTS_N_INSNS (2);
8824
8825 case CONST:
8826 case CONST_DOUBLE:
8827 case LABEL_REF:
8828 case SYMBOL_REF:
8829 return COSTS_N_INSNS (3);
8830
8831 case UDIV:
8832 case UMOD:
8833 case DIV:
8834 case MOD:
8835 return 100;
8836
8837 case TRUNCATE:
8838 return 99;
8839
8840 case AND:
8841 case XOR:
8842 case IOR:
8843 /* XXX guess. */
8844 return 8;
8845
8846 case MEM:
8847 /* XXX another guess. */
8848 /* Memory costs quite a lot for the first word, but subsequent words
8849 load at the equivalent of a single insn each. */
8850 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8851 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8852 ? 4 : 0));
8853
8854 case IF_THEN_ELSE:
8855 /* XXX a guess. */
8856 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8857 return 14;
8858 return 2;
8859
8860 case ZERO_EXTEND:
8861 /* XXX still guessing. */
8862 switch (GET_MODE (XEXP (x, 0)))
8863 {
8864 case QImode:
8865 return (1 + (mode == DImode ? 4 : 0)
8866 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8867
8868 case HImode:
8869 return (4 + (mode == DImode ? 4 : 0)
8870 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8871
8872 case SImode:
8873 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8874
8875 default:
8876 return 99;
8877 }
8878
8879 default:
8880 return 99;
8881 }
8882 }
8883
8884 /* RTX costs when optimizing for size. */
8885 static bool
8886 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8887 int *total)
8888 {
8889 enum machine_mode mode = GET_MODE (x);
8890 if (TARGET_THUMB1)
8891 {
8892 *total = thumb1_size_rtx_costs (x, code, outer_code);
8893 return true;
8894 }
8895
8896 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8897 switch (code)
8898 {
8899 case MEM:
8900 /* A memory access costs 1 insn if the mode is small, or the address is
8901 a single register, otherwise it costs one insn per word. */
8902 if (REG_P (XEXP (x, 0)))
8903 *total = COSTS_N_INSNS (1);
8904 else if (flag_pic
8905 && GET_CODE (XEXP (x, 0)) == PLUS
8906 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8907 /* This will be split into two instructions.
8908 See arm.md:calculate_pic_address. */
8909 *total = COSTS_N_INSNS (2);
8910 else
8911 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8912 return true;
8913
8914 case DIV:
8915 case MOD:
8916 case UDIV:
8917 case UMOD:
8918 /* Needs a libcall, so it costs about this. */
8919 *total = COSTS_N_INSNS (2);
8920 return false;
8921
8922 case ROTATE:
8923 if (mode == SImode && REG_P (XEXP (x, 1)))
8924 {
8925 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8926 return true;
8927 }
8928 /* Fall through */
8929 case ROTATERT:
8930 case ASHIFT:
8931 case LSHIFTRT:
8932 case ASHIFTRT:
8933 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8934 {
8935 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8936 return true;
8937 }
8938 else if (mode == SImode)
8939 {
8940 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8941 /* Slightly disparage register shifts, but not by much. */
8942 if (!CONST_INT_P (XEXP (x, 1)))
8943 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8944 return true;
8945 }
8946
8947 /* Needs a libcall. */
8948 *total = COSTS_N_INSNS (2);
8949 return false;
8950
8951 case MINUS:
8952 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8953 && (mode == SFmode || !TARGET_VFP_SINGLE))
8954 {
8955 *total = COSTS_N_INSNS (1);
8956 return false;
8957 }
8958
8959 if (mode == SImode)
8960 {
8961 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8962 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8963
8964 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8965 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8966 || subcode1 == ROTATE || subcode1 == ROTATERT
8967 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8968 || subcode1 == ASHIFTRT)
8969 {
8970 /* It's just the cost of the two operands. */
8971 *total = 0;
8972 return false;
8973 }
8974
8975 *total = COSTS_N_INSNS (1);
8976 return false;
8977 }
8978
8979 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8980 return false;
8981
8982 case PLUS:
8983 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8984 && (mode == SFmode || !TARGET_VFP_SINGLE))
8985 {
8986 *total = COSTS_N_INSNS (1);
8987 return false;
8988 }
8989
8990 /* A shift as a part of ADD costs nothing. */
8991 if (GET_CODE (XEXP (x, 0)) == MULT
8992 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8993 {
8994 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8995 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8996 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8997 return true;
8998 }
8999
9000 /* Fall through */
9001 case AND: case XOR: case IOR:
9002 if (mode == SImode)
9003 {
9004 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9005
9006 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9007 || subcode == LSHIFTRT || subcode == ASHIFTRT
9008 || (code == AND && subcode == NOT))
9009 {
9010 /* It's just the cost of the two operands. */
9011 *total = 0;
9012 return false;
9013 }
9014 }
9015
9016 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9017 return false;
9018
9019 case MULT:
9020 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9021 return false;
9022
9023 case NEG:
9024 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9025 && (mode == SFmode || !TARGET_VFP_SINGLE))
9026 {
9027 *total = COSTS_N_INSNS (1);
9028 return false;
9029 }
9030
9031 /* Fall through */
9032 case NOT:
9033 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9034
9035 return false;
9036
9037 case IF_THEN_ELSE:
9038 *total = 0;
9039 return false;
9040
9041 case COMPARE:
9042 if (cc_register (XEXP (x, 0), VOIDmode))
9043 * total = 0;
9044 else
9045 *total = COSTS_N_INSNS (1);
9046 return false;
9047
9048 case ABS:
9049 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9050 && (mode == SFmode || !TARGET_VFP_SINGLE))
9051 *total = COSTS_N_INSNS (1);
9052 else
9053 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9054 return false;
9055
9056 case SIGN_EXTEND:
9057 case ZERO_EXTEND:
9058 return arm_rtx_costs_1 (x, outer_code, total, 0);
9059
9060 case CONST_INT:
9061 if (const_ok_for_arm (INTVAL (x)))
9062 /* A multiplication by a constant requires another instruction
9063 to load the constant to a register. */
9064 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9065 ? 1 : 0);
9066 else if (const_ok_for_arm (~INTVAL (x)))
9067 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9068 else if (const_ok_for_arm (-INTVAL (x)))
9069 {
9070 if (outer_code == COMPARE || outer_code == PLUS
9071 || outer_code == MINUS)
9072 *total = 0;
9073 else
9074 *total = COSTS_N_INSNS (1);
9075 }
9076 else
9077 *total = COSTS_N_INSNS (2);
9078 return true;
9079
9080 case CONST:
9081 case LABEL_REF:
9082 case SYMBOL_REF:
9083 *total = COSTS_N_INSNS (2);
9084 return true;
9085
9086 case CONST_DOUBLE:
9087 *total = COSTS_N_INSNS (4);
9088 return true;
9089
9090 case CONST_VECTOR:
9091 if (TARGET_NEON
9092 && TARGET_HARD_FLOAT
9093 && outer_code == SET
9094 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9095 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9096 *total = COSTS_N_INSNS (1);
9097 else
9098 *total = COSTS_N_INSNS (4);
9099 return true;
9100
9101 case HIGH:
9102 case LO_SUM:
9103 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9104 cost of these slightly. */
9105 *total = COSTS_N_INSNS (1) + 1;
9106 return true;
9107
9108 case SET:
9109 return false;
9110
9111 default:
9112 if (mode != VOIDmode)
9113 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9114 else
9115 *total = COSTS_N_INSNS (4); /* How knows? */
9116 return false;
9117 }
9118 }
9119
9120 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9121 operand, then return the operand that is being shifted. If the shift
9122 is not by a constant, then set SHIFT_REG to point to the operand.
9123 Return NULL if OP is not a shifter operand. */
9124 static rtx
9125 shifter_op_p (rtx op, rtx *shift_reg)
9126 {
9127 enum rtx_code code = GET_CODE (op);
9128
9129 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9130 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9131 return XEXP (op, 0);
9132 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9133 return XEXP (op, 0);
9134 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9135 || code == ASHIFTRT)
9136 {
9137 if (!CONST_INT_P (XEXP (op, 1)))
9138 *shift_reg = XEXP (op, 1);
9139 return XEXP (op, 0);
9140 }
9141
9142 return NULL;
9143 }
9144
9145 static bool
9146 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9147 {
9148 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9149 gcc_assert (GET_CODE (x) == UNSPEC);
9150
9151 switch (XINT (x, 1))
9152 {
9153 case UNSPEC_UNALIGNED_LOAD:
9154 /* We can only do unaligned loads into the integer unit, and we can't
9155 use LDM or LDRD. */
9156 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9157 if (speed_p)
9158 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9159 + extra_cost->ldst.load_unaligned);
9160
9161 #ifdef NOT_YET
9162 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9163 ADDR_SPACE_GENERIC, speed_p);
9164 #endif
9165 return true;
9166
9167 case UNSPEC_UNALIGNED_STORE:
9168 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9169 if (speed_p)
9170 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9171 + extra_cost->ldst.store_unaligned);
9172
9173 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9174 #ifdef NOT_YET
9175 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9176 ADDR_SPACE_GENERIC, speed_p);
9177 #endif
9178 return true;
9179
9180 case UNSPEC_VRINTZ:
9181 case UNSPEC_VRINTP:
9182 case UNSPEC_VRINTM:
9183 case UNSPEC_VRINTR:
9184 case UNSPEC_VRINTX:
9185 case UNSPEC_VRINTA:
9186 *cost = COSTS_N_INSNS (1);
9187 if (speed_p)
9188 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9189
9190 return true;
9191 default:
9192 *cost = COSTS_N_INSNS (2);
9193 break;
9194 }
9195 return false;
9196 }
9197
9198 /* Cost of a libcall. We assume one insn per argument, an amount for the
9199 call (one insn for -Os) and then one for processing the result. */
9200 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9201
9202 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9203 do \
9204 { \
9205 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9206 if (shift_op != NULL \
9207 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9208 { \
9209 if (shift_reg) \
9210 { \
9211 if (speed_p) \
9212 *cost += extra_cost->alu.arith_shift_reg; \
9213 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9214 } \
9215 else if (speed_p) \
9216 *cost += extra_cost->alu.arith_shift; \
9217 \
9218 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9219 + rtx_cost (XEXP (x, 1 - IDX), \
9220 OP, 1, speed_p)); \
9221 return true; \
9222 } \
9223 } \
9224 while (0);
9225
9226 /* RTX costs. Make an estimate of the cost of executing the operation
9227 X, which is contained with an operation with code OUTER_CODE.
9228 SPEED_P indicates whether the cost desired is the performance cost,
9229 or the size cost. The estimate is stored in COST and the return
9230 value is TRUE if the cost calculation is final, or FALSE if the
9231 caller should recurse through the operands of X to add additional
9232 costs.
9233
9234 We currently make no attempt to model the size savings of Thumb-2
9235 16-bit instructions. At the normal points in compilation where
9236 this code is called we have no measure of whether the condition
9237 flags are live or not, and thus no realistic way to determine what
9238 the size will eventually be. */
9239 static bool
9240 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9241 const struct cpu_cost_table *extra_cost,
9242 int *cost, bool speed_p)
9243 {
9244 enum machine_mode mode = GET_MODE (x);
9245
9246 if (TARGET_THUMB1)
9247 {
9248 if (speed_p)
9249 *cost = thumb1_rtx_costs (x, code, outer_code);
9250 else
9251 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9252 return true;
9253 }
9254
9255 switch (code)
9256 {
9257 case SET:
9258 *cost = 0;
9259 /* SET RTXs don't have a mode so we get it from the destination. */
9260 mode = GET_MODE (SET_DEST (x));
9261
9262 if (REG_P (SET_SRC (x))
9263 && REG_P (SET_DEST (x)))
9264 {
9265 /* Assume that most copies can be done with a single insn,
9266 unless we don't have HW FP, in which case everything
9267 larger than word mode will require two insns. */
9268 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9269 && GET_MODE_SIZE (mode) > 4)
9270 || mode == DImode)
9271 ? 2 : 1);
9272 /* Conditional register moves can be encoded
9273 in 16 bits in Thumb mode. */
9274 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9275 *cost >>= 1;
9276
9277 return true;
9278 }
9279
9280 if (CONST_INT_P (SET_SRC (x)))
9281 {
9282 /* Handle CONST_INT here, since the value doesn't have a mode
9283 and we would otherwise be unable to work out the true cost. */
9284 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9285 outer_code = SET;
9286 /* Slightly lower the cost of setting a core reg to a constant.
9287 This helps break up chains and allows for better scheduling. */
9288 if (REG_P (SET_DEST (x))
9289 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9290 *cost -= 1;
9291 x = SET_SRC (x);
9292 /* Immediate moves with an immediate in the range [0, 255] can be
9293 encoded in 16 bits in Thumb mode. */
9294 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9295 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9296 *cost >>= 1;
9297 goto const_int_cost;
9298 }
9299
9300 return false;
9301
9302 case MEM:
9303 /* A memory access costs 1 insn if the mode is small, or the address is
9304 a single register, otherwise it costs one insn per word. */
9305 if (REG_P (XEXP (x, 0)))
9306 *cost = COSTS_N_INSNS (1);
9307 else if (flag_pic
9308 && GET_CODE (XEXP (x, 0)) == PLUS
9309 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9310 /* This will be split into two instructions.
9311 See arm.md:calculate_pic_address. */
9312 *cost = COSTS_N_INSNS (2);
9313 else
9314 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9315
9316 /* For speed optimizations, add the costs of the address and
9317 accessing memory. */
9318 if (speed_p)
9319 #ifdef NOT_YET
9320 *cost += (extra_cost->ldst.load
9321 + arm_address_cost (XEXP (x, 0), mode,
9322 ADDR_SPACE_GENERIC, speed_p));
9323 #else
9324 *cost += extra_cost->ldst.load;
9325 #endif
9326 return true;
9327
9328 case PARALLEL:
9329 {
9330 /* Calculations of LDM costs are complex. We assume an initial cost
9331 (ldm_1st) which will load the number of registers mentioned in
9332 ldm_regs_per_insn_1st registers; then each additional
9333 ldm_regs_per_insn_subsequent registers cost one more insn. The
9334 formula for N regs is thus:
9335
9336 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9337 + ldm_regs_per_insn_subsequent - 1)
9338 / ldm_regs_per_insn_subsequent).
9339
9340 Additional costs may also be added for addressing. A similar
9341 formula is used for STM. */
9342
9343 bool is_ldm = load_multiple_operation (x, SImode);
9344 bool is_stm = store_multiple_operation (x, SImode);
9345
9346 *cost = COSTS_N_INSNS (1);
9347
9348 if (is_ldm || is_stm)
9349 {
9350 if (speed_p)
9351 {
9352 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9353 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9354 ? extra_cost->ldst.ldm_regs_per_insn_1st
9355 : extra_cost->ldst.stm_regs_per_insn_1st;
9356 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9357 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9358 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9359
9360 *cost += regs_per_insn_1st
9361 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9362 + regs_per_insn_sub - 1)
9363 / regs_per_insn_sub);
9364 return true;
9365 }
9366
9367 }
9368 return false;
9369 }
9370 case DIV:
9371 case UDIV:
9372 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9373 && (mode == SFmode || !TARGET_VFP_SINGLE))
9374 *cost = COSTS_N_INSNS (speed_p
9375 ? extra_cost->fp[mode != SFmode].div : 1);
9376 else if (mode == SImode && TARGET_IDIV)
9377 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9378 else
9379 *cost = LIBCALL_COST (2);
9380 return false; /* All arguments must be in registers. */
9381
9382 case MOD:
9383 case UMOD:
9384 *cost = LIBCALL_COST (2);
9385 return false; /* All arguments must be in registers. */
9386
9387 case ROTATE:
9388 if (mode == SImode && REG_P (XEXP (x, 1)))
9389 {
9390 *cost = (COSTS_N_INSNS (2)
9391 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9392 if (speed_p)
9393 *cost += extra_cost->alu.shift_reg;
9394 return true;
9395 }
9396 /* Fall through */
9397 case ROTATERT:
9398 case ASHIFT:
9399 case LSHIFTRT:
9400 case ASHIFTRT:
9401 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9402 {
9403 *cost = (COSTS_N_INSNS (3)
9404 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9405 if (speed_p)
9406 *cost += 2 * extra_cost->alu.shift;
9407 return true;
9408 }
9409 else if (mode == SImode)
9410 {
9411 *cost = (COSTS_N_INSNS (1)
9412 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9413 /* Slightly disparage register shifts at -Os, but not by much. */
9414 if (!CONST_INT_P (XEXP (x, 1)))
9415 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9416 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9417 return true;
9418 }
9419 else if (GET_MODE_CLASS (mode) == MODE_INT
9420 && GET_MODE_SIZE (mode) < 4)
9421 {
9422 if (code == ASHIFT)
9423 {
9424 *cost = (COSTS_N_INSNS (1)
9425 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9426 /* Slightly disparage register shifts at -Os, but not by
9427 much. */
9428 if (!CONST_INT_P (XEXP (x, 1)))
9429 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9430 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9431 }
9432 else if (code == LSHIFTRT || code == ASHIFTRT)
9433 {
9434 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9435 {
9436 /* Can use SBFX/UBFX. */
9437 *cost = COSTS_N_INSNS (1);
9438 if (speed_p)
9439 *cost += extra_cost->alu.bfx;
9440 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9441 }
9442 else
9443 {
9444 *cost = COSTS_N_INSNS (2);
9445 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9446 if (speed_p)
9447 {
9448 if (CONST_INT_P (XEXP (x, 1)))
9449 *cost += 2 * extra_cost->alu.shift;
9450 else
9451 *cost += (extra_cost->alu.shift
9452 + extra_cost->alu.shift_reg);
9453 }
9454 else
9455 /* Slightly disparage register shifts. */
9456 *cost += !CONST_INT_P (XEXP (x, 1));
9457 }
9458 }
9459 else /* Rotates. */
9460 {
9461 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9462 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9463 if (speed_p)
9464 {
9465 if (CONST_INT_P (XEXP (x, 1)))
9466 *cost += (2 * extra_cost->alu.shift
9467 + extra_cost->alu.log_shift);
9468 else
9469 *cost += (extra_cost->alu.shift
9470 + extra_cost->alu.shift_reg
9471 + extra_cost->alu.log_shift_reg);
9472 }
9473 }
9474 return true;
9475 }
9476
9477 *cost = LIBCALL_COST (2);
9478 return false;
9479
9480 case BSWAP:
9481 if (arm_arch6)
9482 {
9483 if (mode == SImode)
9484 {
9485 *cost = COSTS_N_INSNS (1);
9486 if (speed_p)
9487 *cost += extra_cost->alu.rev;
9488
9489 return false;
9490 }
9491 }
9492 else
9493 {
9494 /* No rev instruction available. Look at arm_legacy_rev
9495 and thumb_legacy_rev for the form of RTL used then. */
9496 if (TARGET_THUMB)
9497 {
9498 *cost = COSTS_N_INSNS (10);
9499
9500 if (speed_p)
9501 {
9502 *cost += 6 * extra_cost->alu.shift;
9503 *cost += 3 * extra_cost->alu.logical;
9504 }
9505 }
9506 else
9507 {
9508 *cost = COSTS_N_INSNS (5);
9509
9510 if (speed_p)
9511 {
9512 *cost += 2 * extra_cost->alu.shift;
9513 *cost += extra_cost->alu.arith_shift;
9514 *cost += 2 * extra_cost->alu.logical;
9515 }
9516 }
9517 return true;
9518 }
9519 return false;
9520
9521 case MINUS:
9522 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9523 && (mode == SFmode || !TARGET_VFP_SINGLE))
9524 {
9525 *cost = COSTS_N_INSNS (1);
9526 if (GET_CODE (XEXP (x, 0)) == MULT
9527 || GET_CODE (XEXP (x, 1)) == MULT)
9528 {
9529 rtx mul_op0, mul_op1, sub_op;
9530
9531 if (speed_p)
9532 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9533
9534 if (GET_CODE (XEXP (x, 0)) == MULT)
9535 {
9536 mul_op0 = XEXP (XEXP (x, 0), 0);
9537 mul_op1 = XEXP (XEXP (x, 0), 1);
9538 sub_op = XEXP (x, 1);
9539 }
9540 else
9541 {
9542 mul_op0 = XEXP (XEXP (x, 1), 0);
9543 mul_op1 = XEXP (XEXP (x, 1), 1);
9544 sub_op = XEXP (x, 0);
9545 }
9546
9547 /* The first operand of the multiply may be optionally
9548 negated. */
9549 if (GET_CODE (mul_op0) == NEG)
9550 mul_op0 = XEXP (mul_op0, 0);
9551
9552 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9553 + rtx_cost (mul_op1, code, 0, speed_p)
9554 + rtx_cost (sub_op, code, 0, speed_p));
9555
9556 return true;
9557 }
9558
9559 if (speed_p)
9560 *cost += extra_cost->fp[mode != SFmode].addsub;
9561 return false;
9562 }
9563
9564 if (mode == SImode)
9565 {
9566 rtx shift_by_reg = NULL;
9567 rtx shift_op;
9568 rtx non_shift_op;
9569
9570 *cost = COSTS_N_INSNS (1);
9571
9572 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9573 if (shift_op == NULL)
9574 {
9575 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9576 non_shift_op = XEXP (x, 0);
9577 }
9578 else
9579 non_shift_op = XEXP (x, 1);
9580
9581 if (shift_op != NULL)
9582 {
9583 if (shift_by_reg != NULL)
9584 {
9585 if (speed_p)
9586 *cost += extra_cost->alu.arith_shift_reg;
9587 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9588 }
9589 else if (speed_p)
9590 *cost += extra_cost->alu.arith_shift;
9591
9592 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9593 + rtx_cost (non_shift_op, code, 0, speed_p));
9594 return true;
9595 }
9596
9597 if (arm_arch_thumb2
9598 && GET_CODE (XEXP (x, 1)) == MULT)
9599 {
9600 /* MLS. */
9601 if (speed_p)
9602 *cost += extra_cost->mult[0].add;
9603 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9604 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9605 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9606 return true;
9607 }
9608
9609 if (CONST_INT_P (XEXP (x, 0)))
9610 {
9611 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9612 INTVAL (XEXP (x, 0)), NULL_RTX,
9613 NULL_RTX, 1, 0);
9614 *cost = COSTS_N_INSNS (insns);
9615 if (speed_p)
9616 *cost += insns * extra_cost->alu.arith;
9617 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9618 return true;
9619 }
9620
9621 return false;
9622 }
9623
9624 if (GET_MODE_CLASS (mode) == MODE_INT
9625 && GET_MODE_SIZE (mode) < 4)
9626 {
9627 rtx shift_op, shift_reg;
9628 shift_reg = NULL;
9629
9630 /* We check both sides of the MINUS for shifter operands since,
9631 unlike PLUS, it's not commutative. */
9632
9633 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9634 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9635
9636 /* Slightly disparage, as we might need to widen the result. */
9637 *cost = 1 + COSTS_N_INSNS (1);
9638 if (speed_p)
9639 *cost += extra_cost->alu.arith;
9640
9641 if (CONST_INT_P (XEXP (x, 0)))
9642 {
9643 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9644 return true;
9645 }
9646
9647 return false;
9648 }
9649
9650 if (mode == DImode)
9651 {
9652 *cost = COSTS_N_INSNS (2);
9653
9654 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9655 {
9656 rtx op1 = XEXP (x, 1);
9657
9658 if (speed_p)
9659 *cost += 2 * extra_cost->alu.arith;
9660
9661 if (GET_CODE (op1) == ZERO_EXTEND)
9662 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9663 else
9664 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9665 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9666 0, speed_p);
9667 return true;
9668 }
9669 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9670 {
9671 if (speed_p)
9672 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9673 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9674 0, speed_p)
9675 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9676 return true;
9677 }
9678 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9679 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9680 {
9681 if (speed_p)
9682 *cost += (extra_cost->alu.arith
9683 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9684 ? extra_cost->alu.arith
9685 : extra_cost->alu.arith_shift));
9686 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9687 + rtx_cost (XEXP (XEXP (x, 1), 0),
9688 GET_CODE (XEXP (x, 1)), 0, speed_p));
9689 return true;
9690 }
9691
9692 if (speed_p)
9693 *cost += 2 * extra_cost->alu.arith;
9694 return false;
9695 }
9696
9697 /* Vector mode? */
9698
9699 *cost = LIBCALL_COST (2);
9700 return false;
9701
9702 case PLUS:
9703 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9704 && (mode == SFmode || !TARGET_VFP_SINGLE))
9705 {
9706 *cost = COSTS_N_INSNS (1);
9707 if (GET_CODE (XEXP (x, 0)) == MULT)
9708 {
9709 rtx mul_op0, mul_op1, add_op;
9710
9711 if (speed_p)
9712 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9713
9714 mul_op0 = XEXP (XEXP (x, 0), 0);
9715 mul_op1 = XEXP (XEXP (x, 0), 1);
9716 add_op = XEXP (x, 1);
9717
9718 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9719 + rtx_cost (mul_op1, code, 0, speed_p)
9720 + rtx_cost (add_op, code, 0, speed_p));
9721
9722 return true;
9723 }
9724
9725 if (speed_p)
9726 *cost += extra_cost->fp[mode != SFmode].addsub;
9727 return false;
9728 }
9729 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9730 {
9731 *cost = LIBCALL_COST (2);
9732 return false;
9733 }
9734
9735 /* Narrow modes can be synthesized in SImode, but the range
9736 of useful sub-operations is limited. Check for shift operations
9737 on one of the operands. Only left shifts can be used in the
9738 narrow modes. */
9739 if (GET_MODE_CLASS (mode) == MODE_INT
9740 && GET_MODE_SIZE (mode) < 4)
9741 {
9742 rtx shift_op, shift_reg;
9743 shift_reg = NULL;
9744
9745 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9746
9747 if (CONST_INT_P (XEXP (x, 1)))
9748 {
9749 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9750 INTVAL (XEXP (x, 1)), NULL_RTX,
9751 NULL_RTX, 1, 0);
9752 *cost = COSTS_N_INSNS (insns);
9753 if (speed_p)
9754 *cost += insns * extra_cost->alu.arith;
9755 /* Slightly penalize a narrow operation as the result may
9756 need widening. */
9757 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9758 return true;
9759 }
9760
9761 /* Slightly penalize a narrow operation as the result may
9762 need widening. */
9763 *cost = 1 + COSTS_N_INSNS (1);
9764 if (speed_p)
9765 *cost += extra_cost->alu.arith;
9766
9767 return false;
9768 }
9769
9770 if (mode == SImode)
9771 {
9772 rtx shift_op, shift_reg;
9773
9774 *cost = COSTS_N_INSNS (1);
9775 if (TARGET_INT_SIMD
9776 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9777 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9778 {
9779 /* UXTA[BH] or SXTA[BH]. */
9780 if (speed_p)
9781 *cost += extra_cost->alu.extend_arith;
9782 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9783 speed_p)
9784 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9785 return true;
9786 }
9787
9788 shift_reg = NULL;
9789 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9790 if (shift_op != NULL)
9791 {
9792 if (shift_reg)
9793 {
9794 if (speed_p)
9795 *cost += extra_cost->alu.arith_shift_reg;
9796 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9797 }
9798 else if (speed_p)
9799 *cost += extra_cost->alu.arith_shift;
9800
9801 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9802 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9803 return true;
9804 }
9805 if (GET_CODE (XEXP (x, 0)) == MULT)
9806 {
9807 rtx mul_op = XEXP (x, 0);
9808
9809 *cost = COSTS_N_INSNS (1);
9810
9811 if (TARGET_DSP_MULTIPLY
9812 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9813 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9814 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9815 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9816 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9817 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9818 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9819 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9820 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9821 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9823 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9824 == 16))))))
9825 {
9826 /* SMLA[BT][BT]. */
9827 if (speed_p)
9828 *cost += extra_cost->mult[0].extend_add;
9829 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9830 SIGN_EXTEND, 0, speed_p)
9831 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9832 SIGN_EXTEND, 0, speed_p)
9833 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9834 return true;
9835 }
9836
9837 if (speed_p)
9838 *cost += extra_cost->mult[0].add;
9839 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9840 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9841 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9842 return true;
9843 }
9844 if (CONST_INT_P (XEXP (x, 1)))
9845 {
9846 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9847 INTVAL (XEXP (x, 1)), NULL_RTX,
9848 NULL_RTX, 1, 0);
9849 *cost = COSTS_N_INSNS (insns);
9850 if (speed_p)
9851 *cost += insns * extra_cost->alu.arith;
9852 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9853 return true;
9854 }
9855 return false;
9856 }
9857
9858 if (mode == DImode)
9859 {
9860 if (arm_arch3m
9861 && GET_CODE (XEXP (x, 0)) == MULT
9862 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9863 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9864 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9865 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9866 {
9867 *cost = COSTS_N_INSNS (1);
9868 if (speed_p)
9869 *cost += extra_cost->mult[1].extend_add;
9870 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9871 ZERO_EXTEND, 0, speed_p)
9872 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9873 ZERO_EXTEND, 0, speed_p)
9874 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9875 return true;
9876 }
9877
9878 *cost = COSTS_N_INSNS (2);
9879
9880 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9881 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9882 {
9883 if (speed_p)
9884 *cost += (extra_cost->alu.arith
9885 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9886 ? extra_cost->alu.arith
9887 : extra_cost->alu.arith_shift));
9888
9889 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9890 speed_p)
9891 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9892 return true;
9893 }
9894
9895 if (speed_p)
9896 *cost += 2 * extra_cost->alu.arith;
9897 return false;
9898 }
9899
9900 /* Vector mode? */
9901 *cost = LIBCALL_COST (2);
9902 return false;
9903 case IOR:
9904 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9905 {
9906 *cost = COSTS_N_INSNS (1);
9907 if (speed_p)
9908 *cost += extra_cost->alu.rev;
9909
9910 return true;
9911 }
9912 /* Fall through. */
9913 case AND: case XOR:
9914 if (mode == SImode)
9915 {
9916 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9917 rtx op0 = XEXP (x, 0);
9918 rtx shift_op, shift_reg;
9919
9920 *cost = COSTS_N_INSNS (1);
9921
9922 if (subcode == NOT
9923 && (code == AND
9924 || (code == IOR && TARGET_THUMB2)))
9925 op0 = XEXP (op0, 0);
9926
9927 shift_reg = NULL;
9928 shift_op = shifter_op_p (op0, &shift_reg);
9929 if (shift_op != NULL)
9930 {
9931 if (shift_reg)
9932 {
9933 if (speed_p)
9934 *cost += extra_cost->alu.log_shift_reg;
9935 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9936 }
9937 else if (speed_p)
9938 *cost += extra_cost->alu.log_shift;
9939
9940 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9941 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9942 return true;
9943 }
9944
9945 if (CONST_INT_P (XEXP (x, 1)))
9946 {
9947 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9948 INTVAL (XEXP (x, 1)), NULL_RTX,
9949 NULL_RTX, 1, 0);
9950
9951 *cost = COSTS_N_INSNS (insns);
9952 if (speed_p)
9953 *cost += insns * extra_cost->alu.logical;
9954 *cost += rtx_cost (op0, code, 0, speed_p);
9955 return true;
9956 }
9957
9958 if (speed_p)
9959 *cost += extra_cost->alu.logical;
9960 *cost += (rtx_cost (op0, code, 0, speed_p)
9961 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9962 return true;
9963 }
9964
9965 if (mode == DImode)
9966 {
9967 rtx op0 = XEXP (x, 0);
9968 enum rtx_code subcode = GET_CODE (op0);
9969
9970 *cost = COSTS_N_INSNS (2);
9971
9972 if (subcode == NOT
9973 && (code == AND
9974 || (code == IOR && TARGET_THUMB2)))
9975 op0 = XEXP (op0, 0);
9976
9977 if (GET_CODE (op0) == ZERO_EXTEND)
9978 {
9979 if (speed_p)
9980 *cost += 2 * extra_cost->alu.logical;
9981
9982 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9983 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9984 return true;
9985 }
9986 else if (GET_CODE (op0) == SIGN_EXTEND)
9987 {
9988 if (speed_p)
9989 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9990
9991 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9992 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9993 return true;
9994 }
9995
9996 if (speed_p)
9997 *cost += 2 * extra_cost->alu.logical;
9998
9999 return true;
10000 }
10001 /* Vector mode? */
10002
10003 *cost = LIBCALL_COST (2);
10004 return false;
10005
10006 case MULT:
10007 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10008 && (mode == SFmode || !TARGET_VFP_SINGLE))
10009 {
10010 rtx op0 = XEXP (x, 0);
10011
10012 *cost = COSTS_N_INSNS (1);
10013
10014 if (GET_CODE (op0) == NEG)
10015 op0 = XEXP (op0, 0);
10016
10017 if (speed_p)
10018 *cost += extra_cost->fp[mode != SFmode].mult;
10019
10020 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10022 return true;
10023 }
10024 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10025 {
10026 *cost = LIBCALL_COST (2);
10027 return false;
10028 }
10029
10030 if (mode == SImode)
10031 {
10032 *cost = COSTS_N_INSNS (1);
10033 if (TARGET_DSP_MULTIPLY
10034 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10035 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10038 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10039 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10041 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10042 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10045 && (INTVAL (XEXP (XEXP (x, 1), 1))
10046 == 16))))))
10047 {
10048 /* SMUL[TB][TB]. */
10049 if (speed_p)
10050 *cost += extra_cost->mult[0].extend;
10051 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10052 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10053 return true;
10054 }
10055 if (speed_p)
10056 *cost += extra_cost->mult[0].simple;
10057 return false;
10058 }
10059
10060 if (mode == DImode)
10061 {
10062 if (arm_arch3m
10063 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10064 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10065 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10066 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10067 {
10068 *cost = COSTS_N_INSNS (1);
10069 if (speed_p)
10070 *cost += extra_cost->mult[1].extend;
10071 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10072 ZERO_EXTEND, 0, speed_p)
10073 + rtx_cost (XEXP (XEXP (x, 1), 0),
10074 ZERO_EXTEND, 0, speed_p));
10075 return true;
10076 }
10077
10078 *cost = LIBCALL_COST (2);
10079 return false;
10080 }
10081
10082 /* Vector mode? */
10083 *cost = LIBCALL_COST (2);
10084 return false;
10085
10086 case NEG:
10087 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10088 && (mode == SFmode || !TARGET_VFP_SINGLE))
10089 {
10090 *cost = COSTS_N_INSNS (1);
10091 if (speed_p)
10092 *cost += extra_cost->fp[mode != SFmode].neg;
10093
10094 return false;
10095 }
10096 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10097 {
10098 *cost = LIBCALL_COST (1);
10099 return false;
10100 }
10101
10102 if (mode == SImode)
10103 {
10104 if (GET_CODE (XEXP (x, 0)) == ABS)
10105 {
10106 *cost = COSTS_N_INSNS (2);
10107 /* Assume the non-flag-changing variant. */
10108 if (speed_p)
10109 *cost += (extra_cost->alu.log_shift
10110 + extra_cost->alu.arith_shift);
10111 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10112 return true;
10113 }
10114
10115 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10116 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10117 {
10118 *cost = COSTS_N_INSNS (2);
10119 /* No extra cost for MOV imm and MVN imm. */
10120 /* If the comparison op is using the flags, there's no further
10121 cost, otherwise we need to add the cost of the comparison. */
10122 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10123 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10124 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10125 {
10126 *cost += (COSTS_N_INSNS (1)
10127 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10128 speed_p)
10129 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10130 speed_p));
10131 if (speed_p)
10132 *cost += extra_cost->alu.arith;
10133 }
10134 return true;
10135 }
10136 *cost = COSTS_N_INSNS (1);
10137 if (speed_p)
10138 *cost += extra_cost->alu.arith;
10139 return false;
10140 }
10141
10142 if (GET_MODE_CLASS (mode) == MODE_INT
10143 && GET_MODE_SIZE (mode) < 4)
10144 {
10145 /* Slightly disparage, as we might need an extend operation. */
10146 *cost = 1 + COSTS_N_INSNS (1);
10147 if (speed_p)
10148 *cost += extra_cost->alu.arith;
10149 return false;
10150 }
10151
10152 if (mode == DImode)
10153 {
10154 *cost = COSTS_N_INSNS (2);
10155 if (speed_p)
10156 *cost += 2 * extra_cost->alu.arith;
10157 return false;
10158 }
10159
10160 /* Vector mode? */
10161 *cost = LIBCALL_COST (1);
10162 return false;
10163
10164 case NOT:
10165 if (mode == SImode)
10166 {
10167 rtx shift_op;
10168 rtx shift_reg = NULL;
10169
10170 *cost = COSTS_N_INSNS (1);
10171 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10172
10173 if (shift_op)
10174 {
10175 if (shift_reg != NULL)
10176 {
10177 if (speed_p)
10178 *cost += extra_cost->alu.log_shift_reg;
10179 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10180 }
10181 else if (speed_p)
10182 *cost += extra_cost->alu.log_shift;
10183 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10184 return true;
10185 }
10186
10187 if (speed_p)
10188 *cost += extra_cost->alu.logical;
10189 return false;
10190 }
10191 if (mode == DImode)
10192 {
10193 *cost = COSTS_N_INSNS (2);
10194 return false;
10195 }
10196
10197 /* Vector mode? */
10198
10199 *cost += LIBCALL_COST (1);
10200 return false;
10201
10202 case IF_THEN_ELSE:
10203 {
10204 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10205 {
10206 *cost = COSTS_N_INSNS (4);
10207 return true;
10208 }
10209 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10210 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10211
10212 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10213 /* Assume that if one arm of the if_then_else is a register,
10214 that it will be tied with the result and eliminate the
10215 conditional insn. */
10216 if (REG_P (XEXP (x, 1)))
10217 *cost += op2cost;
10218 else if (REG_P (XEXP (x, 2)))
10219 *cost += op1cost;
10220 else
10221 {
10222 if (speed_p)
10223 {
10224 if (extra_cost->alu.non_exec_costs_exec)
10225 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10226 else
10227 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10228 }
10229 else
10230 *cost += op1cost + op2cost;
10231 }
10232 }
10233 return true;
10234
10235 case COMPARE:
10236 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10237 *cost = 0;
10238 else
10239 {
10240 enum machine_mode op0mode;
10241 /* We'll mostly assume that the cost of a compare is the cost of the
10242 LHS. However, there are some notable exceptions. */
10243
10244 /* Floating point compares are never done as side-effects. */
10245 op0mode = GET_MODE (XEXP (x, 0));
10246 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10247 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10248 {
10249 *cost = COSTS_N_INSNS (1);
10250 if (speed_p)
10251 *cost += extra_cost->fp[op0mode != SFmode].compare;
10252
10253 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10254 {
10255 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10256 return true;
10257 }
10258
10259 return false;
10260 }
10261 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10262 {
10263 *cost = LIBCALL_COST (2);
10264 return false;
10265 }
10266
10267 /* DImode compares normally take two insns. */
10268 if (op0mode == DImode)
10269 {
10270 *cost = COSTS_N_INSNS (2);
10271 if (speed_p)
10272 *cost += 2 * extra_cost->alu.arith;
10273 return false;
10274 }
10275
10276 if (op0mode == SImode)
10277 {
10278 rtx shift_op;
10279 rtx shift_reg;
10280
10281 if (XEXP (x, 1) == const0_rtx
10282 && !(REG_P (XEXP (x, 0))
10283 || (GET_CODE (XEXP (x, 0)) == SUBREG
10284 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10285 {
10286 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10287
10288 /* Multiply operations that set the flags are often
10289 significantly more expensive. */
10290 if (speed_p
10291 && GET_CODE (XEXP (x, 0)) == MULT
10292 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10293 *cost += extra_cost->mult[0].flag_setting;
10294
10295 if (speed_p
10296 && GET_CODE (XEXP (x, 0)) == PLUS
10297 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10298 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10299 0), 1), mode))
10300 *cost += extra_cost->mult[0].flag_setting;
10301 return true;
10302 }
10303
10304 shift_reg = NULL;
10305 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10306 if (shift_op != NULL)
10307 {
10308 *cost = COSTS_N_INSNS (1);
10309 if (shift_reg != NULL)
10310 {
10311 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10312 if (speed_p)
10313 *cost += extra_cost->alu.arith_shift_reg;
10314 }
10315 else if (speed_p)
10316 *cost += extra_cost->alu.arith_shift;
10317 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10318 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10319 return true;
10320 }
10321
10322 *cost = COSTS_N_INSNS (1);
10323 if (speed_p)
10324 *cost += extra_cost->alu.arith;
10325 if (CONST_INT_P (XEXP (x, 1))
10326 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10327 {
10328 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10329 return true;
10330 }
10331 return false;
10332 }
10333
10334 /* Vector mode? */
10335
10336 *cost = LIBCALL_COST (2);
10337 return false;
10338 }
10339 return true;
10340
10341 case EQ:
10342 case NE:
10343 case LT:
10344 case LE:
10345 case GT:
10346 case GE:
10347 case LTU:
10348 case LEU:
10349 case GEU:
10350 case GTU:
10351 case ORDERED:
10352 case UNORDERED:
10353 case UNEQ:
10354 case UNLE:
10355 case UNLT:
10356 case UNGE:
10357 case UNGT:
10358 case LTGT:
10359 if (outer_code == SET)
10360 {
10361 /* Is it a store-flag operation? */
10362 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10363 && XEXP (x, 1) == const0_rtx)
10364 {
10365 /* Thumb also needs an IT insn. */
10366 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10367 return true;
10368 }
10369 if (XEXP (x, 1) == const0_rtx)
10370 {
10371 switch (code)
10372 {
10373 case LT:
10374 /* LSR Rd, Rn, #31. */
10375 *cost = COSTS_N_INSNS (1);
10376 if (speed_p)
10377 *cost += extra_cost->alu.shift;
10378 break;
10379
10380 case EQ:
10381 /* RSBS T1, Rn, #0
10382 ADC Rd, Rn, T1. */
10383
10384 case NE:
10385 /* SUBS T1, Rn, #1
10386 SBC Rd, Rn, T1. */
10387 *cost = COSTS_N_INSNS (2);
10388 break;
10389
10390 case LE:
10391 /* RSBS T1, Rn, Rn, LSR #31
10392 ADC Rd, Rn, T1. */
10393 *cost = COSTS_N_INSNS (2);
10394 if (speed_p)
10395 *cost += extra_cost->alu.arith_shift;
10396 break;
10397
10398 case GT:
10399 /* RSB Rd, Rn, Rn, ASR #1
10400 LSR Rd, Rd, #31. */
10401 *cost = COSTS_N_INSNS (2);
10402 if (speed_p)
10403 *cost += (extra_cost->alu.arith_shift
10404 + extra_cost->alu.shift);
10405 break;
10406
10407 case GE:
10408 /* ASR Rd, Rn, #31
10409 ADD Rd, Rn, #1. */
10410 *cost = COSTS_N_INSNS (2);
10411 if (speed_p)
10412 *cost += extra_cost->alu.shift;
10413 break;
10414
10415 default:
10416 /* Remaining cases are either meaningless or would take
10417 three insns anyway. */
10418 *cost = COSTS_N_INSNS (3);
10419 break;
10420 }
10421 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10422 return true;
10423 }
10424 else
10425 {
10426 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10427 if (CONST_INT_P (XEXP (x, 1))
10428 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10429 {
10430 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10431 return true;
10432 }
10433
10434 return false;
10435 }
10436 }
10437 /* Not directly inside a set. If it involves the condition code
10438 register it must be the condition for a branch, cond_exec or
10439 I_T_E operation. Since the comparison is performed elsewhere
10440 this is just the control part which has no additional
10441 cost. */
10442 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10443 && XEXP (x, 1) == const0_rtx)
10444 {
10445 *cost = 0;
10446 return true;
10447 }
10448 return false;
10449
10450 case ABS:
10451 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10452 && (mode == SFmode || !TARGET_VFP_SINGLE))
10453 {
10454 *cost = COSTS_N_INSNS (1);
10455 if (speed_p)
10456 *cost += extra_cost->fp[mode != SFmode].neg;
10457
10458 return false;
10459 }
10460 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10461 {
10462 *cost = LIBCALL_COST (1);
10463 return false;
10464 }
10465
10466 if (mode == SImode)
10467 {
10468 *cost = COSTS_N_INSNS (1);
10469 if (speed_p)
10470 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10471 return false;
10472 }
10473 /* Vector mode? */
10474 *cost = LIBCALL_COST (1);
10475 return false;
10476
10477 case SIGN_EXTEND:
10478 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10479 && MEM_P (XEXP (x, 0)))
10480 {
10481 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10482
10483 if (mode == DImode)
10484 *cost += COSTS_N_INSNS (1);
10485
10486 if (!speed_p)
10487 return true;
10488
10489 if (GET_MODE (XEXP (x, 0)) == SImode)
10490 *cost += extra_cost->ldst.load;
10491 else
10492 *cost += extra_cost->ldst.load_sign_extend;
10493
10494 if (mode == DImode)
10495 *cost += extra_cost->alu.shift;
10496
10497 return true;
10498 }
10499
10500 /* Widening from less than 32-bits requires an extend operation. */
10501 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10502 {
10503 /* We have SXTB/SXTH. */
10504 *cost = COSTS_N_INSNS (1);
10505 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10506 if (speed_p)
10507 *cost += extra_cost->alu.extend;
10508 }
10509 else if (GET_MODE (XEXP (x, 0)) != SImode)
10510 {
10511 /* Needs two shifts. */
10512 *cost = COSTS_N_INSNS (2);
10513 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10514 if (speed_p)
10515 *cost += 2 * extra_cost->alu.shift;
10516 }
10517
10518 /* Widening beyond 32-bits requires one more insn. */
10519 if (mode == DImode)
10520 {
10521 *cost += COSTS_N_INSNS (1);
10522 if (speed_p)
10523 *cost += extra_cost->alu.shift;
10524 }
10525
10526 return true;
10527
10528 case ZERO_EXTEND:
10529 if ((arm_arch4
10530 || GET_MODE (XEXP (x, 0)) == SImode
10531 || GET_MODE (XEXP (x, 0)) == QImode)
10532 && MEM_P (XEXP (x, 0)))
10533 {
10534 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10535
10536 if (mode == DImode)
10537 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10538
10539 return true;
10540 }
10541
10542 /* Widening from less than 32-bits requires an extend operation. */
10543 if (GET_MODE (XEXP (x, 0)) == QImode)
10544 {
10545 /* UXTB can be a shorter instruction in Thumb2, but it might
10546 be slower than the AND Rd, Rn, #255 alternative. When
10547 optimizing for speed it should never be slower to use
10548 AND, and we don't really model 16-bit vs 32-bit insns
10549 here. */
10550 *cost = COSTS_N_INSNS (1);
10551 if (speed_p)
10552 *cost += extra_cost->alu.logical;
10553 }
10554 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10555 {
10556 /* We have UXTB/UXTH. */
10557 *cost = COSTS_N_INSNS (1);
10558 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10559 if (speed_p)
10560 *cost += extra_cost->alu.extend;
10561 }
10562 else if (GET_MODE (XEXP (x, 0)) != SImode)
10563 {
10564 /* Needs two shifts. It's marginally preferable to use
10565 shifts rather than two BIC instructions as the second
10566 shift may merge with a subsequent insn as a shifter
10567 op. */
10568 *cost = COSTS_N_INSNS (2);
10569 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10570 if (speed_p)
10571 *cost += 2 * extra_cost->alu.shift;
10572 }
10573 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10574 *cost = COSTS_N_INSNS (1);
10575
10576 /* Widening beyond 32-bits requires one more insn. */
10577 if (mode == DImode)
10578 {
10579 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10580 }
10581
10582 return true;
10583
10584 case CONST_INT:
10585 *cost = 0;
10586 /* CONST_INT has no mode, so we cannot tell for sure how many
10587 insns are really going to be needed. The best we can do is
10588 look at the value passed. If it fits in SImode, then assume
10589 that's the mode it will be used for. Otherwise assume it
10590 will be used in DImode. */
10591 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10592 mode = SImode;
10593 else
10594 mode = DImode;
10595
10596 /* Avoid blowing up in arm_gen_constant (). */
10597 if (!(outer_code == PLUS
10598 || outer_code == AND
10599 || outer_code == IOR
10600 || outer_code == XOR
10601 || outer_code == MINUS))
10602 outer_code = SET;
10603
10604 const_int_cost:
10605 if (mode == SImode)
10606 {
10607 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10608 INTVAL (x), NULL, NULL,
10609 0, 0));
10610 /* Extra costs? */
10611 }
10612 else
10613 {
10614 *cost += COSTS_N_INSNS (arm_gen_constant
10615 (outer_code, SImode, NULL,
10616 trunc_int_for_mode (INTVAL (x), SImode),
10617 NULL, NULL, 0, 0)
10618 + arm_gen_constant (outer_code, SImode, NULL,
10619 INTVAL (x) >> 32, NULL,
10620 NULL, 0, 0));
10621 /* Extra costs? */
10622 }
10623
10624 return true;
10625
10626 case CONST:
10627 case LABEL_REF:
10628 case SYMBOL_REF:
10629 if (speed_p)
10630 {
10631 if (arm_arch_thumb2 && !flag_pic)
10632 *cost = COSTS_N_INSNS (2);
10633 else
10634 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10635 }
10636 else
10637 *cost = COSTS_N_INSNS (2);
10638
10639 if (flag_pic)
10640 {
10641 *cost += COSTS_N_INSNS (1);
10642 if (speed_p)
10643 *cost += extra_cost->alu.arith;
10644 }
10645
10646 return true;
10647
10648 case CONST_FIXED:
10649 *cost = COSTS_N_INSNS (4);
10650 /* Fixme. */
10651 return true;
10652
10653 case CONST_DOUBLE:
10654 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10655 && (mode == SFmode || !TARGET_VFP_SINGLE))
10656 {
10657 if (vfp3_const_double_rtx (x))
10658 {
10659 *cost = COSTS_N_INSNS (1);
10660 if (speed_p)
10661 *cost += extra_cost->fp[mode == DFmode].fpconst;
10662 return true;
10663 }
10664
10665 if (speed_p)
10666 {
10667 *cost = COSTS_N_INSNS (1);
10668 if (mode == DFmode)
10669 *cost += extra_cost->ldst.loadd;
10670 else
10671 *cost += extra_cost->ldst.loadf;
10672 }
10673 else
10674 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10675
10676 return true;
10677 }
10678 *cost = COSTS_N_INSNS (4);
10679 return true;
10680
10681 case CONST_VECTOR:
10682 /* Fixme. */
10683 if (TARGET_NEON
10684 && TARGET_HARD_FLOAT
10685 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10686 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10687 *cost = COSTS_N_INSNS (1);
10688 else
10689 *cost = COSTS_N_INSNS (4);
10690 return true;
10691
10692 case HIGH:
10693 case LO_SUM:
10694 *cost = COSTS_N_INSNS (1);
10695 /* When optimizing for size, we prefer constant pool entries to
10696 MOVW/MOVT pairs, so bump the cost of these slightly. */
10697 if (!speed_p)
10698 *cost += 1;
10699 return true;
10700
10701 case CLZ:
10702 *cost = COSTS_N_INSNS (1);
10703 if (speed_p)
10704 *cost += extra_cost->alu.clz;
10705 return false;
10706
10707 case SMIN:
10708 if (XEXP (x, 1) == const0_rtx)
10709 {
10710 *cost = COSTS_N_INSNS (1);
10711 if (speed_p)
10712 *cost += extra_cost->alu.log_shift;
10713 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10714 return true;
10715 }
10716 /* Fall through. */
10717 case SMAX:
10718 case UMIN:
10719 case UMAX:
10720 *cost = COSTS_N_INSNS (2);
10721 return false;
10722
10723 case TRUNCATE:
10724 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10725 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10726 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10728 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10729 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10730 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10731 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10732 == ZERO_EXTEND))))
10733 {
10734 *cost = COSTS_N_INSNS (1);
10735 if (speed_p)
10736 *cost += extra_cost->mult[1].extend;
10737 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10738 speed_p)
10739 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10740 0, speed_p));
10741 return true;
10742 }
10743 *cost = LIBCALL_COST (1);
10744 return false;
10745
10746 case UNSPEC:
10747 return arm_unspec_cost (x, outer_code, speed_p, cost);
10748
10749 case PC:
10750 /* Reading the PC is like reading any other register. Writing it
10751 is more expensive, but we take that into account elsewhere. */
10752 *cost = 0;
10753 return true;
10754
10755 case ZERO_EXTRACT:
10756 /* TODO: Simple zero_extract of bottom bits using AND. */
10757 /* Fall through. */
10758 case SIGN_EXTRACT:
10759 if (arm_arch6
10760 && mode == SImode
10761 && CONST_INT_P (XEXP (x, 1))
10762 && CONST_INT_P (XEXP (x, 2)))
10763 {
10764 *cost = COSTS_N_INSNS (1);
10765 if (speed_p)
10766 *cost += extra_cost->alu.bfx;
10767 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10768 return true;
10769 }
10770 /* Without UBFX/SBFX, need to resort to shift operations. */
10771 *cost = COSTS_N_INSNS (2);
10772 if (speed_p)
10773 *cost += 2 * extra_cost->alu.shift;
10774 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10775 return true;
10776
10777 case FLOAT_EXTEND:
10778 if (TARGET_HARD_FLOAT)
10779 {
10780 *cost = COSTS_N_INSNS (1);
10781 if (speed_p)
10782 *cost += extra_cost->fp[mode == DFmode].widen;
10783 if (!TARGET_FPU_ARMV8
10784 && GET_MODE (XEXP (x, 0)) == HFmode)
10785 {
10786 /* Pre v8, widening HF->DF is a two-step process, first
10787 widening to SFmode. */
10788 *cost += COSTS_N_INSNS (1);
10789 if (speed_p)
10790 *cost += extra_cost->fp[0].widen;
10791 }
10792 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10793 return true;
10794 }
10795
10796 *cost = LIBCALL_COST (1);
10797 return false;
10798
10799 case FLOAT_TRUNCATE:
10800 if (TARGET_HARD_FLOAT)
10801 {
10802 *cost = COSTS_N_INSNS (1);
10803 if (speed_p)
10804 *cost += extra_cost->fp[mode == DFmode].narrow;
10805 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10806 return true;
10807 /* Vector modes? */
10808 }
10809 *cost = LIBCALL_COST (1);
10810 return false;
10811
10812 case FMA:
10813 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10814 {
10815 rtx op0 = XEXP (x, 0);
10816 rtx op1 = XEXP (x, 1);
10817 rtx op2 = XEXP (x, 2);
10818
10819 *cost = COSTS_N_INSNS (1);
10820
10821 /* vfms or vfnma. */
10822 if (GET_CODE (op0) == NEG)
10823 op0 = XEXP (op0, 0);
10824
10825 /* vfnms or vfnma. */
10826 if (GET_CODE (op2) == NEG)
10827 op2 = XEXP (op2, 0);
10828
10829 *cost += rtx_cost (op0, FMA, 0, speed_p);
10830 *cost += rtx_cost (op1, FMA, 1, speed_p);
10831 *cost += rtx_cost (op2, FMA, 2, speed_p);
10832
10833 if (speed_p)
10834 *cost += extra_cost->fp[mode ==DFmode].fma;
10835
10836 return true;
10837 }
10838
10839 *cost = LIBCALL_COST (3);
10840 return false;
10841
10842 case FIX:
10843 case UNSIGNED_FIX:
10844 if (TARGET_HARD_FLOAT)
10845 {
10846 if (GET_MODE_CLASS (mode) == MODE_INT)
10847 {
10848 *cost = COSTS_N_INSNS (1);
10849 if (speed_p)
10850 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10851 /* Strip of the 'cost' of rounding towards zero. */
10852 if (GET_CODE (XEXP (x, 0)) == FIX)
10853 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10854 else
10855 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10856 /* ??? Increase the cost to deal with transferring from
10857 FP -> CORE registers? */
10858 return true;
10859 }
10860 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10861 && TARGET_FPU_ARMV8)
10862 {
10863 *cost = COSTS_N_INSNS (1);
10864 if (speed_p)
10865 *cost += extra_cost->fp[mode == DFmode].roundint;
10866 return false;
10867 }
10868 /* Vector costs? */
10869 }
10870 *cost = LIBCALL_COST (1);
10871 return false;
10872
10873 case FLOAT:
10874 case UNSIGNED_FLOAT:
10875 if (TARGET_HARD_FLOAT)
10876 {
10877 /* ??? Increase the cost to deal with transferring from CORE
10878 -> FP registers? */
10879 *cost = COSTS_N_INSNS (1);
10880 if (speed_p)
10881 *cost += extra_cost->fp[mode == DFmode].fromint;
10882 return false;
10883 }
10884 *cost = LIBCALL_COST (1);
10885 return false;
10886
10887 case CALL:
10888 *cost = COSTS_N_INSNS (1);
10889 return true;
10890
10891 case ASM_OPERANDS:
10892 {
10893 /* Just a guess. Guess number of instructions in the asm
10894 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10895 though (see PR60663). */
10896 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10897 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10898
10899 *cost = COSTS_N_INSNS (asm_length + num_operands);
10900 return true;
10901 }
10902 default:
10903 if (mode != VOIDmode)
10904 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10905 else
10906 *cost = COSTS_N_INSNS (4); /* Who knows? */
10907 return false;
10908 }
10909 }
10910
10911 #undef HANDLE_NARROW_SHIFT_ARITH
10912
10913 /* RTX costs when optimizing for size. */
10914 static bool
10915 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10916 int *total, bool speed)
10917 {
10918 bool result;
10919
10920 if (TARGET_OLD_RTX_COSTS
10921 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10922 {
10923 /* Old way. (Deprecated.) */
10924 if (!speed)
10925 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10926 (enum rtx_code) outer_code, total);
10927 else
10928 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10929 (enum rtx_code) outer_code, total,
10930 speed);
10931 }
10932 else
10933 {
10934 /* New way. */
10935 if (current_tune->insn_extra_cost)
10936 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10937 (enum rtx_code) outer_code,
10938 current_tune->insn_extra_cost,
10939 total, speed);
10940 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10941 && current_tune->insn_extra_cost != NULL */
10942 else
10943 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10944 (enum rtx_code) outer_code,
10945 &generic_extra_costs, total, speed);
10946 }
10947
10948 if (dump_file && (dump_flags & TDF_DETAILS))
10949 {
10950 print_rtl_single (dump_file, x);
10951 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10952 *total, result ? "final" : "partial");
10953 }
10954 return result;
10955 }
10956
10957 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10958 supported on any "slowmul" cores, so it can be ignored. */
10959
10960 static bool
10961 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10962 int *total, bool speed)
10963 {
10964 enum machine_mode mode = GET_MODE (x);
10965
10966 if (TARGET_THUMB)
10967 {
10968 *total = thumb1_rtx_costs (x, code, outer_code);
10969 return true;
10970 }
10971
10972 switch (code)
10973 {
10974 case MULT:
10975 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10976 || mode == DImode)
10977 {
10978 *total = COSTS_N_INSNS (20);
10979 return false;
10980 }
10981
10982 if (CONST_INT_P (XEXP (x, 1)))
10983 {
10984 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10985 & (unsigned HOST_WIDE_INT) 0xffffffff);
10986 int cost, const_ok = const_ok_for_arm (i);
10987 int j, booth_unit_size;
10988
10989 /* Tune as appropriate. */
10990 cost = const_ok ? 4 : 8;
10991 booth_unit_size = 2;
10992 for (j = 0; i && j < 32; j += booth_unit_size)
10993 {
10994 i >>= booth_unit_size;
10995 cost++;
10996 }
10997
10998 *total = COSTS_N_INSNS (cost);
10999 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11000 return true;
11001 }
11002
11003 *total = COSTS_N_INSNS (20);
11004 return false;
11005
11006 default:
11007 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11008 }
11009 }
11010
11011
11012 /* RTX cost for cores with a fast multiply unit (M variants). */
11013
11014 static bool
11015 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11016 int *total, bool speed)
11017 {
11018 enum machine_mode mode = GET_MODE (x);
11019
11020 if (TARGET_THUMB1)
11021 {
11022 *total = thumb1_rtx_costs (x, code, outer_code);
11023 return true;
11024 }
11025
11026 /* ??? should thumb2 use different costs? */
11027 switch (code)
11028 {
11029 case MULT:
11030 /* There is no point basing this on the tuning, since it is always the
11031 fast variant if it exists at all. */
11032 if (mode == DImode
11033 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11034 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11035 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11036 {
11037 *total = COSTS_N_INSNS(2);
11038 return false;
11039 }
11040
11041
11042 if (mode == DImode)
11043 {
11044 *total = COSTS_N_INSNS (5);
11045 return false;
11046 }
11047
11048 if (CONST_INT_P (XEXP (x, 1)))
11049 {
11050 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11051 & (unsigned HOST_WIDE_INT) 0xffffffff);
11052 int cost, const_ok = const_ok_for_arm (i);
11053 int j, booth_unit_size;
11054
11055 /* Tune as appropriate. */
11056 cost = const_ok ? 4 : 8;
11057 booth_unit_size = 8;
11058 for (j = 0; i && j < 32; j += booth_unit_size)
11059 {
11060 i >>= booth_unit_size;
11061 cost++;
11062 }
11063
11064 *total = COSTS_N_INSNS(cost);
11065 return false;
11066 }
11067
11068 if (mode == SImode)
11069 {
11070 *total = COSTS_N_INSNS (4);
11071 return false;
11072 }
11073
11074 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11075 {
11076 if (TARGET_HARD_FLOAT
11077 && (mode == SFmode
11078 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11079 {
11080 *total = COSTS_N_INSNS (1);
11081 return false;
11082 }
11083 }
11084
11085 /* Requires a lib call */
11086 *total = COSTS_N_INSNS (20);
11087 return false;
11088
11089 default:
11090 return arm_rtx_costs_1 (x, outer_code, total, speed);
11091 }
11092 }
11093
11094
11095 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11096 so it can be ignored. */
11097
11098 static bool
11099 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11100 int *total, bool speed)
11101 {
11102 enum machine_mode mode = GET_MODE (x);
11103
11104 if (TARGET_THUMB)
11105 {
11106 *total = thumb1_rtx_costs (x, code, outer_code);
11107 return true;
11108 }
11109
11110 switch (code)
11111 {
11112 case COMPARE:
11113 if (GET_CODE (XEXP (x, 0)) != MULT)
11114 return arm_rtx_costs_1 (x, outer_code, total, speed);
11115
11116 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11117 will stall until the multiplication is complete. */
11118 *total = COSTS_N_INSNS (3);
11119 return false;
11120
11121 case MULT:
11122 /* There is no point basing this on the tuning, since it is always the
11123 fast variant if it exists at all. */
11124 if (mode == DImode
11125 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11126 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11127 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11128 {
11129 *total = COSTS_N_INSNS (2);
11130 return false;
11131 }
11132
11133
11134 if (mode == DImode)
11135 {
11136 *total = COSTS_N_INSNS (5);
11137 return false;
11138 }
11139
11140 if (CONST_INT_P (XEXP (x, 1)))
11141 {
11142 /* If operand 1 is a constant we can more accurately
11143 calculate the cost of the multiply. The multiplier can
11144 retire 15 bits on the first cycle and a further 12 on the
11145 second. We do, of course, have to load the constant into
11146 a register first. */
11147 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11148 /* There's a general overhead of one cycle. */
11149 int cost = 1;
11150 unsigned HOST_WIDE_INT masked_const;
11151
11152 if (i & 0x80000000)
11153 i = ~i;
11154
11155 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11156
11157 masked_const = i & 0xffff8000;
11158 if (masked_const != 0)
11159 {
11160 cost++;
11161 masked_const = i & 0xf8000000;
11162 if (masked_const != 0)
11163 cost++;
11164 }
11165 *total = COSTS_N_INSNS (cost);
11166 return false;
11167 }
11168
11169 if (mode == SImode)
11170 {
11171 *total = COSTS_N_INSNS (3);
11172 return false;
11173 }
11174
11175 /* Requires a lib call */
11176 *total = COSTS_N_INSNS (20);
11177 return false;
11178
11179 default:
11180 return arm_rtx_costs_1 (x, outer_code, total, speed);
11181 }
11182 }
11183
11184
11185 /* RTX costs for 9e (and later) cores. */
11186
11187 static bool
11188 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11189 int *total, bool speed)
11190 {
11191 enum machine_mode mode = GET_MODE (x);
11192
11193 if (TARGET_THUMB1)
11194 {
11195 switch (code)
11196 {
11197 case MULT:
11198 *total = COSTS_N_INSNS (3);
11199 return true;
11200
11201 default:
11202 *total = thumb1_rtx_costs (x, code, outer_code);
11203 return true;
11204 }
11205 }
11206
11207 switch (code)
11208 {
11209 case MULT:
11210 /* There is no point basing this on the tuning, since it is always the
11211 fast variant if it exists at all. */
11212 if (mode == DImode
11213 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11214 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11215 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11216 {
11217 *total = COSTS_N_INSNS (2);
11218 return false;
11219 }
11220
11221
11222 if (mode == DImode)
11223 {
11224 *total = COSTS_N_INSNS (5);
11225 return false;
11226 }
11227
11228 if (mode == SImode)
11229 {
11230 *total = COSTS_N_INSNS (2);
11231 return false;
11232 }
11233
11234 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11235 {
11236 if (TARGET_HARD_FLOAT
11237 && (mode == SFmode
11238 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11239 {
11240 *total = COSTS_N_INSNS (1);
11241 return false;
11242 }
11243 }
11244
11245 *total = COSTS_N_INSNS (20);
11246 return false;
11247
11248 default:
11249 return arm_rtx_costs_1 (x, outer_code, total, speed);
11250 }
11251 }
11252 /* All address computations that can be done are free, but rtx cost returns
11253 the same for practically all of them. So we weight the different types
11254 of address here in the order (most pref first):
11255 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11256 static inline int
11257 arm_arm_address_cost (rtx x)
11258 {
11259 enum rtx_code c = GET_CODE (x);
11260
11261 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11262 return 0;
11263 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11264 return 10;
11265
11266 if (c == PLUS)
11267 {
11268 if (CONST_INT_P (XEXP (x, 1)))
11269 return 2;
11270
11271 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11272 return 3;
11273
11274 return 4;
11275 }
11276
11277 return 6;
11278 }
11279
11280 static inline int
11281 arm_thumb_address_cost (rtx x)
11282 {
11283 enum rtx_code c = GET_CODE (x);
11284
11285 if (c == REG)
11286 return 1;
11287 if (c == PLUS
11288 && REG_P (XEXP (x, 0))
11289 && CONST_INT_P (XEXP (x, 1)))
11290 return 1;
11291
11292 return 2;
11293 }
11294
11295 static int
11296 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11297 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11298 {
11299 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11300 }
11301
11302 /* Adjust cost hook for XScale. */
11303 static bool
11304 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11305 {
11306 /* Some true dependencies can have a higher cost depending
11307 on precisely how certain input operands are used. */
11308 if (REG_NOTE_KIND(link) == 0
11309 && recog_memoized (insn) >= 0
11310 && recog_memoized (dep) >= 0)
11311 {
11312 int shift_opnum = get_attr_shift (insn);
11313 enum attr_type attr_type = get_attr_type (dep);
11314
11315 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11316 operand for INSN. If we have a shifted input operand and the
11317 instruction we depend on is another ALU instruction, then we may
11318 have to account for an additional stall. */
11319 if (shift_opnum != 0
11320 && (attr_type == TYPE_ALU_SHIFT_IMM
11321 || attr_type == TYPE_ALUS_SHIFT_IMM
11322 || attr_type == TYPE_LOGIC_SHIFT_IMM
11323 || attr_type == TYPE_LOGICS_SHIFT_IMM
11324 || attr_type == TYPE_ALU_SHIFT_REG
11325 || attr_type == TYPE_ALUS_SHIFT_REG
11326 || attr_type == TYPE_LOGIC_SHIFT_REG
11327 || attr_type == TYPE_LOGICS_SHIFT_REG
11328 || attr_type == TYPE_MOV_SHIFT
11329 || attr_type == TYPE_MVN_SHIFT
11330 || attr_type == TYPE_MOV_SHIFT_REG
11331 || attr_type == TYPE_MVN_SHIFT_REG))
11332 {
11333 rtx shifted_operand;
11334 int opno;
11335
11336 /* Get the shifted operand. */
11337 extract_insn (insn);
11338 shifted_operand = recog_data.operand[shift_opnum];
11339
11340 /* Iterate over all the operands in DEP. If we write an operand
11341 that overlaps with SHIFTED_OPERAND, then we have increase the
11342 cost of this dependency. */
11343 extract_insn (dep);
11344 preprocess_constraints (dep);
11345 for (opno = 0; opno < recog_data.n_operands; opno++)
11346 {
11347 /* We can ignore strict inputs. */
11348 if (recog_data.operand_type[opno] == OP_IN)
11349 continue;
11350
11351 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11352 shifted_operand))
11353 {
11354 *cost = 2;
11355 return false;
11356 }
11357 }
11358 }
11359 }
11360 return true;
11361 }
11362
11363 /* Adjust cost hook for Cortex A9. */
11364 static bool
11365 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11366 {
11367 switch (REG_NOTE_KIND (link))
11368 {
11369 case REG_DEP_ANTI:
11370 *cost = 0;
11371 return false;
11372
11373 case REG_DEP_TRUE:
11374 case REG_DEP_OUTPUT:
11375 if (recog_memoized (insn) >= 0
11376 && recog_memoized (dep) >= 0)
11377 {
11378 if (GET_CODE (PATTERN (insn)) == SET)
11379 {
11380 if (GET_MODE_CLASS
11381 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11382 || GET_MODE_CLASS
11383 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11384 {
11385 enum attr_type attr_type_insn = get_attr_type (insn);
11386 enum attr_type attr_type_dep = get_attr_type (dep);
11387
11388 /* By default all dependencies of the form
11389 s0 = s0 <op> s1
11390 s0 = s0 <op> s2
11391 have an extra latency of 1 cycle because
11392 of the input and output dependency in this
11393 case. However this gets modeled as an true
11394 dependency and hence all these checks. */
11395 if (REG_P (SET_DEST (PATTERN (insn)))
11396 && REG_P (SET_DEST (PATTERN (dep)))
11397 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11398 SET_DEST (PATTERN (dep))))
11399 {
11400 /* FMACS is a special case where the dependent
11401 instruction can be issued 3 cycles before
11402 the normal latency in case of an output
11403 dependency. */
11404 if ((attr_type_insn == TYPE_FMACS
11405 || attr_type_insn == TYPE_FMACD)
11406 && (attr_type_dep == TYPE_FMACS
11407 || attr_type_dep == TYPE_FMACD))
11408 {
11409 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11410 *cost = insn_default_latency (dep) - 3;
11411 else
11412 *cost = insn_default_latency (dep);
11413 return false;
11414 }
11415 else
11416 {
11417 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11418 *cost = insn_default_latency (dep) + 1;
11419 else
11420 *cost = insn_default_latency (dep);
11421 }
11422 return false;
11423 }
11424 }
11425 }
11426 }
11427 break;
11428
11429 default:
11430 gcc_unreachable ();
11431 }
11432
11433 return true;
11434 }
11435
11436 /* Adjust cost hook for FA726TE. */
11437 static bool
11438 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11439 {
11440 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11441 have penalty of 3. */
11442 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11443 && recog_memoized (insn) >= 0
11444 && recog_memoized (dep) >= 0
11445 && get_attr_conds (dep) == CONDS_SET)
11446 {
11447 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11448 if (get_attr_conds (insn) == CONDS_USE
11449 && get_attr_type (insn) != TYPE_BRANCH)
11450 {
11451 *cost = 3;
11452 return false;
11453 }
11454
11455 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11456 || get_attr_conds (insn) == CONDS_USE)
11457 {
11458 *cost = 0;
11459 return false;
11460 }
11461 }
11462
11463 return true;
11464 }
11465
11466 /* Implement TARGET_REGISTER_MOVE_COST.
11467
11468 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11469 it is typically more expensive than a single memory access. We set
11470 the cost to less than two memory accesses so that floating
11471 point to integer conversion does not go through memory. */
11472
11473 int
11474 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11475 reg_class_t from, reg_class_t to)
11476 {
11477 if (TARGET_32BIT)
11478 {
11479 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11480 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11481 return 15;
11482 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11483 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11484 return 4;
11485 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11486 return 20;
11487 else
11488 return 2;
11489 }
11490 else
11491 {
11492 if (from == HI_REGS || to == HI_REGS)
11493 return 4;
11494 else
11495 return 2;
11496 }
11497 }
11498
11499 /* Implement TARGET_MEMORY_MOVE_COST. */
11500
11501 int
11502 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11503 bool in ATTRIBUTE_UNUSED)
11504 {
11505 if (TARGET_32BIT)
11506 return 10;
11507 else
11508 {
11509 if (GET_MODE_SIZE (mode) < 4)
11510 return 8;
11511 else
11512 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11513 }
11514 }
11515
11516 /* Vectorizer cost model implementation. */
11517
11518 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11519 static int
11520 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11521 tree vectype,
11522 int misalign ATTRIBUTE_UNUSED)
11523 {
11524 unsigned elements;
11525
11526 switch (type_of_cost)
11527 {
11528 case scalar_stmt:
11529 return current_tune->vec_costs->scalar_stmt_cost;
11530
11531 case scalar_load:
11532 return current_tune->vec_costs->scalar_load_cost;
11533
11534 case scalar_store:
11535 return current_tune->vec_costs->scalar_store_cost;
11536
11537 case vector_stmt:
11538 return current_tune->vec_costs->vec_stmt_cost;
11539
11540 case vector_load:
11541 return current_tune->vec_costs->vec_align_load_cost;
11542
11543 case vector_store:
11544 return current_tune->vec_costs->vec_store_cost;
11545
11546 case vec_to_scalar:
11547 return current_tune->vec_costs->vec_to_scalar_cost;
11548
11549 case scalar_to_vec:
11550 return current_tune->vec_costs->scalar_to_vec_cost;
11551
11552 case unaligned_load:
11553 return current_tune->vec_costs->vec_unalign_load_cost;
11554
11555 case unaligned_store:
11556 return current_tune->vec_costs->vec_unalign_store_cost;
11557
11558 case cond_branch_taken:
11559 return current_tune->vec_costs->cond_taken_branch_cost;
11560
11561 case cond_branch_not_taken:
11562 return current_tune->vec_costs->cond_not_taken_branch_cost;
11563
11564 case vec_perm:
11565 case vec_promote_demote:
11566 return current_tune->vec_costs->vec_stmt_cost;
11567
11568 case vec_construct:
11569 elements = TYPE_VECTOR_SUBPARTS (vectype);
11570 return elements / 2 + 1;
11571
11572 default:
11573 gcc_unreachable ();
11574 }
11575 }
11576
11577 /* Implement targetm.vectorize.add_stmt_cost. */
11578
11579 static unsigned
11580 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11581 struct _stmt_vec_info *stmt_info, int misalign,
11582 enum vect_cost_model_location where)
11583 {
11584 unsigned *cost = (unsigned *) data;
11585 unsigned retval = 0;
11586
11587 if (flag_vect_cost_model)
11588 {
11589 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11590 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11591
11592 /* Statements in an inner loop relative to the loop being
11593 vectorized are weighted more heavily. The value here is
11594 arbitrary and could potentially be improved with analysis. */
11595 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11596 count *= 50; /* FIXME. */
11597
11598 retval = (unsigned) (count * stmt_cost);
11599 cost[where] += retval;
11600 }
11601
11602 return retval;
11603 }
11604
11605 /* Return true if and only if this insn can dual-issue only as older. */
11606 static bool
11607 cortexa7_older_only (rtx insn)
11608 {
11609 if (recog_memoized (insn) < 0)
11610 return false;
11611
11612 switch (get_attr_type (insn))
11613 {
11614 case TYPE_ALU_REG:
11615 case TYPE_ALUS_REG:
11616 case TYPE_LOGIC_REG:
11617 case TYPE_LOGICS_REG:
11618 case TYPE_ADC_REG:
11619 case TYPE_ADCS_REG:
11620 case TYPE_ADR:
11621 case TYPE_BFM:
11622 case TYPE_REV:
11623 case TYPE_MVN_REG:
11624 case TYPE_SHIFT_IMM:
11625 case TYPE_SHIFT_REG:
11626 case TYPE_LOAD_BYTE:
11627 case TYPE_LOAD1:
11628 case TYPE_STORE1:
11629 case TYPE_FFARITHS:
11630 case TYPE_FADDS:
11631 case TYPE_FFARITHD:
11632 case TYPE_FADDD:
11633 case TYPE_FMOV:
11634 case TYPE_F_CVT:
11635 case TYPE_FCMPS:
11636 case TYPE_FCMPD:
11637 case TYPE_FCONSTS:
11638 case TYPE_FCONSTD:
11639 case TYPE_FMULS:
11640 case TYPE_FMACS:
11641 case TYPE_FMULD:
11642 case TYPE_FMACD:
11643 case TYPE_FDIVS:
11644 case TYPE_FDIVD:
11645 case TYPE_F_MRC:
11646 case TYPE_F_MRRC:
11647 case TYPE_F_FLAG:
11648 case TYPE_F_LOADS:
11649 case TYPE_F_STORES:
11650 return true;
11651 default:
11652 return false;
11653 }
11654 }
11655
11656 /* Return true if and only if this insn can dual-issue as younger. */
11657 static bool
11658 cortexa7_younger (FILE *file, int verbose, rtx insn)
11659 {
11660 if (recog_memoized (insn) < 0)
11661 {
11662 if (verbose > 5)
11663 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11664 return false;
11665 }
11666
11667 switch (get_attr_type (insn))
11668 {
11669 case TYPE_ALU_IMM:
11670 case TYPE_ALUS_IMM:
11671 case TYPE_LOGIC_IMM:
11672 case TYPE_LOGICS_IMM:
11673 case TYPE_EXTEND:
11674 case TYPE_MVN_IMM:
11675 case TYPE_MOV_IMM:
11676 case TYPE_MOV_REG:
11677 case TYPE_MOV_SHIFT:
11678 case TYPE_MOV_SHIFT_REG:
11679 case TYPE_BRANCH:
11680 case TYPE_CALL:
11681 return true;
11682 default:
11683 return false;
11684 }
11685 }
11686
11687
11688 /* Look for an instruction that can dual issue only as an older
11689 instruction, and move it in front of any instructions that can
11690 dual-issue as younger, while preserving the relative order of all
11691 other instructions in the ready list. This is a hueuristic to help
11692 dual-issue in later cycles, by postponing issue of more flexible
11693 instructions. This heuristic may affect dual issue opportunities
11694 in the current cycle. */
11695 static void
11696 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11697 int clock)
11698 {
11699 int i;
11700 int first_older_only = -1, first_younger = -1;
11701
11702 if (verbose > 5)
11703 fprintf (file,
11704 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11705 clock,
11706 *n_readyp);
11707
11708 /* Traverse the ready list from the head (the instruction to issue
11709 first), and looking for the first instruction that can issue as
11710 younger and the first instruction that can dual-issue only as
11711 older. */
11712 for (i = *n_readyp - 1; i >= 0; i--)
11713 {
11714 rtx insn = ready[i];
11715 if (cortexa7_older_only (insn))
11716 {
11717 first_older_only = i;
11718 if (verbose > 5)
11719 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11720 break;
11721 }
11722 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11723 first_younger = i;
11724 }
11725
11726 /* Nothing to reorder because either no younger insn found or insn
11727 that can dual-issue only as older appears before any insn that
11728 can dual-issue as younger. */
11729 if (first_younger == -1)
11730 {
11731 if (verbose > 5)
11732 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11733 return;
11734 }
11735
11736 /* Nothing to reorder because no older-only insn in the ready list. */
11737 if (first_older_only == -1)
11738 {
11739 if (verbose > 5)
11740 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11741 return;
11742 }
11743
11744 /* Move first_older_only insn before first_younger. */
11745 if (verbose > 5)
11746 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11747 INSN_UID(ready [first_older_only]),
11748 INSN_UID(ready [first_younger]));
11749 rtx first_older_only_insn = ready [first_older_only];
11750 for (i = first_older_only; i < first_younger; i++)
11751 {
11752 ready[i] = ready[i+1];
11753 }
11754
11755 ready[i] = first_older_only_insn;
11756 return;
11757 }
11758
11759 /* Implement TARGET_SCHED_REORDER. */
11760 static int
11761 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11762 int clock)
11763 {
11764 switch (arm_tune)
11765 {
11766 case cortexa7:
11767 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11768 break;
11769 default:
11770 /* Do nothing for other cores. */
11771 break;
11772 }
11773
11774 return arm_issue_rate ();
11775 }
11776
11777 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11778 It corrects the value of COST based on the relationship between
11779 INSN and DEP through the dependence LINK. It returns the new
11780 value. There is a per-core adjust_cost hook to adjust scheduler costs
11781 and the per-core hook can choose to completely override the generic
11782 adjust_cost function. Only put bits of code into arm_adjust_cost that
11783 are common across all cores. */
11784 static int
11785 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11786 {
11787 rtx i_pat, d_pat;
11788
11789 /* When generating Thumb-1 code, we want to place flag-setting operations
11790 close to a conditional branch which depends on them, so that we can
11791 omit the comparison. */
11792 if (TARGET_THUMB1
11793 && REG_NOTE_KIND (link) == 0
11794 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11795 && recog_memoized (dep) >= 0
11796 && get_attr_conds (dep) == CONDS_SET)
11797 return 0;
11798
11799 if (current_tune->sched_adjust_cost != NULL)
11800 {
11801 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11802 return cost;
11803 }
11804
11805 /* XXX Is this strictly true? */
11806 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11807 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11808 return 0;
11809
11810 /* Call insns don't incur a stall, even if they follow a load. */
11811 if (REG_NOTE_KIND (link) == 0
11812 && CALL_P (insn))
11813 return 1;
11814
11815 if ((i_pat = single_set (insn)) != NULL
11816 && MEM_P (SET_SRC (i_pat))
11817 && (d_pat = single_set (dep)) != NULL
11818 && MEM_P (SET_DEST (d_pat)))
11819 {
11820 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11821 /* This is a load after a store, there is no conflict if the load reads
11822 from a cached area. Assume that loads from the stack, and from the
11823 constant pool are cached, and that others will miss. This is a
11824 hack. */
11825
11826 if ((GET_CODE (src_mem) == SYMBOL_REF
11827 && CONSTANT_POOL_ADDRESS_P (src_mem))
11828 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11829 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11830 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11831 return 1;
11832 }
11833
11834 return cost;
11835 }
11836
11837 int
11838 arm_max_conditional_execute (void)
11839 {
11840 return max_insns_skipped;
11841 }
11842
11843 static int
11844 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11845 {
11846 if (TARGET_32BIT)
11847 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11848 else
11849 return (optimize > 0) ? 2 : 0;
11850 }
11851
11852 static int
11853 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11854 {
11855 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11856 }
11857
11858 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11859 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11860 sequences of non-executed instructions in IT blocks probably take the same
11861 amount of time as executed instructions (and the IT instruction itself takes
11862 space in icache). This function was experimentally determined to give good
11863 results on a popular embedded benchmark. */
11864
11865 static int
11866 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11867 {
11868 return (TARGET_32BIT && speed_p) ? 1
11869 : arm_default_branch_cost (speed_p, predictable_p);
11870 }
11871
11872 static bool fp_consts_inited = false;
11873
11874 static REAL_VALUE_TYPE value_fp0;
11875
11876 static void
11877 init_fp_table (void)
11878 {
11879 REAL_VALUE_TYPE r;
11880
11881 r = REAL_VALUE_ATOF ("0", DFmode);
11882 value_fp0 = r;
11883 fp_consts_inited = true;
11884 }
11885
11886 /* Return TRUE if rtx X is a valid immediate FP constant. */
11887 int
11888 arm_const_double_rtx (rtx x)
11889 {
11890 REAL_VALUE_TYPE r;
11891
11892 if (!fp_consts_inited)
11893 init_fp_table ();
11894
11895 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11896 if (REAL_VALUE_MINUS_ZERO (r))
11897 return 0;
11898
11899 if (REAL_VALUES_EQUAL (r, value_fp0))
11900 return 1;
11901
11902 return 0;
11903 }
11904
11905 /* VFPv3 has a fairly wide range of representable immediates, formed from
11906 "quarter-precision" floating-point values. These can be evaluated using this
11907 formula (with ^ for exponentiation):
11908
11909 -1^s * n * 2^-r
11910
11911 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11912 16 <= n <= 31 and 0 <= r <= 7.
11913
11914 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11915
11916 - A (most-significant) is the sign bit.
11917 - BCD are the exponent (encoded as r XOR 3).
11918 - EFGH are the mantissa (encoded as n - 16).
11919 */
11920
11921 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11922 fconst[sd] instruction, or -1 if X isn't suitable. */
11923 static int
11924 vfp3_const_double_index (rtx x)
11925 {
11926 REAL_VALUE_TYPE r, m;
11927 int sign, exponent;
11928 unsigned HOST_WIDE_INT mantissa, mant_hi;
11929 unsigned HOST_WIDE_INT mask;
11930 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11931 bool fail;
11932
11933 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11934 return -1;
11935
11936 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11937
11938 /* We can't represent these things, so detect them first. */
11939 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11940 return -1;
11941
11942 /* Extract sign, exponent and mantissa. */
11943 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11944 r = real_value_abs (&r);
11945 exponent = REAL_EXP (&r);
11946 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11947 highest (sign) bit, with a fixed binary point at bit point_pos.
11948 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11949 bits for the mantissa, this may fail (low bits would be lost). */
11950 real_ldexp (&m, &r, point_pos - exponent);
11951 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11952 mantissa = w.elt (0);
11953 mant_hi = w.elt (1);
11954
11955 /* If there are bits set in the low part of the mantissa, we can't
11956 represent this value. */
11957 if (mantissa != 0)
11958 return -1;
11959
11960 /* Now make it so that mantissa contains the most-significant bits, and move
11961 the point_pos to indicate that the least-significant bits have been
11962 discarded. */
11963 point_pos -= HOST_BITS_PER_WIDE_INT;
11964 mantissa = mant_hi;
11965
11966 /* We can permit four significant bits of mantissa only, plus a high bit
11967 which is always 1. */
11968 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11969 if ((mantissa & mask) != 0)
11970 return -1;
11971
11972 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11973 mantissa >>= point_pos - 5;
11974
11975 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11976 floating-point immediate zero with Neon using an integer-zero load, but
11977 that case is handled elsewhere.) */
11978 if (mantissa == 0)
11979 return -1;
11980
11981 gcc_assert (mantissa >= 16 && mantissa <= 31);
11982
11983 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11984 normalized significands are in the range [1, 2). (Our mantissa is shifted
11985 left 4 places at this point relative to normalized IEEE754 values). GCC
11986 internally uses [0.5, 1) (see real.c), so the exponent returned from
11987 REAL_EXP must be altered. */
11988 exponent = 5 - exponent;
11989
11990 if (exponent < 0 || exponent > 7)
11991 return -1;
11992
11993 /* Sign, mantissa and exponent are now in the correct form to plug into the
11994 formula described in the comment above. */
11995 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11996 }
11997
11998 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11999 int
12000 vfp3_const_double_rtx (rtx x)
12001 {
12002 if (!TARGET_VFP3)
12003 return 0;
12004
12005 return vfp3_const_double_index (x) != -1;
12006 }
12007
12008 /* Recognize immediates which can be used in various Neon instructions. Legal
12009 immediates are described by the following table (for VMVN variants, the
12010 bitwise inverse of the constant shown is recognized. In either case, VMOV
12011 is output and the correct instruction to use for a given constant is chosen
12012 by the assembler). The constant shown is replicated across all elements of
12013 the destination vector.
12014
12015 insn elems variant constant (binary)
12016 ---- ----- ------- -----------------
12017 vmov i32 0 00000000 00000000 00000000 abcdefgh
12018 vmov i32 1 00000000 00000000 abcdefgh 00000000
12019 vmov i32 2 00000000 abcdefgh 00000000 00000000
12020 vmov i32 3 abcdefgh 00000000 00000000 00000000
12021 vmov i16 4 00000000 abcdefgh
12022 vmov i16 5 abcdefgh 00000000
12023 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12024 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12025 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12026 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12027 vmvn i16 10 00000000 abcdefgh
12028 vmvn i16 11 abcdefgh 00000000
12029 vmov i32 12 00000000 00000000 abcdefgh 11111111
12030 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12031 vmov i32 14 00000000 abcdefgh 11111111 11111111
12032 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12033 vmov i8 16 abcdefgh
12034 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12035 eeeeeeee ffffffff gggggggg hhhhhhhh
12036 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12037 vmov f32 19 00000000 00000000 00000000 00000000
12038
12039 For case 18, B = !b. Representable values are exactly those accepted by
12040 vfp3_const_double_index, but are output as floating-point numbers rather
12041 than indices.
12042
12043 For case 19, we will change it to vmov.i32 when assembling.
12044
12045 Variants 0-5 (inclusive) may also be used as immediates for the second
12046 operand of VORR/VBIC instructions.
12047
12048 The INVERSE argument causes the bitwise inverse of the given operand to be
12049 recognized instead (used for recognizing legal immediates for the VAND/VORN
12050 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12051 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12052 output, rather than the real insns vbic/vorr).
12053
12054 INVERSE makes no difference to the recognition of float vectors.
12055
12056 The return value is the variant of immediate as shown in the above table, or
12057 -1 if the given value doesn't match any of the listed patterns.
12058 */
12059 static int
12060 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12061 rtx *modconst, int *elementwidth)
12062 {
12063 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12064 matches = 1; \
12065 for (i = 0; i < idx; i += (STRIDE)) \
12066 if (!(TEST)) \
12067 matches = 0; \
12068 if (matches) \
12069 { \
12070 immtype = (CLASS); \
12071 elsize = (ELSIZE); \
12072 break; \
12073 }
12074
12075 unsigned int i, elsize = 0, idx = 0, n_elts;
12076 unsigned int innersize;
12077 unsigned char bytes[16];
12078 int immtype = -1, matches;
12079 unsigned int invmask = inverse ? 0xff : 0;
12080 bool vector = GET_CODE (op) == CONST_VECTOR;
12081
12082 if (vector)
12083 {
12084 n_elts = CONST_VECTOR_NUNITS (op);
12085 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12086 }
12087 else
12088 {
12089 n_elts = 1;
12090 if (mode == VOIDmode)
12091 mode = DImode;
12092 innersize = GET_MODE_SIZE (mode);
12093 }
12094
12095 /* Vectors of float constants. */
12096 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12097 {
12098 rtx el0 = CONST_VECTOR_ELT (op, 0);
12099 REAL_VALUE_TYPE r0;
12100
12101 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12102 return -1;
12103
12104 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12105
12106 for (i = 1; i < n_elts; i++)
12107 {
12108 rtx elt = CONST_VECTOR_ELT (op, i);
12109 REAL_VALUE_TYPE re;
12110
12111 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12112
12113 if (!REAL_VALUES_EQUAL (r0, re))
12114 return -1;
12115 }
12116
12117 if (modconst)
12118 *modconst = CONST_VECTOR_ELT (op, 0);
12119
12120 if (elementwidth)
12121 *elementwidth = 0;
12122
12123 if (el0 == CONST0_RTX (GET_MODE (el0)))
12124 return 19;
12125 else
12126 return 18;
12127 }
12128
12129 /* Splat vector constant out into a byte vector. */
12130 for (i = 0; i < n_elts; i++)
12131 {
12132 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12133 unsigned HOST_WIDE_INT elpart;
12134 unsigned int part, parts;
12135
12136 if (CONST_INT_P (el))
12137 {
12138 elpart = INTVAL (el);
12139 parts = 1;
12140 }
12141 else if (CONST_DOUBLE_P (el))
12142 {
12143 elpart = CONST_DOUBLE_LOW (el);
12144 parts = 2;
12145 }
12146 else
12147 gcc_unreachable ();
12148
12149 for (part = 0; part < parts; part++)
12150 {
12151 unsigned int byte;
12152 for (byte = 0; byte < innersize; byte++)
12153 {
12154 bytes[idx++] = (elpart & 0xff) ^ invmask;
12155 elpart >>= BITS_PER_UNIT;
12156 }
12157 if (CONST_DOUBLE_P (el))
12158 elpart = CONST_DOUBLE_HIGH (el);
12159 }
12160 }
12161
12162 /* Sanity check. */
12163 gcc_assert (idx == GET_MODE_SIZE (mode));
12164
12165 do
12166 {
12167 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12168 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12169
12170 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12171 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12172
12173 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12174 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12175
12176 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12177 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12178
12179 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12180
12181 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12182
12183 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12184 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12185
12186 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12187 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12188
12189 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12190 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12191
12192 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12193 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12194
12195 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12196
12197 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12198
12199 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12200 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12201
12202 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12203 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12204
12205 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12206 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12207
12208 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12209 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12210
12211 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12212
12213 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12214 && bytes[i] == bytes[(i + 8) % idx]);
12215 }
12216 while (0);
12217
12218 if (immtype == -1)
12219 return -1;
12220
12221 if (elementwidth)
12222 *elementwidth = elsize;
12223
12224 if (modconst)
12225 {
12226 unsigned HOST_WIDE_INT imm = 0;
12227
12228 /* Un-invert bytes of recognized vector, if necessary. */
12229 if (invmask != 0)
12230 for (i = 0; i < idx; i++)
12231 bytes[i] ^= invmask;
12232
12233 if (immtype == 17)
12234 {
12235 /* FIXME: Broken on 32-bit H_W_I hosts. */
12236 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12237
12238 for (i = 0; i < 8; i++)
12239 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12240 << (i * BITS_PER_UNIT);
12241
12242 *modconst = GEN_INT (imm);
12243 }
12244 else
12245 {
12246 unsigned HOST_WIDE_INT imm = 0;
12247
12248 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12249 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12250
12251 *modconst = GEN_INT (imm);
12252 }
12253 }
12254
12255 return immtype;
12256 #undef CHECK
12257 }
12258
12259 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12260 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12261 float elements), and a modified constant (whatever should be output for a
12262 VMOV) in *MODCONST. */
12263
12264 int
12265 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12266 rtx *modconst, int *elementwidth)
12267 {
12268 rtx tmpconst;
12269 int tmpwidth;
12270 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12271
12272 if (retval == -1)
12273 return 0;
12274
12275 if (modconst)
12276 *modconst = tmpconst;
12277
12278 if (elementwidth)
12279 *elementwidth = tmpwidth;
12280
12281 return 1;
12282 }
12283
12284 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12285 the immediate is valid, write a constant suitable for using as an operand
12286 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12287 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12288
12289 int
12290 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12291 rtx *modconst, int *elementwidth)
12292 {
12293 rtx tmpconst;
12294 int tmpwidth;
12295 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12296
12297 if (retval < 0 || retval > 5)
12298 return 0;
12299
12300 if (modconst)
12301 *modconst = tmpconst;
12302
12303 if (elementwidth)
12304 *elementwidth = tmpwidth;
12305
12306 return 1;
12307 }
12308
12309 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12310 the immediate is valid, write a constant suitable for using as an operand
12311 to VSHR/VSHL to *MODCONST and the corresponding element width to
12312 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12313 because they have different limitations. */
12314
12315 int
12316 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12317 rtx *modconst, int *elementwidth,
12318 bool isleftshift)
12319 {
12320 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12321 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12322 unsigned HOST_WIDE_INT last_elt = 0;
12323 unsigned HOST_WIDE_INT maxshift;
12324
12325 /* Split vector constant out into a byte vector. */
12326 for (i = 0; i < n_elts; i++)
12327 {
12328 rtx el = CONST_VECTOR_ELT (op, i);
12329 unsigned HOST_WIDE_INT elpart;
12330
12331 if (CONST_INT_P (el))
12332 elpart = INTVAL (el);
12333 else if (CONST_DOUBLE_P (el))
12334 return 0;
12335 else
12336 gcc_unreachable ();
12337
12338 if (i != 0 && elpart != last_elt)
12339 return 0;
12340
12341 last_elt = elpart;
12342 }
12343
12344 /* Shift less than element size. */
12345 maxshift = innersize * 8;
12346
12347 if (isleftshift)
12348 {
12349 /* Left shift immediate value can be from 0 to <size>-1. */
12350 if (last_elt >= maxshift)
12351 return 0;
12352 }
12353 else
12354 {
12355 /* Right shift immediate value can be from 1 to <size>. */
12356 if (last_elt == 0 || last_elt > maxshift)
12357 return 0;
12358 }
12359
12360 if (elementwidth)
12361 *elementwidth = innersize * 8;
12362
12363 if (modconst)
12364 *modconst = CONST_VECTOR_ELT (op, 0);
12365
12366 return 1;
12367 }
12368
12369 /* Return a string suitable for output of Neon immediate logic operation
12370 MNEM. */
12371
12372 char *
12373 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12374 int inverse, int quad)
12375 {
12376 int width, is_valid;
12377 static char templ[40];
12378
12379 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12380
12381 gcc_assert (is_valid != 0);
12382
12383 if (quad)
12384 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12385 else
12386 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12387
12388 return templ;
12389 }
12390
12391 /* Return a string suitable for output of Neon immediate shift operation
12392 (VSHR or VSHL) MNEM. */
12393
12394 char *
12395 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12396 enum machine_mode mode, int quad,
12397 bool isleftshift)
12398 {
12399 int width, is_valid;
12400 static char templ[40];
12401
12402 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12403 gcc_assert (is_valid != 0);
12404
12405 if (quad)
12406 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12407 else
12408 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12409
12410 return templ;
12411 }
12412
12413 /* Output a sequence of pairwise operations to implement a reduction.
12414 NOTE: We do "too much work" here, because pairwise operations work on two
12415 registers-worth of operands in one go. Unfortunately we can't exploit those
12416 extra calculations to do the full operation in fewer steps, I don't think.
12417 Although all vector elements of the result but the first are ignored, we
12418 actually calculate the same result in each of the elements. An alternative
12419 such as initially loading a vector with zero to use as each of the second
12420 operands would use up an additional register and take an extra instruction,
12421 for no particular gain. */
12422
12423 void
12424 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12425 rtx (*reduc) (rtx, rtx, rtx))
12426 {
12427 enum machine_mode inner = GET_MODE_INNER (mode);
12428 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12429 rtx tmpsum = op1;
12430
12431 for (i = parts / 2; i >= 1; i /= 2)
12432 {
12433 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12434 emit_insn (reduc (dest, tmpsum, tmpsum));
12435 tmpsum = dest;
12436 }
12437 }
12438
12439 /* If VALS is a vector constant that can be loaded into a register
12440 using VDUP, generate instructions to do so and return an RTX to
12441 assign to the register. Otherwise return NULL_RTX. */
12442
12443 static rtx
12444 neon_vdup_constant (rtx vals)
12445 {
12446 enum machine_mode mode = GET_MODE (vals);
12447 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12448 int n_elts = GET_MODE_NUNITS (mode);
12449 bool all_same = true;
12450 rtx x;
12451 int i;
12452
12453 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12454 return NULL_RTX;
12455
12456 for (i = 0; i < n_elts; ++i)
12457 {
12458 x = XVECEXP (vals, 0, i);
12459 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12460 all_same = false;
12461 }
12462
12463 if (!all_same)
12464 /* The elements are not all the same. We could handle repeating
12465 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12466 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12467 vdup.i16). */
12468 return NULL_RTX;
12469
12470 /* We can load this constant by using VDUP and a constant in a
12471 single ARM register. This will be cheaper than a vector
12472 load. */
12473
12474 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12475 return gen_rtx_VEC_DUPLICATE (mode, x);
12476 }
12477
12478 /* Generate code to load VALS, which is a PARALLEL containing only
12479 constants (for vec_init) or CONST_VECTOR, efficiently into a
12480 register. Returns an RTX to copy into the register, or NULL_RTX
12481 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12482
12483 rtx
12484 neon_make_constant (rtx vals)
12485 {
12486 enum machine_mode mode = GET_MODE (vals);
12487 rtx target;
12488 rtx const_vec = NULL_RTX;
12489 int n_elts = GET_MODE_NUNITS (mode);
12490 int n_const = 0;
12491 int i;
12492
12493 if (GET_CODE (vals) == CONST_VECTOR)
12494 const_vec = vals;
12495 else if (GET_CODE (vals) == PARALLEL)
12496 {
12497 /* A CONST_VECTOR must contain only CONST_INTs and
12498 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12499 Only store valid constants in a CONST_VECTOR. */
12500 for (i = 0; i < n_elts; ++i)
12501 {
12502 rtx x = XVECEXP (vals, 0, i);
12503 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12504 n_const++;
12505 }
12506 if (n_const == n_elts)
12507 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12508 }
12509 else
12510 gcc_unreachable ();
12511
12512 if (const_vec != NULL
12513 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12514 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12515 return const_vec;
12516 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12517 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12518 pipeline cycle; creating the constant takes one or two ARM
12519 pipeline cycles. */
12520 return target;
12521 else if (const_vec != NULL_RTX)
12522 /* Load from constant pool. On Cortex-A8 this takes two cycles
12523 (for either double or quad vectors). We can not take advantage
12524 of single-cycle VLD1 because we need a PC-relative addressing
12525 mode. */
12526 return const_vec;
12527 else
12528 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12529 We can not construct an initializer. */
12530 return NULL_RTX;
12531 }
12532
12533 /* Initialize vector TARGET to VALS. */
12534
12535 void
12536 neon_expand_vector_init (rtx target, rtx vals)
12537 {
12538 enum machine_mode mode = GET_MODE (target);
12539 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12540 int n_elts = GET_MODE_NUNITS (mode);
12541 int n_var = 0, one_var = -1;
12542 bool all_same = true;
12543 rtx x, mem;
12544 int i;
12545
12546 for (i = 0; i < n_elts; ++i)
12547 {
12548 x = XVECEXP (vals, 0, i);
12549 if (!CONSTANT_P (x))
12550 ++n_var, one_var = i;
12551
12552 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12553 all_same = false;
12554 }
12555
12556 if (n_var == 0)
12557 {
12558 rtx constant = neon_make_constant (vals);
12559 if (constant != NULL_RTX)
12560 {
12561 emit_move_insn (target, constant);
12562 return;
12563 }
12564 }
12565
12566 /* Splat a single non-constant element if we can. */
12567 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12568 {
12569 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12570 emit_insn (gen_rtx_SET (VOIDmode, target,
12571 gen_rtx_VEC_DUPLICATE (mode, x)));
12572 return;
12573 }
12574
12575 /* One field is non-constant. Load constant then overwrite varying
12576 field. This is more efficient than using the stack. */
12577 if (n_var == 1)
12578 {
12579 rtx copy = copy_rtx (vals);
12580 rtx index = GEN_INT (one_var);
12581
12582 /* Load constant part of vector, substitute neighboring value for
12583 varying element. */
12584 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12585 neon_expand_vector_init (target, copy);
12586
12587 /* Insert variable. */
12588 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12589 switch (mode)
12590 {
12591 case V8QImode:
12592 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12593 break;
12594 case V16QImode:
12595 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12596 break;
12597 case V4HImode:
12598 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12599 break;
12600 case V8HImode:
12601 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12602 break;
12603 case V2SImode:
12604 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12605 break;
12606 case V4SImode:
12607 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12608 break;
12609 case V2SFmode:
12610 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12611 break;
12612 case V4SFmode:
12613 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12614 break;
12615 case V2DImode:
12616 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12617 break;
12618 default:
12619 gcc_unreachable ();
12620 }
12621 return;
12622 }
12623
12624 /* Construct the vector in memory one field at a time
12625 and load the whole vector. */
12626 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12627 for (i = 0; i < n_elts; i++)
12628 emit_move_insn (adjust_address_nv (mem, inner_mode,
12629 i * GET_MODE_SIZE (inner_mode)),
12630 XVECEXP (vals, 0, i));
12631 emit_move_insn (target, mem);
12632 }
12633
12634 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12635 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12636 reported source locations are bogus. */
12637
12638 static void
12639 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12640 const char *err)
12641 {
12642 HOST_WIDE_INT lane;
12643
12644 gcc_assert (CONST_INT_P (operand));
12645
12646 lane = INTVAL (operand);
12647
12648 if (lane < low || lane >= high)
12649 error (err);
12650 }
12651
12652 /* Bounds-check lanes. */
12653
12654 void
12655 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12656 {
12657 bounds_check (operand, low, high, "lane out of range");
12658 }
12659
12660 /* Bounds-check constants. */
12661
12662 void
12663 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12664 {
12665 bounds_check (operand, low, high, "constant out of range");
12666 }
12667
12668 HOST_WIDE_INT
12669 neon_element_bits (enum machine_mode mode)
12670 {
12671 if (mode == DImode)
12672 return GET_MODE_BITSIZE (mode);
12673 else
12674 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12675 }
12676
12677 \f
12678 /* Predicates for `match_operand' and `match_operator'. */
12679
12680 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12681 WB is true if full writeback address modes are allowed and is false
12682 if limited writeback address modes (POST_INC and PRE_DEC) are
12683 allowed. */
12684
12685 int
12686 arm_coproc_mem_operand (rtx op, bool wb)
12687 {
12688 rtx ind;
12689
12690 /* Reject eliminable registers. */
12691 if (! (reload_in_progress || reload_completed || lra_in_progress)
12692 && ( reg_mentioned_p (frame_pointer_rtx, op)
12693 || reg_mentioned_p (arg_pointer_rtx, op)
12694 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12695 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12696 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12697 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12698 return FALSE;
12699
12700 /* Constants are converted into offsets from labels. */
12701 if (!MEM_P (op))
12702 return FALSE;
12703
12704 ind = XEXP (op, 0);
12705
12706 if (reload_completed
12707 && (GET_CODE (ind) == LABEL_REF
12708 || (GET_CODE (ind) == CONST
12709 && GET_CODE (XEXP (ind, 0)) == PLUS
12710 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12711 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12712 return TRUE;
12713
12714 /* Match: (mem (reg)). */
12715 if (REG_P (ind))
12716 return arm_address_register_rtx_p (ind, 0);
12717
12718 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12719 acceptable in any case (subject to verification by
12720 arm_address_register_rtx_p). We need WB to be true to accept
12721 PRE_INC and POST_DEC. */
12722 if (GET_CODE (ind) == POST_INC
12723 || GET_CODE (ind) == PRE_DEC
12724 || (wb
12725 && (GET_CODE (ind) == PRE_INC
12726 || GET_CODE (ind) == POST_DEC)))
12727 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12728
12729 if (wb
12730 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12731 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12732 && GET_CODE (XEXP (ind, 1)) == PLUS
12733 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12734 ind = XEXP (ind, 1);
12735
12736 /* Match:
12737 (plus (reg)
12738 (const)). */
12739 if (GET_CODE (ind) == PLUS
12740 && REG_P (XEXP (ind, 0))
12741 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12742 && CONST_INT_P (XEXP (ind, 1))
12743 && INTVAL (XEXP (ind, 1)) > -1024
12744 && INTVAL (XEXP (ind, 1)) < 1024
12745 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12746 return TRUE;
12747
12748 return FALSE;
12749 }
12750
12751 /* Return TRUE if OP is a memory operand which we can load or store a vector
12752 to/from. TYPE is one of the following values:
12753 0 - Vector load/stor (vldr)
12754 1 - Core registers (ldm)
12755 2 - Element/structure loads (vld1)
12756 */
12757 int
12758 neon_vector_mem_operand (rtx op, int type, bool strict)
12759 {
12760 rtx ind;
12761
12762 /* Reject eliminable registers. */
12763 if (! (reload_in_progress || reload_completed)
12764 && ( reg_mentioned_p (frame_pointer_rtx, op)
12765 || reg_mentioned_p (arg_pointer_rtx, op)
12766 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12767 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12768 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12769 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12770 return !strict;
12771
12772 /* Constants are converted into offsets from labels. */
12773 if (!MEM_P (op))
12774 return FALSE;
12775
12776 ind = XEXP (op, 0);
12777
12778 if (reload_completed
12779 && (GET_CODE (ind) == LABEL_REF
12780 || (GET_CODE (ind) == CONST
12781 && GET_CODE (XEXP (ind, 0)) == PLUS
12782 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12783 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12784 return TRUE;
12785
12786 /* Match: (mem (reg)). */
12787 if (REG_P (ind))
12788 return arm_address_register_rtx_p (ind, 0);
12789
12790 /* Allow post-increment with Neon registers. */
12791 if ((type != 1 && GET_CODE (ind) == POST_INC)
12792 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12793 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12794
12795 /* FIXME: vld1 allows register post-modify. */
12796
12797 /* Match:
12798 (plus (reg)
12799 (const)). */
12800 if (type == 0
12801 && GET_CODE (ind) == PLUS
12802 && REG_P (XEXP (ind, 0))
12803 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12804 && CONST_INT_P (XEXP (ind, 1))
12805 && INTVAL (XEXP (ind, 1)) > -1024
12806 /* For quad modes, we restrict the constant offset to be slightly less
12807 than what the instruction format permits. We have no such constraint
12808 on double mode offsets. (This must match arm_legitimate_index_p.) */
12809 && (INTVAL (XEXP (ind, 1))
12810 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12811 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12812 return TRUE;
12813
12814 return FALSE;
12815 }
12816
12817 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12818 type. */
12819 int
12820 neon_struct_mem_operand (rtx op)
12821 {
12822 rtx ind;
12823
12824 /* Reject eliminable registers. */
12825 if (! (reload_in_progress || reload_completed)
12826 && ( reg_mentioned_p (frame_pointer_rtx, op)
12827 || reg_mentioned_p (arg_pointer_rtx, op)
12828 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12829 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12830 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12831 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12832 return FALSE;
12833
12834 /* Constants are converted into offsets from labels. */
12835 if (!MEM_P (op))
12836 return FALSE;
12837
12838 ind = XEXP (op, 0);
12839
12840 if (reload_completed
12841 && (GET_CODE (ind) == LABEL_REF
12842 || (GET_CODE (ind) == CONST
12843 && GET_CODE (XEXP (ind, 0)) == PLUS
12844 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12845 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12846 return TRUE;
12847
12848 /* Match: (mem (reg)). */
12849 if (REG_P (ind))
12850 return arm_address_register_rtx_p (ind, 0);
12851
12852 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12853 if (GET_CODE (ind) == POST_INC
12854 || GET_CODE (ind) == PRE_DEC)
12855 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12856
12857 return FALSE;
12858 }
12859
12860 /* Return true if X is a register that will be eliminated later on. */
12861 int
12862 arm_eliminable_register (rtx x)
12863 {
12864 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12865 || REGNO (x) == ARG_POINTER_REGNUM
12866 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12867 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12868 }
12869
12870 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12871 coprocessor registers. Otherwise return NO_REGS. */
12872
12873 enum reg_class
12874 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12875 {
12876 if (mode == HFmode)
12877 {
12878 if (!TARGET_NEON_FP16)
12879 return GENERAL_REGS;
12880 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12881 return NO_REGS;
12882 return GENERAL_REGS;
12883 }
12884
12885 /* The neon move patterns handle all legitimate vector and struct
12886 addresses. */
12887 if (TARGET_NEON
12888 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12889 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12890 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12891 || VALID_NEON_STRUCT_MODE (mode)))
12892 return NO_REGS;
12893
12894 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12895 return NO_REGS;
12896
12897 return GENERAL_REGS;
12898 }
12899
12900 /* Values which must be returned in the most-significant end of the return
12901 register. */
12902
12903 static bool
12904 arm_return_in_msb (const_tree valtype)
12905 {
12906 return (TARGET_AAPCS_BASED
12907 && BYTES_BIG_ENDIAN
12908 && (AGGREGATE_TYPE_P (valtype)
12909 || TREE_CODE (valtype) == COMPLEX_TYPE
12910 || FIXED_POINT_TYPE_P (valtype)));
12911 }
12912
12913 /* Return TRUE if X references a SYMBOL_REF. */
12914 int
12915 symbol_mentioned_p (rtx x)
12916 {
12917 const char * fmt;
12918 int i;
12919
12920 if (GET_CODE (x) == SYMBOL_REF)
12921 return 1;
12922
12923 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12924 are constant offsets, not symbols. */
12925 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12926 return 0;
12927
12928 fmt = GET_RTX_FORMAT (GET_CODE (x));
12929
12930 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12931 {
12932 if (fmt[i] == 'E')
12933 {
12934 int j;
12935
12936 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12937 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12938 return 1;
12939 }
12940 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12941 return 1;
12942 }
12943
12944 return 0;
12945 }
12946
12947 /* Return TRUE if X references a LABEL_REF. */
12948 int
12949 label_mentioned_p (rtx x)
12950 {
12951 const char * fmt;
12952 int i;
12953
12954 if (GET_CODE (x) == LABEL_REF)
12955 return 1;
12956
12957 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12958 instruction, but they are constant offsets, not symbols. */
12959 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12960 return 0;
12961
12962 fmt = GET_RTX_FORMAT (GET_CODE (x));
12963 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12964 {
12965 if (fmt[i] == 'E')
12966 {
12967 int j;
12968
12969 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12970 if (label_mentioned_p (XVECEXP (x, i, j)))
12971 return 1;
12972 }
12973 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12974 return 1;
12975 }
12976
12977 return 0;
12978 }
12979
12980 int
12981 tls_mentioned_p (rtx x)
12982 {
12983 switch (GET_CODE (x))
12984 {
12985 case CONST:
12986 return tls_mentioned_p (XEXP (x, 0));
12987
12988 case UNSPEC:
12989 if (XINT (x, 1) == UNSPEC_TLS)
12990 return 1;
12991
12992 default:
12993 return 0;
12994 }
12995 }
12996
12997 /* Must not copy any rtx that uses a pc-relative address. */
12998
12999 static int
13000 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
13001 {
13002 if (GET_CODE (*x) == UNSPEC
13003 && (XINT (*x, 1) == UNSPEC_PIC_BASE
13004 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
13005 return 1;
13006 return 0;
13007 }
13008
13009 static bool
13010 arm_cannot_copy_insn_p (rtx insn)
13011 {
13012 /* The tls call insn cannot be copied, as it is paired with a data
13013 word. */
13014 if (recog_memoized (insn) == CODE_FOR_tlscall)
13015 return true;
13016
13017 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13018 }
13019
13020 enum rtx_code
13021 minmax_code (rtx x)
13022 {
13023 enum rtx_code code = GET_CODE (x);
13024
13025 switch (code)
13026 {
13027 case SMAX:
13028 return GE;
13029 case SMIN:
13030 return LE;
13031 case UMIN:
13032 return LEU;
13033 case UMAX:
13034 return GEU;
13035 default:
13036 gcc_unreachable ();
13037 }
13038 }
13039
13040 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13041
13042 bool
13043 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13044 int *mask, bool *signed_sat)
13045 {
13046 /* The high bound must be a power of two minus one. */
13047 int log = exact_log2 (INTVAL (hi_bound) + 1);
13048 if (log == -1)
13049 return false;
13050
13051 /* The low bound is either zero (for usat) or one less than the
13052 negation of the high bound (for ssat). */
13053 if (INTVAL (lo_bound) == 0)
13054 {
13055 if (mask)
13056 *mask = log;
13057 if (signed_sat)
13058 *signed_sat = false;
13059
13060 return true;
13061 }
13062
13063 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13064 {
13065 if (mask)
13066 *mask = log + 1;
13067 if (signed_sat)
13068 *signed_sat = true;
13069
13070 return true;
13071 }
13072
13073 return false;
13074 }
13075
13076 /* Return 1 if memory locations are adjacent. */
13077 int
13078 adjacent_mem_locations (rtx a, rtx b)
13079 {
13080 /* We don't guarantee to preserve the order of these memory refs. */
13081 if (volatile_refs_p (a) || volatile_refs_p (b))
13082 return 0;
13083
13084 if ((REG_P (XEXP (a, 0))
13085 || (GET_CODE (XEXP (a, 0)) == PLUS
13086 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13087 && (REG_P (XEXP (b, 0))
13088 || (GET_CODE (XEXP (b, 0)) == PLUS
13089 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13090 {
13091 HOST_WIDE_INT val0 = 0, val1 = 0;
13092 rtx reg0, reg1;
13093 int val_diff;
13094
13095 if (GET_CODE (XEXP (a, 0)) == PLUS)
13096 {
13097 reg0 = XEXP (XEXP (a, 0), 0);
13098 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13099 }
13100 else
13101 reg0 = XEXP (a, 0);
13102
13103 if (GET_CODE (XEXP (b, 0)) == PLUS)
13104 {
13105 reg1 = XEXP (XEXP (b, 0), 0);
13106 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13107 }
13108 else
13109 reg1 = XEXP (b, 0);
13110
13111 /* Don't accept any offset that will require multiple
13112 instructions to handle, since this would cause the
13113 arith_adjacentmem pattern to output an overlong sequence. */
13114 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13115 return 0;
13116
13117 /* Don't allow an eliminable register: register elimination can make
13118 the offset too large. */
13119 if (arm_eliminable_register (reg0))
13120 return 0;
13121
13122 val_diff = val1 - val0;
13123
13124 if (arm_ld_sched)
13125 {
13126 /* If the target has load delay slots, then there's no benefit
13127 to using an ldm instruction unless the offset is zero and
13128 we are optimizing for size. */
13129 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13130 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13131 && (val_diff == 4 || val_diff == -4));
13132 }
13133
13134 return ((REGNO (reg0) == REGNO (reg1))
13135 && (val_diff == 4 || val_diff == -4));
13136 }
13137
13138 return 0;
13139 }
13140
13141 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13142 for load operations, false for store operations. CONSECUTIVE is true
13143 if the register numbers in the operation must be consecutive in the register
13144 bank. RETURN_PC is true if value is to be loaded in PC.
13145 The pattern we are trying to match for load is:
13146 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13147 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13148 :
13149 :
13150 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13151 ]
13152 where
13153 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13154 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13155 3. If consecutive is TRUE, then for kth register being loaded,
13156 REGNO (R_dk) = REGNO (R_d0) + k.
13157 The pattern for store is similar. */
13158 bool
13159 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13160 bool consecutive, bool return_pc)
13161 {
13162 HOST_WIDE_INT count = XVECLEN (op, 0);
13163 rtx reg, mem, addr;
13164 unsigned regno;
13165 unsigned first_regno;
13166 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13167 rtx elt;
13168 bool addr_reg_in_reglist = false;
13169 bool update = false;
13170 int reg_increment;
13171 int offset_adj;
13172 int regs_per_val;
13173
13174 /* If not in SImode, then registers must be consecutive
13175 (e.g., VLDM instructions for DFmode). */
13176 gcc_assert ((mode == SImode) || consecutive);
13177 /* Setting return_pc for stores is illegal. */
13178 gcc_assert (!return_pc || load);
13179
13180 /* Set up the increments and the regs per val based on the mode. */
13181 reg_increment = GET_MODE_SIZE (mode);
13182 regs_per_val = reg_increment / 4;
13183 offset_adj = return_pc ? 1 : 0;
13184
13185 if (count <= 1
13186 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13187 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13188 return false;
13189
13190 /* Check if this is a write-back. */
13191 elt = XVECEXP (op, 0, offset_adj);
13192 if (GET_CODE (SET_SRC (elt)) == PLUS)
13193 {
13194 i++;
13195 base = 1;
13196 update = true;
13197
13198 /* The offset adjustment must be the number of registers being
13199 popped times the size of a single register. */
13200 if (!REG_P (SET_DEST (elt))
13201 || !REG_P (XEXP (SET_SRC (elt), 0))
13202 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13203 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13204 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13205 ((count - 1 - offset_adj) * reg_increment))
13206 return false;
13207 }
13208
13209 i = i + offset_adj;
13210 base = base + offset_adj;
13211 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13212 success depends on the type: VLDM can do just one reg,
13213 LDM must do at least two. */
13214 if ((count <= i) && (mode == SImode))
13215 return false;
13216
13217 elt = XVECEXP (op, 0, i - 1);
13218 if (GET_CODE (elt) != SET)
13219 return false;
13220
13221 if (load)
13222 {
13223 reg = SET_DEST (elt);
13224 mem = SET_SRC (elt);
13225 }
13226 else
13227 {
13228 reg = SET_SRC (elt);
13229 mem = SET_DEST (elt);
13230 }
13231
13232 if (!REG_P (reg) || !MEM_P (mem))
13233 return false;
13234
13235 regno = REGNO (reg);
13236 first_regno = regno;
13237 addr = XEXP (mem, 0);
13238 if (GET_CODE (addr) == PLUS)
13239 {
13240 if (!CONST_INT_P (XEXP (addr, 1)))
13241 return false;
13242
13243 offset = INTVAL (XEXP (addr, 1));
13244 addr = XEXP (addr, 0);
13245 }
13246
13247 if (!REG_P (addr))
13248 return false;
13249
13250 /* Don't allow SP to be loaded unless it is also the base register. It
13251 guarantees that SP is reset correctly when an LDM instruction
13252 is interrupted. Otherwise, we might end up with a corrupt stack. */
13253 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13254 return false;
13255
13256 for (; i < count; i++)
13257 {
13258 elt = XVECEXP (op, 0, i);
13259 if (GET_CODE (elt) != SET)
13260 return false;
13261
13262 if (load)
13263 {
13264 reg = SET_DEST (elt);
13265 mem = SET_SRC (elt);
13266 }
13267 else
13268 {
13269 reg = SET_SRC (elt);
13270 mem = SET_DEST (elt);
13271 }
13272
13273 if (!REG_P (reg)
13274 || GET_MODE (reg) != mode
13275 || REGNO (reg) <= regno
13276 || (consecutive
13277 && (REGNO (reg) !=
13278 (unsigned int) (first_regno + regs_per_val * (i - base))))
13279 /* Don't allow SP to be loaded unless it is also the base register. It
13280 guarantees that SP is reset correctly when an LDM instruction
13281 is interrupted. Otherwise, we might end up with a corrupt stack. */
13282 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13283 || !MEM_P (mem)
13284 || GET_MODE (mem) != mode
13285 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13286 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13287 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13288 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13289 offset + (i - base) * reg_increment))
13290 && (!REG_P (XEXP (mem, 0))
13291 || offset + (i - base) * reg_increment != 0)))
13292 return false;
13293
13294 regno = REGNO (reg);
13295 if (regno == REGNO (addr))
13296 addr_reg_in_reglist = true;
13297 }
13298
13299 if (load)
13300 {
13301 if (update && addr_reg_in_reglist)
13302 return false;
13303
13304 /* For Thumb-1, address register is always modified - either by write-back
13305 or by explicit load. If the pattern does not describe an update,
13306 then the address register must be in the list of loaded registers. */
13307 if (TARGET_THUMB1)
13308 return update || addr_reg_in_reglist;
13309 }
13310
13311 return true;
13312 }
13313
13314 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13315 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13316 instruction. ADD_OFFSET is nonzero if the base address register needs
13317 to be modified with an add instruction before we can use it. */
13318
13319 static bool
13320 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13321 int nops, HOST_WIDE_INT add_offset)
13322 {
13323 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13324 if the offset isn't small enough. The reason 2 ldrs are faster
13325 is because these ARMs are able to do more than one cache access
13326 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13327 whilst the ARM8 has a double bandwidth cache. This means that
13328 these cores can do both an instruction fetch and a data fetch in
13329 a single cycle, so the trick of calculating the address into a
13330 scratch register (one of the result regs) and then doing a load
13331 multiple actually becomes slower (and no smaller in code size).
13332 That is the transformation
13333
13334 ldr rd1, [rbase + offset]
13335 ldr rd2, [rbase + offset + 4]
13336
13337 to
13338
13339 add rd1, rbase, offset
13340 ldmia rd1, {rd1, rd2}
13341
13342 produces worse code -- '3 cycles + any stalls on rd2' instead of
13343 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13344 access per cycle, the first sequence could never complete in less
13345 than 6 cycles, whereas the ldm sequence would only take 5 and
13346 would make better use of sequential accesses if not hitting the
13347 cache.
13348
13349 We cheat here and test 'arm_ld_sched' which we currently know to
13350 only be true for the ARM8, ARM9 and StrongARM. If this ever
13351 changes, then the test below needs to be reworked. */
13352 if (nops == 2 && arm_ld_sched && add_offset != 0)
13353 return false;
13354
13355 /* XScale has load-store double instructions, but they have stricter
13356 alignment requirements than load-store multiple, so we cannot
13357 use them.
13358
13359 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13360 the pipeline until completion.
13361
13362 NREGS CYCLES
13363 1 3
13364 2 4
13365 3 5
13366 4 6
13367
13368 An ldr instruction takes 1-3 cycles, but does not block the
13369 pipeline.
13370
13371 NREGS CYCLES
13372 1 1-3
13373 2 2-6
13374 3 3-9
13375 4 4-12
13376
13377 Best case ldr will always win. However, the more ldr instructions
13378 we issue, the less likely we are to be able to schedule them well.
13379 Using ldr instructions also increases code size.
13380
13381 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13382 for counts of 3 or 4 regs. */
13383 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13384 return false;
13385 return true;
13386 }
13387
13388 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13389 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13390 an array ORDER which describes the sequence to use when accessing the
13391 offsets that produces an ascending order. In this sequence, each
13392 offset must be larger by exactly 4 than the previous one. ORDER[0]
13393 must have been filled in with the lowest offset by the caller.
13394 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13395 we use to verify that ORDER produces an ascending order of registers.
13396 Return true if it was possible to construct such an order, false if
13397 not. */
13398
13399 static bool
13400 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13401 int *unsorted_regs)
13402 {
13403 int i;
13404 for (i = 1; i < nops; i++)
13405 {
13406 int j;
13407
13408 order[i] = order[i - 1];
13409 for (j = 0; j < nops; j++)
13410 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13411 {
13412 /* We must find exactly one offset that is higher than the
13413 previous one by 4. */
13414 if (order[i] != order[i - 1])
13415 return false;
13416 order[i] = j;
13417 }
13418 if (order[i] == order[i - 1])
13419 return false;
13420 /* The register numbers must be ascending. */
13421 if (unsorted_regs != NULL
13422 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13423 return false;
13424 }
13425 return true;
13426 }
13427
13428 /* Used to determine in a peephole whether a sequence of load
13429 instructions can be changed into a load-multiple instruction.
13430 NOPS is the number of separate load instructions we are examining. The
13431 first NOPS entries in OPERANDS are the destination registers, the
13432 next NOPS entries are memory operands. If this function is
13433 successful, *BASE is set to the common base register of the memory
13434 accesses; *LOAD_OFFSET is set to the first memory location's offset
13435 from that base register.
13436 REGS is an array filled in with the destination register numbers.
13437 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13438 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13439 the sequence of registers in REGS matches the loads from ascending memory
13440 locations, and the function verifies that the register numbers are
13441 themselves ascending. If CHECK_REGS is false, the register numbers
13442 are stored in the order they are found in the operands. */
13443 static int
13444 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13445 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13446 {
13447 int unsorted_regs[MAX_LDM_STM_OPS];
13448 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13449 int order[MAX_LDM_STM_OPS];
13450 rtx base_reg_rtx = NULL;
13451 int base_reg = -1;
13452 int i, ldm_case;
13453
13454 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13455 easily extended if required. */
13456 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13457
13458 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13459
13460 /* Loop over the operands and check that the memory references are
13461 suitable (i.e. immediate offsets from the same base register). At
13462 the same time, extract the target register, and the memory
13463 offsets. */
13464 for (i = 0; i < nops; i++)
13465 {
13466 rtx reg;
13467 rtx offset;
13468
13469 /* Convert a subreg of a mem into the mem itself. */
13470 if (GET_CODE (operands[nops + i]) == SUBREG)
13471 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13472
13473 gcc_assert (MEM_P (operands[nops + i]));
13474
13475 /* Don't reorder volatile memory references; it doesn't seem worth
13476 looking for the case where the order is ok anyway. */
13477 if (MEM_VOLATILE_P (operands[nops + i]))
13478 return 0;
13479
13480 offset = const0_rtx;
13481
13482 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13483 || (GET_CODE (reg) == SUBREG
13484 && REG_P (reg = SUBREG_REG (reg))))
13485 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13486 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13487 || (GET_CODE (reg) == SUBREG
13488 && REG_P (reg = SUBREG_REG (reg))))
13489 && (CONST_INT_P (offset
13490 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13491 {
13492 if (i == 0)
13493 {
13494 base_reg = REGNO (reg);
13495 base_reg_rtx = reg;
13496 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13497 return 0;
13498 }
13499 else if (base_reg != (int) REGNO (reg))
13500 /* Not addressed from the same base register. */
13501 return 0;
13502
13503 unsorted_regs[i] = (REG_P (operands[i])
13504 ? REGNO (operands[i])
13505 : REGNO (SUBREG_REG (operands[i])));
13506
13507 /* If it isn't an integer register, or if it overwrites the
13508 base register but isn't the last insn in the list, then
13509 we can't do this. */
13510 if (unsorted_regs[i] < 0
13511 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13512 || unsorted_regs[i] > 14
13513 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13514 return 0;
13515
13516 /* Don't allow SP to be loaded unless it is also the base
13517 register. It guarantees that SP is reset correctly when
13518 an LDM instruction is interrupted. Otherwise, we might
13519 end up with a corrupt stack. */
13520 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13521 return 0;
13522
13523 unsorted_offsets[i] = INTVAL (offset);
13524 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13525 order[0] = i;
13526 }
13527 else
13528 /* Not a suitable memory address. */
13529 return 0;
13530 }
13531
13532 /* All the useful information has now been extracted from the
13533 operands into unsorted_regs and unsorted_offsets; additionally,
13534 order[0] has been set to the lowest offset in the list. Sort
13535 the offsets into order, verifying that they are adjacent, and
13536 check that the register numbers are ascending. */
13537 if (!compute_offset_order (nops, unsorted_offsets, order,
13538 check_regs ? unsorted_regs : NULL))
13539 return 0;
13540
13541 if (saved_order)
13542 memcpy (saved_order, order, sizeof order);
13543
13544 if (base)
13545 {
13546 *base = base_reg;
13547
13548 for (i = 0; i < nops; i++)
13549 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13550
13551 *load_offset = unsorted_offsets[order[0]];
13552 }
13553
13554 if (TARGET_THUMB1
13555 && !peep2_reg_dead_p (nops, base_reg_rtx))
13556 return 0;
13557
13558 if (unsorted_offsets[order[0]] == 0)
13559 ldm_case = 1; /* ldmia */
13560 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13561 ldm_case = 2; /* ldmib */
13562 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13563 ldm_case = 3; /* ldmda */
13564 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13565 ldm_case = 4; /* ldmdb */
13566 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13567 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13568 ldm_case = 5;
13569 else
13570 return 0;
13571
13572 if (!multiple_operation_profitable_p (false, nops,
13573 ldm_case == 5
13574 ? unsorted_offsets[order[0]] : 0))
13575 return 0;
13576
13577 return ldm_case;
13578 }
13579
13580 /* Used to determine in a peephole whether a sequence of store instructions can
13581 be changed into a store-multiple instruction.
13582 NOPS is the number of separate store instructions we are examining.
13583 NOPS_TOTAL is the total number of instructions recognized by the peephole
13584 pattern.
13585 The first NOPS entries in OPERANDS are the source registers, the next
13586 NOPS entries are memory operands. If this function is successful, *BASE is
13587 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13588 to the first memory location's offset from that base register. REGS is an
13589 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13590 likewise filled with the corresponding rtx's.
13591 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13592 numbers to an ascending order of stores.
13593 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13594 from ascending memory locations, and the function verifies that the register
13595 numbers are themselves ascending. If CHECK_REGS is false, the register
13596 numbers are stored in the order they are found in the operands. */
13597 static int
13598 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13599 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13600 HOST_WIDE_INT *load_offset, bool check_regs)
13601 {
13602 int unsorted_regs[MAX_LDM_STM_OPS];
13603 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13604 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13605 int order[MAX_LDM_STM_OPS];
13606 int base_reg = -1;
13607 rtx base_reg_rtx = NULL;
13608 int i, stm_case;
13609
13610 /* Write back of base register is currently only supported for Thumb 1. */
13611 int base_writeback = TARGET_THUMB1;
13612
13613 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13614 easily extended if required. */
13615 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13616
13617 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13618
13619 /* Loop over the operands and check that the memory references are
13620 suitable (i.e. immediate offsets from the same base register). At
13621 the same time, extract the target register, and the memory
13622 offsets. */
13623 for (i = 0; i < nops; i++)
13624 {
13625 rtx reg;
13626 rtx offset;
13627
13628 /* Convert a subreg of a mem into the mem itself. */
13629 if (GET_CODE (operands[nops + i]) == SUBREG)
13630 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13631
13632 gcc_assert (MEM_P (operands[nops + i]));
13633
13634 /* Don't reorder volatile memory references; it doesn't seem worth
13635 looking for the case where the order is ok anyway. */
13636 if (MEM_VOLATILE_P (operands[nops + i]))
13637 return 0;
13638
13639 offset = const0_rtx;
13640
13641 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13642 || (GET_CODE (reg) == SUBREG
13643 && REG_P (reg = SUBREG_REG (reg))))
13644 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13645 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13646 || (GET_CODE (reg) == SUBREG
13647 && REG_P (reg = SUBREG_REG (reg))))
13648 && (CONST_INT_P (offset
13649 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13650 {
13651 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13652 ? operands[i] : SUBREG_REG (operands[i]));
13653 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13654
13655 if (i == 0)
13656 {
13657 base_reg = REGNO (reg);
13658 base_reg_rtx = reg;
13659 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13660 return 0;
13661 }
13662 else if (base_reg != (int) REGNO (reg))
13663 /* Not addressed from the same base register. */
13664 return 0;
13665
13666 /* If it isn't an integer register, then we can't do this. */
13667 if (unsorted_regs[i] < 0
13668 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13669 /* The effects are unpredictable if the base register is
13670 both updated and stored. */
13671 || (base_writeback && unsorted_regs[i] == base_reg)
13672 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13673 || unsorted_regs[i] > 14)
13674 return 0;
13675
13676 unsorted_offsets[i] = INTVAL (offset);
13677 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13678 order[0] = i;
13679 }
13680 else
13681 /* Not a suitable memory address. */
13682 return 0;
13683 }
13684
13685 /* All the useful information has now been extracted from the
13686 operands into unsorted_regs and unsorted_offsets; additionally,
13687 order[0] has been set to the lowest offset in the list. Sort
13688 the offsets into order, verifying that they are adjacent, and
13689 check that the register numbers are ascending. */
13690 if (!compute_offset_order (nops, unsorted_offsets, order,
13691 check_regs ? unsorted_regs : NULL))
13692 return 0;
13693
13694 if (saved_order)
13695 memcpy (saved_order, order, sizeof order);
13696
13697 if (base)
13698 {
13699 *base = base_reg;
13700
13701 for (i = 0; i < nops; i++)
13702 {
13703 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13704 if (reg_rtxs)
13705 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13706 }
13707
13708 *load_offset = unsorted_offsets[order[0]];
13709 }
13710
13711 if (TARGET_THUMB1
13712 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13713 return 0;
13714
13715 if (unsorted_offsets[order[0]] == 0)
13716 stm_case = 1; /* stmia */
13717 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13718 stm_case = 2; /* stmib */
13719 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13720 stm_case = 3; /* stmda */
13721 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13722 stm_case = 4; /* stmdb */
13723 else
13724 return 0;
13725
13726 if (!multiple_operation_profitable_p (false, nops, 0))
13727 return 0;
13728
13729 return stm_case;
13730 }
13731 \f
13732 /* Routines for use in generating RTL. */
13733
13734 /* Generate a load-multiple instruction. COUNT is the number of loads in
13735 the instruction; REGS and MEMS are arrays containing the operands.
13736 BASEREG is the base register to be used in addressing the memory operands.
13737 WBACK_OFFSET is nonzero if the instruction should update the base
13738 register. */
13739
13740 static rtx
13741 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13742 HOST_WIDE_INT wback_offset)
13743 {
13744 int i = 0, j;
13745 rtx result;
13746
13747 if (!multiple_operation_profitable_p (false, count, 0))
13748 {
13749 rtx seq;
13750
13751 start_sequence ();
13752
13753 for (i = 0; i < count; i++)
13754 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13755
13756 if (wback_offset != 0)
13757 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13758
13759 seq = get_insns ();
13760 end_sequence ();
13761
13762 return seq;
13763 }
13764
13765 result = gen_rtx_PARALLEL (VOIDmode,
13766 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13767 if (wback_offset != 0)
13768 {
13769 XVECEXP (result, 0, 0)
13770 = gen_rtx_SET (VOIDmode, basereg,
13771 plus_constant (Pmode, basereg, wback_offset));
13772 i = 1;
13773 count++;
13774 }
13775
13776 for (j = 0; i < count; i++, j++)
13777 XVECEXP (result, 0, i)
13778 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13779
13780 return result;
13781 }
13782
13783 /* Generate a store-multiple instruction. COUNT is the number of stores in
13784 the instruction; REGS and MEMS are arrays containing the operands.
13785 BASEREG is the base register to be used in addressing the memory operands.
13786 WBACK_OFFSET is nonzero if the instruction should update the base
13787 register. */
13788
13789 static rtx
13790 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13791 HOST_WIDE_INT wback_offset)
13792 {
13793 int i = 0, j;
13794 rtx result;
13795
13796 if (GET_CODE (basereg) == PLUS)
13797 basereg = XEXP (basereg, 0);
13798
13799 if (!multiple_operation_profitable_p (false, count, 0))
13800 {
13801 rtx seq;
13802
13803 start_sequence ();
13804
13805 for (i = 0; i < count; i++)
13806 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13807
13808 if (wback_offset != 0)
13809 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13810
13811 seq = get_insns ();
13812 end_sequence ();
13813
13814 return seq;
13815 }
13816
13817 result = gen_rtx_PARALLEL (VOIDmode,
13818 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13819 if (wback_offset != 0)
13820 {
13821 XVECEXP (result, 0, 0)
13822 = gen_rtx_SET (VOIDmode, basereg,
13823 plus_constant (Pmode, basereg, wback_offset));
13824 i = 1;
13825 count++;
13826 }
13827
13828 for (j = 0; i < count; i++, j++)
13829 XVECEXP (result, 0, i)
13830 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13831
13832 return result;
13833 }
13834
13835 /* Generate either a load-multiple or a store-multiple instruction. This
13836 function can be used in situations where we can start with a single MEM
13837 rtx and adjust its address upwards.
13838 COUNT is the number of operations in the instruction, not counting a
13839 possible update of the base register. REGS is an array containing the
13840 register operands.
13841 BASEREG is the base register to be used in addressing the memory operands,
13842 which are constructed from BASEMEM.
13843 WRITE_BACK specifies whether the generated instruction should include an
13844 update of the base register.
13845 OFFSETP is used to pass an offset to and from this function; this offset
13846 is not used when constructing the address (instead BASEMEM should have an
13847 appropriate offset in its address), it is used only for setting
13848 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13849
13850 static rtx
13851 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13852 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13853 {
13854 rtx mems[MAX_LDM_STM_OPS];
13855 HOST_WIDE_INT offset = *offsetp;
13856 int i;
13857
13858 gcc_assert (count <= MAX_LDM_STM_OPS);
13859
13860 if (GET_CODE (basereg) == PLUS)
13861 basereg = XEXP (basereg, 0);
13862
13863 for (i = 0; i < count; i++)
13864 {
13865 rtx addr = plus_constant (Pmode, basereg, i * 4);
13866 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13867 offset += 4;
13868 }
13869
13870 if (write_back)
13871 *offsetp = offset;
13872
13873 if (is_load)
13874 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13875 write_back ? 4 * count : 0);
13876 else
13877 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13878 write_back ? 4 * count : 0);
13879 }
13880
13881 rtx
13882 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13883 rtx basemem, HOST_WIDE_INT *offsetp)
13884 {
13885 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13886 offsetp);
13887 }
13888
13889 rtx
13890 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13891 rtx basemem, HOST_WIDE_INT *offsetp)
13892 {
13893 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13894 offsetp);
13895 }
13896
13897 /* Called from a peephole2 expander to turn a sequence of loads into an
13898 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13899 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13900 is true if we can reorder the registers because they are used commutatively
13901 subsequently.
13902 Returns true iff we could generate a new instruction. */
13903
13904 bool
13905 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13906 {
13907 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13908 rtx mems[MAX_LDM_STM_OPS];
13909 int i, j, base_reg;
13910 rtx base_reg_rtx;
13911 HOST_WIDE_INT offset;
13912 int write_back = FALSE;
13913 int ldm_case;
13914 rtx addr;
13915
13916 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13917 &base_reg, &offset, !sort_regs);
13918
13919 if (ldm_case == 0)
13920 return false;
13921
13922 if (sort_regs)
13923 for (i = 0; i < nops - 1; i++)
13924 for (j = i + 1; j < nops; j++)
13925 if (regs[i] > regs[j])
13926 {
13927 int t = regs[i];
13928 regs[i] = regs[j];
13929 regs[j] = t;
13930 }
13931 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13932
13933 if (TARGET_THUMB1)
13934 {
13935 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13936 gcc_assert (ldm_case == 1 || ldm_case == 5);
13937 write_back = TRUE;
13938 }
13939
13940 if (ldm_case == 5)
13941 {
13942 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13943 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13944 offset = 0;
13945 if (!TARGET_THUMB1)
13946 {
13947 base_reg = regs[0];
13948 base_reg_rtx = newbase;
13949 }
13950 }
13951
13952 for (i = 0; i < nops; i++)
13953 {
13954 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13955 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13956 SImode, addr, 0);
13957 }
13958 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13959 write_back ? offset + i * 4 : 0));
13960 return true;
13961 }
13962
13963 /* Called from a peephole2 expander to turn a sequence of stores into an
13964 STM instruction. OPERANDS are the operands found by the peephole matcher;
13965 NOPS indicates how many separate stores we are trying to combine.
13966 Returns true iff we could generate a new instruction. */
13967
13968 bool
13969 gen_stm_seq (rtx *operands, int nops)
13970 {
13971 int i;
13972 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13973 rtx mems[MAX_LDM_STM_OPS];
13974 int base_reg;
13975 rtx base_reg_rtx;
13976 HOST_WIDE_INT offset;
13977 int write_back = FALSE;
13978 int stm_case;
13979 rtx addr;
13980 bool base_reg_dies;
13981
13982 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13983 mem_order, &base_reg, &offset, true);
13984
13985 if (stm_case == 0)
13986 return false;
13987
13988 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13989
13990 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13991 if (TARGET_THUMB1)
13992 {
13993 gcc_assert (base_reg_dies);
13994 write_back = TRUE;
13995 }
13996
13997 if (stm_case == 5)
13998 {
13999 gcc_assert (base_reg_dies);
14000 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14001 offset = 0;
14002 }
14003
14004 addr = plus_constant (Pmode, base_reg_rtx, offset);
14005
14006 for (i = 0; i < nops; i++)
14007 {
14008 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14009 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14010 SImode, addr, 0);
14011 }
14012 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14013 write_back ? offset + i * 4 : 0));
14014 return true;
14015 }
14016
14017 /* Called from a peephole2 expander to turn a sequence of stores that are
14018 preceded by constant loads into an STM instruction. OPERANDS are the
14019 operands found by the peephole matcher; NOPS indicates how many
14020 separate stores we are trying to combine; there are 2 * NOPS
14021 instructions in the peephole.
14022 Returns true iff we could generate a new instruction. */
14023
14024 bool
14025 gen_const_stm_seq (rtx *operands, int nops)
14026 {
14027 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14028 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14029 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14030 rtx mems[MAX_LDM_STM_OPS];
14031 int base_reg;
14032 rtx base_reg_rtx;
14033 HOST_WIDE_INT offset;
14034 int write_back = FALSE;
14035 int stm_case;
14036 rtx addr;
14037 bool base_reg_dies;
14038 int i, j;
14039 HARD_REG_SET allocated;
14040
14041 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14042 mem_order, &base_reg, &offset, false);
14043
14044 if (stm_case == 0)
14045 return false;
14046
14047 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14048
14049 /* If the same register is used more than once, try to find a free
14050 register. */
14051 CLEAR_HARD_REG_SET (allocated);
14052 for (i = 0; i < nops; i++)
14053 {
14054 for (j = i + 1; j < nops; j++)
14055 if (regs[i] == regs[j])
14056 {
14057 rtx t = peep2_find_free_register (0, nops * 2,
14058 TARGET_THUMB1 ? "l" : "r",
14059 SImode, &allocated);
14060 if (t == NULL_RTX)
14061 return false;
14062 reg_rtxs[i] = t;
14063 regs[i] = REGNO (t);
14064 }
14065 }
14066
14067 /* Compute an ordering that maps the register numbers to an ascending
14068 sequence. */
14069 reg_order[0] = 0;
14070 for (i = 0; i < nops; i++)
14071 if (regs[i] < regs[reg_order[0]])
14072 reg_order[0] = i;
14073
14074 for (i = 1; i < nops; i++)
14075 {
14076 int this_order = reg_order[i - 1];
14077 for (j = 0; j < nops; j++)
14078 if (regs[j] > regs[reg_order[i - 1]]
14079 && (this_order == reg_order[i - 1]
14080 || regs[j] < regs[this_order]))
14081 this_order = j;
14082 reg_order[i] = this_order;
14083 }
14084
14085 /* Ensure that registers that must be live after the instruction end
14086 up with the correct value. */
14087 for (i = 0; i < nops; i++)
14088 {
14089 int this_order = reg_order[i];
14090 if ((this_order != mem_order[i]
14091 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14092 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14093 return false;
14094 }
14095
14096 /* Load the constants. */
14097 for (i = 0; i < nops; i++)
14098 {
14099 rtx op = operands[2 * nops + mem_order[i]];
14100 sorted_regs[i] = regs[reg_order[i]];
14101 emit_move_insn (reg_rtxs[reg_order[i]], op);
14102 }
14103
14104 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14105
14106 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14107 if (TARGET_THUMB1)
14108 {
14109 gcc_assert (base_reg_dies);
14110 write_back = TRUE;
14111 }
14112
14113 if (stm_case == 5)
14114 {
14115 gcc_assert (base_reg_dies);
14116 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14117 offset = 0;
14118 }
14119
14120 addr = plus_constant (Pmode, base_reg_rtx, offset);
14121
14122 for (i = 0; i < nops; i++)
14123 {
14124 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14125 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14126 SImode, addr, 0);
14127 }
14128 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14129 write_back ? offset + i * 4 : 0));
14130 return true;
14131 }
14132
14133 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14134 unaligned copies on processors which support unaligned semantics for those
14135 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14136 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14137 An interleave factor of 1 (the minimum) will perform no interleaving.
14138 Load/store multiple are used for aligned addresses where possible. */
14139
14140 static void
14141 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14142 HOST_WIDE_INT length,
14143 unsigned int interleave_factor)
14144 {
14145 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14146 int *regnos = XALLOCAVEC (int, interleave_factor);
14147 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14148 HOST_WIDE_INT i, j;
14149 HOST_WIDE_INT remaining = length, words;
14150 rtx halfword_tmp = NULL, byte_tmp = NULL;
14151 rtx dst, src;
14152 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14153 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14154 HOST_WIDE_INT srcoffset, dstoffset;
14155 HOST_WIDE_INT src_autoinc, dst_autoinc;
14156 rtx mem, addr;
14157
14158 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14159
14160 /* Use hard registers if we have aligned source or destination so we can use
14161 load/store multiple with contiguous registers. */
14162 if (dst_aligned || src_aligned)
14163 for (i = 0; i < interleave_factor; i++)
14164 regs[i] = gen_rtx_REG (SImode, i);
14165 else
14166 for (i = 0; i < interleave_factor; i++)
14167 regs[i] = gen_reg_rtx (SImode);
14168
14169 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14170 src = copy_addr_to_reg (XEXP (srcbase, 0));
14171
14172 srcoffset = dstoffset = 0;
14173
14174 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14175 For copying the last bytes we want to subtract this offset again. */
14176 src_autoinc = dst_autoinc = 0;
14177
14178 for (i = 0; i < interleave_factor; i++)
14179 regnos[i] = i;
14180
14181 /* Copy BLOCK_SIZE_BYTES chunks. */
14182
14183 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14184 {
14185 /* Load words. */
14186 if (src_aligned && interleave_factor > 1)
14187 {
14188 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14189 TRUE, srcbase, &srcoffset));
14190 src_autoinc += UNITS_PER_WORD * interleave_factor;
14191 }
14192 else
14193 {
14194 for (j = 0; j < interleave_factor; j++)
14195 {
14196 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14197 - src_autoinc));
14198 mem = adjust_automodify_address (srcbase, SImode, addr,
14199 srcoffset + j * UNITS_PER_WORD);
14200 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14201 }
14202 srcoffset += block_size_bytes;
14203 }
14204
14205 /* Store words. */
14206 if (dst_aligned && interleave_factor > 1)
14207 {
14208 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14209 TRUE, dstbase, &dstoffset));
14210 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14211 }
14212 else
14213 {
14214 for (j = 0; j < interleave_factor; j++)
14215 {
14216 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14217 - dst_autoinc));
14218 mem = adjust_automodify_address (dstbase, SImode, addr,
14219 dstoffset + j * UNITS_PER_WORD);
14220 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14221 }
14222 dstoffset += block_size_bytes;
14223 }
14224
14225 remaining -= block_size_bytes;
14226 }
14227
14228 /* Copy any whole words left (note these aren't interleaved with any
14229 subsequent halfword/byte load/stores in the interests of simplicity). */
14230
14231 words = remaining / UNITS_PER_WORD;
14232
14233 gcc_assert (words < interleave_factor);
14234
14235 if (src_aligned && words > 1)
14236 {
14237 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14238 &srcoffset));
14239 src_autoinc += UNITS_PER_WORD * words;
14240 }
14241 else
14242 {
14243 for (j = 0; j < words; j++)
14244 {
14245 addr = plus_constant (Pmode, src,
14246 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14247 mem = adjust_automodify_address (srcbase, SImode, addr,
14248 srcoffset + j * UNITS_PER_WORD);
14249 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14250 }
14251 srcoffset += words * UNITS_PER_WORD;
14252 }
14253
14254 if (dst_aligned && words > 1)
14255 {
14256 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14257 &dstoffset));
14258 dst_autoinc += words * UNITS_PER_WORD;
14259 }
14260 else
14261 {
14262 for (j = 0; j < words; j++)
14263 {
14264 addr = plus_constant (Pmode, dst,
14265 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14266 mem = adjust_automodify_address (dstbase, SImode, addr,
14267 dstoffset + j * UNITS_PER_WORD);
14268 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14269 }
14270 dstoffset += words * UNITS_PER_WORD;
14271 }
14272
14273 remaining -= words * UNITS_PER_WORD;
14274
14275 gcc_assert (remaining < 4);
14276
14277 /* Copy a halfword if necessary. */
14278
14279 if (remaining >= 2)
14280 {
14281 halfword_tmp = gen_reg_rtx (SImode);
14282
14283 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14284 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14285 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14286
14287 /* Either write out immediately, or delay until we've loaded the last
14288 byte, depending on interleave factor. */
14289 if (interleave_factor == 1)
14290 {
14291 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14292 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14293 emit_insn (gen_unaligned_storehi (mem,
14294 gen_lowpart (HImode, halfword_tmp)));
14295 halfword_tmp = NULL;
14296 dstoffset += 2;
14297 }
14298
14299 remaining -= 2;
14300 srcoffset += 2;
14301 }
14302
14303 gcc_assert (remaining < 2);
14304
14305 /* Copy last byte. */
14306
14307 if ((remaining & 1) != 0)
14308 {
14309 byte_tmp = gen_reg_rtx (SImode);
14310
14311 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14312 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14313 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14314
14315 if (interleave_factor == 1)
14316 {
14317 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14318 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14319 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14320 byte_tmp = NULL;
14321 dstoffset++;
14322 }
14323
14324 remaining--;
14325 srcoffset++;
14326 }
14327
14328 /* Store last halfword if we haven't done so already. */
14329
14330 if (halfword_tmp)
14331 {
14332 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14333 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14334 emit_insn (gen_unaligned_storehi (mem,
14335 gen_lowpart (HImode, halfword_tmp)));
14336 dstoffset += 2;
14337 }
14338
14339 /* Likewise for last byte. */
14340
14341 if (byte_tmp)
14342 {
14343 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14344 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14345 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14346 dstoffset++;
14347 }
14348
14349 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14350 }
14351
14352 /* From mips_adjust_block_mem:
14353
14354 Helper function for doing a loop-based block operation on memory
14355 reference MEM. Each iteration of the loop will operate on LENGTH
14356 bytes of MEM.
14357
14358 Create a new base register for use within the loop and point it to
14359 the start of MEM. Create a new memory reference that uses this
14360 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14361
14362 static void
14363 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14364 rtx *loop_mem)
14365 {
14366 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14367
14368 /* Although the new mem does not refer to a known location,
14369 it does keep up to LENGTH bytes of alignment. */
14370 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14371 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14372 }
14373
14374 /* From mips_block_move_loop:
14375
14376 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14377 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14378 the memory regions do not overlap. */
14379
14380 static void
14381 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14382 unsigned int interleave_factor,
14383 HOST_WIDE_INT bytes_per_iter)
14384 {
14385 rtx label, src_reg, dest_reg, final_src, test;
14386 HOST_WIDE_INT leftover;
14387
14388 leftover = length % bytes_per_iter;
14389 length -= leftover;
14390
14391 /* Create registers and memory references for use within the loop. */
14392 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14393 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14394
14395 /* Calculate the value that SRC_REG should have after the last iteration of
14396 the loop. */
14397 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14398 0, 0, OPTAB_WIDEN);
14399
14400 /* Emit the start of the loop. */
14401 label = gen_label_rtx ();
14402 emit_label (label);
14403
14404 /* Emit the loop body. */
14405 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14406 interleave_factor);
14407
14408 /* Move on to the next block. */
14409 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14410 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14411
14412 /* Emit the loop condition. */
14413 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14414 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14415
14416 /* Mop up any left-over bytes. */
14417 if (leftover)
14418 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14419 }
14420
14421 /* Emit a block move when either the source or destination is unaligned (not
14422 aligned to a four-byte boundary). This may need further tuning depending on
14423 core type, optimize_size setting, etc. */
14424
14425 static int
14426 arm_movmemqi_unaligned (rtx *operands)
14427 {
14428 HOST_WIDE_INT length = INTVAL (operands[2]);
14429
14430 if (optimize_size)
14431 {
14432 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14433 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14434 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14435 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14436 or dst_aligned though: allow more interleaving in those cases since the
14437 resulting code can be smaller. */
14438 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14439 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14440
14441 if (length > 12)
14442 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14443 interleave_factor, bytes_per_iter);
14444 else
14445 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14446 interleave_factor);
14447 }
14448 else
14449 {
14450 /* Note that the loop created by arm_block_move_unaligned_loop may be
14451 subject to loop unrolling, which makes tuning this condition a little
14452 redundant. */
14453 if (length > 32)
14454 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14455 else
14456 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14457 }
14458
14459 return 1;
14460 }
14461
14462 int
14463 arm_gen_movmemqi (rtx *operands)
14464 {
14465 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14466 HOST_WIDE_INT srcoffset, dstoffset;
14467 int i;
14468 rtx src, dst, srcbase, dstbase;
14469 rtx part_bytes_reg = NULL;
14470 rtx mem;
14471
14472 if (!CONST_INT_P (operands[2])
14473 || !CONST_INT_P (operands[3])
14474 || INTVAL (operands[2]) > 64)
14475 return 0;
14476
14477 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14478 return arm_movmemqi_unaligned (operands);
14479
14480 if (INTVAL (operands[3]) & 3)
14481 return 0;
14482
14483 dstbase = operands[0];
14484 srcbase = operands[1];
14485
14486 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14487 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14488
14489 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14490 out_words_to_go = INTVAL (operands[2]) / 4;
14491 last_bytes = INTVAL (operands[2]) & 3;
14492 dstoffset = srcoffset = 0;
14493
14494 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14495 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14496
14497 for (i = 0; in_words_to_go >= 2; i+=4)
14498 {
14499 if (in_words_to_go > 4)
14500 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14501 TRUE, srcbase, &srcoffset));
14502 else
14503 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14504 src, FALSE, srcbase,
14505 &srcoffset));
14506
14507 if (out_words_to_go)
14508 {
14509 if (out_words_to_go > 4)
14510 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14511 TRUE, dstbase, &dstoffset));
14512 else if (out_words_to_go != 1)
14513 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14514 out_words_to_go, dst,
14515 (last_bytes == 0
14516 ? FALSE : TRUE),
14517 dstbase, &dstoffset));
14518 else
14519 {
14520 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14521 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14522 if (last_bytes != 0)
14523 {
14524 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14525 dstoffset += 4;
14526 }
14527 }
14528 }
14529
14530 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14531 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14532 }
14533
14534 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14535 if (out_words_to_go)
14536 {
14537 rtx sreg;
14538
14539 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14540 sreg = copy_to_reg (mem);
14541
14542 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14543 emit_move_insn (mem, sreg);
14544 in_words_to_go--;
14545
14546 gcc_assert (!in_words_to_go); /* Sanity check */
14547 }
14548
14549 if (in_words_to_go)
14550 {
14551 gcc_assert (in_words_to_go > 0);
14552
14553 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14554 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14555 }
14556
14557 gcc_assert (!last_bytes || part_bytes_reg);
14558
14559 if (BYTES_BIG_ENDIAN && last_bytes)
14560 {
14561 rtx tmp = gen_reg_rtx (SImode);
14562
14563 /* The bytes we want are in the top end of the word. */
14564 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14565 GEN_INT (8 * (4 - last_bytes))));
14566 part_bytes_reg = tmp;
14567
14568 while (last_bytes)
14569 {
14570 mem = adjust_automodify_address (dstbase, QImode,
14571 plus_constant (Pmode, dst,
14572 last_bytes - 1),
14573 dstoffset + last_bytes - 1);
14574 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14575
14576 if (--last_bytes)
14577 {
14578 tmp = gen_reg_rtx (SImode);
14579 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14580 part_bytes_reg = tmp;
14581 }
14582 }
14583
14584 }
14585 else
14586 {
14587 if (last_bytes > 1)
14588 {
14589 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14590 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14591 last_bytes -= 2;
14592 if (last_bytes)
14593 {
14594 rtx tmp = gen_reg_rtx (SImode);
14595 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14596 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14597 part_bytes_reg = tmp;
14598 dstoffset += 2;
14599 }
14600 }
14601
14602 if (last_bytes)
14603 {
14604 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14605 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14606 }
14607 }
14608
14609 return 1;
14610 }
14611
14612 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14613 by mode size. */
14614 inline static rtx
14615 next_consecutive_mem (rtx mem)
14616 {
14617 enum machine_mode mode = GET_MODE (mem);
14618 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14619 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14620
14621 return adjust_automodify_address (mem, mode, addr, offset);
14622 }
14623
14624 /* Copy using LDRD/STRD instructions whenever possible.
14625 Returns true upon success. */
14626 bool
14627 gen_movmem_ldrd_strd (rtx *operands)
14628 {
14629 unsigned HOST_WIDE_INT len;
14630 HOST_WIDE_INT align;
14631 rtx src, dst, base;
14632 rtx reg0;
14633 bool src_aligned, dst_aligned;
14634 bool src_volatile, dst_volatile;
14635
14636 gcc_assert (CONST_INT_P (operands[2]));
14637 gcc_assert (CONST_INT_P (operands[3]));
14638
14639 len = UINTVAL (operands[2]);
14640 if (len > 64)
14641 return false;
14642
14643 /* Maximum alignment we can assume for both src and dst buffers. */
14644 align = INTVAL (operands[3]);
14645
14646 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14647 return false;
14648
14649 /* Place src and dst addresses in registers
14650 and update the corresponding mem rtx. */
14651 dst = operands[0];
14652 dst_volatile = MEM_VOLATILE_P (dst);
14653 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14654 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14655 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14656
14657 src = operands[1];
14658 src_volatile = MEM_VOLATILE_P (src);
14659 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14660 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14661 src = adjust_automodify_address (src, VOIDmode, base, 0);
14662
14663 if (!unaligned_access && !(src_aligned && dst_aligned))
14664 return false;
14665
14666 if (src_volatile || dst_volatile)
14667 return false;
14668
14669 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14670 if (!(dst_aligned || src_aligned))
14671 return arm_gen_movmemqi (operands);
14672
14673 src = adjust_address (src, DImode, 0);
14674 dst = adjust_address (dst, DImode, 0);
14675 while (len >= 8)
14676 {
14677 len -= 8;
14678 reg0 = gen_reg_rtx (DImode);
14679 if (src_aligned)
14680 emit_move_insn (reg0, src);
14681 else
14682 emit_insn (gen_unaligned_loaddi (reg0, src));
14683
14684 if (dst_aligned)
14685 emit_move_insn (dst, reg0);
14686 else
14687 emit_insn (gen_unaligned_storedi (dst, reg0));
14688
14689 src = next_consecutive_mem (src);
14690 dst = next_consecutive_mem (dst);
14691 }
14692
14693 gcc_assert (len < 8);
14694 if (len >= 4)
14695 {
14696 /* More than a word but less than a double-word to copy. Copy a word. */
14697 reg0 = gen_reg_rtx (SImode);
14698 src = adjust_address (src, SImode, 0);
14699 dst = adjust_address (dst, SImode, 0);
14700 if (src_aligned)
14701 emit_move_insn (reg0, src);
14702 else
14703 emit_insn (gen_unaligned_loadsi (reg0, src));
14704
14705 if (dst_aligned)
14706 emit_move_insn (dst, reg0);
14707 else
14708 emit_insn (gen_unaligned_storesi (dst, reg0));
14709
14710 src = next_consecutive_mem (src);
14711 dst = next_consecutive_mem (dst);
14712 len -= 4;
14713 }
14714
14715 if (len == 0)
14716 return true;
14717
14718 /* Copy the remaining bytes. */
14719 if (len >= 2)
14720 {
14721 dst = adjust_address (dst, HImode, 0);
14722 src = adjust_address (src, HImode, 0);
14723 reg0 = gen_reg_rtx (SImode);
14724 if (src_aligned)
14725 emit_insn (gen_zero_extendhisi2 (reg0, src));
14726 else
14727 emit_insn (gen_unaligned_loadhiu (reg0, src));
14728
14729 if (dst_aligned)
14730 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14731 else
14732 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14733
14734 src = next_consecutive_mem (src);
14735 dst = next_consecutive_mem (dst);
14736 if (len == 2)
14737 return true;
14738 }
14739
14740 dst = adjust_address (dst, QImode, 0);
14741 src = adjust_address (src, QImode, 0);
14742 reg0 = gen_reg_rtx (QImode);
14743 emit_move_insn (reg0, src);
14744 emit_move_insn (dst, reg0);
14745 return true;
14746 }
14747
14748 /* Select a dominance comparison mode if possible for a test of the general
14749 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14750 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14751 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14752 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14753 In all cases OP will be either EQ or NE, but we don't need to know which
14754 here. If we are unable to support a dominance comparison we return
14755 CC mode. This will then fail to match for the RTL expressions that
14756 generate this call. */
14757 enum machine_mode
14758 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14759 {
14760 enum rtx_code cond1, cond2;
14761 int swapped = 0;
14762
14763 /* Currently we will probably get the wrong result if the individual
14764 comparisons are not simple. This also ensures that it is safe to
14765 reverse a comparison if necessary. */
14766 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14767 != CCmode)
14768 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14769 != CCmode))
14770 return CCmode;
14771
14772 /* The if_then_else variant of this tests the second condition if the
14773 first passes, but is true if the first fails. Reverse the first
14774 condition to get a true "inclusive-or" expression. */
14775 if (cond_or == DOM_CC_NX_OR_Y)
14776 cond1 = reverse_condition (cond1);
14777
14778 /* If the comparisons are not equal, and one doesn't dominate the other,
14779 then we can't do this. */
14780 if (cond1 != cond2
14781 && !comparison_dominates_p (cond1, cond2)
14782 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14783 return CCmode;
14784
14785 if (swapped)
14786 {
14787 enum rtx_code temp = cond1;
14788 cond1 = cond2;
14789 cond2 = temp;
14790 }
14791
14792 switch (cond1)
14793 {
14794 case EQ:
14795 if (cond_or == DOM_CC_X_AND_Y)
14796 return CC_DEQmode;
14797
14798 switch (cond2)
14799 {
14800 case EQ: return CC_DEQmode;
14801 case LE: return CC_DLEmode;
14802 case LEU: return CC_DLEUmode;
14803 case GE: return CC_DGEmode;
14804 case GEU: return CC_DGEUmode;
14805 default: gcc_unreachable ();
14806 }
14807
14808 case LT:
14809 if (cond_or == DOM_CC_X_AND_Y)
14810 return CC_DLTmode;
14811
14812 switch (cond2)
14813 {
14814 case LT:
14815 return CC_DLTmode;
14816 case LE:
14817 return CC_DLEmode;
14818 case NE:
14819 return CC_DNEmode;
14820 default:
14821 gcc_unreachable ();
14822 }
14823
14824 case GT:
14825 if (cond_or == DOM_CC_X_AND_Y)
14826 return CC_DGTmode;
14827
14828 switch (cond2)
14829 {
14830 case GT:
14831 return CC_DGTmode;
14832 case GE:
14833 return CC_DGEmode;
14834 case NE:
14835 return CC_DNEmode;
14836 default:
14837 gcc_unreachable ();
14838 }
14839
14840 case LTU:
14841 if (cond_or == DOM_CC_X_AND_Y)
14842 return CC_DLTUmode;
14843
14844 switch (cond2)
14845 {
14846 case LTU:
14847 return CC_DLTUmode;
14848 case LEU:
14849 return CC_DLEUmode;
14850 case NE:
14851 return CC_DNEmode;
14852 default:
14853 gcc_unreachable ();
14854 }
14855
14856 case GTU:
14857 if (cond_or == DOM_CC_X_AND_Y)
14858 return CC_DGTUmode;
14859
14860 switch (cond2)
14861 {
14862 case GTU:
14863 return CC_DGTUmode;
14864 case GEU:
14865 return CC_DGEUmode;
14866 case NE:
14867 return CC_DNEmode;
14868 default:
14869 gcc_unreachable ();
14870 }
14871
14872 /* The remaining cases only occur when both comparisons are the
14873 same. */
14874 case NE:
14875 gcc_assert (cond1 == cond2);
14876 return CC_DNEmode;
14877
14878 case LE:
14879 gcc_assert (cond1 == cond2);
14880 return CC_DLEmode;
14881
14882 case GE:
14883 gcc_assert (cond1 == cond2);
14884 return CC_DGEmode;
14885
14886 case LEU:
14887 gcc_assert (cond1 == cond2);
14888 return CC_DLEUmode;
14889
14890 case GEU:
14891 gcc_assert (cond1 == cond2);
14892 return CC_DGEUmode;
14893
14894 default:
14895 gcc_unreachable ();
14896 }
14897 }
14898
14899 enum machine_mode
14900 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14901 {
14902 /* All floating point compares return CCFP if it is an equality
14903 comparison, and CCFPE otherwise. */
14904 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14905 {
14906 switch (op)
14907 {
14908 case EQ:
14909 case NE:
14910 case UNORDERED:
14911 case ORDERED:
14912 case UNLT:
14913 case UNLE:
14914 case UNGT:
14915 case UNGE:
14916 case UNEQ:
14917 case LTGT:
14918 return CCFPmode;
14919
14920 case LT:
14921 case LE:
14922 case GT:
14923 case GE:
14924 return CCFPEmode;
14925
14926 default:
14927 gcc_unreachable ();
14928 }
14929 }
14930
14931 /* A compare with a shifted operand. Because of canonicalization, the
14932 comparison will have to be swapped when we emit the assembler. */
14933 if (GET_MODE (y) == SImode
14934 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14935 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14936 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14937 || GET_CODE (x) == ROTATERT))
14938 return CC_SWPmode;
14939
14940 /* This operation is performed swapped, but since we only rely on the Z
14941 flag we don't need an additional mode. */
14942 if (GET_MODE (y) == SImode
14943 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14944 && GET_CODE (x) == NEG
14945 && (op == EQ || op == NE))
14946 return CC_Zmode;
14947
14948 /* This is a special case that is used by combine to allow a
14949 comparison of a shifted byte load to be split into a zero-extend
14950 followed by a comparison of the shifted integer (only valid for
14951 equalities and unsigned inequalities). */
14952 if (GET_MODE (x) == SImode
14953 && GET_CODE (x) == ASHIFT
14954 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14955 && GET_CODE (XEXP (x, 0)) == SUBREG
14956 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14957 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14958 && (op == EQ || op == NE
14959 || op == GEU || op == GTU || op == LTU || op == LEU)
14960 && CONST_INT_P (y))
14961 return CC_Zmode;
14962
14963 /* A construct for a conditional compare, if the false arm contains
14964 0, then both conditions must be true, otherwise either condition
14965 must be true. Not all conditions are possible, so CCmode is
14966 returned if it can't be done. */
14967 if (GET_CODE (x) == IF_THEN_ELSE
14968 && (XEXP (x, 2) == const0_rtx
14969 || XEXP (x, 2) == const1_rtx)
14970 && COMPARISON_P (XEXP (x, 0))
14971 && COMPARISON_P (XEXP (x, 1)))
14972 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14973 INTVAL (XEXP (x, 2)));
14974
14975 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14976 if (GET_CODE (x) == AND
14977 && (op == EQ || op == NE)
14978 && COMPARISON_P (XEXP (x, 0))
14979 && COMPARISON_P (XEXP (x, 1)))
14980 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14981 DOM_CC_X_AND_Y);
14982
14983 if (GET_CODE (x) == IOR
14984 && (op == EQ || op == NE)
14985 && COMPARISON_P (XEXP (x, 0))
14986 && COMPARISON_P (XEXP (x, 1)))
14987 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14988 DOM_CC_X_OR_Y);
14989
14990 /* An operation (on Thumb) where we want to test for a single bit.
14991 This is done by shifting that bit up into the top bit of a
14992 scratch register; we can then branch on the sign bit. */
14993 if (TARGET_THUMB1
14994 && GET_MODE (x) == SImode
14995 && (op == EQ || op == NE)
14996 && GET_CODE (x) == ZERO_EXTRACT
14997 && XEXP (x, 1) == const1_rtx)
14998 return CC_Nmode;
14999
15000 /* An operation that sets the condition codes as a side-effect, the
15001 V flag is not set correctly, so we can only use comparisons where
15002 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15003 instead.) */
15004 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15005 if (GET_MODE (x) == SImode
15006 && y == const0_rtx
15007 && (op == EQ || op == NE || op == LT || op == GE)
15008 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15009 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15010 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15011 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15012 || GET_CODE (x) == LSHIFTRT
15013 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15014 || GET_CODE (x) == ROTATERT
15015 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15016 return CC_NOOVmode;
15017
15018 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15019 return CC_Zmode;
15020
15021 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15022 && GET_CODE (x) == PLUS
15023 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15024 return CC_Cmode;
15025
15026 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15027 {
15028 switch (op)
15029 {
15030 case EQ:
15031 case NE:
15032 /* A DImode comparison against zero can be implemented by
15033 or'ing the two halves together. */
15034 if (y == const0_rtx)
15035 return CC_Zmode;
15036
15037 /* We can do an equality test in three Thumb instructions. */
15038 if (!TARGET_32BIT)
15039 return CC_Zmode;
15040
15041 /* FALLTHROUGH */
15042
15043 case LTU:
15044 case LEU:
15045 case GTU:
15046 case GEU:
15047 /* DImode unsigned comparisons can be implemented by cmp +
15048 cmpeq without a scratch register. Not worth doing in
15049 Thumb-2. */
15050 if (TARGET_32BIT)
15051 return CC_CZmode;
15052
15053 /* FALLTHROUGH */
15054
15055 case LT:
15056 case LE:
15057 case GT:
15058 case GE:
15059 /* DImode signed and unsigned comparisons can be implemented
15060 by cmp + sbcs with a scratch register, but that does not
15061 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15062 gcc_assert (op != EQ && op != NE);
15063 return CC_NCVmode;
15064
15065 default:
15066 gcc_unreachable ();
15067 }
15068 }
15069
15070 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15071 return GET_MODE (x);
15072
15073 return CCmode;
15074 }
15075
15076 /* X and Y are two things to compare using CODE. Emit the compare insn and
15077 return the rtx for register 0 in the proper mode. FP means this is a
15078 floating point compare: I don't think that it is needed on the arm. */
15079 rtx
15080 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15081 {
15082 enum machine_mode mode;
15083 rtx cc_reg;
15084 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15085
15086 /* We might have X as a constant, Y as a register because of the predicates
15087 used for cmpdi. If so, force X to a register here. */
15088 if (dimode_comparison && !REG_P (x))
15089 x = force_reg (DImode, x);
15090
15091 mode = SELECT_CC_MODE (code, x, y);
15092 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15093
15094 if (dimode_comparison
15095 && mode != CC_CZmode)
15096 {
15097 rtx clobber, set;
15098
15099 /* To compare two non-zero values for equality, XOR them and
15100 then compare against zero. Not used for ARM mode; there
15101 CC_CZmode is cheaper. */
15102 if (mode == CC_Zmode && y != const0_rtx)
15103 {
15104 gcc_assert (!reload_completed);
15105 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15106 y = const0_rtx;
15107 }
15108
15109 /* A scratch register is required. */
15110 if (reload_completed)
15111 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15112 else
15113 scratch = gen_rtx_SCRATCH (SImode);
15114
15115 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15116 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15117 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15118 }
15119 else
15120 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15121
15122 return cc_reg;
15123 }
15124
15125 /* Generate a sequence of insns that will generate the correct return
15126 address mask depending on the physical architecture that the program
15127 is running on. */
15128 rtx
15129 arm_gen_return_addr_mask (void)
15130 {
15131 rtx reg = gen_reg_rtx (Pmode);
15132
15133 emit_insn (gen_return_addr_mask (reg));
15134 return reg;
15135 }
15136
15137 void
15138 arm_reload_in_hi (rtx *operands)
15139 {
15140 rtx ref = operands[1];
15141 rtx base, scratch;
15142 HOST_WIDE_INT offset = 0;
15143
15144 if (GET_CODE (ref) == SUBREG)
15145 {
15146 offset = SUBREG_BYTE (ref);
15147 ref = SUBREG_REG (ref);
15148 }
15149
15150 if (REG_P (ref))
15151 {
15152 /* We have a pseudo which has been spilt onto the stack; there
15153 are two cases here: the first where there is a simple
15154 stack-slot replacement and a second where the stack-slot is
15155 out of range, or is used as a subreg. */
15156 if (reg_equiv_mem (REGNO (ref)))
15157 {
15158 ref = reg_equiv_mem (REGNO (ref));
15159 base = find_replacement (&XEXP (ref, 0));
15160 }
15161 else
15162 /* The slot is out of range, or was dressed up in a SUBREG. */
15163 base = reg_equiv_address (REGNO (ref));
15164 }
15165 else
15166 base = find_replacement (&XEXP (ref, 0));
15167
15168 /* Handle the case where the address is too complex to be offset by 1. */
15169 if (GET_CODE (base) == MINUS
15170 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15171 {
15172 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15173
15174 emit_set_insn (base_plus, base);
15175 base = base_plus;
15176 }
15177 else if (GET_CODE (base) == PLUS)
15178 {
15179 /* The addend must be CONST_INT, or we would have dealt with it above. */
15180 HOST_WIDE_INT hi, lo;
15181
15182 offset += INTVAL (XEXP (base, 1));
15183 base = XEXP (base, 0);
15184
15185 /* Rework the address into a legal sequence of insns. */
15186 /* Valid range for lo is -4095 -> 4095 */
15187 lo = (offset >= 0
15188 ? (offset & 0xfff)
15189 : -((-offset) & 0xfff));
15190
15191 /* Corner case, if lo is the max offset then we would be out of range
15192 once we have added the additional 1 below, so bump the msb into the
15193 pre-loading insn(s). */
15194 if (lo == 4095)
15195 lo &= 0x7ff;
15196
15197 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15198 ^ (HOST_WIDE_INT) 0x80000000)
15199 - (HOST_WIDE_INT) 0x80000000);
15200
15201 gcc_assert (hi + lo == offset);
15202
15203 if (hi != 0)
15204 {
15205 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15206
15207 /* Get the base address; addsi3 knows how to handle constants
15208 that require more than one insn. */
15209 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15210 base = base_plus;
15211 offset = lo;
15212 }
15213 }
15214
15215 /* Operands[2] may overlap operands[0] (though it won't overlap
15216 operands[1]), that's why we asked for a DImode reg -- so we can
15217 use the bit that does not overlap. */
15218 if (REGNO (operands[2]) == REGNO (operands[0]))
15219 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15220 else
15221 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15222
15223 emit_insn (gen_zero_extendqisi2 (scratch,
15224 gen_rtx_MEM (QImode,
15225 plus_constant (Pmode, base,
15226 offset))));
15227 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15228 gen_rtx_MEM (QImode,
15229 plus_constant (Pmode, base,
15230 offset + 1))));
15231 if (!BYTES_BIG_ENDIAN)
15232 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15233 gen_rtx_IOR (SImode,
15234 gen_rtx_ASHIFT
15235 (SImode,
15236 gen_rtx_SUBREG (SImode, operands[0], 0),
15237 GEN_INT (8)),
15238 scratch));
15239 else
15240 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15241 gen_rtx_IOR (SImode,
15242 gen_rtx_ASHIFT (SImode, scratch,
15243 GEN_INT (8)),
15244 gen_rtx_SUBREG (SImode, operands[0], 0)));
15245 }
15246
15247 /* Handle storing a half-word to memory during reload by synthesizing as two
15248 byte stores. Take care not to clobber the input values until after we
15249 have moved them somewhere safe. This code assumes that if the DImode
15250 scratch in operands[2] overlaps either the input value or output address
15251 in some way, then that value must die in this insn (we absolutely need
15252 two scratch registers for some corner cases). */
15253 void
15254 arm_reload_out_hi (rtx *operands)
15255 {
15256 rtx ref = operands[0];
15257 rtx outval = operands[1];
15258 rtx base, scratch;
15259 HOST_WIDE_INT offset = 0;
15260
15261 if (GET_CODE (ref) == SUBREG)
15262 {
15263 offset = SUBREG_BYTE (ref);
15264 ref = SUBREG_REG (ref);
15265 }
15266
15267 if (REG_P (ref))
15268 {
15269 /* We have a pseudo which has been spilt onto the stack; there
15270 are two cases here: the first where there is a simple
15271 stack-slot replacement and a second where the stack-slot is
15272 out of range, or is used as a subreg. */
15273 if (reg_equiv_mem (REGNO (ref)))
15274 {
15275 ref = reg_equiv_mem (REGNO (ref));
15276 base = find_replacement (&XEXP (ref, 0));
15277 }
15278 else
15279 /* The slot is out of range, or was dressed up in a SUBREG. */
15280 base = reg_equiv_address (REGNO (ref));
15281 }
15282 else
15283 base = find_replacement (&XEXP (ref, 0));
15284
15285 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15286
15287 /* Handle the case where the address is too complex to be offset by 1. */
15288 if (GET_CODE (base) == MINUS
15289 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15290 {
15291 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15292
15293 /* Be careful not to destroy OUTVAL. */
15294 if (reg_overlap_mentioned_p (base_plus, outval))
15295 {
15296 /* Updating base_plus might destroy outval, see if we can
15297 swap the scratch and base_plus. */
15298 if (!reg_overlap_mentioned_p (scratch, outval))
15299 {
15300 rtx tmp = scratch;
15301 scratch = base_plus;
15302 base_plus = tmp;
15303 }
15304 else
15305 {
15306 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15307
15308 /* Be conservative and copy OUTVAL into the scratch now,
15309 this should only be necessary if outval is a subreg
15310 of something larger than a word. */
15311 /* XXX Might this clobber base? I can't see how it can,
15312 since scratch is known to overlap with OUTVAL, and
15313 must be wider than a word. */
15314 emit_insn (gen_movhi (scratch_hi, outval));
15315 outval = scratch_hi;
15316 }
15317 }
15318
15319 emit_set_insn (base_plus, base);
15320 base = base_plus;
15321 }
15322 else if (GET_CODE (base) == PLUS)
15323 {
15324 /* The addend must be CONST_INT, or we would have dealt with it above. */
15325 HOST_WIDE_INT hi, lo;
15326
15327 offset += INTVAL (XEXP (base, 1));
15328 base = XEXP (base, 0);
15329
15330 /* Rework the address into a legal sequence of insns. */
15331 /* Valid range for lo is -4095 -> 4095 */
15332 lo = (offset >= 0
15333 ? (offset & 0xfff)
15334 : -((-offset) & 0xfff));
15335
15336 /* Corner case, if lo is the max offset then we would be out of range
15337 once we have added the additional 1 below, so bump the msb into the
15338 pre-loading insn(s). */
15339 if (lo == 4095)
15340 lo &= 0x7ff;
15341
15342 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15343 ^ (HOST_WIDE_INT) 0x80000000)
15344 - (HOST_WIDE_INT) 0x80000000);
15345
15346 gcc_assert (hi + lo == offset);
15347
15348 if (hi != 0)
15349 {
15350 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15351
15352 /* Be careful not to destroy OUTVAL. */
15353 if (reg_overlap_mentioned_p (base_plus, outval))
15354 {
15355 /* Updating base_plus might destroy outval, see if we
15356 can swap the scratch and base_plus. */
15357 if (!reg_overlap_mentioned_p (scratch, outval))
15358 {
15359 rtx tmp = scratch;
15360 scratch = base_plus;
15361 base_plus = tmp;
15362 }
15363 else
15364 {
15365 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15366
15367 /* Be conservative and copy outval into scratch now,
15368 this should only be necessary if outval is a
15369 subreg of something larger than a word. */
15370 /* XXX Might this clobber base? I can't see how it
15371 can, since scratch is known to overlap with
15372 outval. */
15373 emit_insn (gen_movhi (scratch_hi, outval));
15374 outval = scratch_hi;
15375 }
15376 }
15377
15378 /* Get the base address; addsi3 knows how to handle constants
15379 that require more than one insn. */
15380 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15381 base = base_plus;
15382 offset = lo;
15383 }
15384 }
15385
15386 if (BYTES_BIG_ENDIAN)
15387 {
15388 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15389 plus_constant (Pmode, base,
15390 offset + 1)),
15391 gen_lowpart (QImode, outval)));
15392 emit_insn (gen_lshrsi3 (scratch,
15393 gen_rtx_SUBREG (SImode, outval, 0),
15394 GEN_INT (8)));
15395 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15396 offset)),
15397 gen_lowpart (QImode, scratch)));
15398 }
15399 else
15400 {
15401 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15402 offset)),
15403 gen_lowpart (QImode, outval)));
15404 emit_insn (gen_lshrsi3 (scratch,
15405 gen_rtx_SUBREG (SImode, outval, 0),
15406 GEN_INT (8)));
15407 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15408 plus_constant (Pmode, base,
15409 offset + 1)),
15410 gen_lowpart (QImode, scratch)));
15411 }
15412 }
15413
15414 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15415 (padded to the size of a word) should be passed in a register. */
15416
15417 static bool
15418 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15419 {
15420 if (TARGET_AAPCS_BASED)
15421 return must_pass_in_stack_var_size (mode, type);
15422 else
15423 return must_pass_in_stack_var_size_or_pad (mode, type);
15424 }
15425
15426
15427 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15428 Return true if an argument passed on the stack should be padded upwards,
15429 i.e. if the least-significant byte has useful data.
15430 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15431 aggregate types are placed in the lowest memory address. */
15432
15433 bool
15434 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15435 {
15436 if (!TARGET_AAPCS_BASED)
15437 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15438
15439 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15440 return false;
15441
15442 return true;
15443 }
15444
15445
15446 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15447 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15448 register has useful data, and return the opposite if the most
15449 significant byte does. */
15450
15451 bool
15452 arm_pad_reg_upward (enum machine_mode mode,
15453 tree type, int first ATTRIBUTE_UNUSED)
15454 {
15455 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15456 {
15457 /* For AAPCS, small aggregates, small fixed-point types,
15458 and small complex types are always padded upwards. */
15459 if (type)
15460 {
15461 if ((AGGREGATE_TYPE_P (type)
15462 || TREE_CODE (type) == COMPLEX_TYPE
15463 || FIXED_POINT_TYPE_P (type))
15464 && int_size_in_bytes (type) <= 4)
15465 return true;
15466 }
15467 else
15468 {
15469 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15470 && GET_MODE_SIZE (mode) <= 4)
15471 return true;
15472 }
15473 }
15474
15475 /* Otherwise, use default padding. */
15476 return !BYTES_BIG_ENDIAN;
15477 }
15478
15479 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15480 assuming that the address in the base register is word aligned. */
15481 bool
15482 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15483 {
15484 HOST_WIDE_INT max_offset;
15485
15486 /* Offset must be a multiple of 4 in Thumb mode. */
15487 if (TARGET_THUMB2 && ((offset & 3) != 0))
15488 return false;
15489
15490 if (TARGET_THUMB2)
15491 max_offset = 1020;
15492 else if (TARGET_ARM)
15493 max_offset = 255;
15494 else
15495 return false;
15496
15497 return ((offset <= max_offset) && (offset >= -max_offset));
15498 }
15499
15500 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15501 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15502 Assumes that the address in the base register RN is word aligned. Pattern
15503 guarantees that both memory accesses use the same base register,
15504 the offsets are constants within the range, and the gap between the offsets is 4.
15505 If preload complete then check that registers are legal. WBACK indicates whether
15506 address is updated. LOAD indicates whether memory access is load or store. */
15507 bool
15508 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15509 bool wback, bool load)
15510 {
15511 unsigned int t, t2, n;
15512
15513 if (!reload_completed)
15514 return true;
15515
15516 if (!offset_ok_for_ldrd_strd (offset))
15517 return false;
15518
15519 t = REGNO (rt);
15520 t2 = REGNO (rt2);
15521 n = REGNO (rn);
15522
15523 if ((TARGET_THUMB2)
15524 && ((wback && (n == t || n == t2))
15525 || (t == SP_REGNUM)
15526 || (t == PC_REGNUM)
15527 || (t2 == SP_REGNUM)
15528 || (t2 == PC_REGNUM)
15529 || (!load && (n == PC_REGNUM))
15530 || (load && (t == t2))
15531 /* Triggers Cortex-M3 LDRD errata. */
15532 || (!wback && load && fix_cm3_ldrd && (n == t))))
15533 return false;
15534
15535 if ((TARGET_ARM)
15536 && ((wback && (n == t || n == t2))
15537 || (t2 == PC_REGNUM)
15538 || (t % 2 != 0) /* First destination register is not even. */
15539 || (t2 != t + 1)
15540 /* PC can be used as base register (for offset addressing only),
15541 but it is depricated. */
15542 || (n == PC_REGNUM)))
15543 return false;
15544
15545 return true;
15546 }
15547
15548 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15549 operand MEM's address contains an immediate offset from the base
15550 register and has no side effects, in which case it sets BASE and
15551 OFFSET accordingly. */
15552 static bool
15553 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15554 {
15555 rtx addr;
15556
15557 gcc_assert (base != NULL && offset != NULL);
15558
15559 /* TODO: Handle more general memory operand patterns, such as
15560 PRE_DEC and PRE_INC. */
15561
15562 if (side_effects_p (mem))
15563 return false;
15564
15565 /* Can't deal with subregs. */
15566 if (GET_CODE (mem) == SUBREG)
15567 return false;
15568
15569 gcc_assert (MEM_P (mem));
15570
15571 *offset = const0_rtx;
15572
15573 addr = XEXP (mem, 0);
15574
15575 /* If addr isn't valid for DImode, then we can't handle it. */
15576 if (!arm_legitimate_address_p (DImode, addr,
15577 reload_in_progress || reload_completed))
15578 return false;
15579
15580 if (REG_P (addr))
15581 {
15582 *base = addr;
15583 return true;
15584 }
15585 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15586 {
15587 *base = XEXP (addr, 0);
15588 *offset = XEXP (addr, 1);
15589 return (REG_P (*base) && CONST_INT_P (*offset));
15590 }
15591
15592 return false;
15593 }
15594
15595 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15596
15597 /* Called from a peephole2 to replace two word-size accesses with a
15598 single LDRD/STRD instruction. Returns true iff we can generate a
15599 new instruction sequence. That is, both accesses use the same base
15600 register and the gap between constant offsets is 4. This function
15601 may reorder its operands to match ldrd/strd RTL templates.
15602 OPERANDS are the operands found by the peephole matcher;
15603 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15604 corresponding memory operands. LOAD indicaates whether the access
15605 is load or store. CONST_STORE indicates a store of constant
15606 integer values held in OPERANDS[4,5] and assumes that the pattern
15607 is of length 4 insn, for the purpose of checking dead registers.
15608 COMMUTE indicates that register operands may be reordered. */
15609 bool
15610 gen_operands_ldrd_strd (rtx *operands, bool load,
15611 bool const_store, bool commute)
15612 {
15613 int nops = 2;
15614 HOST_WIDE_INT offsets[2], offset;
15615 rtx base = NULL_RTX;
15616 rtx cur_base, cur_offset, tmp;
15617 int i, gap;
15618 HARD_REG_SET regset;
15619
15620 gcc_assert (!const_store || !load);
15621 /* Check that the memory references are immediate offsets from the
15622 same base register. Extract the base register, the destination
15623 registers, and the corresponding memory offsets. */
15624 for (i = 0; i < nops; i++)
15625 {
15626 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15627 return false;
15628
15629 if (i == 0)
15630 base = cur_base;
15631 else if (REGNO (base) != REGNO (cur_base))
15632 return false;
15633
15634 offsets[i] = INTVAL (cur_offset);
15635 if (GET_CODE (operands[i]) == SUBREG)
15636 {
15637 tmp = SUBREG_REG (operands[i]);
15638 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15639 operands[i] = tmp;
15640 }
15641 }
15642
15643 /* Make sure there is no dependency between the individual loads. */
15644 if (load && REGNO (operands[0]) == REGNO (base))
15645 return false; /* RAW */
15646
15647 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15648 return false; /* WAW */
15649
15650 /* If the same input register is used in both stores
15651 when storing different constants, try to find a free register.
15652 For example, the code
15653 mov r0, 0
15654 str r0, [r2]
15655 mov r0, 1
15656 str r0, [r2, #4]
15657 can be transformed into
15658 mov r1, 0
15659 strd r1, r0, [r2]
15660 in Thumb mode assuming that r1 is free. */
15661 if (const_store
15662 && REGNO (operands[0]) == REGNO (operands[1])
15663 && INTVAL (operands[4]) != INTVAL (operands[5]))
15664 {
15665 if (TARGET_THUMB2)
15666 {
15667 CLEAR_HARD_REG_SET (regset);
15668 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15669 if (tmp == NULL_RTX)
15670 return false;
15671
15672 /* Use the new register in the first load to ensure that
15673 if the original input register is not dead after peephole,
15674 then it will have the correct constant value. */
15675 operands[0] = tmp;
15676 }
15677 else if (TARGET_ARM)
15678 {
15679 return false;
15680 int regno = REGNO (operands[0]);
15681 if (!peep2_reg_dead_p (4, operands[0]))
15682 {
15683 /* When the input register is even and is not dead after the
15684 pattern, it has to hold the second constant but we cannot
15685 form a legal STRD in ARM mode with this register as the second
15686 register. */
15687 if (regno % 2 == 0)
15688 return false;
15689
15690 /* Is regno-1 free? */
15691 SET_HARD_REG_SET (regset);
15692 CLEAR_HARD_REG_BIT(regset, regno - 1);
15693 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15694 if (tmp == NULL_RTX)
15695 return false;
15696
15697 operands[0] = tmp;
15698 }
15699 else
15700 {
15701 /* Find a DImode register. */
15702 CLEAR_HARD_REG_SET (regset);
15703 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15704 if (tmp != NULL_RTX)
15705 {
15706 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15707 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15708 }
15709 else
15710 {
15711 /* Can we use the input register to form a DI register? */
15712 SET_HARD_REG_SET (regset);
15713 CLEAR_HARD_REG_BIT(regset,
15714 regno % 2 == 0 ? regno + 1 : regno - 1);
15715 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15716 if (tmp == NULL_RTX)
15717 return false;
15718 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15719 }
15720 }
15721
15722 gcc_assert (operands[0] != NULL_RTX);
15723 gcc_assert (operands[1] != NULL_RTX);
15724 gcc_assert (REGNO (operands[0]) % 2 == 0);
15725 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15726 }
15727 }
15728
15729 /* Make sure the instructions are ordered with lower memory access first. */
15730 if (offsets[0] > offsets[1])
15731 {
15732 gap = offsets[0] - offsets[1];
15733 offset = offsets[1];
15734
15735 /* Swap the instructions such that lower memory is accessed first. */
15736 SWAP_RTX (operands[0], operands[1]);
15737 SWAP_RTX (operands[2], operands[3]);
15738 if (const_store)
15739 SWAP_RTX (operands[4], operands[5]);
15740 }
15741 else
15742 {
15743 gap = offsets[1] - offsets[0];
15744 offset = offsets[0];
15745 }
15746
15747 /* Make sure accesses are to consecutive memory locations. */
15748 if (gap != 4)
15749 return false;
15750
15751 /* Make sure we generate legal instructions. */
15752 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15753 false, load))
15754 return true;
15755
15756 /* In Thumb state, where registers are almost unconstrained, there
15757 is little hope to fix it. */
15758 if (TARGET_THUMB2)
15759 return false;
15760
15761 if (load && commute)
15762 {
15763 /* Try reordering registers. */
15764 SWAP_RTX (operands[0], operands[1]);
15765 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15766 false, load))
15767 return true;
15768 }
15769
15770 if (const_store)
15771 {
15772 /* If input registers are dead after this pattern, they can be
15773 reordered or replaced by other registers that are free in the
15774 current pattern. */
15775 if (!peep2_reg_dead_p (4, operands[0])
15776 || !peep2_reg_dead_p (4, operands[1]))
15777 return false;
15778
15779 /* Try to reorder the input registers. */
15780 /* For example, the code
15781 mov r0, 0
15782 mov r1, 1
15783 str r1, [r2]
15784 str r0, [r2, #4]
15785 can be transformed into
15786 mov r1, 0
15787 mov r0, 1
15788 strd r0, [r2]
15789 */
15790 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15791 false, false))
15792 {
15793 SWAP_RTX (operands[0], operands[1]);
15794 return true;
15795 }
15796
15797 /* Try to find a free DI register. */
15798 CLEAR_HARD_REG_SET (regset);
15799 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15800 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15801 while (true)
15802 {
15803 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15804 if (tmp == NULL_RTX)
15805 return false;
15806
15807 /* DREG must be an even-numbered register in DImode.
15808 Split it into SI registers. */
15809 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15810 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15811 gcc_assert (operands[0] != NULL_RTX);
15812 gcc_assert (operands[1] != NULL_RTX);
15813 gcc_assert (REGNO (operands[0]) % 2 == 0);
15814 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15815
15816 return (operands_ok_ldrd_strd (operands[0], operands[1],
15817 base, offset,
15818 false, load));
15819 }
15820 }
15821
15822 return false;
15823 }
15824 #undef SWAP_RTX
15825
15826
15827
15828 \f
15829 /* Print a symbolic form of X to the debug file, F. */
15830 static void
15831 arm_print_value (FILE *f, rtx x)
15832 {
15833 switch (GET_CODE (x))
15834 {
15835 case CONST_INT:
15836 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15837 return;
15838
15839 case CONST_DOUBLE:
15840 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15841 return;
15842
15843 case CONST_VECTOR:
15844 {
15845 int i;
15846
15847 fprintf (f, "<");
15848 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15849 {
15850 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15851 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15852 fputc (',', f);
15853 }
15854 fprintf (f, ">");
15855 }
15856 return;
15857
15858 case CONST_STRING:
15859 fprintf (f, "\"%s\"", XSTR (x, 0));
15860 return;
15861
15862 case SYMBOL_REF:
15863 fprintf (f, "`%s'", XSTR (x, 0));
15864 return;
15865
15866 case LABEL_REF:
15867 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15868 return;
15869
15870 case CONST:
15871 arm_print_value (f, XEXP (x, 0));
15872 return;
15873
15874 case PLUS:
15875 arm_print_value (f, XEXP (x, 0));
15876 fprintf (f, "+");
15877 arm_print_value (f, XEXP (x, 1));
15878 return;
15879
15880 case PC:
15881 fprintf (f, "pc");
15882 return;
15883
15884 default:
15885 fprintf (f, "????");
15886 return;
15887 }
15888 }
15889 \f
15890 /* Routines for manipulation of the constant pool. */
15891
15892 /* Arm instructions cannot load a large constant directly into a
15893 register; they have to come from a pc relative load. The constant
15894 must therefore be placed in the addressable range of the pc
15895 relative load. Depending on the precise pc relative load
15896 instruction the range is somewhere between 256 bytes and 4k. This
15897 means that we often have to dump a constant inside a function, and
15898 generate code to branch around it.
15899
15900 It is important to minimize this, since the branches will slow
15901 things down and make the code larger.
15902
15903 Normally we can hide the table after an existing unconditional
15904 branch so that there is no interruption of the flow, but in the
15905 worst case the code looks like this:
15906
15907 ldr rn, L1
15908 ...
15909 b L2
15910 align
15911 L1: .long value
15912 L2:
15913 ...
15914
15915 ldr rn, L3
15916 ...
15917 b L4
15918 align
15919 L3: .long value
15920 L4:
15921 ...
15922
15923 We fix this by performing a scan after scheduling, which notices
15924 which instructions need to have their operands fetched from the
15925 constant table and builds the table.
15926
15927 The algorithm starts by building a table of all the constants that
15928 need fixing up and all the natural barriers in the function (places
15929 where a constant table can be dropped without breaking the flow).
15930 For each fixup we note how far the pc-relative replacement will be
15931 able to reach and the offset of the instruction into the function.
15932
15933 Having built the table we then group the fixes together to form
15934 tables that are as large as possible (subject to addressing
15935 constraints) and emit each table of constants after the last
15936 barrier that is within range of all the instructions in the group.
15937 If a group does not contain a barrier, then we forcibly create one
15938 by inserting a jump instruction into the flow. Once the table has
15939 been inserted, the insns are then modified to reference the
15940 relevant entry in the pool.
15941
15942 Possible enhancements to the algorithm (not implemented) are:
15943
15944 1) For some processors and object formats, there may be benefit in
15945 aligning the pools to the start of cache lines; this alignment
15946 would need to be taken into account when calculating addressability
15947 of a pool. */
15948
15949 /* These typedefs are located at the start of this file, so that
15950 they can be used in the prototypes there. This comment is to
15951 remind readers of that fact so that the following structures
15952 can be understood more easily.
15953
15954 typedef struct minipool_node Mnode;
15955 typedef struct minipool_fixup Mfix; */
15956
15957 struct minipool_node
15958 {
15959 /* Doubly linked chain of entries. */
15960 Mnode * next;
15961 Mnode * prev;
15962 /* The maximum offset into the code that this entry can be placed. While
15963 pushing fixes for forward references, all entries are sorted in order
15964 of increasing max_address. */
15965 HOST_WIDE_INT max_address;
15966 /* Similarly for an entry inserted for a backwards ref. */
15967 HOST_WIDE_INT min_address;
15968 /* The number of fixes referencing this entry. This can become zero
15969 if we "unpush" an entry. In this case we ignore the entry when we
15970 come to emit the code. */
15971 int refcount;
15972 /* The offset from the start of the minipool. */
15973 HOST_WIDE_INT offset;
15974 /* The value in table. */
15975 rtx value;
15976 /* The mode of value. */
15977 enum machine_mode mode;
15978 /* The size of the value. With iWMMXt enabled
15979 sizes > 4 also imply an alignment of 8-bytes. */
15980 int fix_size;
15981 };
15982
15983 struct minipool_fixup
15984 {
15985 Mfix * next;
15986 rtx insn;
15987 HOST_WIDE_INT address;
15988 rtx * loc;
15989 enum machine_mode mode;
15990 int fix_size;
15991 rtx value;
15992 Mnode * minipool;
15993 HOST_WIDE_INT forwards;
15994 HOST_WIDE_INT backwards;
15995 };
15996
15997 /* Fixes less than a word need padding out to a word boundary. */
15998 #define MINIPOOL_FIX_SIZE(mode) \
15999 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16000
16001 static Mnode * minipool_vector_head;
16002 static Mnode * minipool_vector_tail;
16003 static rtx minipool_vector_label;
16004 static int minipool_pad;
16005
16006 /* The linked list of all minipool fixes required for this function. */
16007 Mfix * minipool_fix_head;
16008 Mfix * minipool_fix_tail;
16009 /* The fix entry for the current minipool, once it has been placed. */
16010 Mfix * minipool_barrier;
16011
16012 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16013 #define JUMP_TABLES_IN_TEXT_SECTION 0
16014 #endif
16015
16016 static HOST_WIDE_INT
16017 get_jump_table_size (rtx insn)
16018 {
16019 /* ADDR_VECs only take room if read-only data does into the text
16020 section. */
16021 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16022 {
16023 rtx body = PATTERN (insn);
16024 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16025 HOST_WIDE_INT size;
16026 HOST_WIDE_INT modesize;
16027
16028 modesize = GET_MODE_SIZE (GET_MODE (body));
16029 size = modesize * XVECLEN (body, elt);
16030 switch (modesize)
16031 {
16032 case 1:
16033 /* Round up size of TBB table to a halfword boundary. */
16034 size = (size + 1) & ~(HOST_WIDE_INT)1;
16035 break;
16036 case 2:
16037 /* No padding necessary for TBH. */
16038 break;
16039 case 4:
16040 /* Add two bytes for alignment on Thumb. */
16041 if (TARGET_THUMB)
16042 size += 2;
16043 break;
16044 default:
16045 gcc_unreachable ();
16046 }
16047 return size;
16048 }
16049
16050 return 0;
16051 }
16052
16053 /* Return the maximum amount of padding that will be inserted before
16054 label LABEL. */
16055
16056 static HOST_WIDE_INT
16057 get_label_padding (rtx label)
16058 {
16059 HOST_WIDE_INT align, min_insn_size;
16060
16061 align = 1 << label_to_alignment (label);
16062 min_insn_size = TARGET_THUMB ? 2 : 4;
16063 return align > min_insn_size ? align - min_insn_size : 0;
16064 }
16065
16066 /* Move a minipool fix MP from its current location to before MAX_MP.
16067 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16068 constraints may need updating. */
16069 static Mnode *
16070 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16071 HOST_WIDE_INT max_address)
16072 {
16073 /* The code below assumes these are different. */
16074 gcc_assert (mp != max_mp);
16075
16076 if (max_mp == NULL)
16077 {
16078 if (max_address < mp->max_address)
16079 mp->max_address = max_address;
16080 }
16081 else
16082 {
16083 if (max_address > max_mp->max_address - mp->fix_size)
16084 mp->max_address = max_mp->max_address - mp->fix_size;
16085 else
16086 mp->max_address = max_address;
16087
16088 /* Unlink MP from its current position. Since max_mp is non-null,
16089 mp->prev must be non-null. */
16090 mp->prev->next = mp->next;
16091 if (mp->next != NULL)
16092 mp->next->prev = mp->prev;
16093 else
16094 minipool_vector_tail = mp->prev;
16095
16096 /* Re-insert it before MAX_MP. */
16097 mp->next = max_mp;
16098 mp->prev = max_mp->prev;
16099 max_mp->prev = mp;
16100
16101 if (mp->prev != NULL)
16102 mp->prev->next = mp;
16103 else
16104 minipool_vector_head = mp;
16105 }
16106
16107 /* Save the new entry. */
16108 max_mp = mp;
16109
16110 /* Scan over the preceding entries and adjust their addresses as
16111 required. */
16112 while (mp->prev != NULL
16113 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16114 {
16115 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16116 mp = mp->prev;
16117 }
16118
16119 return max_mp;
16120 }
16121
16122 /* Add a constant to the minipool for a forward reference. Returns the
16123 node added or NULL if the constant will not fit in this pool. */
16124 static Mnode *
16125 add_minipool_forward_ref (Mfix *fix)
16126 {
16127 /* If set, max_mp is the first pool_entry that has a lower
16128 constraint than the one we are trying to add. */
16129 Mnode * max_mp = NULL;
16130 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16131 Mnode * mp;
16132
16133 /* If the minipool starts before the end of FIX->INSN then this FIX
16134 can not be placed into the current pool. Furthermore, adding the
16135 new constant pool entry may cause the pool to start FIX_SIZE bytes
16136 earlier. */
16137 if (minipool_vector_head &&
16138 (fix->address + get_attr_length (fix->insn)
16139 >= minipool_vector_head->max_address - fix->fix_size))
16140 return NULL;
16141
16142 /* Scan the pool to see if a constant with the same value has
16143 already been added. While we are doing this, also note the
16144 location where we must insert the constant if it doesn't already
16145 exist. */
16146 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16147 {
16148 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16149 && fix->mode == mp->mode
16150 && (!LABEL_P (fix->value)
16151 || (CODE_LABEL_NUMBER (fix->value)
16152 == CODE_LABEL_NUMBER (mp->value)))
16153 && rtx_equal_p (fix->value, mp->value))
16154 {
16155 /* More than one fix references this entry. */
16156 mp->refcount++;
16157 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16158 }
16159
16160 /* Note the insertion point if necessary. */
16161 if (max_mp == NULL
16162 && mp->max_address > max_address)
16163 max_mp = mp;
16164
16165 /* If we are inserting an 8-bytes aligned quantity and
16166 we have not already found an insertion point, then
16167 make sure that all such 8-byte aligned quantities are
16168 placed at the start of the pool. */
16169 if (ARM_DOUBLEWORD_ALIGN
16170 && max_mp == NULL
16171 && fix->fix_size >= 8
16172 && mp->fix_size < 8)
16173 {
16174 max_mp = mp;
16175 max_address = mp->max_address;
16176 }
16177 }
16178
16179 /* The value is not currently in the minipool, so we need to create
16180 a new entry for it. If MAX_MP is NULL, the entry will be put on
16181 the end of the list since the placement is less constrained than
16182 any existing entry. Otherwise, we insert the new fix before
16183 MAX_MP and, if necessary, adjust the constraints on the other
16184 entries. */
16185 mp = XNEW (Mnode);
16186 mp->fix_size = fix->fix_size;
16187 mp->mode = fix->mode;
16188 mp->value = fix->value;
16189 mp->refcount = 1;
16190 /* Not yet required for a backwards ref. */
16191 mp->min_address = -65536;
16192
16193 if (max_mp == NULL)
16194 {
16195 mp->max_address = max_address;
16196 mp->next = NULL;
16197 mp->prev = minipool_vector_tail;
16198
16199 if (mp->prev == NULL)
16200 {
16201 minipool_vector_head = mp;
16202 minipool_vector_label = gen_label_rtx ();
16203 }
16204 else
16205 mp->prev->next = mp;
16206
16207 minipool_vector_tail = mp;
16208 }
16209 else
16210 {
16211 if (max_address > max_mp->max_address - mp->fix_size)
16212 mp->max_address = max_mp->max_address - mp->fix_size;
16213 else
16214 mp->max_address = max_address;
16215
16216 mp->next = max_mp;
16217 mp->prev = max_mp->prev;
16218 max_mp->prev = mp;
16219 if (mp->prev != NULL)
16220 mp->prev->next = mp;
16221 else
16222 minipool_vector_head = mp;
16223 }
16224
16225 /* Save the new entry. */
16226 max_mp = mp;
16227
16228 /* Scan over the preceding entries and adjust their addresses as
16229 required. */
16230 while (mp->prev != NULL
16231 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16232 {
16233 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16234 mp = mp->prev;
16235 }
16236
16237 return max_mp;
16238 }
16239
16240 static Mnode *
16241 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16242 HOST_WIDE_INT min_address)
16243 {
16244 HOST_WIDE_INT offset;
16245
16246 /* The code below assumes these are different. */
16247 gcc_assert (mp != min_mp);
16248
16249 if (min_mp == NULL)
16250 {
16251 if (min_address > mp->min_address)
16252 mp->min_address = min_address;
16253 }
16254 else
16255 {
16256 /* We will adjust this below if it is too loose. */
16257 mp->min_address = min_address;
16258
16259 /* Unlink MP from its current position. Since min_mp is non-null,
16260 mp->next must be non-null. */
16261 mp->next->prev = mp->prev;
16262 if (mp->prev != NULL)
16263 mp->prev->next = mp->next;
16264 else
16265 minipool_vector_head = mp->next;
16266
16267 /* Reinsert it after MIN_MP. */
16268 mp->prev = min_mp;
16269 mp->next = min_mp->next;
16270 min_mp->next = mp;
16271 if (mp->next != NULL)
16272 mp->next->prev = mp;
16273 else
16274 minipool_vector_tail = mp;
16275 }
16276
16277 min_mp = mp;
16278
16279 offset = 0;
16280 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16281 {
16282 mp->offset = offset;
16283 if (mp->refcount > 0)
16284 offset += mp->fix_size;
16285
16286 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16287 mp->next->min_address = mp->min_address + mp->fix_size;
16288 }
16289
16290 return min_mp;
16291 }
16292
16293 /* Add a constant to the minipool for a backward reference. Returns the
16294 node added or NULL if the constant will not fit in this pool.
16295
16296 Note that the code for insertion for a backwards reference can be
16297 somewhat confusing because the calculated offsets for each fix do
16298 not take into account the size of the pool (which is still under
16299 construction. */
16300 static Mnode *
16301 add_minipool_backward_ref (Mfix *fix)
16302 {
16303 /* If set, min_mp is the last pool_entry that has a lower constraint
16304 than the one we are trying to add. */
16305 Mnode *min_mp = NULL;
16306 /* This can be negative, since it is only a constraint. */
16307 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16308 Mnode *mp;
16309
16310 /* If we can't reach the current pool from this insn, or if we can't
16311 insert this entry at the end of the pool without pushing other
16312 fixes out of range, then we don't try. This ensures that we
16313 can't fail later on. */
16314 if (min_address >= minipool_barrier->address
16315 || (minipool_vector_tail->min_address + fix->fix_size
16316 >= minipool_barrier->address))
16317 return NULL;
16318
16319 /* Scan the pool to see if a constant with the same value has
16320 already been added. While we are doing this, also note the
16321 location where we must insert the constant if it doesn't already
16322 exist. */
16323 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16324 {
16325 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16326 && fix->mode == mp->mode
16327 && (!LABEL_P (fix->value)
16328 || (CODE_LABEL_NUMBER (fix->value)
16329 == CODE_LABEL_NUMBER (mp->value)))
16330 && rtx_equal_p (fix->value, mp->value)
16331 /* Check that there is enough slack to move this entry to the
16332 end of the table (this is conservative). */
16333 && (mp->max_address
16334 > (minipool_barrier->address
16335 + minipool_vector_tail->offset
16336 + minipool_vector_tail->fix_size)))
16337 {
16338 mp->refcount++;
16339 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16340 }
16341
16342 if (min_mp != NULL)
16343 mp->min_address += fix->fix_size;
16344 else
16345 {
16346 /* Note the insertion point if necessary. */
16347 if (mp->min_address < min_address)
16348 {
16349 /* For now, we do not allow the insertion of 8-byte alignment
16350 requiring nodes anywhere but at the start of the pool. */
16351 if (ARM_DOUBLEWORD_ALIGN
16352 && fix->fix_size >= 8 && mp->fix_size < 8)
16353 return NULL;
16354 else
16355 min_mp = mp;
16356 }
16357 else if (mp->max_address
16358 < minipool_barrier->address + mp->offset + fix->fix_size)
16359 {
16360 /* Inserting before this entry would push the fix beyond
16361 its maximum address (which can happen if we have
16362 re-located a forwards fix); force the new fix to come
16363 after it. */
16364 if (ARM_DOUBLEWORD_ALIGN
16365 && fix->fix_size >= 8 && mp->fix_size < 8)
16366 return NULL;
16367 else
16368 {
16369 min_mp = mp;
16370 min_address = mp->min_address + fix->fix_size;
16371 }
16372 }
16373 /* Do not insert a non-8-byte aligned quantity before 8-byte
16374 aligned quantities. */
16375 else if (ARM_DOUBLEWORD_ALIGN
16376 && fix->fix_size < 8
16377 && mp->fix_size >= 8)
16378 {
16379 min_mp = mp;
16380 min_address = mp->min_address + fix->fix_size;
16381 }
16382 }
16383 }
16384
16385 /* We need to create a new entry. */
16386 mp = XNEW (Mnode);
16387 mp->fix_size = fix->fix_size;
16388 mp->mode = fix->mode;
16389 mp->value = fix->value;
16390 mp->refcount = 1;
16391 mp->max_address = minipool_barrier->address + 65536;
16392
16393 mp->min_address = min_address;
16394
16395 if (min_mp == NULL)
16396 {
16397 mp->prev = NULL;
16398 mp->next = minipool_vector_head;
16399
16400 if (mp->next == NULL)
16401 {
16402 minipool_vector_tail = mp;
16403 minipool_vector_label = gen_label_rtx ();
16404 }
16405 else
16406 mp->next->prev = mp;
16407
16408 minipool_vector_head = mp;
16409 }
16410 else
16411 {
16412 mp->next = min_mp->next;
16413 mp->prev = min_mp;
16414 min_mp->next = mp;
16415
16416 if (mp->next != NULL)
16417 mp->next->prev = mp;
16418 else
16419 minipool_vector_tail = mp;
16420 }
16421
16422 /* Save the new entry. */
16423 min_mp = mp;
16424
16425 if (mp->prev)
16426 mp = mp->prev;
16427 else
16428 mp->offset = 0;
16429
16430 /* Scan over the following entries and adjust their offsets. */
16431 while (mp->next != NULL)
16432 {
16433 if (mp->next->min_address < mp->min_address + mp->fix_size)
16434 mp->next->min_address = mp->min_address + mp->fix_size;
16435
16436 if (mp->refcount)
16437 mp->next->offset = mp->offset + mp->fix_size;
16438 else
16439 mp->next->offset = mp->offset;
16440
16441 mp = mp->next;
16442 }
16443
16444 return min_mp;
16445 }
16446
16447 static void
16448 assign_minipool_offsets (Mfix *barrier)
16449 {
16450 HOST_WIDE_INT offset = 0;
16451 Mnode *mp;
16452
16453 minipool_barrier = barrier;
16454
16455 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16456 {
16457 mp->offset = offset;
16458
16459 if (mp->refcount > 0)
16460 offset += mp->fix_size;
16461 }
16462 }
16463
16464 /* Output the literal table */
16465 static void
16466 dump_minipool (rtx scan)
16467 {
16468 Mnode * mp;
16469 Mnode * nmp;
16470 int align64 = 0;
16471
16472 if (ARM_DOUBLEWORD_ALIGN)
16473 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16474 if (mp->refcount > 0 && mp->fix_size >= 8)
16475 {
16476 align64 = 1;
16477 break;
16478 }
16479
16480 if (dump_file)
16481 fprintf (dump_file,
16482 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16483 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16484
16485 scan = emit_label_after (gen_label_rtx (), scan);
16486 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16487 scan = emit_label_after (minipool_vector_label, scan);
16488
16489 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16490 {
16491 if (mp->refcount > 0)
16492 {
16493 if (dump_file)
16494 {
16495 fprintf (dump_file,
16496 ";; Offset %u, min %ld, max %ld ",
16497 (unsigned) mp->offset, (unsigned long) mp->min_address,
16498 (unsigned long) mp->max_address);
16499 arm_print_value (dump_file, mp->value);
16500 fputc ('\n', dump_file);
16501 }
16502
16503 switch (mp->fix_size)
16504 {
16505 #ifdef HAVE_consttable_1
16506 case 1:
16507 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16508 break;
16509
16510 #endif
16511 #ifdef HAVE_consttable_2
16512 case 2:
16513 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16514 break;
16515
16516 #endif
16517 #ifdef HAVE_consttable_4
16518 case 4:
16519 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16520 break;
16521
16522 #endif
16523 #ifdef HAVE_consttable_8
16524 case 8:
16525 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16526 break;
16527
16528 #endif
16529 #ifdef HAVE_consttable_16
16530 case 16:
16531 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16532 break;
16533
16534 #endif
16535 default:
16536 gcc_unreachable ();
16537 }
16538 }
16539
16540 nmp = mp->next;
16541 free (mp);
16542 }
16543
16544 minipool_vector_head = minipool_vector_tail = NULL;
16545 scan = emit_insn_after (gen_consttable_end (), scan);
16546 scan = emit_barrier_after (scan);
16547 }
16548
16549 /* Return the cost of forcibly inserting a barrier after INSN. */
16550 static int
16551 arm_barrier_cost (rtx insn)
16552 {
16553 /* Basing the location of the pool on the loop depth is preferable,
16554 but at the moment, the basic block information seems to be
16555 corrupt by this stage of the compilation. */
16556 int base_cost = 50;
16557 rtx next = next_nonnote_insn (insn);
16558
16559 if (next != NULL && LABEL_P (next))
16560 base_cost -= 20;
16561
16562 switch (GET_CODE (insn))
16563 {
16564 case CODE_LABEL:
16565 /* It will always be better to place the table before the label, rather
16566 than after it. */
16567 return 50;
16568
16569 case INSN:
16570 case CALL_INSN:
16571 return base_cost;
16572
16573 case JUMP_INSN:
16574 return base_cost - 10;
16575
16576 default:
16577 return base_cost + 10;
16578 }
16579 }
16580
16581 /* Find the best place in the insn stream in the range
16582 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16583 Create the barrier by inserting a jump and add a new fix entry for
16584 it. */
16585 static Mfix *
16586 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16587 {
16588 HOST_WIDE_INT count = 0;
16589 rtx barrier;
16590 rtx from = fix->insn;
16591 /* The instruction after which we will insert the jump. */
16592 rtx selected = NULL;
16593 int selected_cost;
16594 /* The address at which the jump instruction will be placed. */
16595 HOST_WIDE_INT selected_address;
16596 Mfix * new_fix;
16597 HOST_WIDE_INT max_count = max_address - fix->address;
16598 rtx label = gen_label_rtx ();
16599
16600 selected_cost = arm_barrier_cost (from);
16601 selected_address = fix->address;
16602
16603 while (from && count < max_count)
16604 {
16605 rtx tmp;
16606 int new_cost;
16607
16608 /* This code shouldn't have been called if there was a natural barrier
16609 within range. */
16610 gcc_assert (!BARRIER_P (from));
16611
16612 /* Count the length of this insn. This must stay in sync with the
16613 code that pushes minipool fixes. */
16614 if (LABEL_P (from))
16615 count += get_label_padding (from);
16616 else
16617 count += get_attr_length (from);
16618
16619 /* If there is a jump table, add its length. */
16620 if (tablejump_p (from, NULL, &tmp))
16621 {
16622 count += get_jump_table_size (tmp);
16623
16624 /* Jump tables aren't in a basic block, so base the cost on
16625 the dispatch insn. If we select this location, we will
16626 still put the pool after the table. */
16627 new_cost = arm_barrier_cost (from);
16628
16629 if (count < max_count
16630 && (!selected || new_cost <= selected_cost))
16631 {
16632 selected = tmp;
16633 selected_cost = new_cost;
16634 selected_address = fix->address + count;
16635 }
16636
16637 /* Continue after the dispatch table. */
16638 from = NEXT_INSN (tmp);
16639 continue;
16640 }
16641
16642 new_cost = arm_barrier_cost (from);
16643
16644 if (count < max_count
16645 && (!selected || new_cost <= selected_cost))
16646 {
16647 selected = from;
16648 selected_cost = new_cost;
16649 selected_address = fix->address + count;
16650 }
16651
16652 from = NEXT_INSN (from);
16653 }
16654
16655 /* Make sure that we found a place to insert the jump. */
16656 gcc_assert (selected);
16657
16658 /* Make sure we do not split a call and its corresponding
16659 CALL_ARG_LOCATION note. */
16660 if (CALL_P (selected))
16661 {
16662 rtx next = NEXT_INSN (selected);
16663 if (next && NOTE_P (next)
16664 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16665 selected = next;
16666 }
16667
16668 /* Create a new JUMP_INSN that branches around a barrier. */
16669 from = emit_jump_insn_after (gen_jump (label), selected);
16670 JUMP_LABEL (from) = label;
16671 barrier = emit_barrier_after (from);
16672 emit_label_after (label, barrier);
16673
16674 /* Create a minipool barrier entry for the new barrier. */
16675 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16676 new_fix->insn = barrier;
16677 new_fix->address = selected_address;
16678 new_fix->next = fix->next;
16679 fix->next = new_fix;
16680
16681 return new_fix;
16682 }
16683
16684 /* Record that there is a natural barrier in the insn stream at
16685 ADDRESS. */
16686 static void
16687 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16688 {
16689 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16690
16691 fix->insn = insn;
16692 fix->address = address;
16693
16694 fix->next = NULL;
16695 if (minipool_fix_head != NULL)
16696 minipool_fix_tail->next = fix;
16697 else
16698 minipool_fix_head = fix;
16699
16700 minipool_fix_tail = fix;
16701 }
16702
16703 /* Record INSN, which will need fixing up to load a value from the
16704 minipool. ADDRESS is the offset of the insn since the start of the
16705 function; LOC is a pointer to the part of the insn which requires
16706 fixing; VALUE is the constant that must be loaded, which is of type
16707 MODE. */
16708 static void
16709 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16710 enum machine_mode mode, rtx value)
16711 {
16712 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16713
16714 fix->insn = insn;
16715 fix->address = address;
16716 fix->loc = loc;
16717 fix->mode = mode;
16718 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16719 fix->value = value;
16720 fix->forwards = get_attr_pool_range (insn);
16721 fix->backwards = get_attr_neg_pool_range (insn);
16722 fix->minipool = NULL;
16723
16724 /* If an insn doesn't have a range defined for it, then it isn't
16725 expecting to be reworked by this code. Better to stop now than
16726 to generate duff assembly code. */
16727 gcc_assert (fix->forwards || fix->backwards);
16728
16729 /* If an entry requires 8-byte alignment then assume all constant pools
16730 require 4 bytes of padding. Trying to do this later on a per-pool
16731 basis is awkward because existing pool entries have to be modified. */
16732 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16733 minipool_pad = 4;
16734
16735 if (dump_file)
16736 {
16737 fprintf (dump_file,
16738 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16739 GET_MODE_NAME (mode),
16740 INSN_UID (insn), (unsigned long) address,
16741 -1 * (long)fix->backwards, (long)fix->forwards);
16742 arm_print_value (dump_file, fix->value);
16743 fprintf (dump_file, "\n");
16744 }
16745
16746 /* Add it to the chain of fixes. */
16747 fix->next = NULL;
16748
16749 if (minipool_fix_head != NULL)
16750 minipool_fix_tail->next = fix;
16751 else
16752 minipool_fix_head = fix;
16753
16754 minipool_fix_tail = fix;
16755 }
16756
16757 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16758 Returns the number of insns needed, or 99 if we always want to synthesize
16759 the value. */
16760 int
16761 arm_max_const_double_inline_cost ()
16762 {
16763 /* Let the value get synthesized to avoid the use of literal pools. */
16764 if (arm_disable_literal_pool)
16765 return 99;
16766
16767 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16768 }
16769
16770 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16771 Returns the number of insns needed, or 99 if we don't know how to
16772 do it. */
16773 int
16774 arm_const_double_inline_cost (rtx val)
16775 {
16776 rtx lowpart, highpart;
16777 enum machine_mode mode;
16778
16779 mode = GET_MODE (val);
16780
16781 if (mode == VOIDmode)
16782 mode = DImode;
16783
16784 gcc_assert (GET_MODE_SIZE (mode) == 8);
16785
16786 lowpart = gen_lowpart (SImode, val);
16787 highpart = gen_highpart_mode (SImode, mode, val);
16788
16789 gcc_assert (CONST_INT_P (lowpart));
16790 gcc_assert (CONST_INT_P (highpart));
16791
16792 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16793 NULL_RTX, NULL_RTX, 0, 0)
16794 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16795 NULL_RTX, NULL_RTX, 0, 0));
16796 }
16797
16798 /* Return true if it is worthwhile to split a 64-bit constant into two
16799 32-bit operations. This is the case if optimizing for size, or
16800 if we have load delay slots, or if one 32-bit part can be done with
16801 a single data operation. */
16802 bool
16803 arm_const_double_by_parts (rtx val)
16804 {
16805 enum machine_mode mode = GET_MODE (val);
16806 rtx part;
16807
16808 if (optimize_size || arm_ld_sched)
16809 return true;
16810
16811 if (mode == VOIDmode)
16812 mode = DImode;
16813
16814 part = gen_highpart_mode (SImode, mode, val);
16815
16816 gcc_assert (CONST_INT_P (part));
16817
16818 if (const_ok_for_arm (INTVAL (part))
16819 || const_ok_for_arm (~INTVAL (part)))
16820 return true;
16821
16822 part = gen_lowpart (SImode, val);
16823
16824 gcc_assert (CONST_INT_P (part));
16825
16826 if (const_ok_for_arm (INTVAL (part))
16827 || const_ok_for_arm (~INTVAL (part)))
16828 return true;
16829
16830 return false;
16831 }
16832
16833 /* Return true if it is possible to inline both the high and low parts
16834 of a 64-bit constant into 32-bit data processing instructions. */
16835 bool
16836 arm_const_double_by_immediates (rtx val)
16837 {
16838 enum machine_mode mode = GET_MODE (val);
16839 rtx part;
16840
16841 if (mode == VOIDmode)
16842 mode = DImode;
16843
16844 part = gen_highpart_mode (SImode, mode, val);
16845
16846 gcc_assert (CONST_INT_P (part));
16847
16848 if (!const_ok_for_arm (INTVAL (part)))
16849 return false;
16850
16851 part = gen_lowpart (SImode, val);
16852
16853 gcc_assert (CONST_INT_P (part));
16854
16855 if (!const_ok_for_arm (INTVAL (part)))
16856 return false;
16857
16858 return true;
16859 }
16860
16861 /* Scan INSN and note any of its operands that need fixing.
16862 If DO_PUSHES is false we do not actually push any of the fixups
16863 needed. */
16864 static void
16865 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16866 {
16867 int opno;
16868
16869 extract_insn (insn);
16870
16871 if (!constrain_operands (1))
16872 fatal_insn_not_found (insn);
16873
16874 if (recog_data.n_alternatives == 0)
16875 return;
16876
16877 /* Fill in recog_op_alt with information about the constraints of
16878 this insn. */
16879 preprocess_constraints (insn);
16880
16881 const operand_alternative *op_alt = which_op_alt ();
16882 for (opno = 0; opno < recog_data.n_operands; opno++)
16883 {
16884 /* Things we need to fix can only occur in inputs. */
16885 if (recog_data.operand_type[opno] != OP_IN)
16886 continue;
16887
16888 /* If this alternative is a memory reference, then any mention
16889 of constants in this alternative is really to fool reload
16890 into allowing us to accept one there. We need to fix them up
16891 now so that we output the right code. */
16892 if (op_alt[opno].memory_ok)
16893 {
16894 rtx op = recog_data.operand[opno];
16895
16896 if (CONSTANT_P (op))
16897 {
16898 if (do_pushes)
16899 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16900 recog_data.operand_mode[opno], op);
16901 }
16902 else if (MEM_P (op)
16903 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16904 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16905 {
16906 if (do_pushes)
16907 {
16908 rtx cop = avoid_constant_pool_reference (op);
16909
16910 /* Casting the address of something to a mode narrower
16911 than a word can cause avoid_constant_pool_reference()
16912 to return the pool reference itself. That's no good to
16913 us here. Lets just hope that we can use the
16914 constant pool value directly. */
16915 if (op == cop)
16916 cop = get_pool_constant (XEXP (op, 0));
16917
16918 push_minipool_fix (insn, address,
16919 recog_data.operand_loc[opno],
16920 recog_data.operand_mode[opno], cop);
16921 }
16922
16923 }
16924 }
16925 }
16926
16927 return;
16928 }
16929
16930 /* Rewrite move insn into subtract of 0 if the condition codes will
16931 be useful in next conditional jump insn. */
16932
16933 static void
16934 thumb1_reorg (void)
16935 {
16936 basic_block bb;
16937
16938 FOR_EACH_BB_FN (bb, cfun)
16939 {
16940 rtx dest, src;
16941 rtx pat, op0, set = NULL;
16942 rtx prev, insn = BB_END (bb);
16943 bool insn_clobbered = false;
16944
16945 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16946 insn = PREV_INSN (insn);
16947
16948 /* Find the last cbranchsi4_insn in basic block BB. */
16949 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16950 continue;
16951
16952 /* Get the register with which we are comparing. */
16953 pat = PATTERN (insn);
16954 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16955
16956 /* Find the first flag setting insn before INSN in basic block BB. */
16957 gcc_assert (insn != BB_HEAD (bb));
16958 for (prev = PREV_INSN (insn);
16959 (!insn_clobbered
16960 && prev != BB_HEAD (bb)
16961 && (NOTE_P (prev)
16962 || DEBUG_INSN_P (prev)
16963 || ((set = single_set (prev)) != NULL
16964 && get_attr_conds (prev) == CONDS_NOCOND)));
16965 prev = PREV_INSN (prev))
16966 {
16967 if (reg_set_p (op0, prev))
16968 insn_clobbered = true;
16969 }
16970
16971 /* Skip if op0 is clobbered by insn other than prev. */
16972 if (insn_clobbered)
16973 continue;
16974
16975 if (!set)
16976 continue;
16977
16978 dest = SET_DEST (set);
16979 src = SET_SRC (set);
16980 if (!low_register_operand (dest, SImode)
16981 || !low_register_operand (src, SImode))
16982 continue;
16983
16984 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16985 in INSN. Both src and dest of the move insn are checked. */
16986 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16987 {
16988 dest = copy_rtx (dest);
16989 src = copy_rtx (src);
16990 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16991 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16992 INSN_CODE (prev) = -1;
16993 /* Set test register in INSN to dest. */
16994 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16995 INSN_CODE (insn) = -1;
16996 }
16997 }
16998 }
16999
17000 /* Convert instructions to their cc-clobbering variant if possible, since
17001 that allows us to use smaller encodings. */
17002
17003 static void
17004 thumb2_reorg (void)
17005 {
17006 basic_block bb;
17007 regset_head live;
17008
17009 INIT_REG_SET (&live);
17010
17011 /* We are freeing block_for_insn in the toplev to keep compatibility
17012 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17013 compute_bb_for_insn ();
17014 df_analyze ();
17015
17016 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17017
17018 FOR_EACH_BB_FN (bb, cfun)
17019 {
17020 if (current_tune->disparage_flag_setting_t16_encodings
17021 && optimize_bb_for_speed_p (bb))
17022 continue;
17023
17024 rtx insn;
17025 Convert_Action action = SKIP;
17026 Convert_Action action_for_partial_flag_setting
17027 = (current_tune->disparage_partial_flag_setting_t16_encodings
17028 && optimize_bb_for_speed_p (bb))
17029 ? SKIP : CONV;
17030
17031 COPY_REG_SET (&live, DF_LR_OUT (bb));
17032 df_simulate_initialize_backwards (bb, &live);
17033 FOR_BB_INSNS_REVERSE (bb, insn)
17034 {
17035 if (NONJUMP_INSN_P (insn)
17036 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17037 && GET_CODE (PATTERN (insn)) == SET)
17038 {
17039 action = SKIP;
17040 rtx pat = PATTERN (insn);
17041 rtx dst = XEXP (pat, 0);
17042 rtx src = XEXP (pat, 1);
17043 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17044
17045 if (!OBJECT_P (src))
17046 op0 = XEXP (src, 0);
17047
17048 if (BINARY_P (src))
17049 op1 = XEXP (src, 1);
17050
17051 if (low_register_operand (dst, SImode))
17052 {
17053 switch (GET_CODE (src))
17054 {
17055 case PLUS:
17056 /* Adding two registers and storing the result
17057 in the first source is already a 16-bit
17058 operation. */
17059 if (rtx_equal_p (dst, op0)
17060 && register_operand (op1, SImode))
17061 break;
17062
17063 if (low_register_operand (op0, SImode))
17064 {
17065 /* ADDS <Rd>,<Rn>,<Rm> */
17066 if (low_register_operand (op1, SImode))
17067 action = CONV;
17068 /* ADDS <Rdn>,#<imm8> */
17069 /* SUBS <Rdn>,#<imm8> */
17070 else if (rtx_equal_p (dst, op0)
17071 && CONST_INT_P (op1)
17072 && IN_RANGE (INTVAL (op1), -255, 255))
17073 action = CONV;
17074 /* ADDS <Rd>,<Rn>,#<imm3> */
17075 /* SUBS <Rd>,<Rn>,#<imm3> */
17076 else if (CONST_INT_P (op1)
17077 && IN_RANGE (INTVAL (op1), -7, 7))
17078 action = CONV;
17079 }
17080 /* ADCS <Rd>, <Rn> */
17081 else if (GET_CODE (XEXP (src, 0)) == PLUS
17082 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17083 && low_register_operand (XEXP (XEXP (src, 0), 1),
17084 SImode)
17085 && COMPARISON_P (op1)
17086 && cc_register (XEXP (op1, 0), VOIDmode)
17087 && maybe_get_arm_condition_code (op1) == ARM_CS
17088 && XEXP (op1, 1) == const0_rtx)
17089 action = CONV;
17090 break;
17091
17092 case MINUS:
17093 /* RSBS <Rd>,<Rn>,#0
17094 Not handled here: see NEG below. */
17095 /* SUBS <Rd>,<Rn>,#<imm3>
17096 SUBS <Rdn>,#<imm8>
17097 Not handled here: see PLUS above. */
17098 /* SUBS <Rd>,<Rn>,<Rm> */
17099 if (low_register_operand (op0, SImode)
17100 && low_register_operand (op1, SImode))
17101 action = CONV;
17102 break;
17103
17104 case MULT:
17105 /* MULS <Rdm>,<Rn>,<Rdm>
17106 As an exception to the rule, this is only used
17107 when optimizing for size since MULS is slow on all
17108 known implementations. We do not even want to use
17109 MULS in cold code, if optimizing for speed, so we
17110 test the global flag here. */
17111 if (!optimize_size)
17112 break;
17113 /* else fall through. */
17114 case AND:
17115 case IOR:
17116 case XOR:
17117 /* ANDS <Rdn>,<Rm> */
17118 if (rtx_equal_p (dst, op0)
17119 && low_register_operand (op1, SImode))
17120 action = action_for_partial_flag_setting;
17121 else if (rtx_equal_p (dst, op1)
17122 && low_register_operand (op0, SImode))
17123 action = action_for_partial_flag_setting == SKIP
17124 ? SKIP : SWAP_CONV;
17125 break;
17126
17127 case ASHIFTRT:
17128 case ASHIFT:
17129 case LSHIFTRT:
17130 /* ASRS <Rdn>,<Rm> */
17131 /* LSRS <Rdn>,<Rm> */
17132 /* LSLS <Rdn>,<Rm> */
17133 if (rtx_equal_p (dst, op0)
17134 && low_register_operand (op1, SImode))
17135 action = action_for_partial_flag_setting;
17136 /* ASRS <Rd>,<Rm>,#<imm5> */
17137 /* LSRS <Rd>,<Rm>,#<imm5> */
17138 /* LSLS <Rd>,<Rm>,#<imm5> */
17139 else if (low_register_operand (op0, SImode)
17140 && CONST_INT_P (op1)
17141 && IN_RANGE (INTVAL (op1), 0, 31))
17142 action = action_for_partial_flag_setting;
17143 break;
17144
17145 case ROTATERT:
17146 /* RORS <Rdn>,<Rm> */
17147 if (rtx_equal_p (dst, op0)
17148 && low_register_operand (op1, SImode))
17149 action = action_for_partial_flag_setting;
17150 break;
17151
17152 case NOT:
17153 /* MVNS <Rd>,<Rm> */
17154 if (low_register_operand (op0, SImode))
17155 action = action_for_partial_flag_setting;
17156 break;
17157
17158 case NEG:
17159 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17160 if (low_register_operand (op0, SImode))
17161 action = CONV;
17162 break;
17163
17164 case CONST_INT:
17165 /* MOVS <Rd>,#<imm8> */
17166 if (CONST_INT_P (src)
17167 && IN_RANGE (INTVAL (src), 0, 255))
17168 action = action_for_partial_flag_setting;
17169 break;
17170
17171 case REG:
17172 /* MOVS and MOV<c> with registers have different
17173 encodings, so are not relevant here. */
17174 break;
17175
17176 default:
17177 break;
17178 }
17179 }
17180
17181 if (action != SKIP)
17182 {
17183 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17184 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17185 rtvec vec;
17186
17187 if (action == SWAP_CONV)
17188 {
17189 src = copy_rtx (src);
17190 XEXP (src, 0) = op1;
17191 XEXP (src, 1) = op0;
17192 pat = gen_rtx_SET (VOIDmode, dst, src);
17193 vec = gen_rtvec (2, pat, clobber);
17194 }
17195 else /* action == CONV */
17196 vec = gen_rtvec (2, pat, clobber);
17197
17198 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17199 INSN_CODE (insn) = -1;
17200 }
17201 }
17202
17203 if (NONDEBUG_INSN_P (insn))
17204 df_simulate_one_insn_backwards (bb, insn, &live);
17205 }
17206 }
17207
17208 CLEAR_REG_SET (&live);
17209 }
17210
17211 /* Gcc puts the pool in the wrong place for ARM, since we can only
17212 load addresses a limited distance around the pc. We do some
17213 special munging to move the constant pool values to the correct
17214 point in the code. */
17215 static void
17216 arm_reorg (void)
17217 {
17218 rtx insn;
17219 HOST_WIDE_INT address = 0;
17220 Mfix * fix;
17221
17222 if (TARGET_THUMB1)
17223 thumb1_reorg ();
17224 else if (TARGET_THUMB2)
17225 thumb2_reorg ();
17226
17227 /* Ensure all insns that must be split have been split at this point.
17228 Otherwise, the pool placement code below may compute incorrect
17229 insn lengths. Note that when optimizing, all insns have already
17230 been split at this point. */
17231 if (!optimize)
17232 split_all_insns_noflow ();
17233
17234 minipool_fix_head = minipool_fix_tail = NULL;
17235
17236 /* The first insn must always be a note, or the code below won't
17237 scan it properly. */
17238 insn = get_insns ();
17239 gcc_assert (NOTE_P (insn));
17240 minipool_pad = 0;
17241
17242 /* Scan all the insns and record the operands that will need fixing. */
17243 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17244 {
17245 if (BARRIER_P (insn))
17246 push_minipool_barrier (insn, address);
17247 else if (INSN_P (insn))
17248 {
17249 rtx table;
17250
17251 note_invalid_constants (insn, address, true);
17252 address += get_attr_length (insn);
17253
17254 /* If the insn is a vector jump, add the size of the table
17255 and skip the table. */
17256 if (tablejump_p (insn, NULL, &table))
17257 {
17258 address += get_jump_table_size (table);
17259 insn = table;
17260 }
17261 }
17262 else if (LABEL_P (insn))
17263 /* Add the worst-case padding due to alignment. We don't add
17264 the _current_ padding because the minipool insertions
17265 themselves might change it. */
17266 address += get_label_padding (insn);
17267 }
17268
17269 fix = minipool_fix_head;
17270
17271 /* Now scan the fixups and perform the required changes. */
17272 while (fix)
17273 {
17274 Mfix * ftmp;
17275 Mfix * fdel;
17276 Mfix * last_added_fix;
17277 Mfix * last_barrier = NULL;
17278 Mfix * this_fix;
17279
17280 /* Skip any further barriers before the next fix. */
17281 while (fix && BARRIER_P (fix->insn))
17282 fix = fix->next;
17283
17284 /* No more fixes. */
17285 if (fix == NULL)
17286 break;
17287
17288 last_added_fix = NULL;
17289
17290 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17291 {
17292 if (BARRIER_P (ftmp->insn))
17293 {
17294 if (ftmp->address >= minipool_vector_head->max_address)
17295 break;
17296
17297 last_barrier = ftmp;
17298 }
17299 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17300 break;
17301
17302 last_added_fix = ftmp; /* Keep track of the last fix added. */
17303 }
17304
17305 /* If we found a barrier, drop back to that; any fixes that we
17306 could have reached but come after the barrier will now go in
17307 the next mini-pool. */
17308 if (last_barrier != NULL)
17309 {
17310 /* Reduce the refcount for those fixes that won't go into this
17311 pool after all. */
17312 for (fdel = last_barrier->next;
17313 fdel && fdel != ftmp;
17314 fdel = fdel->next)
17315 {
17316 fdel->minipool->refcount--;
17317 fdel->minipool = NULL;
17318 }
17319
17320 ftmp = last_barrier;
17321 }
17322 else
17323 {
17324 /* ftmp is first fix that we can't fit into this pool and
17325 there no natural barriers that we could use. Insert a
17326 new barrier in the code somewhere between the previous
17327 fix and this one, and arrange to jump around it. */
17328 HOST_WIDE_INT max_address;
17329
17330 /* The last item on the list of fixes must be a barrier, so
17331 we can never run off the end of the list of fixes without
17332 last_barrier being set. */
17333 gcc_assert (ftmp);
17334
17335 max_address = minipool_vector_head->max_address;
17336 /* Check that there isn't another fix that is in range that
17337 we couldn't fit into this pool because the pool was
17338 already too large: we need to put the pool before such an
17339 instruction. The pool itself may come just after the
17340 fix because create_fix_barrier also allows space for a
17341 jump instruction. */
17342 if (ftmp->address < max_address)
17343 max_address = ftmp->address + 1;
17344
17345 last_barrier = create_fix_barrier (last_added_fix, max_address);
17346 }
17347
17348 assign_minipool_offsets (last_barrier);
17349
17350 while (ftmp)
17351 {
17352 if (!BARRIER_P (ftmp->insn)
17353 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17354 == NULL))
17355 break;
17356
17357 ftmp = ftmp->next;
17358 }
17359
17360 /* Scan over the fixes we have identified for this pool, fixing them
17361 up and adding the constants to the pool itself. */
17362 for (this_fix = fix; this_fix && ftmp != this_fix;
17363 this_fix = this_fix->next)
17364 if (!BARRIER_P (this_fix->insn))
17365 {
17366 rtx addr
17367 = plus_constant (Pmode,
17368 gen_rtx_LABEL_REF (VOIDmode,
17369 minipool_vector_label),
17370 this_fix->minipool->offset);
17371 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17372 }
17373
17374 dump_minipool (last_barrier->insn);
17375 fix = ftmp;
17376 }
17377
17378 /* From now on we must synthesize any constants that we can't handle
17379 directly. This can happen if the RTL gets split during final
17380 instruction generation. */
17381 cfun->machine->after_arm_reorg = 1;
17382
17383 /* Free the minipool memory. */
17384 obstack_free (&minipool_obstack, minipool_startobj);
17385 }
17386 \f
17387 /* Routines to output assembly language. */
17388
17389 /* If the rtx is the correct value then return the string of the number.
17390 In this way we can ensure that valid double constants are generated even
17391 when cross compiling. */
17392 const char *
17393 fp_immediate_constant (rtx x)
17394 {
17395 REAL_VALUE_TYPE r;
17396
17397 if (!fp_consts_inited)
17398 init_fp_table ();
17399
17400 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17401
17402 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17403 return "0";
17404 }
17405
17406 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17407 static const char *
17408 fp_const_from_val (REAL_VALUE_TYPE *r)
17409 {
17410 if (!fp_consts_inited)
17411 init_fp_table ();
17412
17413 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17414 return "0";
17415 }
17416
17417 /* OPERANDS[0] is the entire list of insns that constitute pop,
17418 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17419 is in the list, UPDATE is true iff the list contains explicit
17420 update of base register. */
17421 void
17422 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17423 bool update)
17424 {
17425 int i;
17426 char pattern[100];
17427 int offset;
17428 const char *conditional;
17429 int num_saves = XVECLEN (operands[0], 0);
17430 unsigned int regno;
17431 unsigned int regno_base = REGNO (operands[1]);
17432
17433 offset = 0;
17434 offset += update ? 1 : 0;
17435 offset += return_pc ? 1 : 0;
17436
17437 /* Is the base register in the list? */
17438 for (i = offset; i < num_saves; i++)
17439 {
17440 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17441 /* If SP is in the list, then the base register must be SP. */
17442 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17443 /* If base register is in the list, there must be no explicit update. */
17444 if (regno == regno_base)
17445 gcc_assert (!update);
17446 }
17447
17448 conditional = reverse ? "%?%D0" : "%?%d0";
17449 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17450 {
17451 /* Output pop (not stmfd) because it has a shorter encoding. */
17452 gcc_assert (update);
17453 sprintf (pattern, "pop%s\t{", conditional);
17454 }
17455 else
17456 {
17457 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17458 It's just a convention, their semantics are identical. */
17459 if (regno_base == SP_REGNUM)
17460 sprintf (pattern, "ldm%sfd\t", conditional);
17461 else if (TARGET_UNIFIED_ASM)
17462 sprintf (pattern, "ldmia%s\t", conditional);
17463 else
17464 sprintf (pattern, "ldm%sia\t", conditional);
17465
17466 strcat (pattern, reg_names[regno_base]);
17467 if (update)
17468 strcat (pattern, "!, {");
17469 else
17470 strcat (pattern, ", {");
17471 }
17472
17473 /* Output the first destination register. */
17474 strcat (pattern,
17475 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17476
17477 /* Output the rest of the destination registers. */
17478 for (i = offset + 1; i < num_saves; i++)
17479 {
17480 strcat (pattern, ", ");
17481 strcat (pattern,
17482 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17483 }
17484
17485 strcat (pattern, "}");
17486
17487 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17488 strcat (pattern, "^");
17489
17490 output_asm_insn (pattern, &cond);
17491 }
17492
17493
17494 /* Output the assembly for a store multiple. */
17495
17496 const char *
17497 vfp_output_fstmd (rtx * operands)
17498 {
17499 char pattern[100];
17500 int p;
17501 int base;
17502 int i;
17503
17504 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17505 p = strlen (pattern);
17506
17507 gcc_assert (REG_P (operands[1]));
17508
17509 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17510 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17511 {
17512 p += sprintf (&pattern[p], ", d%d", base + i);
17513 }
17514 strcpy (&pattern[p], "}");
17515
17516 output_asm_insn (pattern, operands);
17517 return "";
17518 }
17519
17520
17521 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17522 number of bytes pushed. */
17523
17524 static int
17525 vfp_emit_fstmd (int base_reg, int count)
17526 {
17527 rtx par;
17528 rtx dwarf;
17529 rtx tmp, reg;
17530 int i;
17531
17532 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17533 register pairs are stored by a store multiple insn. We avoid this
17534 by pushing an extra pair. */
17535 if (count == 2 && !arm_arch6)
17536 {
17537 if (base_reg == LAST_VFP_REGNUM - 3)
17538 base_reg -= 2;
17539 count++;
17540 }
17541
17542 /* FSTMD may not store more than 16 doubleword registers at once. Split
17543 larger stores into multiple parts (up to a maximum of two, in
17544 practice). */
17545 if (count > 16)
17546 {
17547 int saved;
17548 /* NOTE: base_reg is an internal register number, so each D register
17549 counts as 2. */
17550 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17551 saved += vfp_emit_fstmd (base_reg, 16);
17552 return saved;
17553 }
17554
17555 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17556 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17557
17558 reg = gen_rtx_REG (DFmode, base_reg);
17559 base_reg += 2;
17560
17561 XVECEXP (par, 0, 0)
17562 = gen_rtx_SET (VOIDmode,
17563 gen_frame_mem
17564 (BLKmode,
17565 gen_rtx_PRE_MODIFY (Pmode,
17566 stack_pointer_rtx,
17567 plus_constant
17568 (Pmode, stack_pointer_rtx,
17569 - (count * 8)))
17570 ),
17571 gen_rtx_UNSPEC (BLKmode,
17572 gen_rtvec (1, reg),
17573 UNSPEC_PUSH_MULT));
17574
17575 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17576 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17577 RTX_FRAME_RELATED_P (tmp) = 1;
17578 XVECEXP (dwarf, 0, 0) = tmp;
17579
17580 tmp = gen_rtx_SET (VOIDmode,
17581 gen_frame_mem (DFmode, stack_pointer_rtx),
17582 reg);
17583 RTX_FRAME_RELATED_P (tmp) = 1;
17584 XVECEXP (dwarf, 0, 1) = tmp;
17585
17586 for (i = 1; i < count; i++)
17587 {
17588 reg = gen_rtx_REG (DFmode, base_reg);
17589 base_reg += 2;
17590 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17591
17592 tmp = gen_rtx_SET (VOIDmode,
17593 gen_frame_mem (DFmode,
17594 plus_constant (Pmode,
17595 stack_pointer_rtx,
17596 i * 8)),
17597 reg);
17598 RTX_FRAME_RELATED_P (tmp) = 1;
17599 XVECEXP (dwarf, 0, i + 1) = tmp;
17600 }
17601
17602 par = emit_insn (par);
17603 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17604 RTX_FRAME_RELATED_P (par) = 1;
17605
17606 return count * 8;
17607 }
17608
17609 /* Emit a call instruction with pattern PAT. ADDR is the address of
17610 the call target. */
17611
17612 void
17613 arm_emit_call_insn (rtx pat, rtx addr)
17614 {
17615 rtx insn;
17616
17617 insn = emit_call_insn (pat);
17618
17619 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17620 If the call might use such an entry, add a use of the PIC register
17621 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17622 if (TARGET_VXWORKS_RTP
17623 && flag_pic
17624 && GET_CODE (addr) == SYMBOL_REF
17625 && (SYMBOL_REF_DECL (addr)
17626 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17627 : !SYMBOL_REF_LOCAL_P (addr)))
17628 {
17629 require_pic_register ();
17630 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17631 }
17632 }
17633
17634 /* Output a 'call' insn. */
17635 const char *
17636 output_call (rtx *operands)
17637 {
17638 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17639
17640 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17641 if (REGNO (operands[0]) == LR_REGNUM)
17642 {
17643 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17644 output_asm_insn ("mov%?\t%0, %|lr", operands);
17645 }
17646
17647 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17648
17649 if (TARGET_INTERWORK || arm_arch4t)
17650 output_asm_insn ("bx%?\t%0", operands);
17651 else
17652 output_asm_insn ("mov%?\t%|pc, %0", operands);
17653
17654 return "";
17655 }
17656
17657 /* Output a 'call' insn that is a reference in memory. This is
17658 disabled for ARMv5 and we prefer a blx instead because otherwise
17659 there's a significant performance overhead. */
17660 const char *
17661 output_call_mem (rtx *operands)
17662 {
17663 gcc_assert (!arm_arch5);
17664 if (TARGET_INTERWORK)
17665 {
17666 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17667 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17668 output_asm_insn ("bx%?\t%|ip", operands);
17669 }
17670 else if (regno_use_in (LR_REGNUM, operands[0]))
17671 {
17672 /* LR is used in the memory address. We load the address in the
17673 first instruction. It's safe to use IP as the target of the
17674 load since the call will kill it anyway. */
17675 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17676 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17677 if (arm_arch4t)
17678 output_asm_insn ("bx%?\t%|ip", operands);
17679 else
17680 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17681 }
17682 else
17683 {
17684 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17685 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17686 }
17687
17688 return "";
17689 }
17690
17691
17692 /* Output a move from arm registers to arm registers of a long double
17693 OPERANDS[0] is the destination.
17694 OPERANDS[1] is the source. */
17695 const char *
17696 output_mov_long_double_arm_from_arm (rtx *operands)
17697 {
17698 /* We have to be careful here because the two might overlap. */
17699 int dest_start = REGNO (operands[0]);
17700 int src_start = REGNO (operands[1]);
17701 rtx ops[2];
17702 int i;
17703
17704 if (dest_start < src_start)
17705 {
17706 for (i = 0; i < 3; i++)
17707 {
17708 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17709 ops[1] = gen_rtx_REG (SImode, src_start + i);
17710 output_asm_insn ("mov%?\t%0, %1", ops);
17711 }
17712 }
17713 else
17714 {
17715 for (i = 2; i >= 0; i--)
17716 {
17717 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17718 ops[1] = gen_rtx_REG (SImode, src_start + i);
17719 output_asm_insn ("mov%?\t%0, %1", ops);
17720 }
17721 }
17722
17723 return "";
17724 }
17725
17726 void
17727 arm_emit_movpair (rtx dest, rtx src)
17728 {
17729 /* If the src is an immediate, simplify it. */
17730 if (CONST_INT_P (src))
17731 {
17732 HOST_WIDE_INT val = INTVAL (src);
17733 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17734 if ((val >> 16) & 0x0000ffff)
17735 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17736 GEN_INT (16)),
17737 GEN_INT ((val >> 16) & 0x0000ffff));
17738 return;
17739 }
17740 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17741 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17742 }
17743
17744 /* Output a move between double words. It must be REG<-MEM
17745 or MEM<-REG. */
17746 const char *
17747 output_move_double (rtx *operands, bool emit, int *count)
17748 {
17749 enum rtx_code code0 = GET_CODE (operands[0]);
17750 enum rtx_code code1 = GET_CODE (operands[1]);
17751 rtx otherops[3];
17752 if (count)
17753 *count = 1;
17754
17755 /* The only case when this might happen is when
17756 you are looking at the length of a DImode instruction
17757 that has an invalid constant in it. */
17758 if (code0 == REG && code1 != MEM)
17759 {
17760 gcc_assert (!emit);
17761 *count = 2;
17762 return "";
17763 }
17764
17765 if (code0 == REG)
17766 {
17767 unsigned int reg0 = REGNO (operands[0]);
17768
17769 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17770
17771 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17772
17773 switch (GET_CODE (XEXP (operands[1], 0)))
17774 {
17775 case REG:
17776
17777 if (emit)
17778 {
17779 if (TARGET_LDRD
17780 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17781 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17782 else
17783 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17784 }
17785 break;
17786
17787 case PRE_INC:
17788 gcc_assert (TARGET_LDRD);
17789 if (emit)
17790 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17791 break;
17792
17793 case PRE_DEC:
17794 if (emit)
17795 {
17796 if (TARGET_LDRD)
17797 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17798 else
17799 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17800 }
17801 break;
17802
17803 case POST_INC:
17804 if (emit)
17805 {
17806 if (TARGET_LDRD)
17807 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17808 else
17809 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17810 }
17811 break;
17812
17813 case POST_DEC:
17814 gcc_assert (TARGET_LDRD);
17815 if (emit)
17816 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17817 break;
17818
17819 case PRE_MODIFY:
17820 case POST_MODIFY:
17821 /* Autoicrement addressing modes should never have overlapping
17822 base and destination registers, and overlapping index registers
17823 are already prohibited, so this doesn't need to worry about
17824 fix_cm3_ldrd. */
17825 otherops[0] = operands[0];
17826 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17827 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17828
17829 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17830 {
17831 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17832 {
17833 /* Registers overlap so split out the increment. */
17834 if (emit)
17835 {
17836 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17837 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17838 }
17839 if (count)
17840 *count = 2;
17841 }
17842 else
17843 {
17844 /* Use a single insn if we can.
17845 FIXME: IWMMXT allows offsets larger than ldrd can
17846 handle, fix these up with a pair of ldr. */
17847 if (TARGET_THUMB2
17848 || !CONST_INT_P (otherops[2])
17849 || (INTVAL (otherops[2]) > -256
17850 && INTVAL (otherops[2]) < 256))
17851 {
17852 if (emit)
17853 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17854 }
17855 else
17856 {
17857 if (emit)
17858 {
17859 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17860 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17861 }
17862 if (count)
17863 *count = 2;
17864
17865 }
17866 }
17867 }
17868 else
17869 {
17870 /* Use a single insn if we can.
17871 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17872 fix these up with a pair of ldr. */
17873 if (TARGET_THUMB2
17874 || !CONST_INT_P (otherops[2])
17875 || (INTVAL (otherops[2]) > -256
17876 && INTVAL (otherops[2]) < 256))
17877 {
17878 if (emit)
17879 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17880 }
17881 else
17882 {
17883 if (emit)
17884 {
17885 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17886 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17887 }
17888 if (count)
17889 *count = 2;
17890 }
17891 }
17892 break;
17893
17894 case LABEL_REF:
17895 case CONST:
17896 /* We might be able to use ldrd %0, %1 here. However the range is
17897 different to ldr/adr, and it is broken on some ARMv7-M
17898 implementations. */
17899 /* Use the second register of the pair to avoid problematic
17900 overlap. */
17901 otherops[1] = operands[1];
17902 if (emit)
17903 output_asm_insn ("adr%?\t%0, %1", otherops);
17904 operands[1] = otherops[0];
17905 if (emit)
17906 {
17907 if (TARGET_LDRD)
17908 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17909 else
17910 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17911 }
17912
17913 if (count)
17914 *count = 2;
17915 break;
17916
17917 /* ??? This needs checking for thumb2. */
17918 default:
17919 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17920 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17921 {
17922 otherops[0] = operands[0];
17923 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17924 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17925
17926 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17927 {
17928 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17929 {
17930 switch ((int) INTVAL (otherops[2]))
17931 {
17932 case -8:
17933 if (emit)
17934 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17935 return "";
17936 case -4:
17937 if (TARGET_THUMB2)
17938 break;
17939 if (emit)
17940 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17941 return "";
17942 case 4:
17943 if (TARGET_THUMB2)
17944 break;
17945 if (emit)
17946 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17947 return "";
17948 }
17949 }
17950 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17951 operands[1] = otherops[0];
17952 if (TARGET_LDRD
17953 && (REG_P (otherops[2])
17954 || TARGET_THUMB2
17955 || (CONST_INT_P (otherops[2])
17956 && INTVAL (otherops[2]) > -256
17957 && INTVAL (otherops[2]) < 256)))
17958 {
17959 if (reg_overlap_mentioned_p (operands[0],
17960 otherops[2]))
17961 {
17962 rtx tmp;
17963 /* Swap base and index registers over to
17964 avoid a conflict. */
17965 tmp = otherops[1];
17966 otherops[1] = otherops[2];
17967 otherops[2] = tmp;
17968 }
17969 /* If both registers conflict, it will usually
17970 have been fixed by a splitter. */
17971 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17972 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17973 {
17974 if (emit)
17975 {
17976 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17977 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17978 }
17979 if (count)
17980 *count = 2;
17981 }
17982 else
17983 {
17984 otherops[0] = operands[0];
17985 if (emit)
17986 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17987 }
17988 return "";
17989 }
17990
17991 if (CONST_INT_P (otherops[2]))
17992 {
17993 if (emit)
17994 {
17995 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17996 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17997 else
17998 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17999 }
18000 }
18001 else
18002 {
18003 if (emit)
18004 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18005 }
18006 }
18007 else
18008 {
18009 if (emit)
18010 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18011 }
18012
18013 if (count)
18014 *count = 2;
18015
18016 if (TARGET_LDRD)
18017 return "ldr%(d%)\t%0, [%1]";
18018
18019 return "ldm%(ia%)\t%1, %M0";
18020 }
18021 else
18022 {
18023 otherops[1] = adjust_address (operands[1], SImode, 4);
18024 /* Take care of overlapping base/data reg. */
18025 if (reg_mentioned_p (operands[0], operands[1]))
18026 {
18027 if (emit)
18028 {
18029 output_asm_insn ("ldr%?\t%0, %1", otherops);
18030 output_asm_insn ("ldr%?\t%0, %1", operands);
18031 }
18032 if (count)
18033 *count = 2;
18034
18035 }
18036 else
18037 {
18038 if (emit)
18039 {
18040 output_asm_insn ("ldr%?\t%0, %1", operands);
18041 output_asm_insn ("ldr%?\t%0, %1", otherops);
18042 }
18043 if (count)
18044 *count = 2;
18045 }
18046 }
18047 }
18048 }
18049 else
18050 {
18051 /* Constraints should ensure this. */
18052 gcc_assert (code0 == MEM && code1 == REG);
18053 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18054 || (TARGET_ARM && TARGET_LDRD));
18055
18056 switch (GET_CODE (XEXP (operands[0], 0)))
18057 {
18058 case REG:
18059 if (emit)
18060 {
18061 if (TARGET_LDRD)
18062 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18063 else
18064 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18065 }
18066 break;
18067
18068 case PRE_INC:
18069 gcc_assert (TARGET_LDRD);
18070 if (emit)
18071 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18072 break;
18073
18074 case PRE_DEC:
18075 if (emit)
18076 {
18077 if (TARGET_LDRD)
18078 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18079 else
18080 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18081 }
18082 break;
18083
18084 case POST_INC:
18085 if (emit)
18086 {
18087 if (TARGET_LDRD)
18088 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18089 else
18090 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18091 }
18092 break;
18093
18094 case POST_DEC:
18095 gcc_assert (TARGET_LDRD);
18096 if (emit)
18097 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18098 break;
18099
18100 case PRE_MODIFY:
18101 case POST_MODIFY:
18102 otherops[0] = operands[1];
18103 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18104 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18105
18106 /* IWMMXT allows offsets larger than ldrd can handle,
18107 fix these up with a pair of ldr. */
18108 if (!TARGET_THUMB2
18109 && CONST_INT_P (otherops[2])
18110 && (INTVAL(otherops[2]) <= -256
18111 || INTVAL(otherops[2]) >= 256))
18112 {
18113 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18114 {
18115 if (emit)
18116 {
18117 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18118 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18119 }
18120 if (count)
18121 *count = 2;
18122 }
18123 else
18124 {
18125 if (emit)
18126 {
18127 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18128 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18129 }
18130 if (count)
18131 *count = 2;
18132 }
18133 }
18134 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18135 {
18136 if (emit)
18137 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18138 }
18139 else
18140 {
18141 if (emit)
18142 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18143 }
18144 break;
18145
18146 case PLUS:
18147 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18148 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18149 {
18150 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18151 {
18152 case -8:
18153 if (emit)
18154 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18155 return "";
18156
18157 case -4:
18158 if (TARGET_THUMB2)
18159 break;
18160 if (emit)
18161 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18162 return "";
18163
18164 case 4:
18165 if (TARGET_THUMB2)
18166 break;
18167 if (emit)
18168 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18169 return "";
18170 }
18171 }
18172 if (TARGET_LDRD
18173 && (REG_P (otherops[2])
18174 || TARGET_THUMB2
18175 || (CONST_INT_P (otherops[2])
18176 && INTVAL (otherops[2]) > -256
18177 && INTVAL (otherops[2]) < 256)))
18178 {
18179 otherops[0] = operands[1];
18180 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18181 if (emit)
18182 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18183 return "";
18184 }
18185 /* Fall through */
18186
18187 default:
18188 otherops[0] = adjust_address (operands[0], SImode, 4);
18189 otherops[1] = operands[1];
18190 if (emit)
18191 {
18192 output_asm_insn ("str%?\t%1, %0", operands);
18193 output_asm_insn ("str%?\t%H1, %0", otherops);
18194 }
18195 if (count)
18196 *count = 2;
18197 }
18198 }
18199
18200 return "";
18201 }
18202
18203 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18204 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18205
18206 const char *
18207 output_move_quad (rtx *operands)
18208 {
18209 if (REG_P (operands[0]))
18210 {
18211 /* Load, or reg->reg move. */
18212
18213 if (MEM_P (operands[1]))
18214 {
18215 switch (GET_CODE (XEXP (operands[1], 0)))
18216 {
18217 case REG:
18218 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18219 break;
18220
18221 case LABEL_REF:
18222 case CONST:
18223 output_asm_insn ("adr%?\t%0, %1", operands);
18224 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18225 break;
18226
18227 default:
18228 gcc_unreachable ();
18229 }
18230 }
18231 else
18232 {
18233 rtx ops[2];
18234 int dest, src, i;
18235
18236 gcc_assert (REG_P (operands[1]));
18237
18238 dest = REGNO (operands[0]);
18239 src = REGNO (operands[1]);
18240
18241 /* This seems pretty dumb, but hopefully GCC won't try to do it
18242 very often. */
18243 if (dest < src)
18244 for (i = 0; i < 4; i++)
18245 {
18246 ops[0] = gen_rtx_REG (SImode, dest + i);
18247 ops[1] = gen_rtx_REG (SImode, src + i);
18248 output_asm_insn ("mov%?\t%0, %1", ops);
18249 }
18250 else
18251 for (i = 3; i >= 0; i--)
18252 {
18253 ops[0] = gen_rtx_REG (SImode, dest + i);
18254 ops[1] = gen_rtx_REG (SImode, src + i);
18255 output_asm_insn ("mov%?\t%0, %1", ops);
18256 }
18257 }
18258 }
18259 else
18260 {
18261 gcc_assert (MEM_P (operands[0]));
18262 gcc_assert (REG_P (operands[1]));
18263 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18264
18265 switch (GET_CODE (XEXP (operands[0], 0)))
18266 {
18267 case REG:
18268 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18269 break;
18270
18271 default:
18272 gcc_unreachable ();
18273 }
18274 }
18275
18276 return "";
18277 }
18278
18279 /* Output a VFP load or store instruction. */
18280
18281 const char *
18282 output_move_vfp (rtx *operands)
18283 {
18284 rtx reg, mem, addr, ops[2];
18285 int load = REG_P (operands[0]);
18286 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18287 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18288 const char *templ;
18289 char buff[50];
18290 enum machine_mode mode;
18291
18292 reg = operands[!load];
18293 mem = operands[load];
18294
18295 mode = GET_MODE (reg);
18296
18297 gcc_assert (REG_P (reg));
18298 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18299 gcc_assert (mode == SFmode
18300 || mode == DFmode
18301 || mode == SImode
18302 || mode == DImode
18303 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18304 gcc_assert (MEM_P (mem));
18305
18306 addr = XEXP (mem, 0);
18307
18308 switch (GET_CODE (addr))
18309 {
18310 case PRE_DEC:
18311 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18312 ops[0] = XEXP (addr, 0);
18313 ops[1] = reg;
18314 break;
18315
18316 case POST_INC:
18317 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18318 ops[0] = XEXP (addr, 0);
18319 ops[1] = reg;
18320 break;
18321
18322 default:
18323 templ = "f%s%c%%?\t%%%s0, %%1%s";
18324 ops[0] = reg;
18325 ops[1] = mem;
18326 break;
18327 }
18328
18329 sprintf (buff, templ,
18330 load ? "ld" : "st",
18331 dp ? 'd' : 's',
18332 dp ? "P" : "",
18333 integer_p ? "\t%@ int" : "");
18334 output_asm_insn (buff, ops);
18335
18336 return "";
18337 }
18338
18339 /* Output a Neon double-word or quad-word load or store, or a load
18340 or store for larger structure modes.
18341
18342 WARNING: The ordering of elements is weird in big-endian mode,
18343 because the EABI requires that vectors stored in memory appear
18344 as though they were stored by a VSTM, as required by the EABI.
18345 GCC RTL defines element ordering based on in-memory order.
18346 This can be different from the architectural ordering of elements
18347 within a NEON register. The intrinsics defined in arm_neon.h use the
18348 NEON register element ordering, not the GCC RTL element ordering.
18349
18350 For example, the in-memory ordering of a big-endian a quadword
18351 vector with 16-bit elements when stored from register pair {d0,d1}
18352 will be (lowest address first, d0[N] is NEON register element N):
18353
18354 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18355
18356 When necessary, quadword registers (dN, dN+1) are moved to ARM
18357 registers from rN in the order:
18358
18359 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18360
18361 So that STM/LDM can be used on vectors in ARM registers, and the
18362 same memory layout will result as if VSTM/VLDM were used.
18363
18364 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18365 possible, which allows use of appropriate alignment tags.
18366 Note that the choice of "64" is independent of the actual vector
18367 element size; this size simply ensures that the behavior is
18368 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18369
18370 Due to limitations of those instructions, use of VST1.64/VLD1.64
18371 is not possible if:
18372 - the address contains PRE_DEC, or
18373 - the mode refers to more than 4 double-word registers
18374
18375 In those cases, it would be possible to replace VSTM/VLDM by a
18376 sequence of instructions; this is not currently implemented since
18377 this is not certain to actually improve performance. */
18378
18379 const char *
18380 output_move_neon (rtx *operands)
18381 {
18382 rtx reg, mem, addr, ops[2];
18383 int regno, nregs, load = REG_P (operands[0]);
18384 const char *templ;
18385 char buff[50];
18386 enum machine_mode mode;
18387
18388 reg = operands[!load];
18389 mem = operands[load];
18390
18391 mode = GET_MODE (reg);
18392
18393 gcc_assert (REG_P (reg));
18394 regno = REGNO (reg);
18395 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18396 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18397 || NEON_REGNO_OK_FOR_QUAD (regno));
18398 gcc_assert (VALID_NEON_DREG_MODE (mode)
18399 || VALID_NEON_QREG_MODE (mode)
18400 || VALID_NEON_STRUCT_MODE (mode));
18401 gcc_assert (MEM_P (mem));
18402
18403 addr = XEXP (mem, 0);
18404
18405 /* Strip off const from addresses like (const (plus (...))). */
18406 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18407 addr = XEXP (addr, 0);
18408
18409 switch (GET_CODE (addr))
18410 {
18411 case POST_INC:
18412 /* We have to use vldm / vstm for too-large modes. */
18413 if (nregs > 4)
18414 {
18415 templ = "v%smia%%?\t%%0!, %%h1";
18416 ops[0] = XEXP (addr, 0);
18417 }
18418 else
18419 {
18420 templ = "v%s1.64\t%%h1, %%A0";
18421 ops[0] = mem;
18422 }
18423 ops[1] = reg;
18424 break;
18425
18426 case PRE_DEC:
18427 /* We have to use vldm / vstm in this case, since there is no
18428 pre-decrement form of the vld1 / vst1 instructions. */
18429 templ = "v%smdb%%?\t%%0!, %%h1";
18430 ops[0] = XEXP (addr, 0);
18431 ops[1] = reg;
18432 break;
18433
18434 case POST_MODIFY:
18435 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18436 gcc_unreachable ();
18437
18438 case LABEL_REF:
18439 case PLUS:
18440 {
18441 int i;
18442 int overlap = -1;
18443 for (i = 0; i < nregs; i++)
18444 {
18445 /* We're only using DImode here because it's a convenient size. */
18446 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18447 ops[1] = adjust_address (mem, DImode, 8 * i);
18448 if (reg_overlap_mentioned_p (ops[0], mem))
18449 {
18450 gcc_assert (overlap == -1);
18451 overlap = i;
18452 }
18453 else
18454 {
18455 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18456 output_asm_insn (buff, ops);
18457 }
18458 }
18459 if (overlap != -1)
18460 {
18461 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18462 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18463 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18464 output_asm_insn (buff, ops);
18465 }
18466
18467 return "";
18468 }
18469
18470 default:
18471 /* We have to use vldm / vstm for too-large modes. */
18472 if (nregs > 4)
18473 templ = "v%smia%%?\t%%m0, %%h1";
18474 else
18475 templ = "v%s1.64\t%%h1, %%A0";
18476
18477 ops[0] = mem;
18478 ops[1] = reg;
18479 }
18480
18481 sprintf (buff, templ, load ? "ld" : "st");
18482 output_asm_insn (buff, ops);
18483
18484 return "";
18485 }
18486
18487 /* Compute and return the length of neon_mov<mode>, where <mode> is
18488 one of VSTRUCT modes: EI, OI, CI or XI. */
18489 int
18490 arm_attr_length_move_neon (rtx insn)
18491 {
18492 rtx reg, mem, addr;
18493 int load;
18494 enum machine_mode mode;
18495
18496 extract_insn_cached (insn);
18497
18498 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18499 {
18500 mode = GET_MODE (recog_data.operand[0]);
18501 switch (mode)
18502 {
18503 case EImode:
18504 case OImode:
18505 return 8;
18506 case CImode:
18507 return 12;
18508 case XImode:
18509 return 16;
18510 default:
18511 gcc_unreachable ();
18512 }
18513 }
18514
18515 load = REG_P (recog_data.operand[0]);
18516 reg = recog_data.operand[!load];
18517 mem = recog_data.operand[load];
18518
18519 gcc_assert (MEM_P (mem));
18520
18521 mode = GET_MODE (reg);
18522 addr = XEXP (mem, 0);
18523
18524 /* Strip off const from addresses like (const (plus (...))). */
18525 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18526 addr = XEXP (addr, 0);
18527
18528 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18529 {
18530 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18531 return insns * 4;
18532 }
18533 else
18534 return 4;
18535 }
18536
18537 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18538 return zero. */
18539
18540 int
18541 arm_address_offset_is_imm (rtx insn)
18542 {
18543 rtx mem, addr;
18544
18545 extract_insn_cached (insn);
18546
18547 if (REG_P (recog_data.operand[0]))
18548 return 0;
18549
18550 mem = recog_data.operand[0];
18551
18552 gcc_assert (MEM_P (mem));
18553
18554 addr = XEXP (mem, 0);
18555
18556 if (REG_P (addr)
18557 || (GET_CODE (addr) == PLUS
18558 && REG_P (XEXP (addr, 0))
18559 && CONST_INT_P (XEXP (addr, 1))))
18560 return 1;
18561 else
18562 return 0;
18563 }
18564
18565 /* Output an ADD r, s, #n where n may be too big for one instruction.
18566 If adding zero to one register, output nothing. */
18567 const char *
18568 output_add_immediate (rtx *operands)
18569 {
18570 HOST_WIDE_INT n = INTVAL (operands[2]);
18571
18572 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18573 {
18574 if (n < 0)
18575 output_multi_immediate (operands,
18576 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18577 -n);
18578 else
18579 output_multi_immediate (operands,
18580 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18581 n);
18582 }
18583
18584 return "";
18585 }
18586
18587 /* Output a multiple immediate operation.
18588 OPERANDS is the vector of operands referred to in the output patterns.
18589 INSTR1 is the output pattern to use for the first constant.
18590 INSTR2 is the output pattern to use for subsequent constants.
18591 IMMED_OP is the index of the constant slot in OPERANDS.
18592 N is the constant value. */
18593 static const char *
18594 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18595 int immed_op, HOST_WIDE_INT n)
18596 {
18597 #if HOST_BITS_PER_WIDE_INT > 32
18598 n &= 0xffffffff;
18599 #endif
18600
18601 if (n == 0)
18602 {
18603 /* Quick and easy output. */
18604 operands[immed_op] = const0_rtx;
18605 output_asm_insn (instr1, operands);
18606 }
18607 else
18608 {
18609 int i;
18610 const char * instr = instr1;
18611
18612 /* Note that n is never zero here (which would give no output). */
18613 for (i = 0; i < 32; i += 2)
18614 {
18615 if (n & (3 << i))
18616 {
18617 operands[immed_op] = GEN_INT (n & (255 << i));
18618 output_asm_insn (instr, operands);
18619 instr = instr2;
18620 i += 6;
18621 }
18622 }
18623 }
18624
18625 return "";
18626 }
18627
18628 /* Return the name of a shifter operation. */
18629 static const char *
18630 arm_shift_nmem(enum rtx_code code)
18631 {
18632 switch (code)
18633 {
18634 case ASHIFT:
18635 return ARM_LSL_NAME;
18636
18637 case ASHIFTRT:
18638 return "asr";
18639
18640 case LSHIFTRT:
18641 return "lsr";
18642
18643 case ROTATERT:
18644 return "ror";
18645
18646 default:
18647 abort();
18648 }
18649 }
18650
18651 /* Return the appropriate ARM instruction for the operation code.
18652 The returned result should not be overwritten. OP is the rtx of the
18653 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18654 was shifted. */
18655 const char *
18656 arithmetic_instr (rtx op, int shift_first_arg)
18657 {
18658 switch (GET_CODE (op))
18659 {
18660 case PLUS:
18661 return "add";
18662
18663 case MINUS:
18664 return shift_first_arg ? "rsb" : "sub";
18665
18666 case IOR:
18667 return "orr";
18668
18669 case XOR:
18670 return "eor";
18671
18672 case AND:
18673 return "and";
18674
18675 case ASHIFT:
18676 case ASHIFTRT:
18677 case LSHIFTRT:
18678 case ROTATERT:
18679 return arm_shift_nmem(GET_CODE(op));
18680
18681 default:
18682 gcc_unreachable ();
18683 }
18684 }
18685
18686 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18687 for the operation code. The returned result should not be overwritten.
18688 OP is the rtx code of the shift.
18689 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18690 shift. */
18691 static const char *
18692 shift_op (rtx op, HOST_WIDE_INT *amountp)
18693 {
18694 const char * mnem;
18695 enum rtx_code code = GET_CODE (op);
18696
18697 switch (code)
18698 {
18699 case ROTATE:
18700 if (!CONST_INT_P (XEXP (op, 1)))
18701 {
18702 output_operand_lossage ("invalid shift operand");
18703 return NULL;
18704 }
18705
18706 code = ROTATERT;
18707 *amountp = 32 - INTVAL (XEXP (op, 1));
18708 mnem = "ror";
18709 break;
18710
18711 case ASHIFT:
18712 case ASHIFTRT:
18713 case LSHIFTRT:
18714 case ROTATERT:
18715 mnem = arm_shift_nmem(code);
18716 if (CONST_INT_P (XEXP (op, 1)))
18717 {
18718 *amountp = INTVAL (XEXP (op, 1));
18719 }
18720 else if (REG_P (XEXP (op, 1)))
18721 {
18722 *amountp = -1;
18723 return mnem;
18724 }
18725 else
18726 {
18727 output_operand_lossage ("invalid shift operand");
18728 return NULL;
18729 }
18730 break;
18731
18732 case MULT:
18733 /* We never have to worry about the amount being other than a
18734 power of 2, since this case can never be reloaded from a reg. */
18735 if (!CONST_INT_P (XEXP (op, 1)))
18736 {
18737 output_operand_lossage ("invalid shift operand");
18738 return NULL;
18739 }
18740
18741 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18742
18743 /* Amount must be a power of two. */
18744 if (*amountp & (*amountp - 1))
18745 {
18746 output_operand_lossage ("invalid shift operand");
18747 return NULL;
18748 }
18749
18750 *amountp = int_log2 (*amountp);
18751 return ARM_LSL_NAME;
18752
18753 default:
18754 output_operand_lossage ("invalid shift operand");
18755 return NULL;
18756 }
18757
18758 /* This is not 100% correct, but follows from the desire to merge
18759 multiplication by a power of 2 with the recognizer for a
18760 shift. >=32 is not a valid shift for "lsl", so we must try and
18761 output a shift that produces the correct arithmetical result.
18762 Using lsr #32 is identical except for the fact that the carry bit
18763 is not set correctly if we set the flags; but we never use the
18764 carry bit from such an operation, so we can ignore that. */
18765 if (code == ROTATERT)
18766 /* Rotate is just modulo 32. */
18767 *amountp &= 31;
18768 else if (*amountp != (*amountp & 31))
18769 {
18770 if (code == ASHIFT)
18771 mnem = "lsr";
18772 *amountp = 32;
18773 }
18774
18775 /* Shifts of 0 are no-ops. */
18776 if (*amountp == 0)
18777 return NULL;
18778
18779 return mnem;
18780 }
18781
18782 /* Obtain the shift from the POWER of two. */
18783
18784 static HOST_WIDE_INT
18785 int_log2 (HOST_WIDE_INT power)
18786 {
18787 HOST_WIDE_INT shift = 0;
18788
18789 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18790 {
18791 gcc_assert (shift <= 31);
18792 shift++;
18793 }
18794
18795 return shift;
18796 }
18797
18798 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18799 because /bin/as is horribly restrictive. The judgement about
18800 whether or not each character is 'printable' (and can be output as
18801 is) or not (and must be printed with an octal escape) must be made
18802 with reference to the *host* character set -- the situation is
18803 similar to that discussed in the comments above pp_c_char in
18804 c-pretty-print.c. */
18805
18806 #define MAX_ASCII_LEN 51
18807
18808 void
18809 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18810 {
18811 int i;
18812 int len_so_far = 0;
18813
18814 fputs ("\t.ascii\t\"", stream);
18815
18816 for (i = 0; i < len; i++)
18817 {
18818 int c = p[i];
18819
18820 if (len_so_far >= MAX_ASCII_LEN)
18821 {
18822 fputs ("\"\n\t.ascii\t\"", stream);
18823 len_so_far = 0;
18824 }
18825
18826 if (ISPRINT (c))
18827 {
18828 if (c == '\\' || c == '\"')
18829 {
18830 putc ('\\', stream);
18831 len_so_far++;
18832 }
18833 putc (c, stream);
18834 len_so_far++;
18835 }
18836 else
18837 {
18838 fprintf (stream, "\\%03o", c);
18839 len_so_far += 4;
18840 }
18841 }
18842
18843 fputs ("\"\n", stream);
18844 }
18845 \f
18846 /* Compute the register save mask for registers 0 through 12
18847 inclusive. This code is used by arm_compute_save_reg_mask. */
18848
18849 static unsigned long
18850 arm_compute_save_reg0_reg12_mask (void)
18851 {
18852 unsigned long func_type = arm_current_func_type ();
18853 unsigned long save_reg_mask = 0;
18854 unsigned int reg;
18855
18856 if (IS_INTERRUPT (func_type))
18857 {
18858 unsigned int max_reg;
18859 /* Interrupt functions must not corrupt any registers,
18860 even call clobbered ones. If this is a leaf function
18861 we can just examine the registers used by the RTL, but
18862 otherwise we have to assume that whatever function is
18863 called might clobber anything, and so we have to save
18864 all the call-clobbered registers as well. */
18865 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18866 /* FIQ handlers have registers r8 - r12 banked, so
18867 we only need to check r0 - r7, Normal ISRs only
18868 bank r14 and r15, so we must check up to r12.
18869 r13 is the stack pointer which is always preserved,
18870 so we do not need to consider it here. */
18871 max_reg = 7;
18872 else
18873 max_reg = 12;
18874
18875 for (reg = 0; reg <= max_reg; reg++)
18876 if (df_regs_ever_live_p (reg)
18877 || (! crtl->is_leaf && call_used_regs[reg]))
18878 save_reg_mask |= (1 << reg);
18879
18880 /* Also save the pic base register if necessary. */
18881 if (flag_pic
18882 && !TARGET_SINGLE_PIC_BASE
18883 && arm_pic_register != INVALID_REGNUM
18884 && crtl->uses_pic_offset_table)
18885 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18886 }
18887 else if (IS_VOLATILE(func_type))
18888 {
18889 /* For noreturn functions we historically omitted register saves
18890 altogether. However this really messes up debugging. As a
18891 compromise save just the frame pointers. Combined with the link
18892 register saved elsewhere this should be sufficient to get
18893 a backtrace. */
18894 if (frame_pointer_needed)
18895 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18896 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18897 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18898 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18899 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18900 }
18901 else
18902 {
18903 /* In the normal case we only need to save those registers
18904 which are call saved and which are used by this function. */
18905 for (reg = 0; reg <= 11; reg++)
18906 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18907 save_reg_mask |= (1 << reg);
18908
18909 /* Handle the frame pointer as a special case. */
18910 if (frame_pointer_needed)
18911 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18912
18913 /* If we aren't loading the PIC register,
18914 don't stack it even though it may be live. */
18915 if (flag_pic
18916 && !TARGET_SINGLE_PIC_BASE
18917 && arm_pic_register != INVALID_REGNUM
18918 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18919 || crtl->uses_pic_offset_table))
18920 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18921
18922 /* The prologue will copy SP into R0, so save it. */
18923 if (IS_STACKALIGN (func_type))
18924 save_reg_mask |= 1;
18925 }
18926
18927 /* Save registers so the exception handler can modify them. */
18928 if (crtl->calls_eh_return)
18929 {
18930 unsigned int i;
18931
18932 for (i = 0; ; i++)
18933 {
18934 reg = EH_RETURN_DATA_REGNO (i);
18935 if (reg == INVALID_REGNUM)
18936 break;
18937 save_reg_mask |= 1 << reg;
18938 }
18939 }
18940
18941 return save_reg_mask;
18942 }
18943
18944 /* Return true if r3 is live at the start of the function. */
18945
18946 static bool
18947 arm_r3_live_at_start_p (void)
18948 {
18949 /* Just look at cfg info, which is still close enough to correct at this
18950 point. This gives false positives for broken functions that might use
18951 uninitialized data that happens to be allocated in r3, but who cares? */
18952 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18953 }
18954
18955 /* Compute the number of bytes used to store the static chain register on the
18956 stack, above the stack frame. We need to know this accurately to get the
18957 alignment of the rest of the stack frame correct. */
18958
18959 static int
18960 arm_compute_static_chain_stack_bytes (void)
18961 {
18962 /* See the defining assertion in arm_expand_prologue. */
18963 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18964 && IS_NESTED (arm_current_func_type ())
18965 && arm_r3_live_at_start_p ()
18966 && crtl->args.pretend_args_size == 0)
18967 return 4;
18968
18969 return 0;
18970 }
18971
18972 /* Compute a bit mask of which registers need to be
18973 saved on the stack for the current function.
18974 This is used by arm_get_frame_offsets, which may add extra registers. */
18975
18976 static unsigned long
18977 arm_compute_save_reg_mask (void)
18978 {
18979 unsigned int save_reg_mask = 0;
18980 unsigned long func_type = arm_current_func_type ();
18981 unsigned int reg;
18982
18983 if (IS_NAKED (func_type))
18984 /* This should never really happen. */
18985 return 0;
18986
18987 /* If we are creating a stack frame, then we must save the frame pointer,
18988 IP (which will hold the old stack pointer), LR and the PC. */
18989 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18990 save_reg_mask |=
18991 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18992 | (1 << IP_REGNUM)
18993 | (1 << LR_REGNUM)
18994 | (1 << PC_REGNUM);
18995
18996 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18997
18998 /* Decide if we need to save the link register.
18999 Interrupt routines have their own banked link register,
19000 so they never need to save it.
19001 Otherwise if we do not use the link register we do not need to save
19002 it. If we are pushing other registers onto the stack however, we
19003 can save an instruction in the epilogue by pushing the link register
19004 now and then popping it back into the PC. This incurs extra memory
19005 accesses though, so we only do it when optimizing for size, and only
19006 if we know that we will not need a fancy return sequence. */
19007 if (df_regs_ever_live_p (LR_REGNUM)
19008 || (save_reg_mask
19009 && optimize_size
19010 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19011 && !crtl->calls_eh_return))
19012 save_reg_mask |= 1 << LR_REGNUM;
19013
19014 if (cfun->machine->lr_save_eliminated)
19015 save_reg_mask &= ~ (1 << LR_REGNUM);
19016
19017 if (TARGET_REALLY_IWMMXT
19018 && ((bit_count (save_reg_mask)
19019 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19020 arm_compute_static_chain_stack_bytes())
19021 ) % 2) != 0)
19022 {
19023 /* The total number of registers that are going to be pushed
19024 onto the stack is odd. We need to ensure that the stack
19025 is 64-bit aligned before we start to save iWMMXt registers,
19026 and also before we start to create locals. (A local variable
19027 might be a double or long long which we will load/store using
19028 an iWMMXt instruction). Therefore we need to push another
19029 ARM register, so that the stack will be 64-bit aligned. We
19030 try to avoid using the arg registers (r0 -r3) as they might be
19031 used to pass values in a tail call. */
19032 for (reg = 4; reg <= 12; reg++)
19033 if ((save_reg_mask & (1 << reg)) == 0)
19034 break;
19035
19036 if (reg <= 12)
19037 save_reg_mask |= (1 << reg);
19038 else
19039 {
19040 cfun->machine->sibcall_blocked = 1;
19041 save_reg_mask |= (1 << 3);
19042 }
19043 }
19044
19045 /* We may need to push an additional register for use initializing the
19046 PIC base register. */
19047 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19048 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19049 {
19050 reg = thumb_find_work_register (1 << 4);
19051 if (!call_used_regs[reg])
19052 save_reg_mask |= (1 << reg);
19053 }
19054
19055 return save_reg_mask;
19056 }
19057
19058
19059 /* Compute a bit mask of which registers need to be
19060 saved on the stack for the current function. */
19061 static unsigned long
19062 thumb1_compute_save_reg_mask (void)
19063 {
19064 unsigned long mask;
19065 unsigned reg;
19066
19067 mask = 0;
19068 for (reg = 0; reg < 12; reg ++)
19069 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19070 mask |= 1 << reg;
19071
19072 if (flag_pic
19073 && !TARGET_SINGLE_PIC_BASE
19074 && arm_pic_register != INVALID_REGNUM
19075 && crtl->uses_pic_offset_table)
19076 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19077
19078 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19079 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19080 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19081
19082 /* LR will also be pushed if any lo regs are pushed. */
19083 if (mask & 0xff || thumb_force_lr_save ())
19084 mask |= (1 << LR_REGNUM);
19085
19086 /* Make sure we have a low work register if we need one.
19087 We will need one if we are going to push a high register,
19088 but we are not currently intending to push a low register. */
19089 if ((mask & 0xff) == 0
19090 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19091 {
19092 /* Use thumb_find_work_register to choose which register
19093 we will use. If the register is live then we will
19094 have to push it. Use LAST_LO_REGNUM as our fallback
19095 choice for the register to select. */
19096 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19097 /* Make sure the register returned by thumb_find_work_register is
19098 not part of the return value. */
19099 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19100 reg = LAST_LO_REGNUM;
19101
19102 if (! call_used_regs[reg])
19103 mask |= 1 << reg;
19104 }
19105
19106 /* The 504 below is 8 bytes less than 512 because there are two possible
19107 alignment words. We can't tell here if they will be present or not so we
19108 have to play it safe and assume that they are. */
19109 if ((CALLER_INTERWORKING_SLOT_SIZE +
19110 ROUND_UP_WORD (get_frame_size ()) +
19111 crtl->outgoing_args_size) >= 504)
19112 {
19113 /* This is the same as the code in thumb1_expand_prologue() which
19114 determines which register to use for stack decrement. */
19115 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19116 if (mask & (1 << reg))
19117 break;
19118
19119 if (reg > LAST_LO_REGNUM)
19120 {
19121 /* Make sure we have a register available for stack decrement. */
19122 mask |= 1 << LAST_LO_REGNUM;
19123 }
19124 }
19125
19126 return mask;
19127 }
19128
19129
19130 /* Return the number of bytes required to save VFP registers. */
19131 static int
19132 arm_get_vfp_saved_size (void)
19133 {
19134 unsigned int regno;
19135 int count;
19136 int saved;
19137
19138 saved = 0;
19139 /* Space for saved VFP registers. */
19140 if (TARGET_HARD_FLOAT && TARGET_VFP)
19141 {
19142 count = 0;
19143 for (regno = FIRST_VFP_REGNUM;
19144 regno < LAST_VFP_REGNUM;
19145 regno += 2)
19146 {
19147 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19148 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19149 {
19150 if (count > 0)
19151 {
19152 /* Workaround ARM10 VFPr1 bug. */
19153 if (count == 2 && !arm_arch6)
19154 count++;
19155 saved += count * 8;
19156 }
19157 count = 0;
19158 }
19159 else
19160 count++;
19161 }
19162 if (count > 0)
19163 {
19164 if (count == 2 && !arm_arch6)
19165 count++;
19166 saved += count * 8;
19167 }
19168 }
19169 return saved;
19170 }
19171
19172
19173 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19174 everything bar the final return instruction. If simple_return is true,
19175 then do not output epilogue, because it has already been emitted in RTL. */
19176 const char *
19177 output_return_instruction (rtx operand, bool really_return, bool reverse,
19178 bool simple_return)
19179 {
19180 char conditional[10];
19181 char instr[100];
19182 unsigned reg;
19183 unsigned long live_regs_mask;
19184 unsigned long func_type;
19185 arm_stack_offsets *offsets;
19186
19187 func_type = arm_current_func_type ();
19188
19189 if (IS_NAKED (func_type))
19190 return "";
19191
19192 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19193 {
19194 /* If this function was declared non-returning, and we have
19195 found a tail call, then we have to trust that the called
19196 function won't return. */
19197 if (really_return)
19198 {
19199 rtx ops[2];
19200
19201 /* Otherwise, trap an attempted return by aborting. */
19202 ops[0] = operand;
19203 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19204 : "abort");
19205 assemble_external_libcall (ops[1]);
19206 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19207 }
19208
19209 return "";
19210 }
19211
19212 gcc_assert (!cfun->calls_alloca || really_return);
19213
19214 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19215
19216 cfun->machine->return_used_this_function = 1;
19217
19218 offsets = arm_get_frame_offsets ();
19219 live_regs_mask = offsets->saved_regs_mask;
19220
19221 if (!simple_return && live_regs_mask)
19222 {
19223 const char * return_reg;
19224
19225 /* If we do not have any special requirements for function exit
19226 (e.g. interworking) then we can load the return address
19227 directly into the PC. Otherwise we must load it into LR. */
19228 if (really_return
19229 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19230 return_reg = reg_names[PC_REGNUM];
19231 else
19232 return_reg = reg_names[LR_REGNUM];
19233
19234 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19235 {
19236 /* There are three possible reasons for the IP register
19237 being saved. 1) a stack frame was created, in which case
19238 IP contains the old stack pointer, or 2) an ISR routine
19239 corrupted it, or 3) it was saved to align the stack on
19240 iWMMXt. In case 1, restore IP into SP, otherwise just
19241 restore IP. */
19242 if (frame_pointer_needed)
19243 {
19244 live_regs_mask &= ~ (1 << IP_REGNUM);
19245 live_regs_mask |= (1 << SP_REGNUM);
19246 }
19247 else
19248 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19249 }
19250
19251 /* On some ARM architectures it is faster to use LDR rather than
19252 LDM to load a single register. On other architectures, the
19253 cost is the same. In 26 bit mode, or for exception handlers,
19254 we have to use LDM to load the PC so that the CPSR is also
19255 restored. */
19256 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19257 if (live_regs_mask == (1U << reg))
19258 break;
19259
19260 if (reg <= LAST_ARM_REGNUM
19261 && (reg != LR_REGNUM
19262 || ! really_return
19263 || ! IS_INTERRUPT (func_type)))
19264 {
19265 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19266 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19267 }
19268 else
19269 {
19270 char *p;
19271 int first = 1;
19272
19273 /* Generate the load multiple instruction to restore the
19274 registers. Note we can get here, even if
19275 frame_pointer_needed is true, but only if sp already
19276 points to the base of the saved core registers. */
19277 if (live_regs_mask & (1 << SP_REGNUM))
19278 {
19279 unsigned HOST_WIDE_INT stack_adjust;
19280
19281 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19282 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19283
19284 if (stack_adjust && arm_arch5 && TARGET_ARM)
19285 if (TARGET_UNIFIED_ASM)
19286 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19287 else
19288 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19289 else
19290 {
19291 /* If we can't use ldmib (SA110 bug),
19292 then try to pop r3 instead. */
19293 if (stack_adjust)
19294 live_regs_mask |= 1 << 3;
19295
19296 if (TARGET_UNIFIED_ASM)
19297 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19298 else
19299 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19300 }
19301 }
19302 else
19303 if (TARGET_UNIFIED_ASM)
19304 sprintf (instr, "pop%s\t{", conditional);
19305 else
19306 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19307
19308 p = instr + strlen (instr);
19309
19310 for (reg = 0; reg <= SP_REGNUM; reg++)
19311 if (live_regs_mask & (1 << reg))
19312 {
19313 int l = strlen (reg_names[reg]);
19314
19315 if (first)
19316 first = 0;
19317 else
19318 {
19319 memcpy (p, ", ", 2);
19320 p += 2;
19321 }
19322
19323 memcpy (p, "%|", 2);
19324 memcpy (p + 2, reg_names[reg], l);
19325 p += l + 2;
19326 }
19327
19328 if (live_regs_mask & (1 << LR_REGNUM))
19329 {
19330 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19331 /* If returning from an interrupt, restore the CPSR. */
19332 if (IS_INTERRUPT (func_type))
19333 strcat (p, "^");
19334 }
19335 else
19336 strcpy (p, "}");
19337 }
19338
19339 output_asm_insn (instr, & operand);
19340
19341 /* See if we need to generate an extra instruction to
19342 perform the actual function return. */
19343 if (really_return
19344 && func_type != ARM_FT_INTERWORKED
19345 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19346 {
19347 /* The return has already been handled
19348 by loading the LR into the PC. */
19349 return "";
19350 }
19351 }
19352
19353 if (really_return)
19354 {
19355 switch ((int) ARM_FUNC_TYPE (func_type))
19356 {
19357 case ARM_FT_ISR:
19358 case ARM_FT_FIQ:
19359 /* ??? This is wrong for unified assembly syntax. */
19360 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19361 break;
19362
19363 case ARM_FT_INTERWORKED:
19364 sprintf (instr, "bx%s\t%%|lr", conditional);
19365 break;
19366
19367 case ARM_FT_EXCEPTION:
19368 /* ??? This is wrong for unified assembly syntax. */
19369 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19370 break;
19371
19372 default:
19373 /* Use bx if it's available. */
19374 if (arm_arch5 || arm_arch4t)
19375 sprintf (instr, "bx%s\t%%|lr", conditional);
19376 else
19377 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19378 break;
19379 }
19380
19381 output_asm_insn (instr, & operand);
19382 }
19383
19384 return "";
19385 }
19386
19387 /* Write the function name into the code section, directly preceding
19388 the function prologue.
19389
19390 Code will be output similar to this:
19391 t0
19392 .ascii "arm_poke_function_name", 0
19393 .align
19394 t1
19395 .word 0xff000000 + (t1 - t0)
19396 arm_poke_function_name
19397 mov ip, sp
19398 stmfd sp!, {fp, ip, lr, pc}
19399 sub fp, ip, #4
19400
19401 When performing a stack backtrace, code can inspect the value
19402 of 'pc' stored at 'fp' + 0. If the trace function then looks
19403 at location pc - 12 and the top 8 bits are set, then we know
19404 that there is a function name embedded immediately preceding this
19405 location and has length ((pc[-3]) & 0xff000000).
19406
19407 We assume that pc is declared as a pointer to an unsigned long.
19408
19409 It is of no benefit to output the function name if we are assembling
19410 a leaf function. These function types will not contain a stack
19411 backtrace structure, therefore it is not possible to determine the
19412 function name. */
19413 void
19414 arm_poke_function_name (FILE *stream, const char *name)
19415 {
19416 unsigned long alignlength;
19417 unsigned long length;
19418 rtx x;
19419
19420 length = strlen (name) + 1;
19421 alignlength = ROUND_UP_WORD (length);
19422
19423 ASM_OUTPUT_ASCII (stream, name, length);
19424 ASM_OUTPUT_ALIGN (stream, 2);
19425 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19426 assemble_aligned_integer (UNITS_PER_WORD, x);
19427 }
19428
19429 /* Place some comments into the assembler stream
19430 describing the current function. */
19431 static void
19432 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19433 {
19434 unsigned long func_type;
19435
19436 /* ??? Do we want to print some of the below anyway? */
19437 if (TARGET_THUMB1)
19438 return;
19439
19440 /* Sanity check. */
19441 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19442
19443 func_type = arm_current_func_type ();
19444
19445 switch ((int) ARM_FUNC_TYPE (func_type))
19446 {
19447 default:
19448 case ARM_FT_NORMAL:
19449 break;
19450 case ARM_FT_INTERWORKED:
19451 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19452 break;
19453 case ARM_FT_ISR:
19454 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19455 break;
19456 case ARM_FT_FIQ:
19457 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19458 break;
19459 case ARM_FT_EXCEPTION:
19460 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19461 break;
19462 }
19463
19464 if (IS_NAKED (func_type))
19465 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19466
19467 if (IS_VOLATILE (func_type))
19468 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19469
19470 if (IS_NESTED (func_type))
19471 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19472 if (IS_STACKALIGN (func_type))
19473 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19474
19475 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19476 crtl->args.size,
19477 crtl->args.pretend_args_size, frame_size);
19478
19479 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19480 frame_pointer_needed,
19481 cfun->machine->uses_anonymous_args);
19482
19483 if (cfun->machine->lr_save_eliminated)
19484 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19485
19486 if (crtl->calls_eh_return)
19487 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19488
19489 }
19490
19491 static void
19492 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19493 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19494 {
19495 arm_stack_offsets *offsets;
19496
19497 if (TARGET_THUMB1)
19498 {
19499 int regno;
19500
19501 /* Emit any call-via-reg trampolines that are needed for v4t support
19502 of call_reg and call_value_reg type insns. */
19503 for (regno = 0; regno < LR_REGNUM; regno++)
19504 {
19505 rtx label = cfun->machine->call_via[regno];
19506
19507 if (label != NULL)
19508 {
19509 switch_to_section (function_section (current_function_decl));
19510 targetm.asm_out.internal_label (asm_out_file, "L",
19511 CODE_LABEL_NUMBER (label));
19512 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19513 }
19514 }
19515
19516 /* ??? Probably not safe to set this here, since it assumes that a
19517 function will be emitted as assembly immediately after we generate
19518 RTL for it. This does not happen for inline functions. */
19519 cfun->machine->return_used_this_function = 0;
19520 }
19521 else /* TARGET_32BIT */
19522 {
19523 /* We need to take into account any stack-frame rounding. */
19524 offsets = arm_get_frame_offsets ();
19525
19526 gcc_assert (!use_return_insn (FALSE, NULL)
19527 || (cfun->machine->return_used_this_function != 0)
19528 || offsets->saved_regs == offsets->outgoing_args
19529 || frame_pointer_needed);
19530 }
19531 }
19532
19533 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19534 STR and STRD. If an even number of registers are being pushed, one
19535 or more STRD patterns are created for each register pair. If an
19536 odd number of registers are pushed, emit an initial STR followed by
19537 as many STRD instructions as are needed. This works best when the
19538 stack is initially 64-bit aligned (the normal case), since it
19539 ensures that each STRD is also 64-bit aligned. */
19540 static void
19541 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19542 {
19543 int num_regs = 0;
19544 int i;
19545 int regno;
19546 rtx par = NULL_RTX;
19547 rtx dwarf = NULL_RTX;
19548 rtx tmp;
19549 bool first = true;
19550
19551 num_regs = bit_count (saved_regs_mask);
19552
19553 /* Must be at least one register to save, and can't save SP or PC. */
19554 gcc_assert (num_regs > 0 && num_regs <= 14);
19555 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19556 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19557
19558 /* Create sequence for DWARF info. All the frame-related data for
19559 debugging is held in this wrapper. */
19560 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19561
19562 /* Describe the stack adjustment. */
19563 tmp = gen_rtx_SET (VOIDmode,
19564 stack_pointer_rtx,
19565 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19566 RTX_FRAME_RELATED_P (tmp) = 1;
19567 XVECEXP (dwarf, 0, 0) = tmp;
19568
19569 /* Find the first register. */
19570 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19571 ;
19572
19573 i = 0;
19574
19575 /* If there's an odd number of registers to push. Start off by
19576 pushing a single register. This ensures that subsequent strd
19577 operations are dword aligned (assuming that SP was originally
19578 64-bit aligned). */
19579 if ((num_regs & 1) != 0)
19580 {
19581 rtx reg, mem, insn;
19582
19583 reg = gen_rtx_REG (SImode, regno);
19584 if (num_regs == 1)
19585 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19586 stack_pointer_rtx));
19587 else
19588 mem = gen_frame_mem (Pmode,
19589 gen_rtx_PRE_MODIFY
19590 (Pmode, stack_pointer_rtx,
19591 plus_constant (Pmode, stack_pointer_rtx,
19592 -4 * num_regs)));
19593
19594 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19595 RTX_FRAME_RELATED_P (tmp) = 1;
19596 insn = emit_insn (tmp);
19597 RTX_FRAME_RELATED_P (insn) = 1;
19598 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19599 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19600 reg);
19601 RTX_FRAME_RELATED_P (tmp) = 1;
19602 i++;
19603 regno++;
19604 XVECEXP (dwarf, 0, i) = tmp;
19605 first = false;
19606 }
19607
19608 while (i < num_regs)
19609 if (saved_regs_mask & (1 << regno))
19610 {
19611 rtx reg1, reg2, mem1, mem2;
19612 rtx tmp0, tmp1, tmp2;
19613 int regno2;
19614
19615 /* Find the register to pair with this one. */
19616 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19617 regno2++)
19618 ;
19619
19620 reg1 = gen_rtx_REG (SImode, regno);
19621 reg2 = gen_rtx_REG (SImode, regno2);
19622
19623 if (first)
19624 {
19625 rtx insn;
19626
19627 first = false;
19628 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19629 stack_pointer_rtx,
19630 -4 * num_regs));
19631 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19632 stack_pointer_rtx,
19633 -4 * (num_regs - 1)));
19634 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19635 plus_constant (Pmode, stack_pointer_rtx,
19636 -4 * (num_regs)));
19637 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19638 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19639 RTX_FRAME_RELATED_P (tmp0) = 1;
19640 RTX_FRAME_RELATED_P (tmp1) = 1;
19641 RTX_FRAME_RELATED_P (tmp2) = 1;
19642 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19643 XVECEXP (par, 0, 0) = tmp0;
19644 XVECEXP (par, 0, 1) = tmp1;
19645 XVECEXP (par, 0, 2) = tmp2;
19646 insn = emit_insn (par);
19647 RTX_FRAME_RELATED_P (insn) = 1;
19648 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19649 }
19650 else
19651 {
19652 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19653 stack_pointer_rtx,
19654 4 * i));
19655 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19656 stack_pointer_rtx,
19657 4 * (i + 1)));
19658 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19659 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19660 RTX_FRAME_RELATED_P (tmp1) = 1;
19661 RTX_FRAME_RELATED_P (tmp2) = 1;
19662 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19663 XVECEXP (par, 0, 0) = tmp1;
19664 XVECEXP (par, 0, 1) = tmp2;
19665 emit_insn (par);
19666 }
19667
19668 /* Create unwind information. This is an approximation. */
19669 tmp1 = gen_rtx_SET (VOIDmode,
19670 gen_frame_mem (Pmode,
19671 plus_constant (Pmode,
19672 stack_pointer_rtx,
19673 4 * i)),
19674 reg1);
19675 tmp2 = gen_rtx_SET (VOIDmode,
19676 gen_frame_mem (Pmode,
19677 plus_constant (Pmode,
19678 stack_pointer_rtx,
19679 4 * (i + 1))),
19680 reg2);
19681
19682 RTX_FRAME_RELATED_P (tmp1) = 1;
19683 RTX_FRAME_RELATED_P (tmp2) = 1;
19684 XVECEXP (dwarf, 0, i + 1) = tmp1;
19685 XVECEXP (dwarf, 0, i + 2) = tmp2;
19686 i += 2;
19687 regno = regno2 + 1;
19688 }
19689 else
19690 regno++;
19691
19692 return;
19693 }
19694
19695 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19696 whenever possible, otherwise it emits single-word stores. The first store
19697 also allocates stack space for all saved registers, using writeback with
19698 post-addressing mode. All other stores use offset addressing. If no STRD
19699 can be emitted, this function emits a sequence of single-word stores,
19700 and not an STM as before, because single-word stores provide more freedom
19701 scheduling and can be turned into an STM by peephole optimizations. */
19702 static void
19703 arm_emit_strd_push (unsigned long saved_regs_mask)
19704 {
19705 int num_regs = 0;
19706 int i, j, dwarf_index = 0;
19707 int offset = 0;
19708 rtx dwarf = NULL_RTX;
19709 rtx insn = NULL_RTX;
19710 rtx tmp, mem;
19711
19712 /* TODO: A more efficient code can be emitted by changing the
19713 layout, e.g., first push all pairs that can use STRD to keep the
19714 stack aligned, and then push all other registers. */
19715 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19716 if (saved_regs_mask & (1 << i))
19717 num_regs++;
19718
19719 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19720 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19721 gcc_assert (num_regs > 0);
19722
19723 /* Create sequence for DWARF info. */
19724 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19725
19726 /* For dwarf info, we generate explicit stack update. */
19727 tmp = gen_rtx_SET (VOIDmode,
19728 stack_pointer_rtx,
19729 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19730 RTX_FRAME_RELATED_P (tmp) = 1;
19731 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19732
19733 /* Save registers. */
19734 offset = - 4 * num_regs;
19735 j = 0;
19736 while (j <= LAST_ARM_REGNUM)
19737 if (saved_regs_mask & (1 << j))
19738 {
19739 if ((j % 2 == 0)
19740 && (saved_regs_mask & (1 << (j + 1))))
19741 {
19742 /* Current register and previous register form register pair for
19743 which STRD can be generated. */
19744 if (offset < 0)
19745 {
19746 /* Allocate stack space for all saved registers. */
19747 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19748 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19749 mem = gen_frame_mem (DImode, tmp);
19750 offset = 0;
19751 }
19752 else if (offset > 0)
19753 mem = gen_frame_mem (DImode,
19754 plus_constant (Pmode,
19755 stack_pointer_rtx,
19756 offset));
19757 else
19758 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19759
19760 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19761 RTX_FRAME_RELATED_P (tmp) = 1;
19762 tmp = emit_insn (tmp);
19763
19764 /* Record the first store insn. */
19765 if (dwarf_index == 1)
19766 insn = tmp;
19767
19768 /* Generate dwarf info. */
19769 mem = gen_frame_mem (SImode,
19770 plus_constant (Pmode,
19771 stack_pointer_rtx,
19772 offset));
19773 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19774 RTX_FRAME_RELATED_P (tmp) = 1;
19775 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19776
19777 mem = gen_frame_mem (SImode,
19778 plus_constant (Pmode,
19779 stack_pointer_rtx,
19780 offset + 4));
19781 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19782 RTX_FRAME_RELATED_P (tmp) = 1;
19783 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19784
19785 offset += 8;
19786 j += 2;
19787 }
19788 else
19789 {
19790 /* Emit a single word store. */
19791 if (offset < 0)
19792 {
19793 /* Allocate stack space for all saved registers. */
19794 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19795 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19796 mem = gen_frame_mem (SImode, tmp);
19797 offset = 0;
19798 }
19799 else if (offset > 0)
19800 mem = gen_frame_mem (SImode,
19801 plus_constant (Pmode,
19802 stack_pointer_rtx,
19803 offset));
19804 else
19805 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19806
19807 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19808 RTX_FRAME_RELATED_P (tmp) = 1;
19809 tmp = emit_insn (tmp);
19810
19811 /* Record the first store insn. */
19812 if (dwarf_index == 1)
19813 insn = tmp;
19814
19815 /* Generate dwarf info. */
19816 mem = gen_frame_mem (SImode,
19817 plus_constant(Pmode,
19818 stack_pointer_rtx,
19819 offset));
19820 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19821 RTX_FRAME_RELATED_P (tmp) = 1;
19822 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19823
19824 offset += 4;
19825 j += 1;
19826 }
19827 }
19828 else
19829 j++;
19830
19831 /* Attach dwarf info to the first insn we generate. */
19832 gcc_assert (insn != NULL_RTX);
19833 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19834 RTX_FRAME_RELATED_P (insn) = 1;
19835 }
19836
19837 /* Generate and emit an insn that we will recognize as a push_multi.
19838 Unfortunately, since this insn does not reflect very well the actual
19839 semantics of the operation, we need to annotate the insn for the benefit
19840 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19841 MASK for registers that should be annotated for DWARF2 frame unwind
19842 information. */
19843 static rtx
19844 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19845 {
19846 int num_regs = 0;
19847 int num_dwarf_regs = 0;
19848 int i, j;
19849 rtx par;
19850 rtx dwarf;
19851 int dwarf_par_index;
19852 rtx tmp, reg;
19853
19854 /* We don't record the PC in the dwarf frame information. */
19855 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19856
19857 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19858 {
19859 if (mask & (1 << i))
19860 num_regs++;
19861 if (dwarf_regs_mask & (1 << i))
19862 num_dwarf_regs++;
19863 }
19864
19865 gcc_assert (num_regs && num_regs <= 16);
19866 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19867
19868 /* For the body of the insn we are going to generate an UNSPEC in
19869 parallel with several USEs. This allows the insn to be recognized
19870 by the push_multi pattern in the arm.md file.
19871
19872 The body of the insn looks something like this:
19873
19874 (parallel [
19875 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19876 (const_int:SI <num>)))
19877 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19878 (use (reg:SI XX))
19879 (use (reg:SI YY))
19880 ...
19881 ])
19882
19883 For the frame note however, we try to be more explicit and actually
19884 show each register being stored into the stack frame, plus a (single)
19885 decrement of the stack pointer. We do it this way in order to be
19886 friendly to the stack unwinding code, which only wants to see a single
19887 stack decrement per instruction. The RTL we generate for the note looks
19888 something like this:
19889
19890 (sequence [
19891 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19892 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19893 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19894 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19895 ...
19896 ])
19897
19898 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19899 instead we'd have a parallel expression detailing all
19900 the stores to the various memory addresses so that debug
19901 information is more up-to-date. Remember however while writing
19902 this to take care of the constraints with the push instruction.
19903
19904 Note also that this has to be taken care of for the VFP registers.
19905
19906 For more see PR43399. */
19907
19908 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19909 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19910 dwarf_par_index = 1;
19911
19912 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19913 {
19914 if (mask & (1 << i))
19915 {
19916 reg = gen_rtx_REG (SImode, i);
19917
19918 XVECEXP (par, 0, 0)
19919 = gen_rtx_SET (VOIDmode,
19920 gen_frame_mem
19921 (BLKmode,
19922 gen_rtx_PRE_MODIFY (Pmode,
19923 stack_pointer_rtx,
19924 plus_constant
19925 (Pmode, stack_pointer_rtx,
19926 -4 * num_regs))
19927 ),
19928 gen_rtx_UNSPEC (BLKmode,
19929 gen_rtvec (1, reg),
19930 UNSPEC_PUSH_MULT));
19931
19932 if (dwarf_regs_mask & (1 << i))
19933 {
19934 tmp = gen_rtx_SET (VOIDmode,
19935 gen_frame_mem (SImode, stack_pointer_rtx),
19936 reg);
19937 RTX_FRAME_RELATED_P (tmp) = 1;
19938 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19939 }
19940
19941 break;
19942 }
19943 }
19944
19945 for (j = 1, i++; j < num_regs; i++)
19946 {
19947 if (mask & (1 << i))
19948 {
19949 reg = gen_rtx_REG (SImode, i);
19950
19951 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19952
19953 if (dwarf_regs_mask & (1 << i))
19954 {
19955 tmp
19956 = gen_rtx_SET (VOIDmode,
19957 gen_frame_mem
19958 (SImode,
19959 plus_constant (Pmode, stack_pointer_rtx,
19960 4 * j)),
19961 reg);
19962 RTX_FRAME_RELATED_P (tmp) = 1;
19963 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19964 }
19965
19966 j++;
19967 }
19968 }
19969
19970 par = emit_insn (par);
19971
19972 tmp = gen_rtx_SET (VOIDmode,
19973 stack_pointer_rtx,
19974 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19975 RTX_FRAME_RELATED_P (tmp) = 1;
19976 XVECEXP (dwarf, 0, 0) = tmp;
19977
19978 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19979
19980 return par;
19981 }
19982
19983 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19984 SIZE is the offset to be adjusted.
19985 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19986 static void
19987 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19988 {
19989 rtx dwarf;
19990
19991 RTX_FRAME_RELATED_P (insn) = 1;
19992 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19993 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19994 }
19995
19996 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19997 SAVED_REGS_MASK shows which registers need to be restored.
19998
19999 Unfortunately, since this insn does not reflect very well the actual
20000 semantics of the operation, we need to annotate the insn for the benefit
20001 of DWARF2 frame unwind information. */
20002 static void
20003 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20004 {
20005 int num_regs = 0;
20006 int i, j;
20007 rtx par;
20008 rtx dwarf = NULL_RTX;
20009 rtx tmp, reg;
20010 bool return_in_pc;
20011 int offset_adj;
20012 int emit_update;
20013
20014 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20015 offset_adj = return_in_pc ? 1 : 0;
20016 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20017 if (saved_regs_mask & (1 << i))
20018 num_regs++;
20019
20020 gcc_assert (num_regs && num_regs <= 16);
20021
20022 /* If SP is in reglist, then we don't emit SP update insn. */
20023 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20024
20025 /* The parallel needs to hold num_regs SETs
20026 and one SET for the stack update. */
20027 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20028
20029 if (return_in_pc)
20030 {
20031 tmp = ret_rtx;
20032 XVECEXP (par, 0, 0) = tmp;
20033 }
20034
20035 if (emit_update)
20036 {
20037 /* Increment the stack pointer, based on there being
20038 num_regs 4-byte registers to restore. */
20039 tmp = gen_rtx_SET (VOIDmode,
20040 stack_pointer_rtx,
20041 plus_constant (Pmode,
20042 stack_pointer_rtx,
20043 4 * num_regs));
20044 RTX_FRAME_RELATED_P (tmp) = 1;
20045 XVECEXP (par, 0, offset_adj) = tmp;
20046 }
20047
20048 /* Now restore every reg, which may include PC. */
20049 for (j = 0, i = 0; j < num_regs; i++)
20050 if (saved_regs_mask & (1 << i))
20051 {
20052 reg = gen_rtx_REG (SImode, i);
20053 if ((num_regs == 1) && emit_update && !return_in_pc)
20054 {
20055 /* Emit single load with writeback. */
20056 tmp = gen_frame_mem (SImode,
20057 gen_rtx_POST_INC (Pmode,
20058 stack_pointer_rtx));
20059 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20060 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20061 return;
20062 }
20063
20064 tmp = gen_rtx_SET (VOIDmode,
20065 reg,
20066 gen_frame_mem
20067 (SImode,
20068 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20069 RTX_FRAME_RELATED_P (tmp) = 1;
20070 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20071
20072 /* We need to maintain a sequence for DWARF info too. As dwarf info
20073 should not have PC, skip PC. */
20074 if (i != PC_REGNUM)
20075 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20076
20077 j++;
20078 }
20079
20080 if (return_in_pc)
20081 par = emit_jump_insn (par);
20082 else
20083 par = emit_insn (par);
20084
20085 REG_NOTES (par) = dwarf;
20086 if (!return_in_pc)
20087 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20088 stack_pointer_rtx, stack_pointer_rtx);
20089 }
20090
20091 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20092 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20093
20094 Unfortunately, since this insn does not reflect very well the actual
20095 semantics of the operation, we need to annotate the insn for the benefit
20096 of DWARF2 frame unwind information. */
20097 static void
20098 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20099 {
20100 int i, j;
20101 rtx par;
20102 rtx dwarf = NULL_RTX;
20103 rtx tmp, reg;
20104
20105 gcc_assert (num_regs && num_regs <= 32);
20106
20107 /* Workaround ARM10 VFPr1 bug. */
20108 if (num_regs == 2 && !arm_arch6)
20109 {
20110 if (first_reg == 15)
20111 first_reg--;
20112
20113 num_regs++;
20114 }
20115
20116 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20117 there could be up to 32 D-registers to restore.
20118 If there are more than 16 D-registers, make two recursive calls,
20119 each of which emits one pop_multi instruction. */
20120 if (num_regs > 16)
20121 {
20122 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20123 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20124 return;
20125 }
20126
20127 /* The parallel needs to hold num_regs SETs
20128 and one SET for the stack update. */
20129 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20130
20131 /* Increment the stack pointer, based on there being
20132 num_regs 8-byte registers to restore. */
20133 tmp = gen_rtx_SET (VOIDmode,
20134 base_reg,
20135 plus_constant (Pmode, base_reg, 8 * num_regs));
20136 RTX_FRAME_RELATED_P (tmp) = 1;
20137 XVECEXP (par, 0, 0) = tmp;
20138
20139 /* Now show every reg that will be restored, using a SET for each. */
20140 for (j = 0, i=first_reg; j < num_regs; i += 2)
20141 {
20142 reg = gen_rtx_REG (DFmode, i);
20143
20144 tmp = gen_rtx_SET (VOIDmode,
20145 reg,
20146 gen_frame_mem
20147 (DFmode,
20148 plus_constant (Pmode, base_reg, 8 * j)));
20149 RTX_FRAME_RELATED_P (tmp) = 1;
20150 XVECEXP (par, 0, j + 1) = tmp;
20151
20152 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20153
20154 j++;
20155 }
20156
20157 par = emit_insn (par);
20158 REG_NOTES (par) = dwarf;
20159
20160 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20161 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20162 {
20163 RTX_FRAME_RELATED_P (par) = 1;
20164 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20165 }
20166 else
20167 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20168 base_reg, base_reg);
20169 }
20170
20171 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20172 number of registers are being popped, multiple LDRD patterns are created for
20173 all register pairs. If odd number of registers are popped, last register is
20174 loaded by using LDR pattern. */
20175 static void
20176 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20177 {
20178 int num_regs = 0;
20179 int i, j;
20180 rtx par = NULL_RTX;
20181 rtx dwarf = NULL_RTX;
20182 rtx tmp, reg, tmp1;
20183 bool return_in_pc;
20184
20185 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20186 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20187 if (saved_regs_mask & (1 << i))
20188 num_regs++;
20189
20190 gcc_assert (num_regs && num_regs <= 16);
20191
20192 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20193 to be popped. So, if num_regs is even, now it will become odd,
20194 and we can generate pop with PC. If num_regs is odd, it will be
20195 even now, and ldr with return can be generated for PC. */
20196 if (return_in_pc)
20197 num_regs--;
20198
20199 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20200
20201 /* Var j iterates over all the registers to gather all the registers in
20202 saved_regs_mask. Var i gives index of saved registers in stack frame.
20203 A PARALLEL RTX of register-pair is created here, so that pattern for
20204 LDRD can be matched. As PC is always last register to be popped, and
20205 we have already decremented num_regs if PC, we don't have to worry
20206 about PC in this loop. */
20207 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20208 if (saved_regs_mask & (1 << j))
20209 {
20210 /* Create RTX for memory load. */
20211 reg = gen_rtx_REG (SImode, j);
20212 tmp = gen_rtx_SET (SImode,
20213 reg,
20214 gen_frame_mem (SImode,
20215 plus_constant (Pmode,
20216 stack_pointer_rtx, 4 * i)));
20217 RTX_FRAME_RELATED_P (tmp) = 1;
20218
20219 if (i % 2 == 0)
20220 {
20221 /* When saved-register index (i) is even, the RTX to be emitted is
20222 yet to be created. Hence create it first. The LDRD pattern we
20223 are generating is :
20224 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20225 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20226 where target registers need not be consecutive. */
20227 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20228 dwarf = NULL_RTX;
20229 }
20230
20231 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20232 added as 0th element and if i is odd, reg_i is added as 1st element
20233 of LDRD pattern shown above. */
20234 XVECEXP (par, 0, (i % 2)) = tmp;
20235 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20236
20237 if ((i % 2) == 1)
20238 {
20239 /* When saved-register index (i) is odd, RTXs for both the registers
20240 to be loaded are generated in above given LDRD pattern, and the
20241 pattern can be emitted now. */
20242 par = emit_insn (par);
20243 REG_NOTES (par) = dwarf;
20244 RTX_FRAME_RELATED_P (par) = 1;
20245 }
20246
20247 i++;
20248 }
20249
20250 /* If the number of registers pushed is odd AND return_in_pc is false OR
20251 number of registers are even AND return_in_pc is true, last register is
20252 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20253 then LDR with post increment. */
20254
20255 /* Increment the stack pointer, based on there being
20256 num_regs 4-byte registers to restore. */
20257 tmp = gen_rtx_SET (VOIDmode,
20258 stack_pointer_rtx,
20259 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20260 RTX_FRAME_RELATED_P (tmp) = 1;
20261 tmp = emit_insn (tmp);
20262 if (!return_in_pc)
20263 {
20264 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20265 stack_pointer_rtx, stack_pointer_rtx);
20266 }
20267
20268 dwarf = NULL_RTX;
20269
20270 if (((num_regs % 2) == 1 && !return_in_pc)
20271 || ((num_regs % 2) == 0 && return_in_pc))
20272 {
20273 /* Scan for the single register to be popped. Skip until the saved
20274 register is found. */
20275 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20276
20277 /* Gen LDR with post increment here. */
20278 tmp1 = gen_rtx_MEM (SImode,
20279 gen_rtx_POST_INC (SImode,
20280 stack_pointer_rtx));
20281 set_mem_alias_set (tmp1, get_frame_alias_set ());
20282
20283 reg = gen_rtx_REG (SImode, j);
20284 tmp = gen_rtx_SET (SImode, reg, tmp1);
20285 RTX_FRAME_RELATED_P (tmp) = 1;
20286 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20287
20288 if (return_in_pc)
20289 {
20290 /* If return_in_pc, j must be PC_REGNUM. */
20291 gcc_assert (j == PC_REGNUM);
20292 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20293 XVECEXP (par, 0, 0) = ret_rtx;
20294 XVECEXP (par, 0, 1) = tmp;
20295 par = emit_jump_insn (par);
20296 }
20297 else
20298 {
20299 par = emit_insn (tmp);
20300 REG_NOTES (par) = dwarf;
20301 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20302 stack_pointer_rtx, stack_pointer_rtx);
20303 }
20304
20305 }
20306 else if ((num_regs % 2) == 1 && return_in_pc)
20307 {
20308 /* There are 2 registers to be popped. So, generate the pattern
20309 pop_multiple_with_stack_update_and_return to pop in PC. */
20310 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20311 }
20312
20313 return;
20314 }
20315
20316 /* LDRD in ARM mode needs consecutive registers as operands. This function
20317 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20318 offset addressing and then generates one separate stack udpate. This provides
20319 more scheduling freedom, compared to writeback on every load. However,
20320 if the function returns using load into PC directly
20321 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20322 before the last load. TODO: Add a peephole optimization to recognize
20323 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20324 peephole optimization to merge the load at stack-offset zero
20325 with the stack update instruction using load with writeback
20326 in post-index addressing mode. */
20327 static void
20328 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20329 {
20330 int j = 0;
20331 int offset = 0;
20332 rtx par = NULL_RTX;
20333 rtx dwarf = NULL_RTX;
20334 rtx tmp, mem;
20335
20336 /* Restore saved registers. */
20337 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20338 j = 0;
20339 while (j <= LAST_ARM_REGNUM)
20340 if (saved_regs_mask & (1 << j))
20341 {
20342 if ((j % 2) == 0
20343 && (saved_regs_mask & (1 << (j + 1)))
20344 && (j + 1) != PC_REGNUM)
20345 {
20346 /* Current register and next register form register pair for which
20347 LDRD can be generated. PC is always the last register popped, and
20348 we handle it separately. */
20349 if (offset > 0)
20350 mem = gen_frame_mem (DImode,
20351 plus_constant (Pmode,
20352 stack_pointer_rtx,
20353 offset));
20354 else
20355 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20356
20357 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20358 tmp = emit_insn (tmp);
20359 RTX_FRAME_RELATED_P (tmp) = 1;
20360
20361 /* Generate dwarf info. */
20362
20363 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20364 gen_rtx_REG (SImode, j),
20365 NULL_RTX);
20366 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20367 gen_rtx_REG (SImode, j + 1),
20368 dwarf);
20369
20370 REG_NOTES (tmp) = dwarf;
20371
20372 offset += 8;
20373 j += 2;
20374 }
20375 else if (j != PC_REGNUM)
20376 {
20377 /* Emit a single word load. */
20378 if (offset > 0)
20379 mem = gen_frame_mem (SImode,
20380 plus_constant (Pmode,
20381 stack_pointer_rtx,
20382 offset));
20383 else
20384 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20385
20386 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20387 tmp = emit_insn (tmp);
20388 RTX_FRAME_RELATED_P (tmp) = 1;
20389
20390 /* Generate dwarf info. */
20391 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20392 gen_rtx_REG (SImode, j),
20393 NULL_RTX);
20394
20395 offset += 4;
20396 j += 1;
20397 }
20398 else /* j == PC_REGNUM */
20399 j++;
20400 }
20401 else
20402 j++;
20403
20404 /* Update the stack. */
20405 if (offset > 0)
20406 {
20407 tmp = gen_rtx_SET (Pmode,
20408 stack_pointer_rtx,
20409 plus_constant (Pmode,
20410 stack_pointer_rtx,
20411 offset));
20412 tmp = emit_insn (tmp);
20413 arm_add_cfa_adjust_cfa_note (tmp, offset,
20414 stack_pointer_rtx, stack_pointer_rtx);
20415 offset = 0;
20416 }
20417
20418 if (saved_regs_mask & (1 << PC_REGNUM))
20419 {
20420 /* Only PC is to be popped. */
20421 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20422 XVECEXP (par, 0, 0) = ret_rtx;
20423 tmp = gen_rtx_SET (SImode,
20424 gen_rtx_REG (SImode, PC_REGNUM),
20425 gen_frame_mem (SImode,
20426 gen_rtx_POST_INC (SImode,
20427 stack_pointer_rtx)));
20428 RTX_FRAME_RELATED_P (tmp) = 1;
20429 XVECEXP (par, 0, 1) = tmp;
20430 par = emit_jump_insn (par);
20431
20432 /* Generate dwarf info. */
20433 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20434 gen_rtx_REG (SImode, PC_REGNUM),
20435 NULL_RTX);
20436 REG_NOTES (par) = dwarf;
20437 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20438 stack_pointer_rtx, stack_pointer_rtx);
20439 }
20440 }
20441
20442 /* Calculate the size of the return value that is passed in registers. */
20443 static unsigned
20444 arm_size_return_regs (void)
20445 {
20446 enum machine_mode mode;
20447
20448 if (crtl->return_rtx != 0)
20449 mode = GET_MODE (crtl->return_rtx);
20450 else
20451 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20452
20453 return GET_MODE_SIZE (mode);
20454 }
20455
20456 /* Return true if the current function needs to save/restore LR. */
20457 static bool
20458 thumb_force_lr_save (void)
20459 {
20460 return !cfun->machine->lr_save_eliminated
20461 && (!leaf_function_p ()
20462 || thumb_far_jump_used_p ()
20463 || df_regs_ever_live_p (LR_REGNUM));
20464 }
20465
20466 /* We do not know if r3 will be available because
20467 we do have an indirect tailcall happening in this
20468 particular case. */
20469 static bool
20470 is_indirect_tailcall_p (rtx call)
20471 {
20472 rtx pat = PATTERN (call);
20473
20474 /* Indirect tail call. */
20475 pat = XVECEXP (pat, 0, 0);
20476 if (GET_CODE (pat) == SET)
20477 pat = SET_SRC (pat);
20478
20479 pat = XEXP (XEXP (pat, 0), 0);
20480 return REG_P (pat);
20481 }
20482
20483 /* Return true if r3 is used by any of the tail call insns in the
20484 current function. */
20485 static bool
20486 any_sibcall_could_use_r3 (void)
20487 {
20488 edge_iterator ei;
20489 edge e;
20490
20491 if (!crtl->tail_call_emit)
20492 return false;
20493 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20494 if (e->flags & EDGE_SIBCALL)
20495 {
20496 rtx call = BB_END (e->src);
20497 if (!CALL_P (call))
20498 call = prev_nonnote_nondebug_insn (call);
20499 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20500 if (find_regno_fusage (call, USE, 3)
20501 || is_indirect_tailcall_p (call))
20502 return true;
20503 }
20504 return false;
20505 }
20506
20507
20508 /* Compute the distance from register FROM to register TO.
20509 These can be the arg pointer (26), the soft frame pointer (25),
20510 the stack pointer (13) or the hard frame pointer (11).
20511 In thumb mode r7 is used as the soft frame pointer, if needed.
20512 Typical stack layout looks like this:
20513
20514 old stack pointer -> | |
20515 ----
20516 | | \
20517 | | saved arguments for
20518 | | vararg functions
20519 | | /
20520 --
20521 hard FP & arg pointer -> | | \
20522 | | stack
20523 | | frame
20524 | | /
20525 --
20526 | | \
20527 | | call saved
20528 | | registers
20529 soft frame pointer -> | | /
20530 --
20531 | | \
20532 | | local
20533 | | variables
20534 locals base pointer -> | | /
20535 --
20536 | | \
20537 | | outgoing
20538 | | arguments
20539 current stack pointer -> | | /
20540 --
20541
20542 For a given function some or all of these stack components
20543 may not be needed, giving rise to the possibility of
20544 eliminating some of the registers.
20545
20546 The values returned by this function must reflect the behavior
20547 of arm_expand_prologue() and arm_compute_save_reg_mask().
20548
20549 The sign of the number returned reflects the direction of stack
20550 growth, so the values are positive for all eliminations except
20551 from the soft frame pointer to the hard frame pointer.
20552
20553 SFP may point just inside the local variables block to ensure correct
20554 alignment. */
20555
20556
20557 /* Calculate stack offsets. These are used to calculate register elimination
20558 offsets and in prologue/epilogue code. Also calculates which registers
20559 should be saved. */
20560
20561 static arm_stack_offsets *
20562 arm_get_frame_offsets (void)
20563 {
20564 struct arm_stack_offsets *offsets;
20565 unsigned long func_type;
20566 int leaf;
20567 int saved;
20568 int core_saved;
20569 HOST_WIDE_INT frame_size;
20570 int i;
20571
20572 offsets = &cfun->machine->stack_offsets;
20573
20574 /* We need to know if we are a leaf function. Unfortunately, it
20575 is possible to be called after start_sequence has been called,
20576 which causes get_insns to return the insns for the sequence,
20577 not the function, which will cause leaf_function_p to return
20578 the incorrect result.
20579
20580 to know about leaf functions once reload has completed, and the
20581 frame size cannot be changed after that time, so we can safely
20582 use the cached value. */
20583
20584 if (reload_completed)
20585 return offsets;
20586
20587 /* Initially this is the size of the local variables. It will translated
20588 into an offset once we have determined the size of preceding data. */
20589 frame_size = ROUND_UP_WORD (get_frame_size ());
20590
20591 leaf = leaf_function_p ();
20592
20593 /* Space for variadic functions. */
20594 offsets->saved_args = crtl->args.pretend_args_size;
20595
20596 /* In Thumb mode this is incorrect, but never used. */
20597 offsets->frame
20598 = (offsets->saved_args
20599 + arm_compute_static_chain_stack_bytes ()
20600 + (frame_pointer_needed ? 4 : 0));
20601
20602 if (TARGET_32BIT)
20603 {
20604 unsigned int regno;
20605
20606 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20607 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20608 saved = core_saved;
20609
20610 /* We know that SP will be doubleword aligned on entry, and we must
20611 preserve that condition at any subroutine call. We also require the
20612 soft frame pointer to be doubleword aligned. */
20613
20614 if (TARGET_REALLY_IWMMXT)
20615 {
20616 /* Check for the call-saved iWMMXt registers. */
20617 for (regno = FIRST_IWMMXT_REGNUM;
20618 regno <= LAST_IWMMXT_REGNUM;
20619 regno++)
20620 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20621 saved += 8;
20622 }
20623
20624 func_type = arm_current_func_type ();
20625 /* Space for saved VFP registers. */
20626 if (! IS_VOLATILE (func_type)
20627 && TARGET_HARD_FLOAT && TARGET_VFP)
20628 saved += arm_get_vfp_saved_size ();
20629 }
20630 else /* TARGET_THUMB1 */
20631 {
20632 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20633 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20634 saved = core_saved;
20635 if (TARGET_BACKTRACE)
20636 saved += 16;
20637 }
20638
20639 /* Saved registers include the stack frame. */
20640 offsets->saved_regs
20641 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20642 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20643
20644 /* A leaf function does not need any stack alignment if it has nothing
20645 on the stack. */
20646 if (leaf && frame_size == 0
20647 /* However if it calls alloca(), we have a dynamically allocated
20648 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20649 && ! cfun->calls_alloca)
20650 {
20651 offsets->outgoing_args = offsets->soft_frame;
20652 offsets->locals_base = offsets->soft_frame;
20653 return offsets;
20654 }
20655
20656 /* Ensure SFP has the correct alignment. */
20657 if (ARM_DOUBLEWORD_ALIGN
20658 && (offsets->soft_frame & 7))
20659 {
20660 offsets->soft_frame += 4;
20661 /* Try to align stack by pushing an extra reg. Don't bother doing this
20662 when there is a stack frame as the alignment will be rolled into
20663 the normal stack adjustment. */
20664 if (frame_size + crtl->outgoing_args_size == 0)
20665 {
20666 int reg = -1;
20667
20668 /* If it is safe to use r3, then do so. This sometimes
20669 generates better code on Thumb-2 by avoiding the need to
20670 use 32-bit push/pop instructions. */
20671 if (! any_sibcall_could_use_r3 ()
20672 && arm_size_return_regs () <= 12
20673 && (offsets->saved_regs_mask & (1 << 3)) == 0
20674 && (TARGET_THUMB2
20675 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20676 {
20677 reg = 3;
20678 }
20679 else
20680 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20681 {
20682 /* Avoid fixed registers; they may be changed at
20683 arbitrary times so it's unsafe to restore them
20684 during the epilogue. */
20685 if (!fixed_regs[i]
20686 && (offsets->saved_regs_mask & (1 << i)) == 0)
20687 {
20688 reg = i;
20689 break;
20690 }
20691 }
20692
20693 if (reg != -1)
20694 {
20695 offsets->saved_regs += 4;
20696 offsets->saved_regs_mask |= (1 << reg);
20697 }
20698 }
20699 }
20700
20701 offsets->locals_base = offsets->soft_frame + frame_size;
20702 offsets->outgoing_args = (offsets->locals_base
20703 + crtl->outgoing_args_size);
20704
20705 if (ARM_DOUBLEWORD_ALIGN)
20706 {
20707 /* Ensure SP remains doubleword aligned. */
20708 if (offsets->outgoing_args & 7)
20709 offsets->outgoing_args += 4;
20710 gcc_assert (!(offsets->outgoing_args & 7));
20711 }
20712
20713 return offsets;
20714 }
20715
20716
20717 /* Calculate the relative offsets for the different stack pointers. Positive
20718 offsets are in the direction of stack growth. */
20719
20720 HOST_WIDE_INT
20721 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20722 {
20723 arm_stack_offsets *offsets;
20724
20725 offsets = arm_get_frame_offsets ();
20726
20727 /* OK, now we have enough information to compute the distances.
20728 There must be an entry in these switch tables for each pair
20729 of registers in ELIMINABLE_REGS, even if some of the entries
20730 seem to be redundant or useless. */
20731 switch (from)
20732 {
20733 case ARG_POINTER_REGNUM:
20734 switch (to)
20735 {
20736 case THUMB_HARD_FRAME_POINTER_REGNUM:
20737 return 0;
20738
20739 case FRAME_POINTER_REGNUM:
20740 /* This is the reverse of the soft frame pointer
20741 to hard frame pointer elimination below. */
20742 return offsets->soft_frame - offsets->saved_args;
20743
20744 case ARM_HARD_FRAME_POINTER_REGNUM:
20745 /* This is only non-zero in the case where the static chain register
20746 is stored above the frame. */
20747 return offsets->frame - offsets->saved_args - 4;
20748
20749 case STACK_POINTER_REGNUM:
20750 /* If nothing has been pushed on the stack at all
20751 then this will return -4. This *is* correct! */
20752 return offsets->outgoing_args - (offsets->saved_args + 4);
20753
20754 default:
20755 gcc_unreachable ();
20756 }
20757 gcc_unreachable ();
20758
20759 case FRAME_POINTER_REGNUM:
20760 switch (to)
20761 {
20762 case THUMB_HARD_FRAME_POINTER_REGNUM:
20763 return 0;
20764
20765 case ARM_HARD_FRAME_POINTER_REGNUM:
20766 /* The hard frame pointer points to the top entry in the
20767 stack frame. The soft frame pointer to the bottom entry
20768 in the stack frame. If there is no stack frame at all,
20769 then they are identical. */
20770
20771 return offsets->frame - offsets->soft_frame;
20772
20773 case STACK_POINTER_REGNUM:
20774 return offsets->outgoing_args - offsets->soft_frame;
20775
20776 default:
20777 gcc_unreachable ();
20778 }
20779 gcc_unreachable ();
20780
20781 default:
20782 /* You cannot eliminate from the stack pointer.
20783 In theory you could eliminate from the hard frame
20784 pointer to the stack pointer, but this will never
20785 happen, since if a stack frame is not needed the
20786 hard frame pointer will never be used. */
20787 gcc_unreachable ();
20788 }
20789 }
20790
20791 /* Given FROM and TO register numbers, say whether this elimination is
20792 allowed. Frame pointer elimination is automatically handled.
20793
20794 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20795 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20796 pointer, we must eliminate FRAME_POINTER_REGNUM into
20797 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20798 ARG_POINTER_REGNUM. */
20799
20800 bool
20801 arm_can_eliminate (const int from, const int to)
20802 {
20803 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20804 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20805 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20806 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20807 true);
20808 }
20809
20810 /* Emit RTL to save coprocessor registers on function entry. Returns the
20811 number of bytes pushed. */
20812
20813 static int
20814 arm_save_coproc_regs(void)
20815 {
20816 int saved_size = 0;
20817 unsigned reg;
20818 unsigned start_reg;
20819 rtx insn;
20820
20821 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20822 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20823 {
20824 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20825 insn = gen_rtx_MEM (V2SImode, insn);
20826 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20827 RTX_FRAME_RELATED_P (insn) = 1;
20828 saved_size += 8;
20829 }
20830
20831 if (TARGET_HARD_FLOAT && TARGET_VFP)
20832 {
20833 start_reg = FIRST_VFP_REGNUM;
20834
20835 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20836 {
20837 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20838 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20839 {
20840 if (start_reg != reg)
20841 saved_size += vfp_emit_fstmd (start_reg,
20842 (reg - start_reg) / 2);
20843 start_reg = reg + 2;
20844 }
20845 }
20846 if (start_reg != reg)
20847 saved_size += vfp_emit_fstmd (start_reg,
20848 (reg - start_reg) / 2);
20849 }
20850 return saved_size;
20851 }
20852
20853
20854 /* Set the Thumb frame pointer from the stack pointer. */
20855
20856 static void
20857 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20858 {
20859 HOST_WIDE_INT amount;
20860 rtx insn, dwarf;
20861
20862 amount = offsets->outgoing_args - offsets->locals_base;
20863 if (amount < 1024)
20864 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20865 stack_pointer_rtx, GEN_INT (amount)));
20866 else
20867 {
20868 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20869 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20870 expects the first two operands to be the same. */
20871 if (TARGET_THUMB2)
20872 {
20873 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20874 stack_pointer_rtx,
20875 hard_frame_pointer_rtx));
20876 }
20877 else
20878 {
20879 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20880 hard_frame_pointer_rtx,
20881 stack_pointer_rtx));
20882 }
20883 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20884 plus_constant (Pmode, stack_pointer_rtx, amount));
20885 RTX_FRAME_RELATED_P (dwarf) = 1;
20886 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20887 }
20888
20889 RTX_FRAME_RELATED_P (insn) = 1;
20890 }
20891
20892 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20893 function. */
20894 void
20895 arm_expand_prologue (void)
20896 {
20897 rtx amount;
20898 rtx insn;
20899 rtx ip_rtx;
20900 unsigned long live_regs_mask;
20901 unsigned long func_type;
20902 int fp_offset = 0;
20903 int saved_pretend_args = 0;
20904 int saved_regs = 0;
20905 unsigned HOST_WIDE_INT args_to_push;
20906 arm_stack_offsets *offsets;
20907
20908 func_type = arm_current_func_type ();
20909
20910 /* Naked functions don't have prologues. */
20911 if (IS_NAKED (func_type))
20912 return;
20913
20914 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20915 args_to_push = crtl->args.pretend_args_size;
20916
20917 /* Compute which register we will have to save onto the stack. */
20918 offsets = arm_get_frame_offsets ();
20919 live_regs_mask = offsets->saved_regs_mask;
20920
20921 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20922
20923 if (IS_STACKALIGN (func_type))
20924 {
20925 rtx r0, r1;
20926
20927 /* Handle a word-aligned stack pointer. We generate the following:
20928
20929 mov r0, sp
20930 bic r1, r0, #7
20931 mov sp, r1
20932 <save and restore r0 in normal prologue/epilogue>
20933 mov sp, r0
20934 bx lr
20935
20936 The unwinder doesn't need to know about the stack realignment.
20937 Just tell it we saved SP in r0. */
20938 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20939
20940 r0 = gen_rtx_REG (SImode, 0);
20941 r1 = gen_rtx_REG (SImode, 1);
20942
20943 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20944 RTX_FRAME_RELATED_P (insn) = 1;
20945 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20946
20947 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20948
20949 /* ??? The CFA changes here, which may cause GDB to conclude that it
20950 has entered a different function. That said, the unwind info is
20951 correct, individually, before and after this instruction because
20952 we've described the save of SP, which will override the default
20953 handling of SP as restoring from the CFA. */
20954 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20955 }
20956
20957 /* For APCS frames, if IP register is clobbered
20958 when creating frame, save that register in a special
20959 way. */
20960 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20961 {
20962 if (IS_INTERRUPT (func_type))
20963 {
20964 /* Interrupt functions must not corrupt any registers.
20965 Creating a frame pointer however, corrupts the IP
20966 register, so we must push it first. */
20967 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20968
20969 /* Do not set RTX_FRAME_RELATED_P on this insn.
20970 The dwarf stack unwinding code only wants to see one
20971 stack decrement per function, and this is not it. If
20972 this instruction is labeled as being part of the frame
20973 creation sequence then dwarf2out_frame_debug_expr will
20974 die when it encounters the assignment of IP to FP
20975 later on, since the use of SP here establishes SP as
20976 the CFA register and not IP.
20977
20978 Anyway this instruction is not really part of the stack
20979 frame creation although it is part of the prologue. */
20980 }
20981 else if (IS_NESTED (func_type))
20982 {
20983 /* The static chain register is the same as the IP register
20984 used as a scratch register during stack frame creation.
20985 To get around this need to find somewhere to store IP
20986 whilst the frame is being created. We try the following
20987 places in order:
20988
20989 1. The last argument register r3 if it is available.
20990 2. A slot on the stack above the frame if there are no
20991 arguments to push onto the stack.
20992 3. Register r3 again, after pushing the argument registers
20993 onto the stack, if this is a varargs function.
20994 4. The last slot on the stack created for the arguments to
20995 push, if this isn't a varargs function.
20996
20997 Note - we only need to tell the dwarf2 backend about the SP
20998 adjustment in the second variant; the static chain register
20999 doesn't need to be unwound, as it doesn't contain a value
21000 inherited from the caller. */
21001
21002 if (!arm_r3_live_at_start_p ())
21003 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21004 else if (args_to_push == 0)
21005 {
21006 rtx addr, dwarf;
21007
21008 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21009 saved_regs += 4;
21010
21011 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21012 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21013 fp_offset = 4;
21014
21015 /* Just tell the dwarf backend that we adjusted SP. */
21016 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21017 plus_constant (Pmode, stack_pointer_rtx,
21018 -fp_offset));
21019 RTX_FRAME_RELATED_P (insn) = 1;
21020 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21021 }
21022 else
21023 {
21024 /* Store the args on the stack. */
21025 if (cfun->machine->uses_anonymous_args)
21026 {
21027 insn
21028 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21029 (0xf0 >> (args_to_push / 4)) & 0xf);
21030 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21031 saved_pretend_args = 1;
21032 }
21033 else
21034 {
21035 rtx addr, dwarf;
21036
21037 if (args_to_push == 4)
21038 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21039 else
21040 addr
21041 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21042 plus_constant (Pmode,
21043 stack_pointer_rtx,
21044 -args_to_push));
21045
21046 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21047
21048 /* Just tell the dwarf backend that we adjusted SP. */
21049 dwarf
21050 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21051 plus_constant (Pmode, stack_pointer_rtx,
21052 -args_to_push));
21053 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21054 }
21055
21056 RTX_FRAME_RELATED_P (insn) = 1;
21057 fp_offset = args_to_push;
21058 args_to_push = 0;
21059 }
21060 }
21061
21062 insn = emit_set_insn (ip_rtx,
21063 plus_constant (Pmode, stack_pointer_rtx,
21064 fp_offset));
21065 RTX_FRAME_RELATED_P (insn) = 1;
21066 }
21067
21068 if (args_to_push)
21069 {
21070 /* Push the argument registers, or reserve space for them. */
21071 if (cfun->machine->uses_anonymous_args)
21072 insn = emit_multi_reg_push
21073 ((0xf0 >> (args_to_push / 4)) & 0xf,
21074 (0xf0 >> (args_to_push / 4)) & 0xf);
21075 else
21076 insn = emit_insn
21077 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21078 GEN_INT (- args_to_push)));
21079 RTX_FRAME_RELATED_P (insn) = 1;
21080 }
21081
21082 /* If this is an interrupt service routine, and the link register
21083 is going to be pushed, and we're not generating extra
21084 push of IP (needed when frame is needed and frame layout if apcs),
21085 subtracting four from LR now will mean that the function return
21086 can be done with a single instruction. */
21087 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21088 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21089 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21090 && TARGET_ARM)
21091 {
21092 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21093
21094 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21095 }
21096
21097 if (live_regs_mask)
21098 {
21099 unsigned long dwarf_regs_mask = live_regs_mask;
21100
21101 saved_regs += bit_count (live_regs_mask) * 4;
21102 if (optimize_size && !frame_pointer_needed
21103 && saved_regs == offsets->saved_regs - offsets->saved_args)
21104 {
21105 /* If no coprocessor registers are being pushed and we don't have
21106 to worry about a frame pointer then push extra registers to
21107 create the stack frame. This is done is a way that does not
21108 alter the frame layout, so is independent of the epilogue. */
21109 int n;
21110 int frame;
21111 n = 0;
21112 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21113 n++;
21114 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21115 if (frame && n * 4 >= frame)
21116 {
21117 n = frame / 4;
21118 live_regs_mask |= (1 << n) - 1;
21119 saved_regs += frame;
21120 }
21121 }
21122
21123 if (TARGET_LDRD
21124 && current_tune->prefer_ldrd_strd
21125 && !optimize_function_for_size_p (cfun))
21126 {
21127 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21128 if (TARGET_THUMB2)
21129 thumb2_emit_strd_push (live_regs_mask);
21130 else if (TARGET_ARM
21131 && !TARGET_APCS_FRAME
21132 && !IS_INTERRUPT (func_type))
21133 arm_emit_strd_push (live_regs_mask);
21134 else
21135 {
21136 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21137 RTX_FRAME_RELATED_P (insn) = 1;
21138 }
21139 }
21140 else
21141 {
21142 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21143 RTX_FRAME_RELATED_P (insn) = 1;
21144 }
21145 }
21146
21147 if (! IS_VOLATILE (func_type))
21148 saved_regs += arm_save_coproc_regs ();
21149
21150 if (frame_pointer_needed && TARGET_ARM)
21151 {
21152 /* Create the new frame pointer. */
21153 if (TARGET_APCS_FRAME)
21154 {
21155 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21156 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21157 RTX_FRAME_RELATED_P (insn) = 1;
21158
21159 if (IS_NESTED (func_type))
21160 {
21161 /* Recover the static chain register. */
21162 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21163 insn = gen_rtx_REG (SImode, 3);
21164 else
21165 {
21166 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21167 insn = gen_frame_mem (SImode, insn);
21168 }
21169 emit_set_insn (ip_rtx, insn);
21170 /* Add a USE to stop propagate_one_insn() from barfing. */
21171 emit_insn (gen_force_register_use (ip_rtx));
21172 }
21173 }
21174 else
21175 {
21176 insn = GEN_INT (saved_regs - 4);
21177 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21178 stack_pointer_rtx, insn));
21179 RTX_FRAME_RELATED_P (insn) = 1;
21180 }
21181 }
21182
21183 if (flag_stack_usage_info)
21184 current_function_static_stack_size
21185 = offsets->outgoing_args - offsets->saved_args;
21186
21187 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21188 {
21189 /* This add can produce multiple insns for a large constant, so we
21190 need to get tricky. */
21191 rtx last = get_last_insn ();
21192
21193 amount = GEN_INT (offsets->saved_args + saved_regs
21194 - offsets->outgoing_args);
21195
21196 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21197 amount));
21198 do
21199 {
21200 last = last ? NEXT_INSN (last) : get_insns ();
21201 RTX_FRAME_RELATED_P (last) = 1;
21202 }
21203 while (last != insn);
21204
21205 /* If the frame pointer is needed, emit a special barrier that
21206 will prevent the scheduler from moving stores to the frame
21207 before the stack adjustment. */
21208 if (frame_pointer_needed)
21209 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21210 hard_frame_pointer_rtx));
21211 }
21212
21213
21214 if (frame_pointer_needed && TARGET_THUMB2)
21215 thumb_set_frame_pointer (offsets);
21216
21217 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21218 {
21219 unsigned long mask;
21220
21221 mask = live_regs_mask;
21222 mask &= THUMB2_WORK_REGS;
21223 if (!IS_NESTED (func_type))
21224 mask |= (1 << IP_REGNUM);
21225 arm_load_pic_register (mask);
21226 }
21227
21228 /* If we are profiling, make sure no instructions are scheduled before
21229 the call to mcount. Similarly if the user has requested no
21230 scheduling in the prolog. Similarly if we want non-call exceptions
21231 using the EABI unwinder, to prevent faulting instructions from being
21232 swapped with a stack adjustment. */
21233 if (crtl->profile || !TARGET_SCHED_PROLOG
21234 || (arm_except_unwind_info (&global_options) == UI_TARGET
21235 && cfun->can_throw_non_call_exceptions))
21236 emit_insn (gen_blockage ());
21237
21238 /* If the link register is being kept alive, with the return address in it,
21239 then make sure that it does not get reused by the ce2 pass. */
21240 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21241 cfun->machine->lr_save_eliminated = 1;
21242 }
21243 \f
21244 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21245 static void
21246 arm_print_condition (FILE *stream)
21247 {
21248 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21249 {
21250 /* Branch conversion is not implemented for Thumb-2. */
21251 if (TARGET_THUMB)
21252 {
21253 output_operand_lossage ("predicated Thumb instruction");
21254 return;
21255 }
21256 if (current_insn_predicate != NULL)
21257 {
21258 output_operand_lossage
21259 ("predicated instruction in conditional sequence");
21260 return;
21261 }
21262
21263 fputs (arm_condition_codes[arm_current_cc], stream);
21264 }
21265 else if (current_insn_predicate)
21266 {
21267 enum arm_cond_code code;
21268
21269 if (TARGET_THUMB1)
21270 {
21271 output_operand_lossage ("predicated Thumb instruction");
21272 return;
21273 }
21274
21275 code = get_arm_condition_code (current_insn_predicate);
21276 fputs (arm_condition_codes[code], stream);
21277 }
21278 }
21279
21280
21281 /* Globally reserved letters: acln
21282 Puncutation letters currently used: @_|?().!#
21283 Lower case letters currently used: bcdefhimpqtvwxyz
21284 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21285 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21286
21287 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21288
21289 If CODE is 'd', then the X is a condition operand and the instruction
21290 should only be executed if the condition is true.
21291 if CODE is 'D', then the X is a condition operand and the instruction
21292 should only be executed if the condition is false: however, if the mode
21293 of the comparison is CCFPEmode, then always execute the instruction -- we
21294 do this because in these circumstances !GE does not necessarily imply LT;
21295 in these cases the instruction pattern will take care to make sure that
21296 an instruction containing %d will follow, thereby undoing the effects of
21297 doing this instruction unconditionally.
21298 If CODE is 'N' then X is a floating point operand that must be negated
21299 before output.
21300 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21301 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21302 static void
21303 arm_print_operand (FILE *stream, rtx x, int code)
21304 {
21305 switch (code)
21306 {
21307 case '@':
21308 fputs (ASM_COMMENT_START, stream);
21309 return;
21310
21311 case '_':
21312 fputs (user_label_prefix, stream);
21313 return;
21314
21315 case '|':
21316 fputs (REGISTER_PREFIX, stream);
21317 return;
21318
21319 case '?':
21320 arm_print_condition (stream);
21321 return;
21322
21323 case '(':
21324 /* Nothing in unified syntax, otherwise the current condition code. */
21325 if (!TARGET_UNIFIED_ASM)
21326 arm_print_condition (stream);
21327 break;
21328
21329 case ')':
21330 /* The current condition code in unified syntax, otherwise nothing. */
21331 if (TARGET_UNIFIED_ASM)
21332 arm_print_condition (stream);
21333 break;
21334
21335 case '.':
21336 /* The current condition code for a condition code setting instruction.
21337 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21338 if (TARGET_UNIFIED_ASM)
21339 {
21340 fputc('s', stream);
21341 arm_print_condition (stream);
21342 }
21343 else
21344 {
21345 arm_print_condition (stream);
21346 fputc('s', stream);
21347 }
21348 return;
21349
21350 case '!':
21351 /* If the instruction is conditionally executed then print
21352 the current condition code, otherwise print 's'. */
21353 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21354 if (current_insn_predicate)
21355 arm_print_condition (stream);
21356 else
21357 fputc('s', stream);
21358 break;
21359
21360 /* %# is a "break" sequence. It doesn't output anything, but is used to
21361 separate e.g. operand numbers from following text, if that text consists
21362 of further digits which we don't want to be part of the operand
21363 number. */
21364 case '#':
21365 return;
21366
21367 case 'N':
21368 {
21369 REAL_VALUE_TYPE r;
21370 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21371 r = real_value_negate (&r);
21372 fprintf (stream, "%s", fp_const_from_val (&r));
21373 }
21374 return;
21375
21376 /* An integer or symbol address without a preceding # sign. */
21377 case 'c':
21378 switch (GET_CODE (x))
21379 {
21380 case CONST_INT:
21381 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21382 break;
21383
21384 case SYMBOL_REF:
21385 output_addr_const (stream, x);
21386 break;
21387
21388 case CONST:
21389 if (GET_CODE (XEXP (x, 0)) == PLUS
21390 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21391 {
21392 output_addr_const (stream, x);
21393 break;
21394 }
21395 /* Fall through. */
21396
21397 default:
21398 output_operand_lossage ("Unsupported operand for code '%c'", code);
21399 }
21400 return;
21401
21402 /* An integer that we want to print in HEX. */
21403 case 'x':
21404 switch (GET_CODE (x))
21405 {
21406 case CONST_INT:
21407 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21408 break;
21409
21410 default:
21411 output_operand_lossage ("Unsupported operand for code '%c'", code);
21412 }
21413 return;
21414
21415 case 'B':
21416 if (CONST_INT_P (x))
21417 {
21418 HOST_WIDE_INT val;
21419 val = ARM_SIGN_EXTEND (~INTVAL (x));
21420 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21421 }
21422 else
21423 {
21424 putc ('~', stream);
21425 output_addr_const (stream, x);
21426 }
21427 return;
21428
21429 case 'b':
21430 /* Print the log2 of a CONST_INT. */
21431 {
21432 HOST_WIDE_INT val;
21433
21434 if (!CONST_INT_P (x)
21435 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21436 output_operand_lossage ("Unsupported operand for code '%c'", code);
21437 else
21438 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21439 }
21440 return;
21441
21442 case 'L':
21443 /* The low 16 bits of an immediate constant. */
21444 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21445 return;
21446
21447 case 'i':
21448 fprintf (stream, "%s", arithmetic_instr (x, 1));
21449 return;
21450
21451 case 'I':
21452 fprintf (stream, "%s", arithmetic_instr (x, 0));
21453 return;
21454
21455 case 'S':
21456 {
21457 HOST_WIDE_INT val;
21458 const char *shift;
21459
21460 shift = shift_op (x, &val);
21461
21462 if (shift)
21463 {
21464 fprintf (stream, ", %s ", shift);
21465 if (val == -1)
21466 arm_print_operand (stream, XEXP (x, 1), 0);
21467 else
21468 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21469 }
21470 }
21471 return;
21472
21473 /* An explanation of the 'Q', 'R' and 'H' register operands:
21474
21475 In a pair of registers containing a DI or DF value the 'Q'
21476 operand returns the register number of the register containing
21477 the least significant part of the value. The 'R' operand returns
21478 the register number of the register containing the most
21479 significant part of the value.
21480
21481 The 'H' operand returns the higher of the two register numbers.
21482 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21483 same as the 'Q' operand, since the most significant part of the
21484 value is held in the lower number register. The reverse is true
21485 on systems where WORDS_BIG_ENDIAN is false.
21486
21487 The purpose of these operands is to distinguish between cases
21488 where the endian-ness of the values is important (for example
21489 when they are added together), and cases where the endian-ness
21490 is irrelevant, but the order of register operations is important.
21491 For example when loading a value from memory into a register
21492 pair, the endian-ness does not matter. Provided that the value
21493 from the lower memory address is put into the lower numbered
21494 register, and the value from the higher address is put into the
21495 higher numbered register, the load will work regardless of whether
21496 the value being loaded is big-wordian or little-wordian. The
21497 order of the two register loads can matter however, if the address
21498 of the memory location is actually held in one of the registers
21499 being overwritten by the load.
21500
21501 The 'Q' and 'R' constraints are also available for 64-bit
21502 constants. */
21503 case 'Q':
21504 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21505 {
21506 rtx part = gen_lowpart (SImode, x);
21507 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21508 return;
21509 }
21510
21511 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21512 {
21513 output_operand_lossage ("invalid operand for code '%c'", code);
21514 return;
21515 }
21516
21517 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21518 return;
21519
21520 case 'R':
21521 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21522 {
21523 enum machine_mode mode = GET_MODE (x);
21524 rtx part;
21525
21526 if (mode == VOIDmode)
21527 mode = DImode;
21528 part = gen_highpart_mode (SImode, mode, x);
21529 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21530 return;
21531 }
21532
21533 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21534 {
21535 output_operand_lossage ("invalid operand for code '%c'", code);
21536 return;
21537 }
21538
21539 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21540 return;
21541
21542 case 'H':
21543 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21544 {
21545 output_operand_lossage ("invalid operand for code '%c'", code);
21546 return;
21547 }
21548
21549 asm_fprintf (stream, "%r", REGNO (x) + 1);
21550 return;
21551
21552 case 'J':
21553 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21554 {
21555 output_operand_lossage ("invalid operand for code '%c'", code);
21556 return;
21557 }
21558
21559 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21560 return;
21561
21562 case 'K':
21563 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21564 {
21565 output_operand_lossage ("invalid operand for code '%c'", code);
21566 return;
21567 }
21568
21569 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21570 return;
21571
21572 case 'm':
21573 asm_fprintf (stream, "%r",
21574 REG_P (XEXP (x, 0))
21575 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21576 return;
21577
21578 case 'M':
21579 asm_fprintf (stream, "{%r-%r}",
21580 REGNO (x),
21581 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21582 return;
21583
21584 /* Like 'M', but writing doubleword vector registers, for use by Neon
21585 insns. */
21586 case 'h':
21587 {
21588 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21589 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21590 if (numregs == 1)
21591 asm_fprintf (stream, "{d%d}", regno);
21592 else
21593 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21594 }
21595 return;
21596
21597 case 'd':
21598 /* CONST_TRUE_RTX means always -- that's the default. */
21599 if (x == const_true_rtx)
21600 return;
21601
21602 if (!COMPARISON_P (x))
21603 {
21604 output_operand_lossage ("invalid operand for code '%c'", code);
21605 return;
21606 }
21607
21608 fputs (arm_condition_codes[get_arm_condition_code (x)],
21609 stream);
21610 return;
21611
21612 case 'D':
21613 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21614 want to do that. */
21615 if (x == const_true_rtx)
21616 {
21617 output_operand_lossage ("instruction never executed");
21618 return;
21619 }
21620 if (!COMPARISON_P (x))
21621 {
21622 output_operand_lossage ("invalid operand for code '%c'", code);
21623 return;
21624 }
21625
21626 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21627 (get_arm_condition_code (x))],
21628 stream);
21629 return;
21630
21631 case 's':
21632 case 'V':
21633 case 'W':
21634 case 'X':
21635 case 'Y':
21636 case 'Z':
21637 /* Former Maverick support, removed after GCC-4.7. */
21638 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21639 return;
21640
21641 case 'U':
21642 if (!REG_P (x)
21643 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21644 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21645 /* Bad value for wCG register number. */
21646 {
21647 output_operand_lossage ("invalid operand for code '%c'", code);
21648 return;
21649 }
21650
21651 else
21652 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21653 return;
21654
21655 /* Print an iWMMXt control register name. */
21656 case 'w':
21657 if (!CONST_INT_P (x)
21658 || INTVAL (x) < 0
21659 || INTVAL (x) >= 16)
21660 /* Bad value for wC register number. */
21661 {
21662 output_operand_lossage ("invalid operand for code '%c'", code);
21663 return;
21664 }
21665
21666 else
21667 {
21668 static const char * wc_reg_names [16] =
21669 {
21670 "wCID", "wCon", "wCSSF", "wCASF",
21671 "wC4", "wC5", "wC6", "wC7",
21672 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21673 "wC12", "wC13", "wC14", "wC15"
21674 };
21675
21676 fputs (wc_reg_names [INTVAL (x)], stream);
21677 }
21678 return;
21679
21680 /* Print the high single-precision register of a VFP double-precision
21681 register. */
21682 case 'p':
21683 {
21684 enum machine_mode mode = GET_MODE (x);
21685 int regno;
21686
21687 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21688 {
21689 output_operand_lossage ("invalid operand for code '%c'", code);
21690 return;
21691 }
21692
21693 regno = REGNO (x);
21694 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21695 {
21696 output_operand_lossage ("invalid operand for code '%c'", code);
21697 return;
21698 }
21699
21700 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21701 }
21702 return;
21703
21704 /* Print a VFP/Neon double precision or quad precision register name. */
21705 case 'P':
21706 case 'q':
21707 {
21708 enum machine_mode mode = GET_MODE (x);
21709 int is_quad = (code == 'q');
21710 int regno;
21711
21712 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21713 {
21714 output_operand_lossage ("invalid operand for code '%c'", code);
21715 return;
21716 }
21717
21718 if (!REG_P (x)
21719 || !IS_VFP_REGNUM (REGNO (x)))
21720 {
21721 output_operand_lossage ("invalid operand for code '%c'", code);
21722 return;
21723 }
21724
21725 regno = REGNO (x);
21726 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21727 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21728 {
21729 output_operand_lossage ("invalid operand for code '%c'", code);
21730 return;
21731 }
21732
21733 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21734 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21735 }
21736 return;
21737
21738 /* These two codes print the low/high doubleword register of a Neon quad
21739 register, respectively. For pair-structure types, can also print
21740 low/high quadword registers. */
21741 case 'e':
21742 case 'f':
21743 {
21744 enum machine_mode mode = GET_MODE (x);
21745 int regno;
21746
21747 if ((GET_MODE_SIZE (mode) != 16
21748 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21749 {
21750 output_operand_lossage ("invalid operand for code '%c'", code);
21751 return;
21752 }
21753
21754 regno = REGNO (x);
21755 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21756 {
21757 output_operand_lossage ("invalid operand for code '%c'", code);
21758 return;
21759 }
21760
21761 if (GET_MODE_SIZE (mode) == 16)
21762 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21763 + (code == 'f' ? 1 : 0));
21764 else
21765 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21766 + (code == 'f' ? 1 : 0));
21767 }
21768 return;
21769
21770 /* Print a VFPv3 floating-point constant, represented as an integer
21771 index. */
21772 case 'G':
21773 {
21774 int index = vfp3_const_double_index (x);
21775 gcc_assert (index != -1);
21776 fprintf (stream, "%d", index);
21777 }
21778 return;
21779
21780 /* Print bits representing opcode features for Neon.
21781
21782 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21783 and polynomials as unsigned.
21784
21785 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21786
21787 Bit 2 is 1 for rounding functions, 0 otherwise. */
21788
21789 /* Identify the type as 's', 'u', 'p' or 'f'. */
21790 case 'T':
21791 {
21792 HOST_WIDE_INT bits = INTVAL (x);
21793 fputc ("uspf"[bits & 3], stream);
21794 }
21795 return;
21796
21797 /* Likewise, but signed and unsigned integers are both 'i'. */
21798 case 'F':
21799 {
21800 HOST_WIDE_INT bits = INTVAL (x);
21801 fputc ("iipf"[bits & 3], stream);
21802 }
21803 return;
21804
21805 /* As for 'T', but emit 'u' instead of 'p'. */
21806 case 't':
21807 {
21808 HOST_WIDE_INT bits = INTVAL (x);
21809 fputc ("usuf"[bits & 3], stream);
21810 }
21811 return;
21812
21813 /* Bit 2: rounding (vs none). */
21814 case 'O':
21815 {
21816 HOST_WIDE_INT bits = INTVAL (x);
21817 fputs ((bits & 4) != 0 ? "r" : "", stream);
21818 }
21819 return;
21820
21821 /* Memory operand for vld1/vst1 instruction. */
21822 case 'A':
21823 {
21824 rtx addr;
21825 bool postinc = FALSE;
21826 unsigned align, memsize, align_bits;
21827
21828 gcc_assert (MEM_P (x));
21829 addr = XEXP (x, 0);
21830 if (GET_CODE (addr) == POST_INC)
21831 {
21832 postinc = 1;
21833 addr = XEXP (addr, 0);
21834 }
21835 asm_fprintf (stream, "[%r", REGNO (addr));
21836
21837 /* We know the alignment of this access, so we can emit a hint in the
21838 instruction (for some alignments) as an aid to the memory subsystem
21839 of the target. */
21840 align = MEM_ALIGN (x) >> 3;
21841 memsize = MEM_SIZE (x);
21842
21843 /* Only certain alignment specifiers are supported by the hardware. */
21844 if (memsize == 32 && (align % 32) == 0)
21845 align_bits = 256;
21846 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21847 align_bits = 128;
21848 else if (memsize >= 8 && (align % 8) == 0)
21849 align_bits = 64;
21850 else
21851 align_bits = 0;
21852
21853 if (align_bits != 0)
21854 asm_fprintf (stream, ":%d", align_bits);
21855
21856 asm_fprintf (stream, "]");
21857
21858 if (postinc)
21859 fputs("!", stream);
21860 }
21861 return;
21862
21863 case 'C':
21864 {
21865 rtx addr;
21866
21867 gcc_assert (MEM_P (x));
21868 addr = XEXP (x, 0);
21869 gcc_assert (REG_P (addr));
21870 asm_fprintf (stream, "[%r]", REGNO (addr));
21871 }
21872 return;
21873
21874 /* Translate an S register number into a D register number and element index. */
21875 case 'y':
21876 {
21877 enum machine_mode mode = GET_MODE (x);
21878 int regno;
21879
21880 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21881 {
21882 output_operand_lossage ("invalid operand for code '%c'", code);
21883 return;
21884 }
21885
21886 regno = REGNO (x);
21887 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21888 {
21889 output_operand_lossage ("invalid operand for code '%c'", code);
21890 return;
21891 }
21892
21893 regno = regno - FIRST_VFP_REGNUM;
21894 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21895 }
21896 return;
21897
21898 case 'v':
21899 gcc_assert (CONST_DOUBLE_P (x));
21900 int result;
21901 result = vfp3_const_double_for_fract_bits (x);
21902 if (result == 0)
21903 result = vfp3_const_double_for_bits (x);
21904 fprintf (stream, "#%d", result);
21905 return;
21906
21907 /* Register specifier for vld1.16/vst1.16. Translate the S register
21908 number into a D register number and element index. */
21909 case 'z':
21910 {
21911 enum machine_mode mode = GET_MODE (x);
21912 int regno;
21913
21914 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21915 {
21916 output_operand_lossage ("invalid operand for code '%c'", code);
21917 return;
21918 }
21919
21920 regno = REGNO (x);
21921 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21922 {
21923 output_operand_lossage ("invalid operand for code '%c'", code);
21924 return;
21925 }
21926
21927 regno = regno - FIRST_VFP_REGNUM;
21928 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21929 }
21930 return;
21931
21932 default:
21933 if (x == 0)
21934 {
21935 output_operand_lossage ("missing operand");
21936 return;
21937 }
21938
21939 switch (GET_CODE (x))
21940 {
21941 case REG:
21942 asm_fprintf (stream, "%r", REGNO (x));
21943 break;
21944
21945 case MEM:
21946 output_memory_reference_mode = GET_MODE (x);
21947 output_address (XEXP (x, 0));
21948 break;
21949
21950 case CONST_DOUBLE:
21951 if (TARGET_NEON)
21952 {
21953 char fpstr[20];
21954 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21955 sizeof (fpstr), 0, 1);
21956 fprintf (stream, "#%s", fpstr);
21957 }
21958 else
21959 fprintf (stream, "#%s", fp_immediate_constant (x));
21960 break;
21961
21962 default:
21963 gcc_assert (GET_CODE (x) != NEG);
21964 fputc ('#', stream);
21965 if (GET_CODE (x) == HIGH)
21966 {
21967 fputs (":lower16:", stream);
21968 x = XEXP (x, 0);
21969 }
21970
21971 output_addr_const (stream, x);
21972 break;
21973 }
21974 }
21975 }
21976 \f
21977 /* Target hook for printing a memory address. */
21978 static void
21979 arm_print_operand_address (FILE *stream, rtx x)
21980 {
21981 if (TARGET_32BIT)
21982 {
21983 int is_minus = GET_CODE (x) == MINUS;
21984
21985 if (REG_P (x))
21986 asm_fprintf (stream, "[%r]", REGNO (x));
21987 else if (GET_CODE (x) == PLUS || is_minus)
21988 {
21989 rtx base = XEXP (x, 0);
21990 rtx index = XEXP (x, 1);
21991 HOST_WIDE_INT offset = 0;
21992 if (!REG_P (base)
21993 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21994 {
21995 /* Ensure that BASE is a register. */
21996 /* (one of them must be). */
21997 /* Also ensure the SP is not used as in index register. */
21998 rtx temp = base;
21999 base = index;
22000 index = temp;
22001 }
22002 switch (GET_CODE (index))
22003 {
22004 case CONST_INT:
22005 offset = INTVAL (index);
22006 if (is_minus)
22007 offset = -offset;
22008 asm_fprintf (stream, "[%r, #%wd]",
22009 REGNO (base), offset);
22010 break;
22011
22012 case REG:
22013 asm_fprintf (stream, "[%r, %s%r]",
22014 REGNO (base), is_minus ? "-" : "",
22015 REGNO (index));
22016 break;
22017
22018 case MULT:
22019 case ASHIFTRT:
22020 case LSHIFTRT:
22021 case ASHIFT:
22022 case ROTATERT:
22023 {
22024 asm_fprintf (stream, "[%r, %s%r",
22025 REGNO (base), is_minus ? "-" : "",
22026 REGNO (XEXP (index, 0)));
22027 arm_print_operand (stream, index, 'S');
22028 fputs ("]", stream);
22029 break;
22030 }
22031
22032 default:
22033 gcc_unreachable ();
22034 }
22035 }
22036 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22037 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22038 {
22039 extern enum machine_mode output_memory_reference_mode;
22040
22041 gcc_assert (REG_P (XEXP (x, 0)));
22042
22043 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22044 asm_fprintf (stream, "[%r, #%s%d]!",
22045 REGNO (XEXP (x, 0)),
22046 GET_CODE (x) == PRE_DEC ? "-" : "",
22047 GET_MODE_SIZE (output_memory_reference_mode));
22048 else
22049 asm_fprintf (stream, "[%r], #%s%d",
22050 REGNO (XEXP (x, 0)),
22051 GET_CODE (x) == POST_DEC ? "-" : "",
22052 GET_MODE_SIZE (output_memory_reference_mode));
22053 }
22054 else if (GET_CODE (x) == PRE_MODIFY)
22055 {
22056 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22057 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22058 asm_fprintf (stream, "#%wd]!",
22059 INTVAL (XEXP (XEXP (x, 1), 1)));
22060 else
22061 asm_fprintf (stream, "%r]!",
22062 REGNO (XEXP (XEXP (x, 1), 1)));
22063 }
22064 else if (GET_CODE (x) == POST_MODIFY)
22065 {
22066 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22067 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22068 asm_fprintf (stream, "#%wd",
22069 INTVAL (XEXP (XEXP (x, 1), 1)));
22070 else
22071 asm_fprintf (stream, "%r",
22072 REGNO (XEXP (XEXP (x, 1), 1)));
22073 }
22074 else output_addr_const (stream, x);
22075 }
22076 else
22077 {
22078 if (REG_P (x))
22079 asm_fprintf (stream, "[%r]", REGNO (x));
22080 else if (GET_CODE (x) == POST_INC)
22081 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22082 else if (GET_CODE (x) == PLUS)
22083 {
22084 gcc_assert (REG_P (XEXP (x, 0)));
22085 if (CONST_INT_P (XEXP (x, 1)))
22086 asm_fprintf (stream, "[%r, #%wd]",
22087 REGNO (XEXP (x, 0)),
22088 INTVAL (XEXP (x, 1)));
22089 else
22090 asm_fprintf (stream, "[%r, %r]",
22091 REGNO (XEXP (x, 0)),
22092 REGNO (XEXP (x, 1)));
22093 }
22094 else
22095 output_addr_const (stream, x);
22096 }
22097 }
22098 \f
22099 /* Target hook for indicating whether a punctuation character for
22100 TARGET_PRINT_OPERAND is valid. */
22101 static bool
22102 arm_print_operand_punct_valid_p (unsigned char code)
22103 {
22104 return (code == '@' || code == '|' || code == '.'
22105 || code == '(' || code == ')' || code == '#'
22106 || (TARGET_32BIT && (code == '?'))
22107 || (TARGET_THUMB2 && (code == '!'))
22108 || (TARGET_THUMB && (code == '_')));
22109 }
22110 \f
22111 /* Target hook for assembling integer objects. The ARM version needs to
22112 handle word-sized values specially. */
22113 static bool
22114 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22115 {
22116 enum machine_mode mode;
22117
22118 if (size == UNITS_PER_WORD && aligned_p)
22119 {
22120 fputs ("\t.word\t", asm_out_file);
22121 output_addr_const (asm_out_file, x);
22122
22123 /* Mark symbols as position independent. We only do this in the
22124 .text segment, not in the .data segment. */
22125 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22126 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22127 {
22128 /* See legitimize_pic_address for an explanation of the
22129 TARGET_VXWORKS_RTP check. */
22130 if (!arm_pic_data_is_text_relative
22131 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22132 fputs ("(GOT)", asm_out_file);
22133 else
22134 fputs ("(GOTOFF)", asm_out_file);
22135 }
22136 fputc ('\n', asm_out_file);
22137 return true;
22138 }
22139
22140 mode = GET_MODE (x);
22141
22142 if (arm_vector_mode_supported_p (mode))
22143 {
22144 int i, units;
22145
22146 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22147
22148 units = CONST_VECTOR_NUNITS (x);
22149 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22150
22151 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22152 for (i = 0; i < units; i++)
22153 {
22154 rtx elt = CONST_VECTOR_ELT (x, i);
22155 assemble_integer
22156 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22157 }
22158 else
22159 for (i = 0; i < units; i++)
22160 {
22161 rtx elt = CONST_VECTOR_ELT (x, i);
22162 REAL_VALUE_TYPE rval;
22163
22164 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22165
22166 assemble_real
22167 (rval, GET_MODE_INNER (mode),
22168 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22169 }
22170
22171 return true;
22172 }
22173
22174 return default_assemble_integer (x, size, aligned_p);
22175 }
22176
22177 static void
22178 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22179 {
22180 section *s;
22181
22182 if (!TARGET_AAPCS_BASED)
22183 {
22184 (is_ctor ?
22185 default_named_section_asm_out_constructor
22186 : default_named_section_asm_out_destructor) (symbol, priority);
22187 return;
22188 }
22189
22190 /* Put these in the .init_array section, using a special relocation. */
22191 if (priority != DEFAULT_INIT_PRIORITY)
22192 {
22193 char buf[18];
22194 sprintf (buf, "%s.%.5u",
22195 is_ctor ? ".init_array" : ".fini_array",
22196 priority);
22197 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22198 }
22199 else if (is_ctor)
22200 s = ctors_section;
22201 else
22202 s = dtors_section;
22203
22204 switch_to_section (s);
22205 assemble_align (POINTER_SIZE);
22206 fputs ("\t.word\t", asm_out_file);
22207 output_addr_const (asm_out_file, symbol);
22208 fputs ("(target1)\n", asm_out_file);
22209 }
22210
22211 /* Add a function to the list of static constructors. */
22212
22213 static void
22214 arm_elf_asm_constructor (rtx symbol, int priority)
22215 {
22216 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22217 }
22218
22219 /* Add a function to the list of static destructors. */
22220
22221 static void
22222 arm_elf_asm_destructor (rtx symbol, int priority)
22223 {
22224 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22225 }
22226 \f
22227 /* A finite state machine takes care of noticing whether or not instructions
22228 can be conditionally executed, and thus decrease execution time and code
22229 size by deleting branch instructions. The fsm is controlled by
22230 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22231
22232 /* The state of the fsm controlling condition codes are:
22233 0: normal, do nothing special
22234 1: make ASM_OUTPUT_OPCODE not output this instruction
22235 2: make ASM_OUTPUT_OPCODE not output this instruction
22236 3: make instructions conditional
22237 4: make instructions conditional
22238
22239 State transitions (state->state by whom under condition):
22240 0 -> 1 final_prescan_insn if the `target' is a label
22241 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22242 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22243 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22244 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22245 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22246 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22247 (the target insn is arm_target_insn).
22248
22249 If the jump clobbers the conditions then we use states 2 and 4.
22250
22251 A similar thing can be done with conditional return insns.
22252
22253 XXX In case the `target' is an unconditional branch, this conditionalising
22254 of the instructions always reduces code size, but not always execution
22255 time. But then, I want to reduce the code size to somewhere near what
22256 /bin/cc produces. */
22257
22258 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22259 instructions. When a COND_EXEC instruction is seen the subsequent
22260 instructions are scanned so that multiple conditional instructions can be
22261 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22262 specify the length and true/false mask for the IT block. These will be
22263 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22264
22265 /* Returns the index of the ARM condition code string in
22266 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22267 COMPARISON should be an rtx like `(eq (...) (...))'. */
22268
22269 enum arm_cond_code
22270 maybe_get_arm_condition_code (rtx comparison)
22271 {
22272 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22273 enum arm_cond_code code;
22274 enum rtx_code comp_code = GET_CODE (comparison);
22275
22276 if (GET_MODE_CLASS (mode) != MODE_CC)
22277 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22278 XEXP (comparison, 1));
22279
22280 switch (mode)
22281 {
22282 case CC_DNEmode: code = ARM_NE; goto dominance;
22283 case CC_DEQmode: code = ARM_EQ; goto dominance;
22284 case CC_DGEmode: code = ARM_GE; goto dominance;
22285 case CC_DGTmode: code = ARM_GT; goto dominance;
22286 case CC_DLEmode: code = ARM_LE; goto dominance;
22287 case CC_DLTmode: code = ARM_LT; goto dominance;
22288 case CC_DGEUmode: code = ARM_CS; goto dominance;
22289 case CC_DGTUmode: code = ARM_HI; goto dominance;
22290 case CC_DLEUmode: code = ARM_LS; goto dominance;
22291 case CC_DLTUmode: code = ARM_CC;
22292
22293 dominance:
22294 if (comp_code == EQ)
22295 return ARM_INVERSE_CONDITION_CODE (code);
22296 if (comp_code == NE)
22297 return code;
22298 return ARM_NV;
22299
22300 case CC_NOOVmode:
22301 switch (comp_code)
22302 {
22303 case NE: return ARM_NE;
22304 case EQ: return ARM_EQ;
22305 case GE: return ARM_PL;
22306 case LT: return ARM_MI;
22307 default: return ARM_NV;
22308 }
22309
22310 case CC_Zmode:
22311 switch (comp_code)
22312 {
22313 case NE: return ARM_NE;
22314 case EQ: return ARM_EQ;
22315 default: return ARM_NV;
22316 }
22317
22318 case CC_Nmode:
22319 switch (comp_code)
22320 {
22321 case NE: return ARM_MI;
22322 case EQ: return ARM_PL;
22323 default: return ARM_NV;
22324 }
22325
22326 case CCFPEmode:
22327 case CCFPmode:
22328 /* We can handle all cases except UNEQ and LTGT. */
22329 switch (comp_code)
22330 {
22331 case GE: return ARM_GE;
22332 case GT: return ARM_GT;
22333 case LE: return ARM_LS;
22334 case LT: return ARM_MI;
22335 case NE: return ARM_NE;
22336 case EQ: return ARM_EQ;
22337 case ORDERED: return ARM_VC;
22338 case UNORDERED: return ARM_VS;
22339 case UNLT: return ARM_LT;
22340 case UNLE: return ARM_LE;
22341 case UNGT: return ARM_HI;
22342 case UNGE: return ARM_PL;
22343 /* UNEQ and LTGT do not have a representation. */
22344 case UNEQ: /* Fall through. */
22345 case LTGT: /* Fall through. */
22346 default: return ARM_NV;
22347 }
22348
22349 case CC_SWPmode:
22350 switch (comp_code)
22351 {
22352 case NE: return ARM_NE;
22353 case EQ: return ARM_EQ;
22354 case GE: return ARM_LE;
22355 case GT: return ARM_LT;
22356 case LE: return ARM_GE;
22357 case LT: return ARM_GT;
22358 case GEU: return ARM_LS;
22359 case GTU: return ARM_CC;
22360 case LEU: return ARM_CS;
22361 case LTU: return ARM_HI;
22362 default: return ARM_NV;
22363 }
22364
22365 case CC_Cmode:
22366 switch (comp_code)
22367 {
22368 case LTU: return ARM_CS;
22369 case GEU: return ARM_CC;
22370 default: return ARM_NV;
22371 }
22372
22373 case CC_CZmode:
22374 switch (comp_code)
22375 {
22376 case NE: return ARM_NE;
22377 case EQ: return ARM_EQ;
22378 case GEU: return ARM_CS;
22379 case GTU: return ARM_HI;
22380 case LEU: return ARM_LS;
22381 case LTU: return ARM_CC;
22382 default: return ARM_NV;
22383 }
22384
22385 case CC_NCVmode:
22386 switch (comp_code)
22387 {
22388 case GE: return ARM_GE;
22389 case LT: return ARM_LT;
22390 case GEU: return ARM_CS;
22391 case LTU: return ARM_CC;
22392 default: return ARM_NV;
22393 }
22394
22395 case CCmode:
22396 switch (comp_code)
22397 {
22398 case NE: return ARM_NE;
22399 case EQ: return ARM_EQ;
22400 case GE: return ARM_GE;
22401 case GT: return ARM_GT;
22402 case LE: return ARM_LE;
22403 case LT: return ARM_LT;
22404 case GEU: return ARM_CS;
22405 case GTU: return ARM_HI;
22406 case LEU: return ARM_LS;
22407 case LTU: return ARM_CC;
22408 default: return ARM_NV;
22409 }
22410
22411 default: gcc_unreachable ();
22412 }
22413 }
22414
22415 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22416 static enum arm_cond_code
22417 get_arm_condition_code (rtx comparison)
22418 {
22419 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22420 gcc_assert (code != ARM_NV);
22421 return code;
22422 }
22423
22424 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22425 instructions. */
22426 void
22427 thumb2_final_prescan_insn (rtx insn)
22428 {
22429 rtx first_insn = insn;
22430 rtx body = PATTERN (insn);
22431 rtx predicate;
22432 enum arm_cond_code code;
22433 int n;
22434 int mask;
22435 int max;
22436
22437 /* max_insns_skipped in the tune was already taken into account in the
22438 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22439 just emit the IT blocks as we can. It does not make sense to split
22440 the IT blocks. */
22441 max = MAX_INSN_PER_IT_BLOCK;
22442
22443 /* Remove the previous insn from the count of insns to be output. */
22444 if (arm_condexec_count)
22445 arm_condexec_count--;
22446
22447 /* Nothing to do if we are already inside a conditional block. */
22448 if (arm_condexec_count)
22449 return;
22450
22451 if (GET_CODE (body) != COND_EXEC)
22452 return;
22453
22454 /* Conditional jumps are implemented directly. */
22455 if (JUMP_P (insn))
22456 return;
22457
22458 predicate = COND_EXEC_TEST (body);
22459 arm_current_cc = get_arm_condition_code (predicate);
22460
22461 n = get_attr_ce_count (insn);
22462 arm_condexec_count = 1;
22463 arm_condexec_mask = (1 << n) - 1;
22464 arm_condexec_masklen = n;
22465 /* See if subsequent instructions can be combined into the same block. */
22466 for (;;)
22467 {
22468 insn = next_nonnote_insn (insn);
22469
22470 /* Jumping into the middle of an IT block is illegal, so a label or
22471 barrier terminates the block. */
22472 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22473 break;
22474
22475 body = PATTERN (insn);
22476 /* USE and CLOBBER aren't really insns, so just skip them. */
22477 if (GET_CODE (body) == USE
22478 || GET_CODE (body) == CLOBBER)
22479 continue;
22480
22481 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22482 if (GET_CODE (body) != COND_EXEC)
22483 break;
22484 /* Maximum number of conditionally executed instructions in a block. */
22485 n = get_attr_ce_count (insn);
22486 if (arm_condexec_masklen + n > max)
22487 break;
22488
22489 predicate = COND_EXEC_TEST (body);
22490 code = get_arm_condition_code (predicate);
22491 mask = (1 << n) - 1;
22492 if (arm_current_cc == code)
22493 arm_condexec_mask |= (mask << arm_condexec_masklen);
22494 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22495 break;
22496
22497 arm_condexec_count++;
22498 arm_condexec_masklen += n;
22499
22500 /* A jump must be the last instruction in a conditional block. */
22501 if (JUMP_P (insn))
22502 break;
22503 }
22504 /* Restore recog_data (getting the attributes of other insns can
22505 destroy this array, but final.c assumes that it remains intact
22506 across this call). */
22507 extract_constrain_insn_cached (first_insn);
22508 }
22509
22510 void
22511 arm_final_prescan_insn (rtx insn)
22512 {
22513 /* BODY will hold the body of INSN. */
22514 rtx body = PATTERN (insn);
22515
22516 /* This will be 1 if trying to repeat the trick, and things need to be
22517 reversed if it appears to fail. */
22518 int reverse = 0;
22519
22520 /* If we start with a return insn, we only succeed if we find another one. */
22521 int seeking_return = 0;
22522 enum rtx_code return_code = UNKNOWN;
22523
22524 /* START_INSN will hold the insn from where we start looking. This is the
22525 first insn after the following code_label if REVERSE is true. */
22526 rtx start_insn = insn;
22527
22528 /* If in state 4, check if the target branch is reached, in order to
22529 change back to state 0. */
22530 if (arm_ccfsm_state == 4)
22531 {
22532 if (insn == arm_target_insn)
22533 {
22534 arm_target_insn = NULL;
22535 arm_ccfsm_state = 0;
22536 }
22537 return;
22538 }
22539
22540 /* If in state 3, it is possible to repeat the trick, if this insn is an
22541 unconditional branch to a label, and immediately following this branch
22542 is the previous target label which is only used once, and the label this
22543 branch jumps to is not too far off. */
22544 if (arm_ccfsm_state == 3)
22545 {
22546 if (simplejump_p (insn))
22547 {
22548 start_insn = next_nonnote_insn (start_insn);
22549 if (BARRIER_P (start_insn))
22550 {
22551 /* XXX Isn't this always a barrier? */
22552 start_insn = next_nonnote_insn (start_insn);
22553 }
22554 if (LABEL_P (start_insn)
22555 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22556 && LABEL_NUSES (start_insn) == 1)
22557 reverse = TRUE;
22558 else
22559 return;
22560 }
22561 else if (ANY_RETURN_P (body))
22562 {
22563 start_insn = next_nonnote_insn (start_insn);
22564 if (BARRIER_P (start_insn))
22565 start_insn = next_nonnote_insn (start_insn);
22566 if (LABEL_P (start_insn)
22567 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22568 && LABEL_NUSES (start_insn) == 1)
22569 {
22570 reverse = TRUE;
22571 seeking_return = 1;
22572 return_code = GET_CODE (body);
22573 }
22574 else
22575 return;
22576 }
22577 else
22578 return;
22579 }
22580
22581 gcc_assert (!arm_ccfsm_state || reverse);
22582 if (!JUMP_P (insn))
22583 return;
22584
22585 /* This jump might be paralleled with a clobber of the condition codes
22586 the jump should always come first */
22587 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22588 body = XVECEXP (body, 0, 0);
22589
22590 if (reverse
22591 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22592 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22593 {
22594 int insns_skipped;
22595 int fail = FALSE, succeed = FALSE;
22596 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22597 int then_not_else = TRUE;
22598 rtx this_insn = start_insn, label = 0;
22599
22600 /* Register the insn jumped to. */
22601 if (reverse)
22602 {
22603 if (!seeking_return)
22604 label = XEXP (SET_SRC (body), 0);
22605 }
22606 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22607 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22608 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22609 {
22610 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22611 then_not_else = FALSE;
22612 }
22613 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22614 {
22615 seeking_return = 1;
22616 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22617 }
22618 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22619 {
22620 seeking_return = 1;
22621 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22622 then_not_else = FALSE;
22623 }
22624 else
22625 gcc_unreachable ();
22626
22627 /* See how many insns this branch skips, and what kind of insns. If all
22628 insns are okay, and the label or unconditional branch to the same
22629 label is not too far away, succeed. */
22630 for (insns_skipped = 0;
22631 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22632 {
22633 rtx scanbody;
22634
22635 this_insn = next_nonnote_insn (this_insn);
22636 if (!this_insn)
22637 break;
22638
22639 switch (GET_CODE (this_insn))
22640 {
22641 case CODE_LABEL:
22642 /* Succeed if it is the target label, otherwise fail since
22643 control falls in from somewhere else. */
22644 if (this_insn == label)
22645 {
22646 arm_ccfsm_state = 1;
22647 succeed = TRUE;
22648 }
22649 else
22650 fail = TRUE;
22651 break;
22652
22653 case BARRIER:
22654 /* Succeed if the following insn is the target label.
22655 Otherwise fail.
22656 If return insns are used then the last insn in a function
22657 will be a barrier. */
22658 this_insn = next_nonnote_insn (this_insn);
22659 if (this_insn && this_insn == label)
22660 {
22661 arm_ccfsm_state = 1;
22662 succeed = TRUE;
22663 }
22664 else
22665 fail = TRUE;
22666 break;
22667
22668 case CALL_INSN:
22669 /* The AAPCS says that conditional calls should not be
22670 used since they make interworking inefficient (the
22671 linker can't transform BL<cond> into BLX). That's
22672 only a problem if the machine has BLX. */
22673 if (arm_arch5)
22674 {
22675 fail = TRUE;
22676 break;
22677 }
22678
22679 /* Succeed if the following insn is the target label, or
22680 if the following two insns are a barrier and the
22681 target label. */
22682 this_insn = next_nonnote_insn (this_insn);
22683 if (this_insn && BARRIER_P (this_insn))
22684 this_insn = next_nonnote_insn (this_insn);
22685
22686 if (this_insn && this_insn == label
22687 && insns_skipped < max_insns_skipped)
22688 {
22689 arm_ccfsm_state = 1;
22690 succeed = TRUE;
22691 }
22692 else
22693 fail = TRUE;
22694 break;
22695
22696 case JUMP_INSN:
22697 /* If this is an unconditional branch to the same label, succeed.
22698 If it is to another label, do nothing. If it is conditional,
22699 fail. */
22700 /* XXX Probably, the tests for SET and the PC are
22701 unnecessary. */
22702
22703 scanbody = PATTERN (this_insn);
22704 if (GET_CODE (scanbody) == SET
22705 && GET_CODE (SET_DEST (scanbody)) == PC)
22706 {
22707 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22708 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22709 {
22710 arm_ccfsm_state = 2;
22711 succeed = TRUE;
22712 }
22713 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22714 fail = TRUE;
22715 }
22716 /* Fail if a conditional return is undesirable (e.g. on a
22717 StrongARM), but still allow this if optimizing for size. */
22718 else if (GET_CODE (scanbody) == return_code
22719 && !use_return_insn (TRUE, NULL)
22720 && !optimize_size)
22721 fail = TRUE;
22722 else if (GET_CODE (scanbody) == return_code)
22723 {
22724 arm_ccfsm_state = 2;
22725 succeed = TRUE;
22726 }
22727 else if (GET_CODE (scanbody) == PARALLEL)
22728 {
22729 switch (get_attr_conds (this_insn))
22730 {
22731 case CONDS_NOCOND:
22732 break;
22733 default:
22734 fail = TRUE;
22735 break;
22736 }
22737 }
22738 else
22739 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22740
22741 break;
22742
22743 case INSN:
22744 /* Instructions using or affecting the condition codes make it
22745 fail. */
22746 scanbody = PATTERN (this_insn);
22747 if (!(GET_CODE (scanbody) == SET
22748 || GET_CODE (scanbody) == PARALLEL)
22749 || get_attr_conds (this_insn) != CONDS_NOCOND)
22750 fail = TRUE;
22751 break;
22752
22753 default:
22754 break;
22755 }
22756 }
22757 if (succeed)
22758 {
22759 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22760 arm_target_label = CODE_LABEL_NUMBER (label);
22761 else
22762 {
22763 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22764
22765 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22766 {
22767 this_insn = next_nonnote_insn (this_insn);
22768 gcc_assert (!this_insn
22769 || (!BARRIER_P (this_insn)
22770 && !LABEL_P (this_insn)));
22771 }
22772 if (!this_insn)
22773 {
22774 /* Oh, dear! we ran off the end.. give up. */
22775 extract_constrain_insn_cached (insn);
22776 arm_ccfsm_state = 0;
22777 arm_target_insn = NULL;
22778 return;
22779 }
22780 arm_target_insn = this_insn;
22781 }
22782
22783 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22784 what it was. */
22785 if (!reverse)
22786 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22787
22788 if (reverse || then_not_else)
22789 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22790 }
22791
22792 /* Restore recog_data (getting the attributes of other insns can
22793 destroy this array, but final.c assumes that it remains intact
22794 across this call. */
22795 extract_constrain_insn_cached (insn);
22796 }
22797 }
22798
22799 /* Output IT instructions. */
22800 void
22801 thumb2_asm_output_opcode (FILE * stream)
22802 {
22803 char buff[5];
22804 int n;
22805
22806 if (arm_condexec_mask)
22807 {
22808 for (n = 0; n < arm_condexec_masklen; n++)
22809 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22810 buff[n] = 0;
22811 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22812 arm_condition_codes[arm_current_cc]);
22813 arm_condexec_mask = 0;
22814 }
22815 }
22816
22817 /* Returns true if REGNO is a valid register
22818 for holding a quantity of type MODE. */
22819 int
22820 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22821 {
22822 if (GET_MODE_CLASS (mode) == MODE_CC)
22823 return (regno == CC_REGNUM
22824 || (TARGET_HARD_FLOAT && TARGET_VFP
22825 && regno == VFPCC_REGNUM));
22826
22827 if (TARGET_THUMB1)
22828 /* For the Thumb we only allow values bigger than SImode in
22829 registers 0 - 6, so that there is always a second low
22830 register available to hold the upper part of the value.
22831 We probably we ought to ensure that the register is the
22832 start of an even numbered register pair. */
22833 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22834
22835 if (TARGET_HARD_FLOAT && TARGET_VFP
22836 && IS_VFP_REGNUM (regno))
22837 {
22838 if (mode == SFmode || mode == SImode)
22839 return VFP_REGNO_OK_FOR_SINGLE (regno);
22840
22841 if (mode == DFmode)
22842 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22843
22844 /* VFP registers can hold HFmode values, but there is no point in
22845 putting them there unless we have hardware conversion insns. */
22846 if (mode == HFmode)
22847 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22848
22849 if (TARGET_NEON)
22850 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22851 || (VALID_NEON_QREG_MODE (mode)
22852 && NEON_REGNO_OK_FOR_QUAD (regno))
22853 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22854 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22855 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22856 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22857 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22858
22859 return FALSE;
22860 }
22861
22862 if (TARGET_REALLY_IWMMXT)
22863 {
22864 if (IS_IWMMXT_GR_REGNUM (regno))
22865 return mode == SImode;
22866
22867 if (IS_IWMMXT_REGNUM (regno))
22868 return VALID_IWMMXT_REG_MODE (mode);
22869 }
22870
22871 /* We allow almost any value to be stored in the general registers.
22872 Restrict doubleword quantities to even register pairs in ARM state
22873 so that we can use ldrd. Do not allow very large Neon structure
22874 opaque modes in general registers; they would use too many. */
22875 if (regno <= LAST_ARM_REGNUM)
22876 {
22877 if (ARM_NUM_REGS (mode) > 4)
22878 return FALSE;
22879
22880 if (TARGET_THUMB2)
22881 return TRUE;
22882
22883 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22884 }
22885
22886 if (regno == FRAME_POINTER_REGNUM
22887 || regno == ARG_POINTER_REGNUM)
22888 /* We only allow integers in the fake hard registers. */
22889 return GET_MODE_CLASS (mode) == MODE_INT;
22890
22891 return FALSE;
22892 }
22893
22894 /* Implement MODES_TIEABLE_P. */
22895
22896 bool
22897 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22898 {
22899 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22900 return true;
22901
22902 /* We specifically want to allow elements of "structure" modes to
22903 be tieable to the structure. This more general condition allows
22904 other rarer situations too. */
22905 if (TARGET_NEON
22906 && (VALID_NEON_DREG_MODE (mode1)
22907 || VALID_NEON_QREG_MODE (mode1)
22908 || VALID_NEON_STRUCT_MODE (mode1))
22909 && (VALID_NEON_DREG_MODE (mode2)
22910 || VALID_NEON_QREG_MODE (mode2)
22911 || VALID_NEON_STRUCT_MODE (mode2)))
22912 return true;
22913
22914 return false;
22915 }
22916
22917 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22918 not used in arm mode. */
22919
22920 enum reg_class
22921 arm_regno_class (int regno)
22922 {
22923 if (TARGET_THUMB1)
22924 {
22925 if (regno == STACK_POINTER_REGNUM)
22926 return STACK_REG;
22927 if (regno == CC_REGNUM)
22928 return CC_REG;
22929 if (regno < 8)
22930 return LO_REGS;
22931 return HI_REGS;
22932 }
22933
22934 if (TARGET_THUMB2 && regno < 8)
22935 return LO_REGS;
22936
22937 if ( regno <= LAST_ARM_REGNUM
22938 || regno == FRAME_POINTER_REGNUM
22939 || regno == ARG_POINTER_REGNUM)
22940 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22941
22942 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22943 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22944
22945 if (IS_VFP_REGNUM (regno))
22946 {
22947 if (regno <= D7_VFP_REGNUM)
22948 return VFP_D0_D7_REGS;
22949 else if (regno <= LAST_LO_VFP_REGNUM)
22950 return VFP_LO_REGS;
22951 else
22952 return VFP_HI_REGS;
22953 }
22954
22955 if (IS_IWMMXT_REGNUM (regno))
22956 return IWMMXT_REGS;
22957
22958 if (IS_IWMMXT_GR_REGNUM (regno))
22959 return IWMMXT_GR_REGS;
22960
22961 return NO_REGS;
22962 }
22963
22964 /* Handle a special case when computing the offset
22965 of an argument from the frame pointer. */
22966 int
22967 arm_debugger_arg_offset (int value, rtx addr)
22968 {
22969 rtx insn;
22970
22971 /* We are only interested if dbxout_parms() failed to compute the offset. */
22972 if (value != 0)
22973 return 0;
22974
22975 /* We can only cope with the case where the address is held in a register. */
22976 if (!REG_P (addr))
22977 return 0;
22978
22979 /* If we are using the frame pointer to point at the argument, then
22980 an offset of 0 is correct. */
22981 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22982 return 0;
22983
22984 /* If we are using the stack pointer to point at the
22985 argument, then an offset of 0 is correct. */
22986 /* ??? Check this is consistent with thumb2 frame layout. */
22987 if ((TARGET_THUMB || !frame_pointer_needed)
22988 && REGNO (addr) == SP_REGNUM)
22989 return 0;
22990
22991 /* Oh dear. The argument is pointed to by a register rather
22992 than being held in a register, or being stored at a known
22993 offset from the frame pointer. Since GDB only understands
22994 those two kinds of argument we must translate the address
22995 held in the register into an offset from the frame pointer.
22996 We do this by searching through the insns for the function
22997 looking to see where this register gets its value. If the
22998 register is initialized from the frame pointer plus an offset
22999 then we are in luck and we can continue, otherwise we give up.
23000
23001 This code is exercised by producing debugging information
23002 for a function with arguments like this:
23003
23004 double func (double a, double b, int c, double d) {return d;}
23005
23006 Without this code the stab for parameter 'd' will be set to
23007 an offset of 0 from the frame pointer, rather than 8. */
23008
23009 /* The if() statement says:
23010
23011 If the insn is a normal instruction
23012 and if the insn is setting the value in a register
23013 and if the register being set is the register holding the address of the argument
23014 and if the address is computing by an addition
23015 that involves adding to a register
23016 which is the frame pointer
23017 a constant integer
23018
23019 then... */
23020
23021 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23022 {
23023 if ( NONJUMP_INSN_P (insn)
23024 && GET_CODE (PATTERN (insn)) == SET
23025 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23026 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23027 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23028 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23029 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23030 )
23031 {
23032 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23033
23034 break;
23035 }
23036 }
23037
23038 if (value == 0)
23039 {
23040 debug_rtx (addr);
23041 warning (0, "unable to compute real location of stacked parameter");
23042 value = 8; /* XXX magic hack */
23043 }
23044
23045 return value;
23046 }
23047 \f
23048 typedef enum {
23049 T_V8QI,
23050 T_V4HI,
23051 T_V4HF,
23052 T_V2SI,
23053 T_V2SF,
23054 T_DI,
23055 T_V16QI,
23056 T_V8HI,
23057 T_V4SI,
23058 T_V4SF,
23059 T_V2DI,
23060 T_TI,
23061 T_EI,
23062 T_OI,
23063 T_MAX /* Size of enum. Keep last. */
23064 } neon_builtin_type_mode;
23065
23066 #define TYPE_MODE_BIT(X) (1 << (X))
23067
23068 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23069 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23070 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23071 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23072 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23073 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23074
23075 #define v8qi_UP T_V8QI
23076 #define v4hi_UP T_V4HI
23077 #define v4hf_UP T_V4HF
23078 #define v2si_UP T_V2SI
23079 #define v2sf_UP T_V2SF
23080 #define di_UP T_DI
23081 #define v16qi_UP T_V16QI
23082 #define v8hi_UP T_V8HI
23083 #define v4si_UP T_V4SI
23084 #define v4sf_UP T_V4SF
23085 #define v2di_UP T_V2DI
23086 #define ti_UP T_TI
23087 #define ei_UP T_EI
23088 #define oi_UP T_OI
23089
23090 #define UP(X) X##_UP
23091
23092 typedef enum {
23093 NEON_BINOP,
23094 NEON_TERNOP,
23095 NEON_UNOP,
23096 NEON_BSWAP,
23097 NEON_GETLANE,
23098 NEON_SETLANE,
23099 NEON_CREATE,
23100 NEON_RINT,
23101 NEON_DUP,
23102 NEON_DUPLANE,
23103 NEON_COMBINE,
23104 NEON_SPLIT,
23105 NEON_LANEMUL,
23106 NEON_LANEMULL,
23107 NEON_LANEMULH,
23108 NEON_LANEMAC,
23109 NEON_SCALARMUL,
23110 NEON_SCALARMULL,
23111 NEON_SCALARMULH,
23112 NEON_SCALARMAC,
23113 NEON_CONVERT,
23114 NEON_FLOAT_WIDEN,
23115 NEON_FLOAT_NARROW,
23116 NEON_FIXCONV,
23117 NEON_SELECT,
23118 NEON_REINTERP,
23119 NEON_VTBL,
23120 NEON_VTBX,
23121 NEON_LOAD1,
23122 NEON_LOAD1LANE,
23123 NEON_STORE1,
23124 NEON_STORE1LANE,
23125 NEON_LOADSTRUCT,
23126 NEON_LOADSTRUCTLANE,
23127 NEON_STORESTRUCT,
23128 NEON_STORESTRUCTLANE,
23129 NEON_LOGICBINOP,
23130 NEON_SHIFTINSERT,
23131 NEON_SHIFTIMM,
23132 NEON_SHIFTACC
23133 } neon_itype;
23134
23135 typedef struct {
23136 const char *name;
23137 const neon_itype itype;
23138 const neon_builtin_type_mode mode;
23139 const enum insn_code code;
23140 unsigned int fcode;
23141 } neon_builtin_datum;
23142
23143 #define CF(N,X) CODE_FOR_neon_##N##X
23144
23145 #define VAR1(T, N, A) \
23146 {#N, NEON_##T, UP (A), CF (N, A), 0}
23147 #define VAR2(T, N, A, B) \
23148 VAR1 (T, N, A), \
23149 {#N, NEON_##T, UP (B), CF (N, B), 0}
23150 #define VAR3(T, N, A, B, C) \
23151 VAR2 (T, N, A, B), \
23152 {#N, NEON_##T, UP (C), CF (N, C), 0}
23153 #define VAR4(T, N, A, B, C, D) \
23154 VAR3 (T, N, A, B, C), \
23155 {#N, NEON_##T, UP (D), CF (N, D), 0}
23156 #define VAR5(T, N, A, B, C, D, E) \
23157 VAR4 (T, N, A, B, C, D), \
23158 {#N, NEON_##T, UP (E), CF (N, E), 0}
23159 #define VAR6(T, N, A, B, C, D, E, F) \
23160 VAR5 (T, N, A, B, C, D, E), \
23161 {#N, NEON_##T, UP (F), CF (N, F), 0}
23162 #define VAR7(T, N, A, B, C, D, E, F, G) \
23163 VAR6 (T, N, A, B, C, D, E, F), \
23164 {#N, NEON_##T, UP (G), CF (N, G), 0}
23165 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23166 VAR7 (T, N, A, B, C, D, E, F, G), \
23167 {#N, NEON_##T, UP (H), CF (N, H), 0}
23168 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23169 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23170 {#N, NEON_##T, UP (I), CF (N, I), 0}
23171 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23172 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23173 {#N, NEON_##T, UP (J), CF (N, J), 0}
23174
23175 /* The NEON builtin data can be found in arm_neon_builtins.def.
23176 The mode entries in the following table correspond to the "key" type of the
23177 instruction variant, i.e. equivalent to that which would be specified after
23178 the assembler mnemonic, which usually refers to the last vector operand.
23179 (Signed/unsigned/polynomial types are not differentiated between though, and
23180 are all mapped onto the same mode for a given element size.) The modes
23181 listed per instruction should be the same as those defined for that
23182 instruction's pattern in neon.md. */
23183
23184 static neon_builtin_datum neon_builtin_data[] =
23185 {
23186 #include "arm_neon_builtins.def"
23187 };
23188
23189 #undef CF
23190 #undef VAR1
23191 #undef VAR2
23192 #undef VAR3
23193 #undef VAR4
23194 #undef VAR5
23195 #undef VAR6
23196 #undef VAR7
23197 #undef VAR8
23198 #undef VAR9
23199 #undef VAR10
23200
23201 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23202 #define VAR1(T, N, A) \
23203 CF (N, A)
23204 #define VAR2(T, N, A, B) \
23205 VAR1 (T, N, A), \
23206 CF (N, B)
23207 #define VAR3(T, N, A, B, C) \
23208 VAR2 (T, N, A, B), \
23209 CF (N, C)
23210 #define VAR4(T, N, A, B, C, D) \
23211 VAR3 (T, N, A, B, C), \
23212 CF (N, D)
23213 #define VAR5(T, N, A, B, C, D, E) \
23214 VAR4 (T, N, A, B, C, D), \
23215 CF (N, E)
23216 #define VAR6(T, N, A, B, C, D, E, F) \
23217 VAR5 (T, N, A, B, C, D, E), \
23218 CF (N, F)
23219 #define VAR7(T, N, A, B, C, D, E, F, G) \
23220 VAR6 (T, N, A, B, C, D, E, F), \
23221 CF (N, G)
23222 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23223 VAR7 (T, N, A, B, C, D, E, F, G), \
23224 CF (N, H)
23225 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23226 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23227 CF (N, I)
23228 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23229 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23230 CF (N, J)
23231 enum arm_builtins
23232 {
23233 ARM_BUILTIN_GETWCGR0,
23234 ARM_BUILTIN_GETWCGR1,
23235 ARM_BUILTIN_GETWCGR2,
23236 ARM_BUILTIN_GETWCGR3,
23237
23238 ARM_BUILTIN_SETWCGR0,
23239 ARM_BUILTIN_SETWCGR1,
23240 ARM_BUILTIN_SETWCGR2,
23241 ARM_BUILTIN_SETWCGR3,
23242
23243 ARM_BUILTIN_WZERO,
23244
23245 ARM_BUILTIN_WAVG2BR,
23246 ARM_BUILTIN_WAVG2HR,
23247 ARM_BUILTIN_WAVG2B,
23248 ARM_BUILTIN_WAVG2H,
23249
23250 ARM_BUILTIN_WACCB,
23251 ARM_BUILTIN_WACCH,
23252 ARM_BUILTIN_WACCW,
23253
23254 ARM_BUILTIN_WMACS,
23255 ARM_BUILTIN_WMACSZ,
23256 ARM_BUILTIN_WMACU,
23257 ARM_BUILTIN_WMACUZ,
23258
23259 ARM_BUILTIN_WSADB,
23260 ARM_BUILTIN_WSADBZ,
23261 ARM_BUILTIN_WSADH,
23262 ARM_BUILTIN_WSADHZ,
23263
23264 ARM_BUILTIN_WALIGNI,
23265 ARM_BUILTIN_WALIGNR0,
23266 ARM_BUILTIN_WALIGNR1,
23267 ARM_BUILTIN_WALIGNR2,
23268 ARM_BUILTIN_WALIGNR3,
23269
23270 ARM_BUILTIN_TMIA,
23271 ARM_BUILTIN_TMIAPH,
23272 ARM_BUILTIN_TMIABB,
23273 ARM_BUILTIN_TMIABT,
23274 ARM_BUILTIN_TMIATB,
23275 ARM_BUILTIN_TMIATT,
23276
23277 ARM_BUILTIN_TMOVMSKB,
23278 ARM_BUILTIN_TMOVMSKH,
23279 ARM_BUILTIN_TMOVMSKW,
23280
23281 ARM_BUILTIN_TBCSTB,
23282 ARM_BUILTIN_TBCSTH,
23283 ARM_BUILTIN_TBCSTW,
23284
23285 ARM_BUILTIN_WMADDS,
23286 ARM_BUILTIN_WMADDU,
23287
23288 ARM_BUILTIN_WPACKHSS,
23289 ARM_BUILTIN_WPACKWSS,
23290 ARM_BUILTIN_WPACKDSS,
23291 ARM_BUILTIN_WPACKHUS,
23292 ARM_BUILTIN_WPACKWUS,
23293 ARM_BUILTIN_WPACKDUS,
23294
23295 ARM_BUILTIN_WADDB,
23296 ARM_BUILTIN_WADDH,
23297 ARM_BUILTIN_WADDW,
23298 ARM_BUILTIN_WADDSSB,
23299 ARM_BUILTIN_WADDSSH,
23300 ARM_BUILTIN_WADDSSW,
23301 ARM_BUILTIN_WADDUSB,
23302 ARM_BUILTIN_WADDUSH,
23303 ARM_BUILTIN_WADDUSW,
23304 ARM_BUILTIN_WSUBB,
23305 ARM_BUILTIN_WSUBH,
23306 ARM_BUILTIN_WSUBW,
23307 ARM_BUILTIN_WSUBSSB,
23308 ARM_BUILTIN_WSUBSSH,
23309 ARM_BUILTIN_WSUBSSW,
23310 ARM_BUILTIN_WSUBUSB,
23311 ARM_BUILTIN_WSUBUSH,
23312 ARM_BUILTIN_WSUBUSW,
23313
23314 ARM_BUILTIN_WAND,
23315 ARM_BUILTIN_WANDN,
23316 ARM_BUILTIN_WOR,
23317 ARM_BUILTIN_WXOR,
23318
23319 ARM_BUILTIN_WCMPEQB,
23320 ARM_BUILTIN_WCMPEQH,
23321 ARM_BUILTIN_WCMPEQW,
23322 ARM_BUILTIN_WCMPGTUB,
23323 ARM_BUILTIN_WCMPGTUH,
23324 ARM_BUILTIN_WCMPGTUW,
23325 ARM_BUILTIN_WCMPGTSB,
23326 ARM_BUILTIN_WCMPGTSH,
23327 ARM_BUILTIN_WCMPGTSW,
23328
23329 ARM_BUILTIN_TEXTRMSB,
23330 ARM_BUILTIN_TEXTRMSH,
23331 ARM_BUILTIN_TEXTRMSW,
23332 ARM_BUILTIN_TEXTRMUB,
23333 ARM_BUILTIN_TEXTRMUH,
23334 ARM_BUILTIN_TEXTRMUW,
23335 ARM_BUILTIN_TINSRB,
23336 ARM_BUILTIN_TINSRH,
23337 ARM_BUILTIN_TINSRW,
23338
23339 ARM_BUILTIN_WMAXSW,
23340 ARM_BUILTIN_WMAXSH,
23341 ARM_BUILTIN_WMAXSB,
23342 ARM_BUILTIN_WMAXUW,
23343 ARM_BUILTIN_WMAXUH,
23344 ARM_BUILTIN_WMAXUB,
23345 ARM_BUILTIN_WMINSW,
23346 ARM_BUILTIN_WMINSH,
23347 ARM_BUILTIN_WMINSB,
23348 ARM_BUILTIN_WMINUW,
23349 ARM_BUILTIN_WMINUH,
23350 ARM_BUILTIN_WMINUB,
23351
23352 ARM_BUILTIN_WMULUM,
23353 ARM_BUILTIN_WMULSM,
23354 ARM_BUILTIN_WMULUL,
23355
23356 ARM_BUILTIN_PSADBH,
23357 ARM_BUILTIN_WSHUFH,
23358
23359 ARM_BUILTIN_WSLLH,
23360 ARM_BUILTIN_WSLLW,
23361 ARM_BUILTIN_WSLLD,
23362 ARM_BUILTIN_WSRAH,
23363 ARM_BUILTIN_WSRAW,
23364 ARM_BUILTIN_WSRAD,
23365 ARM_BUILTIN_WSRLH,
23366 ARM_BUILTIN_WSRLW,
23367 ARM_BUILTIN_WSRLD,
23368 ARM_BUILTIN_WRORH,
23369 ARM_BUILTIN_WRORW,
23370 ARM_BUILTIN_WRORD,
23371 ARM_BUILTIN_WSLLHI,
23372 ARM_BUILTIN_WSLLWI,
23373 ARM_BUILTIN_WSLLDI,
23374 ARM_BUILTIN_WSRAHI,
23375 ARM_BUILTIN_WSRAWI,
23376 ARM_BUILTIN_WSRADI,
23377 ARM_BUILTIN_WSRLHI,
23378 ARM_BUILTIN_WSRLWI,
23379 ARM_BUILTIN_WSRLDI,
23380 ARM_BUILTIN_WRORHI,
23381 ARM_BUILTIN_WRORWI,
23382 ARM_BUILTIN_WRORDI,
23383
23384 ARM_BUILTIN_WUNPCKIHB,
23385 ARM_BUILTIN_WUNPCKIHH,
23386 ARM_BUILTIN_WUNPCKIHW,
23387 ARM_BUILTIN_WUNPCKILB,
23388 ARM_BUILTIN_WUNPCKILH,
23389 ARM_BUILTIN_WUNPCKILW,
23390
23391 ARM_BUILTIN_WUNPCKEHSB,
23392 ARM_BUILTIN_WUNPCKEHSH,
23393 ARM_BUILTIN_WUNPCKEHSW,
23394 ARM_BUILTIN_WUNPCKEHUB,
23395 ARM_BUILTIN_WUNPCKEHUH,
23396 ARM_BUILTIN_WUNPCKEHUW,
23397 ARM_BUILTIN_WUNPCKELSB,
23398 ARM_BUILTIN_WUNPCKELSH,
23399 ARM_BUILTIN_WUNPCKELSW,
23400 ARM_BUILTIN_WUNPCKELUB,
23401 ARM_BUILTIN_WUNPCKELUH,
23402 ARM_BUILTIN_WUNPCKELUW,
23403
23404 ARM_BUILTIN_WABSB,
23405 ARM_BUILTIN_WABSH,
23406 ARM_BUILTIN_WABSW,
23407
23408 ARM_BUILTIN_WADDSUBHX,
23409 ARM_BUILTIN_WSUBADDHX,
23410
23411 ARM_BUILTIN_WABSDIFFB,
23412 ARM_BUILTIN_WABSDIFFH,
23413 ARM_BUILTIN_WABSDIFFW,
23414
23415 ARM_BUILTIN_WADDCH,
23416 ARM_BUILTIN_WADDCW,
23417
23418 ARM_BUILTIN_WAVG4,
23419 ARM_BUILTIN_WAVG4R,
23420
23421 ARM_BUILTIN_WMADDSX,
23422 ARM_BUILTIN_WMADDUX,
23423
23424 ARM_BUILTIN_WMADDSN,
23425 ARM_BUILTIN_WMADDUN,
23426
23427 ARM_BUILTIN_WMULWSM,
23428 ARM_BUILTIN_WMULWUM,
23429
23430 ARM_BUILTIN_WMULWSMR,
23431 ARM_BUILTIN_WMULWUMR,
23432
23433 ARM_BUILTIN_WMULWL,
23434
23435 ARM_BUILTIN_WMULSMR,
23436 ARM_BUILTIN_WMULUMR,
23437
23438 ARM_BUILTIN_WQMULM,
23439 ARM_BUILTIN_WQMULMR,
23440
23441 ARM_BUILTIN_WQMULWM,
23442 ARM_BUILTIN_WQMULWMR,
23443
23444 ARM_BUILTIN_WADDBHUSM,
23445 ARM_BUILTIN_WADDBHUSL,
23446
23447 ARM_BUILTIN_WQMIABB,
23448 ARM_BUILTIN_WQMIABT,
23449 ARM_BUILTIN_WQMIATB,
23450 ARM_BUILTIN_WQMIATT,
23451
23452 ARM_BUILTIN_WQMIABBN,
23453 ARM_BUILTIN_WQMIABTN,
23454 ARM_BUILTIN_WQMIATBN,
23455 ARM_BUILTIN_WQMIATTN,
23456
23457 ARM_BUILTIN_WMIABB,
23458 ARM_BUILTIN_WMIABT,
23459 ARM_BUILTIN_WMIATB,
23460 ARM_BUILTIN_WMIATT,
23461
23462 ARM_BUILTIN_WMIABBN,
23463 ARM_BUILTIN_WMIABTN,
23464 ARM_BUILTIN_WMIATBN,
23465 ARM_BUILTIN_WMIATTN,
23466
23467 ARM_BUILTIN_WMIAWBB,
23468 ARM_BUILTIN_WMIAWBT,
23469 ARM_BUILTIN_WMIAWTB,
23470 ARM_BUILTIN_WMIAWTT,
23471
23472 ARM_BUILTIN_WMIAWBBN,
23473 ARM_BUILTIN_WMIAWBTN,
23474 ARM_BUILTIN_WMIAWTBN,
23475 ARM_BUILTIN_WMIAWTTN,
23476
23477 ARM_BUILTIN_WMERGE,
23478
23479 ARM_BUILTIN_CRC32B,
23480 ARM_BUILTIN_CRC32H,
23481 ARM_BUILTIN_CRC32W,
23482 ARM_BUILTIN_CRC32CB,
23483 ARM_BUILTIN_CRC32CH,
23484 ARM_BUILTIN_CRC32CW,
23485
23486 ARM_BUILTIN_GET_FPSCR,
23487 ARM_BUILTIN_SET_FPSCR,
23488
23489 #undef CRYPTO1
23490 #undef CRYPTO2
23491 #undef CRYPTO3
23492
23493 #define CRYPTO1(L, U, M1, M2) \
23494 ARM_BUILTIN_CRYPTO_##U,
23495 #define CRYPTO2(L, U, M1, M2, M3) \
23496 ARM_BUILTIN_CRYPTO_##U,
23497 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23498 ARM_BUILTIN_CRYPTO_##U,
23499
23500 #include "crypto.def"
23501
23502 #undef CRYPTO1
23503 #undef CRYPTO2
23504 #undef CRYPTO3
23505
23506 #include "arm_neon_builtins.def"
23507
23508 ,ARM_BUILTIN_MAX
23509 };
23510
23511 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23512
23513 #undef CF
23514 #undef VAR1
23515 #undef VAR2
23516 #undef VAR3
23517 #undef VAR4
23518 #undef VAR5
23519 #undef VAR6
23520 #undef VAR7
23521 #undef VAR8
23522 #undef VAR9
23523 #undef VAR10
23524
23525 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23526
23527 #define NUM_DREG_TYPES 5
23528 #define NUM_QREG_TYPES 6
23529
23530 static void
23531 arm_init_neon_builtins (void)
23532 {
23533 unsigned int i, fcode;
23534 tree decl;
23535
23536 tree neon_intQI_type_node;
23537 tree neon_intHI_type_node;
23538 tree neon_floatHF_type_node;
23539 tree neon_polyQI_type_node;
23540 tree neon_polyHI_type_node;
23541 tree neon_intSI_type_node;
23542 tree neon_intDI_type_node;
23543 tree neon_intUTI_type_node;
23544 tree neon_float_type_node;
23545
23546 tree intQI_pointer_node;
23547 tree intHI_pointer_node;
23548 tree intSI_pointer_node;
23549 tree intDI_pointer_node;
23550 tree float_pointer_node;
23551
23552 tree const_intQI_node;
23553 tree const_intHI_node;
23554 tree const_intSI_node;
23555 tree const_intDI_node;
23556 tree const_float_node;
23557
23558 tree const_intQI_pointer_node;
23559 tree const_intHI_pointer_node;
23560 tree const_intSI_pointer_node;
23561 tree const_intDI_pointer_node;
23562 tree const_float_pointer_node;
23563
23564 tree V8QI_type_node;
23565 tree V4HI_type_node;
23566 tree V4UHI_type_node;
23567 tree V4HF_type_node;
23568 tree V2SI_type_node;
23569 tree V2USI_type_node;
23570 tree V2SF_type_node;
23571 tree V16QI_type_node;
23572 tree V8HI_type_node;
23573 tree V8UHI_type_node;
23574 tree V4SI_type_node;
23575 tree V4USI_type_node;
23576 tree V4SF_type_node;
23577 tree V2DI_type_node;
23578 tree V2UDI_type_node;
23579
23580 tree intUQI_type_node;
23581 tree intUHI_type_node;
23582 tree intUSI_type_node;
23583 tree intUDI_type_node;
23584
23585 tree intEI_type_node;
23586 tree intOI_type_node;
23587 tree intCI_type_node;
23588 tree intXI_type_node;
23589
23590 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23591 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23592 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23593
23594 /* Create distinguished type nodes for NEON vector element types,
23595 and pointers to values of such types, so we can detect them later. */
23596 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23597 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23598 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23599 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23600 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23601 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23602 neon_float_type_node = make_node (REAL_TYPE);
23603 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23604 layout_type (neon_float_type_node);
23605 neon_floatHF_type_node = make_node (REAL_TYPE);
23606 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23607 layout_type (neon_floatHF_type_node);
23608
23609 /* Define typedefs which exactly correspond to the modes we are basing vector
23610 types on. If you change these names you'll need to change
23611 the table used by arm_mangle_type too. */
23612 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23613 "__builtin_neon_qi");
23614 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23615 "__builtin_neon_hi");
23616 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23617 "__builtin_neon_hf");
23618 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23619 "__builtin_neon_si");
23620 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23621 "__builtin_neon_sf");
23622 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23623 "__builtin_neon_di");
23624 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23625 "__builtin_neon_poly8");
23626 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23627 "__builtin_neon_poly16");
23628
23629 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23630 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23631 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23632 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23633 float_pointer_node = build_pointer_type (neon_float_type_node);
23634
23635 /* Next create constant-qualified versions of the above types. */
23636 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23637 TYPE_QUAL_CONST);
23638 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23639 TYPE_QUAL_CONST);
23640 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23641 TYPE_QUAL_CONST);
23642 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23643 TYPE_QUAL_CONST);
23644 const_float_node = build_qualified_type (neon_float_type_node,
23645 TYPE_QUAL_CONST);
23646
23647 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23648 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23649 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23650 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23651 const_float_pointer_node = build_pointer_type (const_float_node);
23652
23653 /* Unsigned integer types for various mode sizes. */
23654 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23655 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23656 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23657 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23658 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23659 /* Now create vector types based on our NEON element types. */
23660 /* 64-bit vectors. */
23661 V8QI_type_node =
23662 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23663 V4HI_type_node =
23664 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23665 V4UHI_type_node =
23666 build_vector_type_for_mode (intUHI_type_node, V4HImode);
23667 V4HF_type_node =
23668 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23669 V2SI_type_node =
23670 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23671 V2USI_type_node =
23672 build_vector_type_for_mode (intUSI_type_node, V2SImode);
23673 V2SF_type_node =
23674 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23675 /* 128-bit vectors. */
23676 V16QI_type_node =
23677 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23678 V8HI_type_node =
23679 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23680 V8UHI_type_node =
23681 build_vector_type_for_mode (intUHI_type_node, V8HImode);
23682 V4SI_type_node =
23683 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23684 V4USI_type_node =
23685 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23686 V4SF_type_node =
23687 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23688 V2DI_type_node =
23689 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23690 V2UDI_type_node =
23691 build_vector_type_for_mode (intUDI_type_node, V2DImode);
23692
23693
23694 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23695 "__builtin_neon_uqi");
23696 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23697 "__builtin_neon_uhi");
23698 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23699 "__builtin_neon_usi");
23700 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23701 "__builtin_neon_udi");
23702 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23703 "__builtin_neon_poly64");
23704 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23705 "__builtin_neon_poly128");
23706
23707 /* Opaque integer types for structures of vectors. */
23708 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23709 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23710 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23711 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23712
23713 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23714 "__builtin_neon_ti");
23715 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23716 "__builtin_neon_ei");
23717 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23718 "__builtin_neon_oi");
23719 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23720 "__builtin_neon_ci");
23721 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23722 "__builtin_neon_xi");
23723
23724 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23725 {
23726
23727 tree V16UQI_type_node =
23728 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23729
23730 tree v16uqi_ftype_v16uqi
23731 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23732
23733 tree v16uqi_ftype_v16uqi_v16uqi
23734 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23735 V16UQI_type_node, NULL_TREE);
23736
23737 tree v4usi_ftype_v4usi
23738 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23739
23740 tree v4usi_ftype_v4usi_v4usi
23741 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23742 V4USI_type_node, NULL_TREE);
23743
23744 tree v4usi_ftype_v4usi_v4usi_v4usi
23745 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23746 V4USI_type_node, V4USI_type_node, NULL_TREE);
23747
23748 tree uti_ftype_udi_udi
23749 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23750 intUDI_type_node, NULL_TREE);
23751
23752 #undef CRYPTO1
23753 #undef CRYPTO2
23754 #undef CRYPTO3
23755 #undef C
23756 #undef N
23757 #undef CF
23758 #undef FT1
23759 #undef FT2
23760 #undef FT3
23761
23762 #define C(U) \
23763 ARM_BUILTIN_CRYPTO_##U
23764 #define N(L) \
23765 "__builtin_arm_crypto_"#L
23766 #define FT1(R, A) \
23767 R##_ftype_##A
23768 #define FT2(R, A1, A2) \
23769 R##_ftype_##A1##_##A2
23770 #define FT3(R, A1, A2, A3) \
23771 R##_ftype_##A1##_##A2##_##A3
23772 #define CRYPTO1(L, U, R, A) \
23773 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23774 C (U), BUILT_IN_MD, \
23775 NULL, NULL_TREE);
23776 #define CRYPTO2(L, U, R, A1, A2) \
23777 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23778 C (U), BUILT_IN_MD, \
23779 NULL, NULL_TREE);
23780
23781 #define CRYPTO3(L, U, R, A1, A2, A3) \
23782 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23783 C (U), BUILT_IN_MD, \
23784 NULL, NULL_TREE);
23785 #include "crypto.def"
23786
23787 #undef CRYPTO1
23788 #undef CRYPTO2
23789 #undef CRYPTO3
23790 #undef C
23791 #undef N
23792 #undef FT1
23793 #undef FT2
23794 #undef FT3
23795 }
23796 dreg_types[0] = V8QI_type_node;
23797 dreg_types[1] = V4HI_type_node;
23798 dreg_types[2] = V2SI_type_node;
23799 dreg_types[3] = V2SF_type_node;
23800 dreg_types[4] = neon_intDI_type_node;
23801
23802 qreg_types[0] = V16QI_type_node;
23803 qreg_types[1] = V8HI_type_node;
23804 qreg_types[2] = V4SI_type_node;
23805 qreg_types[3] = V4SF_type_node;
23806 qreg_types[4] = V2DI_type_node;
23807 qreg_types[5] = neon_intUTI_type_node;
23808
23809 for (i = 0; i < NUM_QREG_TYPES; i++)
23810 {
23811 int j;
23812 for (j = 0; j < NUM_QREG_TYPES; j++)
23813 {
23814 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23815 reinterp_ftype_dreg[i][j]
23816 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23817
23818 reinterp_ftype_qreg[i][j]
23819 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23820 }
23821 }
23822
23823 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23824 i < ARRAY_SIZE (neon_builtin_data);
23825 i++, fcode++)
23826 {
23827 neon_builtin_datum *d = &neon_builtin_data[i];
23828
23829 const char* const modenames[] = {
23830 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23831 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23832 "ti", "ei", "oi"
23833 };
23834 char namebuf[60];
23835 tree ftype = NULL;
23836 int is_load = 0, is_store = 0;
23837
23838 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23839
23840 d->fcode = fcode;
23841
23842 switch (d->itype)
23843 {
23844 case NEON_LOAD1:
23845 case NEON_LOAD1LANE:
23846 case NEON_LOADSTRUCT:
23847 case NEON_LOADSTRUCTLANE:
23848 is_load = 1;
23849 /* Fall through. */
23850 case NEON_STORE1:
23851 case NEON_STORE1LANE:
23852 case NEON_STORESTRUCT:
23853 case NEON_STORESTRUCTLANE:
23854 if (!is_load)
23855 is_store = 1;
23856 /* Fall through. */
23857 case NEON_UNOP:
23858 case NEON_RINT:
23859 case NEON_BINOP:
23860 case NEON_LOGICBINOP:
23861 case NEON_SHIFTINSERT:
23862 case NEON_TERNOP:
23863 case NEON_GETLANE:
23864 case NEON_SETLANE:
23865 case NEON_CREATE:
23866 case NEON_DUP:
23867 case NEON_DUPLANE:
23868 case NEON_SHIFTIMM:
23869 case NEON_SHIFTACC:
23870 case NEON_COMBINE:
23871 case NEON_SPLIT:
23872 case NEON_CONVERT:
23873 case NEON_FIXCONV:
23874 case NEON_LANEMUL:
23875 case NEON_LANEMULL:
23876 case NEON_LANEMULH:
23877 case NEON_LANEMAC:
23878 case NEON_SCALARMUL:
23879 case NEON_SCALARMULL:
23880 case NEON_SCALARMULH:
23881 case NEON_SCALARMAC:
23882 case NEON_SELECT:
23883 case NEON_VTBL:
23884 case NEON_VTBX:
23885 {
23886 int k;
23887 tree return_type = void_type_node, args = void_list_node;
23888
23889 /* Build a function type directly from the insn_data for
23890 this builtin. The build_function_type() function takes
23891 care of removing duplicates for us. */
23892 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23893 {
23894 tree eltype;
23895
23896 if (is_load && k == 1)
23897 {
23898 /* Neon load patterns always have the memory
23899 operand in the operand 1 position. */
23900 gcc_assert (insn_data[d->code].operand[k].predicate
23901 == neon_struct_operand);
23902
23903 switch (d->mode)
23904 {
23905 case T_V8QI:
23906 case T_V16QI:
23907 eltype = const_intQI_pointer_node;
23908 break;
23909
23910 case T_V4HI:
23911 case T_V8HI:
23912 eltype = const_intHI_pointer_node;
23913 break;
23914
23915 case T_V2SI:
23916 case T_V4SI:
23917 eltype = const_intSI_pointer_node;
23918 break;
23919
23920 case T_V2SF:
23921 case T_V4SF:
23922 eltype = const_float_pointer_node;
23923 break;
23924
23925 case T_DI:
23926 case T_V2DI:
23927 eltype = const_intDI_pointer_node;
23928 break;
23929
23930 default: gcc_unreachable ();
23931 }
23932 }
23933 else if (is_store && k == 0)
23934 {
23935 /* Similarly, Neon store patterns use operand 0 as
23936 the memory location to store to. */
23937 gcc_assert (insn_data[d->code].operand[k].predicate
23938 == neon_struct_operand);
23939
23940 switch (d->mode)
23941 {
23942 case T_V8QI:
23943 case T_V16QI:
23944 eltype = intQI_pointer_node;
23945 break;
23946
23947 case T_V4HI:
23948 case T_V8HI:
23949 eltype = intHI_pointer_node;
23950 break;
23951
23952 case T_V2SI:
23953 case T_V4SI:
23954 eltype = intSI_pointer_node;
23955 break;
23956
23957 case T_V2SF:
23958 case T_V4SF:
23959 eltype = float_pointer_node;
23960 break;
23961
23962 case T_DI:
23963 case T_V2DI:
23964 eltype = intDI_pointer_node;
23965 break;
23966
23967 default: gcc_unreachable ();
23968 }
23969 }
23970 else
23971 {
23972 switch (insn_data[d->code].operand[k].mode)
23973 {
23974 case VOIDmode: eltype = void_type_node; break;
23975 /* Scalars. */
23976 case QImode: eltype = neon_intQI_type_node; break;
23977 case HImode: eltype = neon_intHI_type_node; break;
23978 case SImode: eltype = neon_intSI_type_node; break;
23979 case SFmode: eltype = neon_float_type_node; break;
23980 case DImode: eltype = neon_intDI_type_node; break;
23981 case TImode: eltype = intTI_type_node; break;
23982 case EImode: eltype = intEI_type_node; break;
23983 case OImode: eltype = intOI_type_node; break;
23984 case CImode: eltype = intCI_type_node; break;
23985 case XImode: eltype = intXI_type_node; break;
23986 /* 64-bit vectors. */
23987 case V8QImode: eltype = V8QI_type_node; break;
23988 case V4HImode: eltype = V4HI_type_node; break;
23989 case V2SImode: eltype = V2SI_type_node; break;
23990 case V2SFmode: eltype = V2SF_type_node; break;
23991 /* 128-bit vectors. */
23992 case V16QImode: eltype = V16QI_type_node; break;
23993 case V8HImode: eltype = V8HI_type_node; break;
23994 case V4SImode: eltype = V4SI_type_node; break;
23995 case V4SFmode: eltype = V4SF_type_node; break;
23996 case V2DImode: eltype = V2DI_type_node; break;
23997 default: gcc_unreachable ();
23998 }
23999 }
24000
24001 if (k == 0 && !is_store)
24002 return_type = eltype;
24003 else
24004 args = tree_cons (NULL_TREE, eltype, args);
24005 }
24006
24007 ftype = build_function_type (return_type, args);
24008 }
24009 break;
24010
24011 case NEON_REINTERP:
24012 {
24013 /* We iterate over NUM_DREG_TYPES doubleword types,
24014 then NUM_QREG_TYPES quadword types.
24015 V4HF is not a type used in reinterpret, so we translate
24016 d->mode to the correct index in reinterp_ftype_dreg. */
24017 bool qreg_p
24018 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
24019 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
24020 % NUM_QREG_TYPES;
24021 switch (insn_data[d->code].operand[0].mode)
24022 {
24023 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
24024 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
24025 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
24026 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
24027 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
24028 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
24029 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
24030 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
24031 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
24032 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
24033 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
24034 default: gcc_unreachable ();
24035 }
24036 }
24037 break;
24038 case NEON_FLOAT_WIDEN:
24039 {
24040 tree eltype = NULL_TREE;
24041 tree return_type = NULL_TREE;
24042
24043 switch (insn_data[d->code].operand[1].mode)
24044 {
24045 case V4HFmode:
24046 eltype = V4HF_type_node;
24047 return_type = V4SF_type_node;
24048 break;
24049 default: gcc_unreachable ();
24050 }
24051 ftype = build_function_type_list (return_type, eltype, NULL);
24052 break;
24053 }
24054 case NEON_FLOAT_NARROW:
24055 {
24056 tree eltype = NULL_TREE;
24057 tree return_type = NULL_TREE;
24058
24059 switch (insn_data[d->code].operand[1].mode)
24060 {
24061 case V4SFmode:
24062 eltype = V4SF_type_node;
24063 return_type = V4HF_type_node;
24064 break;
24065 default: gcc_unreachable ();
24066 }
24067 ftype = build_function_type_list (return_type, eltype, NULL);
24068 break;
24069 }
24070 case NEON_BSWAP:
24071 {
24072 tree eltype = NULL_TREE;
24073 switch (insn_data[d->code].operand[1].mode)
24074 {
24075 case V4HImode:
24076 eltype = V4UHI_type_node;
24077 break;
24078 case V8HImode:
24079 eltype = V8UHI_type_node;
24080 break;
24081 case V2SImode:
24082 eltype = V2USI_type_node;
24083 break;
24084 case V4SImode:
24085 eltype = V4USI_type_node;
24086 break;
24087 case V2DImode:
24088 eltype = V2UDI_type_node;
24089 break;
24090 default: gcc_unreachable ();
24091 }
24092 ftype = build_function_type_list (eltype, eltype, NULL);
24093 break;
24094 }
24095 default:
24096 gcc_unreachable ();
24097 }
24098
24099 gcc_assert (ftype != NULL);
24100
24101 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24102
24103 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24104 NULL_TREE);
24105 arm_builtin_decls[fcode] = decl;
24106 }
24107 }
24108
24109 #undef NUM_DREG_TYPES
24110 #undef NUM_QREG_TYPES
24111
24112 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24113 do \
24114 { \
24115 if ((MASK) & insn_flags) \
24116 { \
24117 tree bdecl; \
24118 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24119 BUILT_IN_MD, NULL, NULL_TREE); \
24120 arm_builtin_decls[CODE] = bdecl; \
24121 } \
24122 } \
24123 while (0)
24124
24125 struct builtin_description
24126 {
24127 const unsigned int mask;
24128 const enum insn_code icode;
24129 const char * const name;
24130 const enum arm_builtins code;
24131 const enum rtx_code comparison;
24132 const unsigned int flag;
24133 };
24134
24135 static const struct builtin_description bdesc_2arg[] =
24136 {
24137 #define IWMMXT_BUILTIN(code, string, builtin) \
24138 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24139 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24140
24141 #define IWMMXT2_BUILTIN(code, string, builtin) \
24142 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24143 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24144
24145 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24146 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24147 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24148 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24149 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24150 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24151 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24152 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24153 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24154 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24155 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24156 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24157 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24158 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24159 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24160 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24161 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24162 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24163 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24164 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24165 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24166 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24167 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24168 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24169 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24170 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24171 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24172 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24173 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24174 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24175 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24176 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24177 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24178 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24179 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24180 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24181 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24182 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24183 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24184 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24185 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24186 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24187 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24188 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24189 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24190 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24191 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24192 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24193 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24194 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24195 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24196 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24197 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24198 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24199 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24200 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24201 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24202 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24203 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24204 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24205 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24206 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24207 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24208 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24209 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24210 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24211 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24212 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24213 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24214 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24215 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24216 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24217 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24218 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24219 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24220 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24221 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24222 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24223
24224 #define IWMMXT_BUILTIN2(code, builtin) \
24225 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24226
24227 #define IWMMXT2_BUILTIN2(code, builtin) \
24228 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24229
24230 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24231 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24232 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24233 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24234 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24235 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24236 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24237 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24238 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24239 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24240
24241
24242 #define FP_BUILTIN(L, U) \
24243 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24244 UNKNOWN, 0},
24245
24246 FP_BUILTIN (set_fpscr, GET_FPSCR)
24247 FP_BUILTIN (get_fpscr, SET_FPSCR)
24248 #undef FP_BUILTIN
24249
24250 #define CRC32_BUILTIN(L, U) \
24251 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24252 UNKNOWN, 0},
24253 CRC32_BUILTIN (crc32b, CRC32B)
24254 CRC32_BUILTIN (crc32h, CRC32H)
24255 CRC32_BUILTIN (crc32w, CRC32W)
24256 CRC32_BUILTIN (crc32cb, CRC32CB)
24257 CRC32_BUILTIN (crc32ch, CRC32CH)
24258 CRC32_BUILTIN (crc32cw, CRC32CW)
24259 #undef CRC32_BUILTIN
24260
24261
24262 #define CRYPTO_BUILTIN(L, U) \
24263 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24264 UNKNOWN, 0},
24265 #undef CRYPTO1
24266 #undef CRYPTO2
24267 #undef CRYPTO3
24268 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24269 #define CRYPTO1(L, U, R, A)
24270 #define CRYPTO3(L, U, R, A1, A2, A3)
24271 #include "crypto.def"
24272 #undef CRYPTO1
24273 #undef CRYPTO2
24274 #undef CRYPTO3
24275
24276 };
24277
24278 static const struct builtin_description bdesc_1arg[] =
24279 {
24280 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24281 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24282 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24283 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24284 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24285 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24286 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24287 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24288 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24289 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24290 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24291 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24292 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24293 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24294 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24295 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24296 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24297 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24298 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24299 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24300 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24301 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24302 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24303 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24304
24305 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24306 #define CRYPTO2(L, U, R, A1, A2)
24307 #define CRYPTO3(L, U, R, A1, A2, A3)
24308 #include "crypto.def"
24309 #undef CRYPTO1
24310 #undef CRYPTO2
24311 #undef CRYPTO3
24312 };
24313
24314 static const struct builtin_description bdesc_3arg[] =
24315 {
24316 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24317 #define CRYPTO1(L, U, R, A)
24318 #define CRYPTO2(L, U, R, A1, A2)
24319 #include "crypto.def"
24320 #undef CRYPTO1
24321 #undef CRYPTO2
24322 #undef CRYPTO3
24323 };
24324 #undef CRYPTO_BUILTIN
24325
24326 /* Set up all the iWMMXt builtins. This is not called if
24327 TARGET_IWMMXT is zero. */
24328
24329 static void
24330 arm_init_iwmmxt_builtins (void)
24331 {
24332 const struct builtin_description * d;
24333 size_t i;
24334
24335 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24336 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24337 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24338
24339 tree v8qi_ftype_v8qi_v8qi_int
24340 = build_function_type_list (V8QI_type_node,
24341 V8QI_type_node, V8QI_type_node,
24342 integer_type_node, NULL_TREE);
24343 tree v4hi_ftype_v4hi_int
24344 = build_function_type_list (V4HI_type_node,
24345 V4HI_type_node, integer_type_node, NULL_TREE);
24346 tree v2si_ftype_v2si_int
24347 = build_function_type_list (V2SI_type_node,
24348 V2SI_type_node, integer_type_node, NULL_TREE);
24349 tree v2si_ftype_di_di
24350 = build_function_type_list (V2SI_type_node,
24351 long_long_integer_type_node,
24352 long_long_integer_type_node,
24353 NULL_TREE);
24354 tree di_ftype_di_int
24355 = build_function_type_list (long_long_integer_type_node,
24356 long_long_integer_type_node,
24357 integer_type_node, NULL_TREE);
24358 tree di_ftype_di_int_int
24359 = build_function_type_list (long_long_integer_type_node,
24360 long_long_integer_type_node,
24361 integer_type_node,
24362 integer_type_node, NULL_TREE);
24363 tree int_ftype_v8qi
24364 = build_function_type_list (integer_type_node,
24365 V8QI_type_node, NULL_TREE);
24366 tree int_ftype_v4hi
24367 = build_function_type_list (integer_type_node,
24368 V4HI_type_node, NULL_TREE);
24369 tree int_ftype_v2si
24370 = build_function_type_list (integer_type_node,
24371 V2SI_type_node, NULL_TREE);
24372 tree int_ftype_v8qi_int
24373 = build_function_type_list (integer_type_node,
24374 V8QI_type_node, integer_type_node, NULL_TREE);
24375 tree int_ftype_v4hi_int
24376 = build_function_type_list (integer_type_node,
24377 V4HI_type_node, integer_type_node, NULL_TREE);
24378 tree int_ftype_v2si_int
24379 = build_function_type_list (integer_type_node,
24380 V2SI_type_node, integer_type_node, NULL_TREE);
24381 tree v8qi_ftype_v8qi_int_int
24382 = build_function_type_list (V8QI_type_node,
24383 V8QI_type_node, integer_type_node,
24384 integer_type_node, NULL_TREE);
24385 tree v4hi_ftype_v4hi_int_int
24386 = build_function_type_list (V4HI_type_node,
24387 V4HI_type_node, integer_type_node,
24388 integer_type_node, NULL_TREE);
24389 tree v2si_ftype_v2si_int_int
24390 = build_function_type_list (V2SI_type_node,
24391 V2SI_type_node, integer_type_node,
24392 integer_type_node, NULL_TREE);
24393 /* Miscellaneous. */
24394 tree v8qi_ftype_v4hi_v4hi
24395 = build_function_type_list (V8QI_type_node,
24396 V4HI_type_node, V4HI_type_node, NULL_TREE);
24397 tree v4hi_ftype_v2si_v2si
24398 = build_function_type_list (V4HI_type_node,
24399 V2SI_type_node, V2SI_type_node, NULL_TREE);
24400 tree v8qi_ftype_v4hi_v8qi
24401 = build_function_type_list (V8QI_type_node,
24402 V4HI_type_node, V8QI_type_node, NULL_TREE);
24403 tree v2si_ftype_v4hi_v4hi
24404 = build_function_type_list (V2SI_type_node,
24405 V4HI_type_node, V4HI_type_node, NULL_TREE);
24406 tree v2si_ftype_v8qi_v8qi
24407 = build_function_type_list (V2SI_type_node,
24408 V8QI_type_node, V8QI_type_node, NULL_TREE);
24409 tree v4hi_ftype_v4hi_di
24410 = build_function_type_list (V4HI_type_node,
24411 V4HI_type_node, long_long_integer_type_node,
24412 NULL_TREE);
24413 tree v2si_ftype_v2si_di
24414 = build_function_type_list (V2SI_type_node,
24415 V2SI_type_node, long_long_integer_type_node,
24416 NULL_TREE);
24417 tree di_ftype_void
24418 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24419 tree int_ftype_void
24420 = build_function_type_list (integer_type_node, NULL_TREE);
24421 tree di_ftype_v8qi
24422 = build_function_type_list (long_long_integer_type_node,
24423 V8QI_type_node, NULL_TREE);
24424 tree di_ftype_v4hi
24425 = build_function_type_list (long_long_integer_type_node,
24426 V4HI_type_node, NULL_TREE);
24427 tree di_ftype_v2si
24428 = build_function_type_list (long_long_integer_type_node,
24429 V2SI_type_node, NULL_TREE);
24430 tree v2si_ftype_v4hi
24431 = build_function_type_list (V2SI_type_node,
24432 V4HI_type_node, NULL_TREE);
24433 tree v4hi_ftype_v8qi
24434 = build_function_type_list (V4HI_type_node,
24435 V8QI_type_node, NULL_TREE);
24436 tree v8qi_ftype_v8qi
24437 = build_function_type_list (V8QI_type_node,
24438 V8QI_type_node, NULL_TREE);
24439 tree v4hi_ftype_v4hi
24440 = build_function_type_list (V4HI_type_node,
24441 V4HI_type_node, NULL_TREE);
24442 tree v2si_ftype_v2si
24443 = build_function_type_list (V2SI_type_node,
24444 V2SI_type_node, NULL_TREE);
24445
24446 tree di_ftype_di_v4hi_v4hi
24447 = build_function_type_list (long_long_unsigned_type_node,
24448 long_long_unsigned_type_node,
24449 V4HI_type_node, V4HI_type_node,
24450 NULL_TREE);
24451
24452 tree di_ftype_v4hi_v4hi
24453 = build_function_type_list (long_long_unsigned_type_node,
24454 V4HI_type_node,V4HI_type_node,
24455 NULL_TREE);
24456
24457 tree v2si_ftype_v2si_v4hi_v4hi
24458 = build_function_type_list (V2SI_type_node,
24459 V2SI_type_node, V4HI_type_node,
24460 V4HI_type_node, NULL_TREE);
24461
24462 tree v2si_ftype_v2si_v8qi_v8qi
24463 = build_function_type_list (V2SI_type_node,
24464 V2SI_type_node, V8QI_type_node,
24465 V8QI_type_node, NULL_TREE);
24466
24467 tree di_ftype_di_v2si_v2si
24468 = build_function_type_list (long_long_unsigned_type_node,
24469 long_long_unsigned_type_node,
24470 V2SI_type_node, V2SI_type_node,
24471 NULL_TREE);
24472
24473 tree di_ftype_di_di_int
24474 = build_function_type_list (long_long_unsigned_type_node,
24475 long_long_unsigned_type_node,
24476 long_long_unsigned_type_node,
24477 integer_type_node, NULL_TREE);
24478
24479 tree void_ftype_int
24480 = build_function_type_list (void_type_node,
24481 integer_type_node, NULL_TREE);
24482
24483 tree v8qi_ftype_char
24484 = build_function_type_list (V8QI_type_node,
24485 signed_char_type_node, NULL_TREE);
24486
24487 tree v4hi_ftype_short
24488 = build_function_type_list (V4HI_type_node,
24489 short_integer_type_node, NULL_TREE);
24490
24491 tree v2si_ftype_int
24492 = build_function_type_list (V2SI_type_node,
24493 integer_type_node, NULL_TREE);
24494
24495 /* Normal vector binops. */
24496 tree v8qi_ftype_v8qi_v8qi
24497 = build_function_type_list (V8QI_type_node,
24498 V8QI_type_node, V8QI_type_node, NULL_TREE);
24499 tree v4hi_ftype_v4hi_v4hi
24500 = build_function_type_list (V4HI_type_node,
24501 V4HI_type_node,V4HI_type_node, NULL_TREE);
24502 tree v2si_ftype_v2si_v2si
24503 = build_function_type_list (V2SI_type_node,
24504 V2SI_type_node, V2SI_type_node, NULL_TREE);
24505 tree di_ftype_di_di
24506 = build_function_type_list (long_long_unsigned_type_node,
24507 long_long_unsigned_type_node,
24508 long_long_unsigned_type_node,
24509 NULL_TREE);
24510
24511 /* Add all builtins that are more or less simple operations on two
24512 operands. */
24513 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24514 {
24515 /* Use one of the operands; the target can have a different mode for
24516 mask-generating compares. */
24517 enum machine_mode mode;
24518 tree type;
24519
24520 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24521 continue;
24522
24523 mode = insn_data[d->icode].operand[1].mode;
24524
24525 switch (mode)
24526 {
24527 case V8QImode:
24528 type = v8qi_ftype_v8qi_v8qi;
24529 break;
24530 case V4HImode:
24531 type = v4hi_ftype_v4hi_v4hi;
24532 break;
24533 case V2SImode:
24534 type = v2si_ftype_v2si_v2si;
24535 break;
24536 case DImode:
24537 type = di_ftype_di_di;
24538 break;
24539
24540 default:
24541 gcc_unreachable ();
24542 }
24543
24544 def_mbuiltin (d->mask, d->name, type, d->code);
24545 }
24546
24547 /* Add the remaining MMX insns with somewhat more complicated types. */
24548 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24549 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24550 ARM_BUILTIN_ ## CODE)
24551
24552 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24553 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24554 ARM_BUILTIN_ ## CODE)
24555
24556 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24557 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24558 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24559 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24560 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24561 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24562 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24563 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24564 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24565
24566 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24567 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24568 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24569 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24570 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24571 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24572
24573 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24574 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24575 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24576 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24577 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24578 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24579
24580 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24581 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24582 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24583 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24584 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24585 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24586
24587 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24588 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24589 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24590 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24591 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24592 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24593
24594 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24595
24596 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24597 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24598 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24599 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24600 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24601 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24602 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24603 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24604 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24605 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24606
24607 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24608 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24609 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24610 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24611 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24612 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24613 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24614 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24615 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24616
24617 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24618 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24619 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24620
24621 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24622 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24623 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24624
24625 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24626 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24627
24628 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24629 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24630 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24631 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24632 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24633 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24634
24635 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24636 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24637 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24638 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24639 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24640 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24641 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24642 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24643 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24644 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24645 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24646 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24647
24648 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24649 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24650 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24651 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24652
24653 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24654 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24655 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24656 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24657 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24658 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24659 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24660
24661 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24662 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24663 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24664
24665 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24666 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24667 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24668 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24669
24670 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24671 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24672 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24673 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24674
24675 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24676 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24677 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24678 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24679
24680 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24681 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24682 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24683 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24684
24685 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24686 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24687 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24688 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24689
24690 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24691 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24692 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24693 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24694
24695 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24696
24697 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24698 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24699 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24700
24701 #undef iwmmx_mbuiltin
24702 #undef iwmmx2_mbuiltin
24703 }
24704
24705 static void
24706 arm_init_fp16_builtins (void)
24707 {
24708 tree fp16_type = make_node (REAL_TYPE);
24709 TYPE_PRECISION (fp16_type) = 16;
24710 layout_type (fp16_type);
24711 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24712 }
24713
24714 static void
24715 arm_init_crc32_builtins ()
24716 {
24717 tree si_ftype_si_qi
24718 = build_function_type_list (unsigned_intSI_type_node,
24719 unsigned_intSI_type_node,
24720 unsigned_intQI_type_node, NULL_TREE);
24721 tree si_ftype_si_hi
24722 = build_function_type_list (unsigned_intSI_type_node,
24723 unsigned_intSI_type_node,
24724 unsigned_intHI_type_node, NULL_TREE);
24725 tree si_ftype_si_si
24726 = build_function_type_list (unsigned_intSI_type_node,
24727 unsigned_intSI_type_node,
24728 unsigned_intSI_type_node, NULL_TREE);
24729
24730 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24731 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24732 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24733 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24734 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24735 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24736 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24737 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24738 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24739 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24740 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24741 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24742 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24743 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24744 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24745 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24746 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24747 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24748 }
24749
24750 static void
24751 arm_init_builtins (void)
24752 {
24753 if (TARGET_REALLY_IWMMXT)
24754 arm_init_iwmmxt_builtins ();
24755
24756 if (TARGET_NEON)
24757 arm_init_neon_builtins ();
24758
24759 if (arm_fp16_format)
24760 arm_init_fp16_builtins ();
24761
24762 if (TARGET_CRC32)
24763 arm_init_crc32_builtins ();
24764
24765 if (TARGET_VFP)
24766 {
24767 tree ftype_set_fpscr
24768 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
24769 tree ftype_get_fpscr
24770 = build_function_type_list (unsigned_type_node, NULL);
24771
24772 arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]
24773 = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr,
24774 ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24775 arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]
24776 = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
24777 ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
24778 }
24779 }
24780
24781 /* Return the ARM builtin for CODE. */
24782
24783 static tree
24784 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24785 {
24786 if (code >= ARM_BUILTIN_MAX)
24787 return error_mark_node;
24788
24789 return arm_builtin_decls[code];
24790 }
24791
24792 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24793
24794 static const char *
24795 arm_invalid_parameter_type (const_tree t)
24796 {
24797 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24798 return N_("function parameters cannot have __fp16 type");
24799 return NULL;
24800 }
24801
24802 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24803
24804 static const char *
24805 arm_invalid_return_type (const_tree t)
24806 {
24807 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24808 return N_("functions cannot return __fp16 type");
24809 return NULL;
24810 }
24811
24812 /* Implement TARGET_PROMOTED_TYPE. */
24813
24814 static tree
24815 arm_promoted_type (const_tree t)
24816 {
24817 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24818 return float_type_node;
24819 return NULL_TREE;
24820 }
24821
24822 /* Implement TARGET_CONVERT_TO_TYPE.
24823 Specifically, this hook implements the peculiarity of the ARM
24824 half-precision floating-point C semantics that requires conversions between
24825 __fp16 to or from double to do an intermediate conversion to float. */
24826
24827 static tree
24828 arm_convert_to_type (tree type, tree expr)
24829 {
24830 tree fromtype = TREE_TYPE (expr);
24831 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24832 return NULL_TREE;
24833 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24834 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24835 return convert (type, convert (float_type_node, expr));
24836 return NULL_TREE;
24837 }
24838
24839 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24840 This simply adds HFmode as a supported mode; even though we don't
24841 implement arithmetic on this type directly, it's supported by
24842 optabs conversions, much the way the double-word arithmetic is
24843 special-cased in the default hook. */
24844
24845 static bool
24846 arm_scalar_mode_supported_p (enum machine_mode mode)
24847 {
24848 if (mode == HFmode)
24849 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24850 else if (ALL_FIXED_POINT_MODE_P (mode))
24851 return true;
24852 else
24853 return default_scalar_mode_supported_p (mode);
24854 }
24855
24856 /* Errors in the source file can cause expand_expr to return const0_rtx
24857 where we expect a vector. To avoid crashing, use one of the vector
24858 clear instructions. */
24859
24860 static rtx
24861 safe_vector_operand (rtx x, enum machine_mode mode)
24862 {
24863 if (x != const0_rtx)
24864 return x;
24865 x = gen_reg_rtx (mode);
24866
24867 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24868 : gen_rtx_SUBREG (DImode, x, 0)));
24869 return x;
24870 }
24871
24872 /* Function to expand ternary builtins. */
24873 static rtx
24874 arm_expand_ternop_builtin (enum insn_code icode,
24875 tree exp, rtx target)
24876 {
24877 rtx pat;
24878 tree arg0 = CALL_EXPR_ARG (exp, 0);
24879 tree arg1 = CALL_EXPR_ARG (exp, 1);
24880 tree arg2 = CALL_EXPR_ARG (exp, 2);
24881
24882 rtx op0 = expand_normal (arg0);
24883 rtx op1 = expand_normal (arg1);
24884 rtx op2 = expand_normal (arg2);
24885 rtx op3 = NULL_RTX;
24886
24887 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24888 lane operand depending on endianness. */
24889 bool builtin_sha1cpm_p = false;
24890
24891 if (insn_data[icode].n_operands == 5)
24892 {
24893 gcc_assert (icode == CODE_FOR_crypto_sha1c
24894 || icode == CODE_FOR_crypto_sha1p
24895 || icode == CODE_FOR_crypto_sha1m);
24896 builtin_sha1cpm_p = true;
24897 }
24898 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24899 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24900 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24901 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24902
24903
24904 if (VECTOR_MODE_P (mode0))
24905 op0 = safe_vector_operand (op0, mode0);
24906 if (VECTOR_MODE_P (mode1))
24907 op1 = safe_vector_operand (op1, mode1);
24908 if (VECTOR_MODE_P (mode2))
24909 op2 = safe_vector_operand (op2, mode2);
24910
24911 if (! target
24912 || GET_MODE (target) != tmode
24913 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24914 target = gen_reg_rtx (tmode);
24915
24916 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24917 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24918 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24919
24920 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24921 op0 = copy_to_mode_reg (mode0, op0);
24922 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24923 op1 = copy_to_mode_reg (mode1, op1);
24924 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24925 op2 = copy_to_mode_reg (mode2, op2);
24926 if (builtin_sha1cpm_p)
24927 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24928
24929 if (builtin_sha1cpm_p)
24930 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24931 else
24932 pat = GEN_FCN (icode) (target, op0, op1, op2);
24933 if (! pat)
24934 return 0;
24935 emit_insn (pat);
24936 return target;
24937 }
24938
24939 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24940
24941 static rtx
24942 arm_expand_binop_builtin (enum insn_code icode,
24943 tree exp, rtx target)
24944 {
24945 rtx pat;
24946 tree arg0 = CALL_EXPR_ARG (exp, 0);
24947 tree arg1 = CALL_EXPR_ARG (exp, 1);
24948 rtx op0 = expand_normal (arg0);
24949 rtx op1 = expand_normal (arg1);
24950 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24951 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24952 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24953
24954 if (VECTOR_MODE_P (mode0))
24955 op0 = safe_vector_operand (op0, mode0);
24956 if (VECTOR_MODE_P (mode1))
24957 op1 = safe_vector_operand (op1, mode1);
24958
24959 if (! target
24960 || GET_MODE (target) != tmode
24961 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24962 target = gen_reg_rtx (tmode);
24963
24964 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24965 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24966
24967 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24968 op0 = copy_to_mode_reg (mode0, op0);
24969 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24970 op1 = copy_to_mode_reg (mode1, op1);
24971
24972 pat = GEN_FCN (icode) (target, op0, op1);
24973 if (! pat)
24974 return 0;
24975 emit_insn (pat);
24976 return target;
24977 }
24978
24979 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24980
24981 static rtx
24982 arm_expand_unop_builtin (enum insn_code icode,
24983 tree exp, rtx target, int do_load)
24984 {
24985 rtx pat;
24986 tree arg0 = CALL_EXPR_ARG (exp, 0);
24987 rtx op0 = expand_normal (arg0);
24988 rtx op1 = NULL_RTX;
24989 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24990 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24991 bool builtin_sha1h_p = false;
24992
24993 if (insn_data[icode].n_operands == 3)
24994 {
24995 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24996 builtin_sha1h_p = true;
24997 }
24998
24999 if (! target
25000 || GET_MODE (target) != tmode
25001 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25002 target = gen_reg_rtx (tmode);
25003 if (do_load)
25004 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
25005 else
25006 {
25007 if (VECTOR_MODE_P (mode0))
25008 op0 = safe_vector_operand (op0, mode0);
25009
25010 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25011 op0 = copy_to_mode_reg (mode0, op0);
25012 }
25013 if (builtin_sha1h_p)
25014 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
25015
25016 if (builtin_sha1h_p)
25017 pat = GEN_FCN (icode) (target, op0, op1);
25018 else
25019 pat = GEN_FCN (icode) (target, op0);
25020 if (! pat)
25021 return 0;
25022 emit_insn (pat);
25023 return target;
25024 }
25025
25026 typedef enum {
25027 NEON_ARG_COPY_TO_REG,
25028 NEON_ARG_CONSTANT,
25029 NEON_ARG_MEMORY,
25030 NEON_ARG_STOP
25031 } builtin_arg;
25032
25033 #define NEON_MAX_BUILTIN_ARGS 5
25034
25035 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
25036 and return an expression for the accessed memory.
25037
25038 The intrinsic function operates on a block of registers that has
25039 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
25040 function references the memory at EXP of type TYPE and in mode
25041 MEM_MODE; this mode may be BLKmode if no more suitable mode is
25042 available. */
25043
25044 static tree
25045 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
25046 enum machine_mode reg_mode,
25047 neon_builtin_type_mode type_mode)
25048 {
25049 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
25050 tree elem_type, upper_bound, array_type;
25051
25052 /* Work out the size of the register block in bytes. */
25053 reg_size = GET_MODE_SIZE (reg_mode);
25054
25055 /* Work out the size of each vector in bytes. */
25056 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
25057 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
25058
25059 /* Work out how many vectors there are. */
25060 gcc_assert (reg_size % vector_size == 0);
25061 nvectors = reg_size / vector_size;
25062
25063 /* Work out the type of each element. */
25064 gcc_assert (POINTER_TYPE_P (type));
25065 elem_type = TREE_TYPE (type);
25066
25067 /* Work out how many elements are being loaded or stored.
25068 MEM_MODE == REG_MODE implies a one-to-one mapping between register
25069 and memory elements; anything else implies a lane load or store. */
25070 if (mem_mode == reg_mode)
25071 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
25072 else
25073 nelems = nvectors;
25074
25075 /* Create a type that describes the full access. */
25076 upper_bound = build_int_cst (size_type_node, nelems - 1);
25077 array_type = build_array_type (elem_type, build_index_type (upper_bound));
25078
25079 /* Dereference EXP using that type. */
25080 return fold_build2 (MEM_REF, array_type, exp,
25081 build_int_cst (build_pointer_type (array_type), 0));
25082 }
25083
25084 /* Expand a Neon builtin. */
25085 static rtx
25086 arm_expand_neon_args (rtx target, int icode, int have_retval,
25087 neon_builtin_type_mode type_mode,
25088 tree exp, int fcode, ...)
25089 {
25090 va_list ap;
25091 rtx pat;
25092 tree arg[NEON_MAX_BUILTIN_ARGS];
25093 rtx op[NEON_MAX_BUILTIN_ARGS];
25094 tree arg_type;
25095 tree formals;
25096 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25097 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25098 enum machine_mode other_mode;
25099 int argc = 0;
25100 int opno;
25101
25102 if (have_retval
25103 && (!target
25104 || GET_MODE (target) != tmode
25105 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25106 target = gen_reg_rtx (tmode);
25107
25108 va_start (ap, fcode);
25109
25110 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25111
25112 for (;;)
25113 {
25114 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25115
25116 if (thisarg == NEON_ARG_STOP)
25117 break;
25118 else
25119 {
25120 opno = argc + have_retval;
25121 mode[argc] = insn_data[icode].operand[opno].mode;
25122 arg[argc] = CALL_EXPR_ARG (exp, argc);
25123 arg_type = TREE_VALUE (formals);
25124 if (thisarg == NEON_ARG_MEMORY)
25125 {
25126 other_mode = insn_data[icode].operand[1 - opno].mode;
25127 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25128 mode[argc], other_mode,
25129 type_mode);
25130 }
25131
25132 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25133 be returned. */
25134 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25135 (thisarg == NEON_ARG_MEMORY
25136 ? EXPAND_MEMORY : EXPAND_NORMAL));
25137
25138 switch (thisarg)
25139 {
25140 case NEON_ARG_COPY_TO_REG:
25141 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25142 if (!(*insn_data[icode].operand[opno].predicate)
25143 (op[argc], mode[argc]))
25144 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25145 break;
25146
25147 case NEON_ARG_CONSTANT:
25148 /* FIXME: This error message is somewhat unhelpful. */
25149 if (!(*insn_data[icode].operand[opno].predicate)
25150 (op[argc], mode[argc]))
25151 error ("argument must be a constant");
25152 break;
25153
25154 case NEON_ARG_MEMORY:
25155 /* Check if expand failed. */
25156 if (op[argc] == const0_rtx)
25157 return 0;
25158 gcc_assert (MEM_P (op[argc]));
25159 PUT_MODE (op[argc], mode[argc]);
25160 /* ??? arm_neon.h uses the same built-in functions for signed
25161 and unsigned accesses, casting where necessary. This isn't
25162 alias safe. */
25163 set_mem_alias_set (op[argc], 0);
25164 if (!(*insn_data[icode].operand[opno].predicate)
25165 (op[argc], mode[argc]))
25166 op[argc] = (replace_equiv_address
25167 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25168 break;
25169
25170 case NEON_ARG_STOP:
25171 gcc_unreachable ();
25172 }
25173
25174 argc++;
25175 formals = TREE_CHAIN (formals);
25176 }
25177 }
25178
25179 va_end (ap);
25180
25181 if (have_retval)
25182 switch (argc)
25183 {
25184 case 1:
25185 pat = GEN_FCN (icode) (target, op[0]);
25186 break;
25187
25188 case 2:
25189 pat = GEN_FCN (icode) (target, op[0], op[1]);
25190 break;
25191
25192 case 3:
25193 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25194 break;
25195
25196 case 4:
25197 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25198 break;
25199
25200 case 5:
25201 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25202 break;
25203
25204 default:
25205 gcc_unreachable ();
25206 }
25207 else
25208 switch (argc)
25209 {
25210 case 1:
25211 pat = GEN_FCN (icode) (op[0]);
25212 break;
25213
25214 case 2:
25215 pat = GEN_FCN (icode) (op[0], op[1]);
25216 break;
25217
25218 case 3:
25219 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25220 break;
25221
25222 case 4:
25223 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25224 break;
25225
25226 case 5:
25227 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25228 break;
25229
25230 default:
25231 gcc_unreachable ();
25232 }
25233
25234 if (!pat)
25235 return 0;
25236
25237 emit_insn (pat);
25238
25239 return target;
25240 }
25241
25242 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25243 constants defined per-instruction or per instruction-variant. Instead, the
25244 required info is looked up in the table neon_builtin_data. */
25245 static rtx
25246 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25247 {
25248 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25249 neon_itype itype = d->itype;
25250 enum insn_code icode = d->code;
25251 neon_builtin_type_mode type_mode = d->mode;
25252
25253 switch (itype)
25254 {
25255 case NEON_UNOP:
25256 case NEON_CONVERT:
25257 case NEON_DUPLANE:
25258 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25259 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25260
25261 case NEON_BINOP:
25262 case NEON_SETLANE:
25263 case NEON_SCALARMUL:
25264 case NEON_SCALARMULL:
25265 case NEON_SCALARMULH:
25266 case NEON_SHIFTINSERT:
25267 case NEON_LOGICBINOP:
25268 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25269 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25270 NEON_ARG_STOP);
25271
25272 case NEON_TERNOP:
25273 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25274 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25275 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25276
25277 case NEON_GETLANE:
25278 case NEON_FIXCONV:
25279 case NEON_SHIFTIMM:
25280 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25281 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25282 NEON_ARG_STOP);
25283
25284 case NEON_CREATE:
25285 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25286 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25287
25288 case NEON_DUP:
25289 case NEON_RINT:
25290 case NEON_SPLIT:
25291 case NEON_FLOAT_WIDEN:
25292 case NEON_FLOAT_NARROW:
25293 case NEON_BSWAP:
25294 case NEON_REINTERP:
25295 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25296 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25297
25298 case NEON_COMBINE:
25299 case NEON_VTBL:
25300 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25301 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25302
25303 case NEON_LANEMUL:
25304 case NEON_LANEMULL:
25305 case NEON_LANEMULH:
25306 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25307 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25308 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25309
25310 case NEON_LANEMAC:
25311 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25312 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25313 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25314
25315 case NEON_SHIFTACC:
25316 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25317 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25318 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25319
25320 case NEON_SCALARMAC:
25321 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25322 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25323 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25324
25325 case NEON_SELECT:
25326 case NEON_VTBX:
25327 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25328 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25329 NEON_ARG_STOP);
25330
25331 case NEON_LOAD1:
25332 case NEON_LOADSTRUCT:
25333 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25334 NEON_ARG_MEMORY, NEON_ARG_STOP);
25335
25336 case NEON_LOAD1LANE:
25337 case NEON_LOADSTRUCTLANE:
25338 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25339 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25340 NEON_ARG_STOP);
25341
25342 case NEON_STORE1:
25343 case NEON_STORESTRUCT:
25344 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25345 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25346
25347 case NEON_STORE1LANE:
25348 case NEON_STORESTRUCTLANE:
25349 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25350 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25351 NEON_ARG_STOP);
25352 }
25353
25354 gcc_unreachable ();
25355 }
25356
25357 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25358 void
25359 neon_reinterpret (rtx dest, rtx src)
25360 {
25361 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25362 }
25363
25364 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25365 not to early-clobber SRC registers in the process.
25366
25367 We assume that the operands described by SRC and DEST represent a
25368 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25369 number of components into which the copy has been decomposed. */
25370 void
25371 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25372 {
25373 unsigned int i;
25374
25375 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25376 || REGNO (operands[0]) < REGNO (operands[1]))
25377 {
25378 for (i = 0; i < count; i++)
25379 {
25380 operands[2 * i] = dest[i];
25381 operands[2 * i + 1] = src[i];
25382 }
25383 }
25384 else
25385 {
25386 for (i = 0; i < count; i++)
25387 {
25388 operands[2 * i] = dest[count - i - 1];
25389 operands[2 * i + 1] = src[count - i - 1];
25390 }
25391 }
25392 }
25393
25394 /* Split operands into moves from op[1] + op[2] into op[0]. */
25395
25396 void
25397 neon_split_vcombine (rtx operands[3])
25398 {
25399 unsigned int dest = REGNO (operands[0]);
25400 unsigned int src1 = REGNO (operands[1]);
25401 unsigned int src2 = REGNO (operands[2]);
25402 enum machine_mode halfmode = GET_MODE (operands[1]);
25403 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25404 rtx destlo, desthi;
25405
25406 if (src1 == dest && src2 == dest + halfregs)
25407 {
25408 /* No-op move. Can't split to nothing; emit something. */
25409 emit_note (NOTE_INSN_DELETED);
25410 return;
25411 }
25412
25413 /* Preserve register attributes for variable tracking. */
25414 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25415 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25416 GET_MODE_SIZE (halfmode));
25417
25418 /* Special case of reversed high/low parts. Use VSWP. */
25419 if (src2 == dest && src1 == dest + halfregs)
25420 {
25421 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25422 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25423 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25424 return;
25425 }
25426
25427 if (!reg_overlap_mentioned_p (operands[2], destlo))
25428 {
25429 /* Try to avoid unnecessary moves if part of the result
25430 is in the right place already. */
25431 if (src1 != dest)
25432 emit_move_insn (destlo, operands[1]);
25433 if (src2 != dest + halfregs)
25434 emit_move_insn (desthi, operands[2]);
25435 }
25436 else
25437 {
25438 if (src2 != dest + halfregs)
25439 emit_move_insn (desthi, operands[2]);
25440 if (src1 != dest)
25441 emit_move_insn (destlo, operands[1]);
25442 }
25443 }
25444
25445 /* Expand an expression EXP that calls a built-in function,
25446 with result going to TARGET if that's convenient
25447 (and in mode MODE if that's convenient).
25448 SUBTARGET may be used as the target for computing one of EXP's operands.
25449 IGNORE is nonzero if the value is to be ignored. */
25450
25451 static rtx
25452 arm_expand_builtin (tree exp,
25453 rtx target,
25454 rtx subtarget ATTRIBUTE_UNUSED,
25455 enum machine_mode mode ATTRIBUTE_UNUSED,
25456 int ignore ATTRIBUTE_UNUSED)
25457 {
25458 const struct builtin_description * d;
25459 enum insn_code icode;
25460 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25461 tree arg0;
25462 tree arg1;
25463 tree arg2;
25464 rtx op0;
25465 rtx op1;
25466 rtx op2;
25467 rtx pat;
25468 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25469 size_t i;
25470 enum machine_mode tmode;
25471 enum machine_mode mode0;
25472 enum machine_mode mode1;
25473 enum machine_mode mode2;
25474 int opint;
25475 int selector;
25476 int mask;
25477 int imm;
25478
25479 if (fcode >= ARM_BUILTIN_NEON_BASE)
25480 return arm_expand_neon_builtin (fcode, exp, target);
25481
25482 switch (fcode)
25483 {
25484 case ARM_BUILTIN_GET_FPSCR:
25485 case ARM_BUILTIN_SET_FPSCR:
25486 if (fcode == ARM_BUILTIN_GET_FPSCR)
25487 {
25488 icode = CODE_FOR_get_fpscr;
25489 target = gen_reg_rtx (SImode);
25490 pat = GEN_FCN (icode) (target);
25491 }
25492 else
25493 {
25494 target = NULL_RTX;
25495 icode = CODE_FOR_set_fpscr;
25496 arg0 = CALL_EXPR_ARG (exp, 0);
25497 op0 = expand_normal (arg0);
25498 pat = GEN_FCN (icode) (op0);
25499 }
25500 emit_insn (pat);
25501 return target;
25502
25503 case ARM_BUILTIN_TEXTRMSB:
25504 case ARM_BUILTIN_TEXTRMUB:
25505 case ARM_BUILTIN_TEXTRMSH:
25506 case ARM_BUILTIN_TEXTRMUH:
25507 case ARM_BUILTIN_TEXTRMSW:
25508 case ARM_BUILTIN_TEXTRMUW:
25509 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25510 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25511 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25512 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25513 : CODE_FOR_iwmmxt_textrmw);
25514
25515 arg0 = CALL_EXPR_ARG (exp, 0);
25516 arg1 = CALL_EXPR_ARG (exp, 1);
25517 op0 = expand_normal (arg0);
25518 op1 = expand_normal (arg1);
25519 tmode = insn_data[icode].operand[0].mode;
25520 mode0 = insn_data[icode].operand[1].mode;
25521 mode1 = insn_data[icode].operand[2].mode;
25522
25523 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25524 op0 = copy_to_mode_reg (mode0, op0);
25525 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25526 {
25527 /* @@@ better error message */
25528 error ("selector must be an immediate");
25529 return gen_reg_rtx (tmode);
25530 }
25531
25532 opint = INTVAL (op1);
25533 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25534 {
25535 if (opint > 7 || opint < 0)
25536 error ("the range of selector should be in 0 to 7");
25537 }
25538 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25539 {
25540 if (opint > 3 || opint < 0)
25541 error ("the range of selector should be in 0 to 3");
25542 }
25543 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25544 {
25545 if (opint > 1 || opint < 0)
25546 error ("the range of selector should be in 0 to 1");
25547 }
25548
25549 if (target == 0
25550 || GET_MODE (target) != tmode
25551 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25552 target = gen_reg_rtx (tmode);
25553 pat = GEN_FCN (icode) (target, op0, op1);
25554 if (! pat)
25555 return 0;
25556 emit_insn (pat);
25557 return target;
25558
25559 case ARM_BUILTIN_WALIGNI:
25560 /* If op2 is immediate, call walighi, else call walighr. */
25561 arg0 = CALL_EXPR_ARG (exp, 0);
25562 arg1 = CALL_EXPR_ARG (exp, 1);
25563 arg2 = CALL_EXPR_ARG (exp, 2);
25564 op0 = expand_normal (arg0);
25565 op1 = expand_normal (arg1);
25566 op2 = expand_normal (arg2);
25567 if (CONST_INT_P (op2))
25568 {
25569 icode = CODE_FOR_iwmmxt_waligni;
25570 tmode = insn_data[icode].operand[0].mode;
25571 mode0 = insn_data[icode].operand[1].mode;
25572 mode1 = insn_data[icode].operand[2].mode;
25573 mode2 = insn_data[icode].operand[3].mode;
25574 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25575 op0 = copy_to_mode_reg (mode0, op0);
25576 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25577 op1 = copy_to_mode_reg (mode1, op1);
25578 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25579 selector = INTVAL (op2);
25580 if (selector > 7 || selector < 0)
25581 error ("the range of selector should be in 0 to 7");
25582 }
25583 else
25584 {
25585 icode = CODE_FOR_iwmmxt_walignr;
25586 tmode = insn_data[icode].operand[0].mode;
25587 mode0 = insn_data[icode].operand[1].mode;
25588 mode1 = insn_data[icode].operand[2].mode;
25589 mode2 = insn_data[icode].operand[3].mode;
25590 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25591 op0 = copy_to_mode_reg (mode0, op0);
25592 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25593 op1 = copy_to_mode_reg (mode1, op1);
25594 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25595 op2 = copy_to_mode_reg (mode2, op2);
25596 }
25597 if (target == 0
25598 || GET_MODE (target) != tmode
25599 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25600 target = gen_reg_rtx (tmode);
25601 pat = GEN_FCN (icode) (target, op0, op1, op2);
25602 if (!pat)
25603 return 0;
25604 emit_insn (pat);
25605 return target;
25606
25607 case ARM_BUILTIN_TINSRB:
25608 case ARM_BUILTIN_TINSRH:
25609 case ARM_BUILTIN_TINSRW:
25610 case ARM_BUILTIN_WMERGE:
25611 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25612 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25613 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25614 : CODE_FOR_iwmmxt_tinsrw);
25615 arg0 = CALL_EXPR_ARG (exp, 0);
25616 arg1 = CALL_EXPR_ARG (exp, 1);
25617 arg2 = CALL_EXPR_ARG (exp, 2);
25618 op0 = expand_normal (arg0);
25619 op1 = expand_normal (arg1);
25620 op2 = expand_normal (arg2);
25621 tmode = insn_data[icode].operand[0].mode;
25622 mode0 = insn_data[icode].operand[1].mode;
25623 mode1 = insn_data[icode].operand[2].mode;
25624 mode2 = insn_data[icode].operand[3].mode;
25625
25626 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25627 op0 = copy_to_mode_reg (mode0, op0);
25628 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25629 op1 = copy_to_mode_reg (mode1, op1);
25630 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25631 {
25632 error ("selector must be an immediate");
25633 return const0_rtx;
25634 }
25635 if (icode == CODE_FOR_iwmmxt_wmerge)
25636 {
25637 selector = INTVAL (op2);
25638 if (selector > 7 || selector < 0)
25639 error ("the range of selector should be in 0 to 7");
25640 }
25641 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25642 || (icode == CODE_FOR_iwmmxt_tinsrh)
25643 || (icode == CODE_FOR_iwmmxt_tinsrw))
25644 {
25645 mask = 0x01;
25646 selector= INTVAL (op2);
25647 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25648 error ("the range of selector should be in 0 to 7");
25649 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25650 error ("the range of selector should be in 0 to 3");
25651 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25652 error ("the range of selector should be in 0 to 1");
25653 mask <<= selector;
25654 op2 = GEN_INT (mask);
25655 }
25656 if (target == 0
25657 || GET_MODE (target) != tmode
25658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25659 target = gen_reg_rtx (tmode);
25660 pat = GEN_FCN (icode) (target, op0, op1, op2);
25661 if (! pat)
25662 return 0;
25663 emit_insn (pat);
25664 return target;
25665
25666 case ARM_BUILTIN_SETWCGR0:
25667 case ARM_BUILTIN_SETWCGR1:
25668 case ARM_BUILTIN_SETWCGR2:
25669 case ARM_BUILTIN_SETWCGR3:
25670 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25671 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25672 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25673 : CODE_FOR_iwmmxt_setwcgr3);
25674 arg0 = CALL_EXPR_ARG (exp, 0);
25675 op0 = expand_normal (arg0);
25676 mode0 = insn_data[icode].operand[0].mode;
25677 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25678 op0 = copy_to_mode_reg (mode0, op0);
25679 pat = GEN_FCN (icode) (op0);
25680 if (!pat)
25681 return 0;
25682 emit_insn (pat);
25683 return 0;
25684
25685 case ARM_BUILTIN_GETWCGR0:
25686 case ARM_BUILTIN_GETWCGR1:
25687 case ARM_BUILTIN_GETWCGR2:
25688 case ARM_BUILTIN_GETWCGR3:
25689 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25690 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25691 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25692 : CODE_FOR_iwmmxt_getwcgr3);
25693 tmode = insn_data[icode].operand[0].mode;
25694 if (target == 0
25695 || GET_MODE (target) != tmode
25696 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25697 target = gen_reg_rtx (tmode);
25698 pat = GEN_FCN (icode) (target);
25699 if (!pat)
25700 return 0;
25701 emit_insn (pat);
25702 return target;
25703
25704 case ARM_BUILTIN_WSHUFH:
25705 icode = CODE_FOR_iwmmxt_wshufh;
25706 arg0 = CALL_EXPR_ARG (exp, 0);
25707 arg1 = CALL_EXPR_ARG (exp, 1);
25708 op0 = expand_normal (arg0);
25709 op1 = expand_normal (arg1);
25710 tmode = insn_data[icode].operand[0].mode;
25711 mode1 = insn_data[icode].operand[1].mode;
25712 mode2 = insn_data[icode].operand[2].mode;
25713
25714 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25715 op0 = copy_to_mode_reg (mode1, op0);
25716 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25717 {
25718 error ("mask must be an immediate");
25719 return const0_rtx;
25720 }
25721 selector = INTVAL (op1);
25722 if (selector < 0 || selector > 255)
25723 error ("the range of mask should be in 0 to 255");
25724 if (target == 0
25725 || GET_MODE (target) != tmode
25726 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25727 target = gen_reg_rtx (tmode);
25728 pat = GEN_FCN (icode) (target, op0, op1);
25729 if (! pat)
25730 return 0;
25731 emit_insn (pat);
25732 return target;
25733
25734 case ARM_BUILTIN_WMADDS:
25735 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25736 case ARM_BUILTIN_WMADDSX:
25737 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25738 case ARM_BUILTIN_WMADDSN:
25739 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25740 case ARM_BUILTIN_WMADDU:
25741 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25742 case ARM_BUILTIN_WMADDUX:
25743 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25744 case ARM_BUILTIN_WMADDUN:
25745 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25746 case ARM_BUILTIN_WSADBZ:
25747 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25748 case ARM_BUILTIN_WSADHZ:
25749 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25750
25751 /* Several three-argument builtins. */
25752 case ARM_BUILTIN_WMACS:
25753 case ARM_BUILTIN_WMACU:
25754 case ARM_BUILTIN_TMIA:
25755 case ARM_BUILTIN_TMIAPH:
25756 case ARM_BUILTIN_TMIATT:
25757 case ARM_BUILTIN_TMIATB:
25758 case ARM_BUILTIN_TMIABT:
25759 case ARM_BUILTIN_TMIABB:
25760 case ARM_BUILTIN_WQMIABB:
25761 case ARM_BUILTIN_WQMIABT:
25762 case ARM_BUILTIN_WQMIATB:
25763 case ARM_BUILTIN_WQMIATT:
25764 case ARM_BUILTIN_WQMIABBN:
25765 case ARM_BUILTIN_WQMIABTN:
25766 case ARM_BUILTIN_WQMIATBN:
25767 case ARM_BUILTIN_WQMIATTN:
25768 case ARM_BUILTIN_WMIABB:
25769 case ARM_BUILTIN_WMIABT:
25770 case ARM_BUILTIN_WMIATB:
25771 case ARM_BUILTIN_WMIATT:
25772 case ARM_BUILTIN_WMIABBN:
25773 case ARM_BUILTIN_WMIABTN:
25774 case ARM_BUILTIN_WMIATBN:
25775 case ARM_BUILTIN_WMIATTN:
25776 case ARM_BUILTIN_WMIAWBB:
25777 case ARM_BUILTIN_WMIAWBT:
25778 case ARM_BUILTIN_WMIAWTB:
25779 case ARM_BUILTIN_WMIAWTT:
25780 case ARM_BUILTIN_WMIAWBBN:
25781 case ARM_BUILTIN_WMIAWBTN:
25782 case ARM_BUILTIN_WMIAWTBN:
25783 case ARM_BUILTIN_WMIAWTTN:
25784 case ARM_BUILTIN_WSADB:
25785 case ARM_BUILTIN_WSADH:
25786 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25787 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25788 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25789 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25790 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25791 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25792 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25793 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25794 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25795 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25796 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25797 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25798 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25799 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25800 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25801 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25802 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25803 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25804 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25805 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25806 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25807 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25808 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25809 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25810 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25811 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25812 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25813 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25814 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25815 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25816 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25817 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25818 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25819 : CODE_FOR_iwmmxt_wsadh);
25820 arg0 = CALL_EXPR_ARG (exp, 0);
25821 arg1 = CALL_EXPR_ARG (exp, 1);
25822 arg2 = CALL_EXPR_ARG (exp, 2);
25823 op0 = expand_normal (arg0);
25824 op1 = expand_normal (arg1);
25825 op2 = expand_normal (arg2);
25826 tmode = insn_data[icode].operand[0].mode;
25827 mode0 = insn_data[icode].operand[1].mode;
25828 mode1 = insn_data[icode].operand[2].mode;
25829 mode2 = insn_data[icode].operand[3].mode;
25830
25831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25832 op0 = copy_to_mode_reg (mode0, op0);
25833 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25834 op1 = copy_to_mode_reg (mode1, op1);
25835 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25836 op2 = copy_to_mode_reg (mode2, op2);
25837 if (target == 0
25838 || GET_MODE (target) != tmode
25839 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25840 target = gen_reg_rtx (tmode);
25841 pat = GEN_FCN (icode) (target, op0, op1, op2);
25842 if (! pat)
25843 return 0;
25844 emit_insn (pat);
25845 return target;
25846
25847 case ARM_BUILTIN_WZERO:
25848 target = gen_reg_rtx (DImode);
25849 emit_insn (gen_iwmmxt_clrdi (target));
25850 return target;
25851
25852 case ARM_BUILTIN_WSRLHI:
25853 case ARM_BUILTIN_WSRLWI:
25854 case ARM_BUILTIN_WSRLDI:
25855 case ARM_BUILTIN_WSLLHI:
25856 case ARM_BUILTIN_WSLLWI:
25857 case ARM_BUILTIN_WSLLDI:
25858 case ARM_BUILTIN_WSRAHI:
25859 case ARM_BUILTIN_WSRAWI:
25860 case ARM_BUILTIN_WSRADI:
25861 case ARM_BUILTIN_WRORHI:
25862 case ARM_BUILTIN_WRORWI:
25863 case ARM_BUILTIN_WRORDI:
25864 case ARM_BUILTIN_WSRLH:
25865 case ARM_BUILTIN_WSRLW:
25866 case ARM_BUILTIN_WSRLD:
25867 case ARM_BUILTIN_WSLLH:
25868 case ARM_BUILTIN_WSLLW:
25869 case ARM_BUILTIN_WSLLD:
25870 case ARM_BUILTIN_WSRAH:
25871 case ARM_BUILTIN_WSRAW:
25872 case ARM_BUILTIN_WSRAD:
25873 case ARM_BUILTIN_WRORH:
25874 case ARM_BUILTIN_WRORW:
25875 case ARM_BUILTIN_WRORD:
25876 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25877 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25878 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25879 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25880 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25881 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25882 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25883 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25884 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25885 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25886 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25887 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25888 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25889 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25890 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25891 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25892 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25893 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25894 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25895 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25896 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25897 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25898 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25899 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25900 : CODE_FOR_nothing);
25901 arg1 = CALL_EXPR_ARG (exp, 1);
25902 op1 = expand_normal (arg1);
25903 if (GET_MODE (op1) == VOIDmode)
25904 {
25905 imm = INTVAL (op1);
25906 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25907 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25908 && (imm < 0 || imm > 32))
25909 {
25910 if (fcode == ARM_BUILTIN_WRORHI)
25911 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25912 else if (fcode == ARM_BUILTIN_WRORWI)
25913 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25914 else if (fcode == ARM_BUILTIN_WRORH)
25915 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25916 else
25917 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25918 }
25919 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25920 && (imm < 0 || imm > 64))
25921 {
25922 if (fcode == ARM_BUILTIN_WRORDI)
25923 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25924 else
25925 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25926 }
25927 else if (imm < 0)
25928 {
25929 if (fcode == ARM_BUILTIN_WSRLHI)
25930 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25931 else if (fcode == ARM_BUILTIN_WSRLWI)
25932 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25933 else if (fcode == ARM_BUILTIN_WSRLDI)
25934 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25935 else if (fcode == ARM_BUILTIN_WSLLHI)
25936 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25937 else if (fcode == ARM_BUILTIN_WSLLWI)
25938 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25939 else if (fcode == ARM_BUILTIN_WSLLDI)
25940 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25941 else if (fcode == ARM_BUILTIN_WSRAHI)
25942 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25943 else if (fcode == ARM_BUILTIN_WSRAWI)
25944 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25945 else if (fcode == ARM_BUILTIN_WSRADI)
25946 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25947 else if (fcode == ARM_BUILTIN_WSRLH)
25948 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25949 else if (fcode == ARM_BUILTIN_WSRLW)
25950 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25951 else if (fcode == ARM_BUILTIN_WSRLD)
25952 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25953 else if (fcode == ARM_BUILTIN_WSLLH)
25954 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25955 else if (fcode == ARM_BUILTIN_WSLLW)
25956 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25957 else if (fcode == ARM_BUILTIN_WSLLD)
25958 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25959 else if (fcode == ARM_BUILTIN_WSRAH)
25960 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25961 else if (fcode == ARM_BUILTIN_WSRAW)
25962 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25963 else
25964 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25965 }
25966 }
25967 return arm_expand_binop_builtin (icode, exp, target);
25968
25969 default:
25970 break;
25971 }
25972
25973 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25974 if (d->code == (const enum arm_builtins) fcode)
25975 return arm_expand_binop_builtin (d->icode, exp, target);
25976
25977 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25978 if (d->code == (const enum arm_builtins) fcode)
25979 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25980
25981 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25982 if (d->code == (const enum arm_builtins) fcode)
25983 return arm_expand_ternop_builtin (d->icode, exp, target);
25984
25985 /* @@@ Should really do something sensible here. */
25986 return NULL_RTX;
25987 }
25988 \f
25989 /* Return the number (counting from 0) of
25990 the least significant set bit in MASK. */
25991
25992 inline static int
25993 number_of_first_bit_set (unsigned mask)
25994 {
25995 return ctz_hwi (mask);
25996 }
25997
25998 /* Like emit_multi_reg_push, but allowing for a different set of
25999 registers to be described as saved. MASK is the set of registers
26000 to be saved; REAL_REGS is the set of registers to be described as
26001 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26002
26003 static rtx
26004 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26005 {
26006 unsigned long regno;
26007 rtx par[10], tmp, reg, insn;
26008 int i, j;
26009
26010 /* Build the parallel of the registers actually being stored. */
26011 for (i = 0; mask; ++i, mask &= mask - 1)
26012 {
26013 regno = ctz_hwi (mask);
26014 reg = gen_rtx_REG (SImode, regno);
26015
26016 if (i == 0)
26017 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26018 else
26019 tmp = gen_rtx_USE (VOIDmode, reg);
26020
26021 par[i] = tmp;
26022 }
26023
26024 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26025 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26026 tmp = gen_frame_mem (BLKmode, tmp);
26027 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
26028 par[0] = tmp;
26029
26030 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26031 insn = emit_insn (tmp);
26032
26033 /* Always build the stack adjustment note for unwind info. */
26034 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26035 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
26036 par[0] = tmp;
26037
26038 /* Build the parallel of the registers recorded as saved for unwind. */
26039 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26040 {
26041 regno = ctz_hwi (real_regs);
26042 reg = gen_rtx_REG (SImode, regno);
26043
26044 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26045 tmp = gen_frame_mem (SImode, tmp);
26046 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
26047 RTX_FRAME_RELATED_P (tmp) = 1;
26048 par[j + 1] = tmp;
26049 }
26050
26051 if (j == 0)
26052 tmp = par[0];
26053 else
26054 {
26055 RTX_FRAME_RELATED_P (par[0]) = 1;
26056 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26057 }
26058
26059 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26060
26061 return insn;
26062 }
26063
26064 /* Emit code to push or pop registers to or from the stack. F is the
26065 assembly file. MASK is the registers to pop. */
26066 static void
26067 thumb_pop (FILE *f, unsigned long mask)
26068 {
26069 int regno;
26070 int lo_mask = mask & 0xFF;
26071 int pushed_words = 0;
26072
26073 gcc_assert (mask);
26074
26075 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26076 {
26077 /* Special case. Do not generate a POP PC statement here, do it in
26078 thumb_exit() */
26079 thumb_exit (f, -1);
26080 return;
26081 }
26082
26083 fprintf (f, "\tpop\t{");
26084
26085 /* Look at the low registers first. */
26086 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26087 {
26088 if (lo_mask & 1)
26089 {
26090 asm_fprintf (f, "%r", regno);
26091
26092 if ((lo_mask & ~1) != 0)
26093 fprintf (f, ", ");
26094
26095 pushed_words++;
26096 }
26097 }
26098
26099 if (mask & (1 << PC_REGNUM))
26100 {
26101 /* Catch popping the PC. */
26102 if (TARGET_INTERWORK || TARGET_BACKTRACE
26103 || crtl->calls_eh_return)
26104 {
26105 /* The PC is never poped directly, instead
26106 it is popped into r3 and then BX is used. */
26107 fprintf (f, "}\n");
26108
26109 thumb_exit (f, -1);
26110
26111 return;
26112 }
26113 else
26114 {
26115 if (mask & 0xFF)
26116 fprintf (f, ", ");
26117
26118 asm_fprintf (f, "%r", PC_REGNUM);
26119 }
26120 }
26121
26122 fprintf (f, "}\n");
26123 }
26124
26125 /* Generate code to return from a thumb function.
26126 If 'reg_containing_return_addr' is -1, then the return address is
26127 actually on the stack, at the stack pointer. */
26128 static void
26129 thumb_exit (FILE *f, int reg_containing_return_addr)
26130 {
26131 unsigned regs_available_for_popping;
26132 unsigned regs_to_pop;
26133 int pops_needed;
26134 unsigned available;
26135 unsigned required;
26136 enum machine_mode mode;
26137 int size;
26138 int restore_a4 = FALSE;
26139
26140 /* Compute the registers we need to pop. */
26141 regs_to_pop = 0;
26142 pops_needed = 0;
26143
26144 if (reg_containing_return_addr == -1)
26145 {
26146 regs_to_pop |= 1 << LR_REGNUM;
26147 ++pops_needed;
26148 }
26149
26150 if (TARGET_BACKTRACE)
26151 {
26152 /* Restore the (ARM) frame pointer and stack pointer. */
26153 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26154 pops_needed += 2;
26155 }
26156
26157 /* If there is nothing to pop then just emit the BX instruction and
26158 return. */
26159 if (pops_needed == 0)
26160 {
26161 if (crtl->calls_eh_return)
26162 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26163
26164 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26165 return;
26166 }
26167 /* Otherwise if we are not supporting interworking and we have not created
26168 a backtrace structure and the function was not entered in ARM mode then
26169 just pop the return address straight into the PC. */
26170 else if (!TARGET_INTERWORK
26171 && !TARGET_BACKTRACE
26172 && !is_called_in_ARM_mode (current_function_decl)
26173 && !crtl->calls_eh_return)
26174 {
26175 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26176 return;
26177 }
26178
26179 /* Find out how many of the (return) argument registers we can corrupt. */
26180 regs_available_for_popping = 0;
26181
26182 /* If returning via __builtin_eh_return, the bottom three registers
26183 all contain information needed for the return. */
26184 if (crtl->calls_eh_return)
26185 size = 12;
26186 else
26187 {
26188 /* If we can deduce the registers used from the function's
26189 return value. This is more reliable that examining
26190 df_regs_ever_live_p () because that will be set if the register is
26191 ever used in the function, not just if the register is used
26192 to hold a return value. */
26193
26194 if (crtl->return_rtx != 0)
26195 mode = GET_MODE (crtl->return_rtx);
26196 else
26197 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26198
26199 size = GET_MODE_SIZE (mode);
26200
26201 if (size == 0)
26202 {
26203 /* In a void function we can use any argument register.
26204 In a function that returns a structure on the stack
26205 we can use the second and third argument registers. */
26206 if (mode == VOIDmode)
26207 regs_available_for_popping =
26208 (1 << ARG_REGISTER (1))
26209 | (1 << ARG_REGISTER (2))
26210 | (1 << ARG_REGISTER (3));
26211 else
26212 regs_available_for_popping =
26213 (1 << ARG_REGISTER (2))
26214 | (1 << ARG_REGISTER (3));
26215 }
26216 else if (size <= 4)
26217 regs_available_for_popping =
26218 (1 << ARG_REGISTER (2))
26219 | (1 << ARG_REGISTER (3));
26220 else if (size <= 8)
26221 regs_available_for_popping =
26222 (1 << ARG_REGISTER (3));
26223 }
26224
26225 /* Match registers to be popped with registers into which we pop them. */
26226 for (available = regs_available_for_popping,
26227 required = regs_to_pop;
26228 required != 0 && available != 0;
26229 available &= ~(available & - available),
26230 required &= ~(required & - required))
26231 -- pops_needed;
26232
26233 /* If we have any popping registers left over, remove them. */
26234 if (available > 0)
26235 regs_available_for_popping &= ~available;
26236
26237 /* Otherwise if we need another popping register we can use
26238 the fourth argument register. */
26239 else if (pops_needed)
26240 {
26241 /* If we have not found any free argument registers and
26242 reg a4 contains the return address, we must move it. */
26243 if (regs_available_for_popping == 0
26244 && reg_containing_return_addr == LAST_ARG_REGNUM)
26245 {
26246 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26247 reg_containing_return_addr = LR_REGNUM;
26248 }
26249 else if (size > 12)
26250 {
26251 /* Register a4 is being used to hold part of the return value,
26252 but we have dire need of a free, low register. */
26253 restore_a4 = TRUE;
26254
26255 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26256 }
26257
26258 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26259 {
26260 /* The fourth argument register is available. */
26261 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26262
26263 --pops_needed;
26264 }
26265 }
26266
26267 /* Pop as many registers as we can. */
26268 thumb_pop (f, regs_available_for_popping);
26269
26270 /* Process the registers we popped. */
26271 if (reg_containing_return_addr == -1)
26272 {
26273 /* The return address was popped into the lowest numbered register. */
26274 regs_to_pop &= ~(1 << LR_REGNUM);
26275
26276 reg_containing_return_addr =
26277 number_of_first_bit_set (regs_available_for_popping);
26278
26279 /* Remove this register for the mask of available registers, so that
26280 the return address will not be corrupted by further pops. */
26281 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26282 }
26283
26284 /* If we popped other registers then handle them here. */
26285 if (regs_available_for_popping)
26286 {
26287 int frame_pointer;
26288
26289 /* Work out which register currently contains the frame pointer. */
26290 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26291
26292 /* Move it into the correct place. */
26293 asm_fprintf (f, "\tmov\t%r, %r\n",
26294 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26295
26296 /* (Temporarily) remove it from the mask of popped registers. */
26297 regs_available_for_popping &= ~(1 << frame_pointer);
26298 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26299
26300 if (regs_available_for_popping)
26301 {
26302 int stack_pointer;
26303
26304 /* We popped the stack pointer as well,
26305 find the register that contains it. */
26306 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26307
26308 /* Move it into the stack register. */
26309 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26310
26311 /* At this point we have popped all necessary registers, so
26312 do not worry about restoring regs_available_for_popping
26313 to its correct value:
26314
26315 assert (pops_needed == 0)
26316 assert (regs_available_for_popping == (1 << frame_pointer))
26317 assert (regs_to_pop == (1 << STACK_POINTER)) */
26318 }
26319 else
26320 {
26321 /* Since we have just move the popped value into the frame
26322 pointer, the popping register is available for reuse, and
26323 we know that we still have the stack pointer left to pop. */
26324 regs_available_for_popping |= (1 << frame_pointer);
26325 }
26326 }
26327
26328 /* If we still have registers left on the stack, but we no longer have
26329 any registers into which we can pop them, then we must move the return
26330 address into the link register and make available the register that
26331 contained it. */
26332 if (regs_available_for_popping == 0 && pops_needed > 0)
26333 {
26334 regs_available_for_popping |= 1 << reg_containing_return_addr;
26335
26336 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26337 reg_containing_return_addr);
26338
26339 reg_containing_return_addr = LR_REGNUM;
26340 }
26341
26342 /* If we have registers left on the stack then pop some more.
26343 We know that at most we will want to pop FP and SP. */
26344 if (pops_needed > 0)
26345 {
26346 int popped_into;
26347 int move_to;
26348
26349 thumb_pop (f, regs_available_for_popping);
26350
26351 /* We have popped either FP or SP.
26352 Move whichever one it is into the correct register. */
26353 popped_into = number_of_first_bit_set (regs_available_for_popping);
26354 move_to = number_of_first_bit_set (regs_to_pop);
26355
26356 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26357
26358 regs_to_pop &= ~(1 << move_to);
26359
26360 --pops_needed;
26361 }
26362
26363 /* If we still have not popped everything then we must have only
26364 had one register available to us and we are now popping the SP. */
26365 if (pops_needed > 0)
26366 {
26367 int popped_into;
26368
26369 thumb_pop (f, regs_available_for_popping);
26370
26371 popped_into = number_of_first_bit_set (regs_available_for_popping);
26372
26373 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26374 /*
26375 assert (regs_to_pop == (1 << STACK_POINTER))
26376 assert (pops_needed == 1)
26377 */
26378 }
26379
26380 /* If necessary restore the a4 register. */
26381 if (restore_a4)
26382 {
26383 if (reg_containing_return_addr != LR_REGNUM)
26384 {
26385 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26386 reg_containing_return_addr = LR_REGNUM;
26387 }
26388
26389 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26390 }
26391
26392 if (crtl->calls_eh_return)
26393 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26394
26395 /* Return to caller. */
26396 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26397 }
26398 \f
26399 /* Scan INSN just before assembler is output for it.
26400 For Thumb-1, we track the status of the condition codes; this
26401 information is used in the cbranchsi4_insn pattern. */
26402 void
26403 thumb1_final_prescan_insn (rtx insn)
26404 {
26405 if (flag_print_asm_name)
26406 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26407 INSN_ADDRESSES (INSN_UID (insn)));
26408 /* Don't overwrite the previous setter when we get to a cbranch. */
26409 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26410 {
26411 enum attr_conds conds;
26412
26413 if (cfun->machine->thumb1_cc_insn)
26414 {
26415 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26416 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26417 CC_STATUS_INIT;
26418 }
26419 conds = get_attr_conds (insn);
26420 if (conds == CONDS_SET)
26421 {
26422 rtx set = single_set (insn);
26423 cfun->machine->thumb1_cc_insn = insn;
26424 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26425 cfun->machine->thumb1_cc_op1 = const0_rtx;
26426 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26427 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26428 {
26429 rtx src1 = XEXP (SET_SRC (set), 1);
26430 if (src1 == const0_rtx)
26431 cfun->machine->thumb1_cc_mode = CCmode;
26432 }
26433 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26434 {
26435 /* Record the src register operand instead of dest because
26436 cprop_hardreg pass propagates src. */
26437 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26438 }
26439 }
26440 else if (conds != CONDS_NOCOND)
26441 cfun->machine->thumb1_cc_insn = NULL_RTX;
26442 }
26443
26444 /* Check if unexpected far jump is used. */
26445 if (cfun->machine->lr_save_eliminated
26446 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26447 internal_error("Unexpected thumb1 far jump");
26448 }
26449
26450 int
26451 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26452 {
26453 unsigned HOST_WIDE_INT mask = 0xff;
26454 int i;
26455
26456 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26457 if (val == 0) /* XXX */
26458 return 0;
26459
26460 for (i = 0; i < 25; i++)
26461 if ((val & (mask << i)) == val)
26462 return 1;
26463
26464 return 0;
26465 }
26466
26467 /* Returns nonzero if the current function contains,
26468 or might contain a far jump. */
26469 static int
26470 thumb_far_jump_used_p (void)
26471 {
26472 rtx insn;
26473 bool far_jump = false;
26474 unsigned int func_size = 0;
26475
26476 /* This test is only important for leaf functions. */
26477 /* assert (!leaf_function_p ()); */
26478
26479 /* If we have already decided that far jumps may be used,
26480 do not bother checking again, and always return true even if
26481 it turns out that they are not being used. Once we have made
26482 the decision that far jumps are present (and that hence the link
26483 register will be pushed onto the stack) we cannot go back on it. */
26484 if (cfun->machine->far_jump_used)
26485 return 1;
26486
26487 /* If this function is not being called from the prologue/epilogue
26488 generation code then it must be being called from the
26489 INITIAL_ELIMINATION_OFFSET macro. */
26490 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26491 {
26492 /* In this case we know that we are being asked about the elimination
26493 of the arg pointer register. If that register is not being used,
26494 then there are no arguments on the stack, and we do not have to
26495 worry that a far jump might force the prologue to push the link
26496 register, changing the stack offsets. In this case we can just
26497 return false, since the presence of far jumps in the function will
26498 not affect stack offsets.
26499
26500 If the arg pointer is live (or if it was live, but has now been
26501 eliminated and so set to dead) then we do have to test to see if
26502 the function might contain a far jump. This test can lead to some
26503 false negatives, since before reload is completed, then length of
26504 branch instructions is not known, so gcc defaults to returning their
26505 longest length, which in turn sets the far jump attribute to true.
26506
26507 A false negative will not result in bad code being generated, but it
26508 will result in a needless push and pop of the link register. We
26509 hope that this does not occur too often.
26510
26511 If we need doubleword stack alignment this could affect the other
26512 elimination offsets so we can't risk getting it wrong. */
26513 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26514 cfun->machine->arg_pointer_live = 1;
26515 else if (!cfun->machine->arg_pointer_live)
26516 return 0;
26517 }
26518
26519 /* We should not change far_jump_used during or after reload, as there is
26520 no chance to change stack frame layout. */
26521 if (reload_in_progress || reload_completed)
26522 return 0;
26523
26524 /* Check to see if the function contains a branch
26525 insn with the far jump attribute set. */
26526 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26527 {
26528 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26529 {
26530 far_jump = true;
26531 }
26532 func_size += get_attr_length (insn);
26533 }
26534
26535 /* Attribute far_jump will always be true for thumb1 before
26536 shorten_branch pass. So checking far_jump attribute before
26537 shorten_branch isn't much useful.
26538
26539 Following heuristic tries to estimate more accurately if a far jump
26540 may finally be used. The heuristic is very conservative as there is
26541 no chance to roll-back the decision of not to use far jump.
26542
26543 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26544 2-byte insn is associated with a 4 byte constant pool. Using
26545 function size 2048/3 as the threshold is conservative enough. */
26546 if (far_jump)
26547 {
26548 if ((func_size * 3) >= 2048)
26549 {
26550 /* Record the fact that we have decided that
26551 the function does use far jumps. */
26552 cfun->machine->far_jump_used = 1;
26553 return 1;
26554 }
26555 }
26556
26557 return 0;
26558 }
26559
26560 /* Return nonzero if FUNC must be entered in ARM mode. */
26561 int
26562 is_called_in_ARM_mode (tree func)
26563 {
26564 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26565
26566 /* Ignore the problem about functions whose address is taken. */
26567 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26568 return TRUE;
26569
26570 #ifdef ARM_PE
26571 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26572 #else
26573 return FALSE;
26574 #endif
26575 }
26576
26577 /* Given the stack offsets and register mask in OFFSETS, decide how
26578 many additional registers to push instead of subtracting a constant
26579 from SP. For epilogues the principle is the same except we use pop.
26580 FOR_PROLOGUE indicates which we're generating. */
26581 static int
26582 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26583 {
26584 HOST_WIDE_INT amount;
26585 unsigned long live_regs_mask = offsets->saved_regs_mask;
26586 /* Extract a mask of the ones we can give to the Thumb's push/pop
26587 instruction. */
26588 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26589 /* Then count how many other high registers will need to be pushed. */
26590 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26591 int n_free, reg_base, size;
26592
26593 if (!for_prologue && frame_pointer_needed)
26594 amount = offsets->locals_base - offsets->saved_regs;
26595 else
26596 amount = offsets->outgoing_args - offsets->saved_regs;
26597
26598 /* If the stack frame size is 512 exactly, we can save one load
26599 instruction, which should make this a win even when optimizing
26600 for speed. */
26601 if (!optimize_size && amount != 512)
26602 return 0;
26603
26604 /* Can't do this if there are high registers to push. */
26605 if (high_regs_pushed != 0)
26606 return 0;
26607
26608 /* Shouldn't do it in the prologue if no registers would normally
26609 be pushed at all. In the epilogue, also allow it if we'll have
26610 a pop insn for the PC. */
26611 if (l_mask == 0
26612 && (for_prologue
26613 || TARGET_BACKTRACE
26614 || (live_regs_mask & 1 << LR_REGNUM) == 0
26615 || TARGET_INTERWORK
26616 || crtl->args.pretend_args_size != 0))
26617 return 0;
26618
26619 /* Don't do this if thumb_expand_prologue wants to emit instructions
26620 between the push and the stack frame allocation. */
26621 if (for_prologue
26622 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26623 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26624 return 0;
26625
26626 reg_base = 0;
26627 n_free = 0;
26628 if (!for_prologue)
26629 {
26630 size = arm_size_return_regs ();
26631 reg_base = ARM_NUM_INTS (size);
26632 live_regs_mask >>= reg_base;
26633 }
26634
26635 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26636 && (for_prologue || call_used_regs[reg_base + n_free]))
26637 {
26638 live_regs_mask >>= 1;
26639 n_free++;
26640 }
26641
26642 if (n_free == 0)
26643 return 0;
26644 gcc_assert (amount / 4 * 4 == amount);
26645
26646 if (amount >= 512 && (amount - n_free * 4) < 512)
26647 return (amount - 508) / 4;
26648 if (amount <= n_free * 4)
26649 return amount / 4;
26650 return 0;
26651 }
26652
26653 /* The bits which aren't usefully expanded as rtl. */
26654 const char *
26655 thumb1_unexpanded_epilogue (void)
26656 {
26657 arm_stack_offsets *offsets;
26658 int regno;
26659 unsigned long live_regs_mask = 0;
26660 int high_regs_pushed = 0;
26661 int extra_pop;
26662 int had_to_push_lr;
26663 int size;
26664
26665 if (cfun->machine->return_used_this_function != 0)
26666 return "";
26667
26668 if (IS_NAKED (arm_current_func_type ()))
26669 return "";
26670
26671 offsets = arm_get_frame_offsets ();
26672 live_regs_mask = offsets->saved_regs_mask;
26673 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26674
26675 /* If we can deduce the registers used from the function's return value.
26676 This is more reliable that examining df_regs_ever_live_p () because that
26677 will be set if the register is ever used in the function, not just if
26678 the register is used to hold a return value. */
26679 size = arm_size_return_regs ();
26680
26681 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26682 if (extra_pop > 0)
26683 {
26684 unsigned long extra_mask = (1 << extra_pop) - 1;
26685 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26686 }
26687
26688 /* The prolog may have pushed some high registers to use as
26689 work registers. e.g. the testsuite file:
26690 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26691 compiles to produce:
26692 push {r4, r5, r6, r7, lr}
26693 mov r7, r9
26694 mov r6, r8
26695 push {r6, r7}
26696 as part of the prolog. We have to undo that pushing here. */
26697
26698 if (high_regs_pushed)
26699 {
26700 unsigned long mask = live_regs_mask & 0xff;
26701 int next_hi_reg;
26702
26703 /* The available low registers depend on the size of the value we are
26704 returning. */
26705 if (size <= 12)
26706 mask |= 1 << 3;
26707 if (size <= 8)
26708 mask |= 1 << 2;
26709
26710 if (mask == 0)
26711 /* Oh dear! We have no low registers into which we can pop
26712 high registers! */
26713 internal_error
26714 ("no low registers available for popping high registers");
26715
26716 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26717 if (live_regs_mask & (1 << next_hi_reg))
26718 break;
26719
26720 while (high_regs_pushed)
26721 {
26722 /* Find lo register(s) into which the high register(s) can
26723 be popped. */
26724 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26725 {
26726 if (mask & (1 << regno))
26727 high_regs_pushed--;
26728 if (high_regs_pushed == 0)
26729 break;
26730 }
26731
26732 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26733
26734 /* Pop the values into the low register(s). */
26735 thumb_pop (asm_out_file, mask);
26736
26737 /* Move the value(s) into the high registers. */
26738 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26739 {
26740 if (mask & (1 << regno))
26741 {
26742 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26743 regno);
26744
26745 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26746 if (live_regs_mask & (1 << next_hi_reg))
26747 break;
26748 }
26749 }
26750 }
26751 live_regs_mask &= ~0x0f00;
26752 }
26753
26754 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26755 live_regs_mask &= 0xff;
26756
26757 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26758 {
26759 /* Pop the return address into the PC. */
26760 if (had_to_push_lr)
26761 live_regs_mask |= 1 << PC_REGNUM;
26762
26763 /* Either no argument registers were pushed or a backtrace
26764 structure was created which includes an adjusted stack
26765 pointer, so just pop everything. */
26766 if (live_regs_mask)
26767 thumb_pop (asm_out_file, live_regs_mask);
26768
26769 /* We have either just popped the return address into the
26770 PC or it is was kept in LR for the entire function.
26771 Note that thumb_pop has already called thumb_exit if the
26772 PC was in the list. */
26773 if (!had_to_push_lr)
26774 thumb_exit (asm_out_file, LR_REGNUM);
26775 }
26776 else
26777 {
26778 /* Pop everything but the return address. */
26779 if (live_regs_mask)
26780 thumb_pop (asm_out_file, live_regs_mask);
26781
26782 if (had_to_push_lr)
26783 {
26784 if (size > 12)
26785 {
26786 /* We have no free low regs, so save one. */
26787 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26788 LAST_ARG_REGNUM);
26789 }
26790
26791 /* Get the return address into a temporary register. */
26792 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26793
26794 if (size > 12)
26795 {
26796 /* Move the return address to lr. */
26797 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26798 LAST_ARG_REGNUM);
26799 /* Restore the low register. */
26800 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26801 IP_REGNUM);
26802 regno = LR_REGNUM;
26803 }
26804 else
26805 regno = LAST_ARG_REGNUM;
26806 }
26807 else
26808 regno = LR_REGNUM;
26809
26810 /* Remove the argument registers that were pushed onto the stack. */
26811 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26812 SP_REGNUM, SP_REGNUM,
26813 crtl->args.pretend_args_size);
26814
26815 thumb_exit (asm_out_file, regno);
26816 }
26817
26818 return "";
26819 }
26820
26821 /* Functions to save and restore machine-specific function data. */
26822 static struct machine_function *
26823 arm_init_machine_status (void)
26824 {
26825 struct machine_function *machine;
26826 machine = ggc_cleared_alloc<machine_function> ();
26827
26828 #if ARM_FT_UNKNOWN != 0
26829 machine->func_type = ARM_FT_UNKNOWN;
26830 #endif
26831 return machine;
26832 }
26833
26834 /* Return an RTX indicating where the return address to the
26835 calling function can be found. */
26836 rtx
26837 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26838 {
26839 if (count != 0)
26840 return NULL_RTX;
26841
26842 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26843 }
26844
26845 /* Do anything needed before RTL is emitted for each function. */
26846 void
26847 arm_init_expanders (void)
26848 {
26849 /* Arrange to initialize and mark the machine per-function status. */
26850 init_machine_status = arm_init_machine_status;
26851
26852 /* This is to stop the combine pass optimizing away the alignment
26853 adjustment of va_arg. */
26854 /* ??? It is claimed that this should not be necessary. */
26855 if (cfun)
26856 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26857 }
26858
26859
26860 /* Like arm_compute_initial_elimination offset. Simpler because there
26861 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26862 to point at the base of the local variables after static stack
26863 space for a function has been allocated. */
26864
26865 HOST_WIDE_INT
26866 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26867 {
26868 arm_stack_offsets *offsets;
26869
26870 offsets = arm_get_frame_offsets ();
26871
26872 switch (from)
26873 {
26874 case ARG_POINTER_REGNUM:
26875 switch (to)
26876 {
26877 case STACK_POINTER_REGNUM:
26878 return offsets->outgoing_args - offsets->saved_args;
26879
26880 case FRAME_POINTER_REGNUM:
26881 return offsets->soft_frame - offsets->saved_args;
26882
26883 case ARM_HARD_FRAME_POINTER_REGNUM:
26884 return offsets->saved_regs - offsets->saved_args;
26885
26886 case THUMB_HARD_FRAME_POINTER_REGNUM:
26887 return offsets->locals_base - offsets->saved_args;
26888
26889 default:
26890 gcc_unreachable ();
26891 }
26892 break;
26893
26894 case FRAME_POINTER_REGNUM:
26895 switch (to)
26896 {
26897 case STACK_POINTER_REGNUM:
26898 return offsets->outgoing_args - offsets->soft_frame;
26899
26900 case ARM_HARD_FRAME_POINTER_REGNUM:
26901 return offsets->saved_regs - offsets->soft_frame;
26902
26903 case THUMB_HARD_FRAME_POINTER_REGNUM:
26904 return offsets->locals_base - offsets->soft_frame;
26905
26906 default:
26907 gcc_unreachable ();
26908 }
26909 break;
26910
26911 default:
26912 gcc_unreachable ();
26913 }
26914 }
26915
26916 /* Generate the function's prologue. */
26917
26918 void
26919 thumb1_expand_prologue (void)
26920 {
26921 rtx insn;
26922
26923 HOST_WIDE_INT amount;
26924 arm_stack_offsets *offsets;
26925 unsigned long func_type;
26926 int regno;
26927 unsigned long live_regs_mask;
26928 unsigned long l_mask;
26929 unsigned high_regs_pushed = 0;
26930
26931 func_type = arm_current_func_type ();
26932
26933 /* Naked functions don't have prologues. */
26934 if (IS_NAKED (func_type))
26935 return;
26936
26937 if (IS_INTERRUPT (func_type))
26938 {
26939 error ("interrupt Service Routines cannot be coded in Thumb mode");
26940 return;
26941 }
26942
26943 if (is_called_in_ARM_mode (current_function_decl))
26944 emit_insn (gen_prologue_thumb1_interwork ());
26945
26946 offsets = arm_get_frame_offsets ();
26947 live_regs_mask = offsets->saved_regs_mask;
26948
26949 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26950 l_mask = live_regs_mask & 0x40ff;
26951 /* Then count how many other high registers will need to be pushed. */
26952 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26953
26954 if (crtl->args.pretend_args_size)
26955 {
26956 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26957
26958 if (cfun->machine->uses_anonymous_args)
26959 {
26960 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26961 unsigned long mask;
26962
26963 mask = 1ul << (LAST_ARG_REGNUM + 1);
26964 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26965
26966 insn = thumb1_emit_multi_reg_push (mask, 0);
26967 }
26968 else
26969 {
26970 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26971 stack_pointer_rtx, x));
26972 }
26973 RTX_FRAME_RELATED_P (insn) = 1;
26974 }
26975
26976 if (TARGET_BACKTRACE)
26977 {
26978 HOST_WIDE_INT offset = 0;
26979 unsigned work_register;
26980 rtx work_reg, x, arm_hfp_rtx;
26981
26982 /* We have been asked to create a stack backtrace structure.
26983 The code looks like this:
26984
26985 0 .align 2
26986 0 func:
26987 0 sub SP, #16 Reserve space for 4 registers.
26988 2 push {R7} Push low registers.
26989 4 add R7, SP, #20 Get the stack pointer before the push.
26990 6 str R7, [SP, #8] Store the stack pointer
26991 (before reserving the space).
26992 8 mov R7, PC Get hold of the start of this code + 12.
26993 10 str R7, [SP, #16] Store it.
26994 12 mov R7, FP Get hold of the current frame pointer.
26995 14 str R7, [SP, #4] Store it.
26996 16 mov R7, LR Get hold of the current return address.
26997 18 str R7, [SP, #12] Store it.
26998 20 add R7, SP, #16 Point at the start of the
26999 backtrace structure.
27000 22 mov FP, R7 Put this value into the frame pointer. */
27001
27002 work_register = thumb_find_work_register (live_regs_mask);
27003 work_reg = gen_rtx_REG (SImode, work_register);
27004 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27005
27006 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27007 stack_pointer_rtx, GEN_INT (-16)));
27008 RTX_FRAME_RELATED_P (insn) = 1;
27009
27010 if (l_mask)
27011 {
27012 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27013 RTX_FRAME_RELATED_P (insn) = 1;
27014
27015 offset = bit_count (l_mask) * UNITS_PER_WORD;
27016 }
27017
27018 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27019 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27020
27021 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27022 x = gen_frame_mem (SImode, x);
27023 emit_move_insn (x, work_reg);
27024
27025 /* Make sure that the instruction fetching the PC is in the right place
27026 to calculate "start of backtrace creation code + 12". */
27027 /* ??? The stores using the common WORK_REG ought to be enough to
27028 prevent the scheduler from doing anything weird. Failing that
27029 we could always move all of the following into an UNSPEC_VOLATILE. */
27030 if (l_mask)
27031 {
27032 x = gen_rtx_REG (SImode, PC_REGNUM);
27033 emit_move_insn (work_reg, x);
27034
27035 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27036 x = gen_frame_mem (SImode, x);
27037 emit_move_insn (x, work_reg);
27038
27039 emit_move_insn (work_reg, arm_hfp_rtx);
27040
27041 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27042 x = gen_frame_mem (SImode, x);
27043 emit_move_insn (x, work_reg);
27044 }
27045 else
27046 {
27047 emit_move_insn (work_reg, arm_hfp_rtx);
27048
27049 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27050 x = gen_frame_mem (SImode, x);
27051 emit_move_insn (x, work_reg);
27052
27053 x = gen_rtx_REG (SImode, PC_REGNUM);
27054 emit_move_insn (work_reg, x);
27055
27056 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27057 x = gen_frame_mem (SImode, x);
27058 emit_move_insn (x, work_reg);
27059 }
27060
27061 x = gen_rtx_REG (SImode, LR_REGNUM);
27062 emit_move_insn (work_reg, x);
27063
27064 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27065 x = gen_frame_mem (SImode, x);
27066 emit_move_insn (x, work_reg);
27067
27068 x = GEN_INT (offset + 12);
27069 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27070
27071 emit_move_insn (arm_hfp_rtx, work_reg);
27072 }
27073 /* Optimization: If we are not pushing any low registers but we are going
27074 to push some high registers then delay our first push. This will just
27075 be a push of LR and we can combine it with the push of the first high
27076 register. */
27077 else if ((l_mask & 0xff) != 0
27078 || (high_regs_pushed == 0 && l_mask))
27079 {
27080 unsigned long mask = l_mask;
27081 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27082 insn = thumb1_emit_multi_reg_push (mask, mask);
27083 RTX_FRAME_RELATED_P (insn) = 1;
27084 }
27085
27086 if (high_regs_pushed)
27087 {
27088 unsigned pushable_regs;
27089 unsigned next_hi_reg;
27090 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27091 : crtl->args.info.nregs;
27092 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27093
27094 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27095 if (live_regs_mask & (1 << next_hi_reg))
27096 break;
27097
27098 /* Here we need to mask out registers used for passing arguments
27099 even if they can be pushed. This is to avoid using them to stash the high
27100 registers. Such kind of stash may clobber the use of arguments. */
27101 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
27102
27103 if (pushable_regs == 0)
27104 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27105
27106 while (high_regs_pushed > 0)
27107 {
27108 unsigned long real_regs_mask = 0;
27109
27110 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
27111 {
27112 if (pushable_regs & (1 << regno))
27113 {
27114 emit_move_insn (gen_rtx_REG (SImode, regno),
27115 gen_rtx_REG (SImode, next_hi_reg));
27116
27117 high_regs_pushed --;
27118 real_regs_mask |= (1 << next_hi_reg);
27119
27120 if (high_regs_pushed)
27121 {
27122 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27123 next_hi_reg --)
27124 if (live_regs_mask & (1 << next_hi_reg))
27125 break;
27126 }
27127 else
27128 {
27129 pushable_regs &= ~((1 << regno) - 1);
27130 break;
27131 }
27132 }
27133 }
27134
27135 /* If we had to find a work register and we have not yet
27136 saved the LR then add it to the list of regs to push. */
27137 if (l_mask == (1 << LR_REGNUM))
27138 {
27139 pushable_regs |= l_mask;
27140 real_regs_mask |= l_mask;
27141 l_mask = 0;
27142 }
27143
27144 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27145 RTX_FRAME_RELATED_P (insn) = 1;
27146 }
27147 }
27148
27149 /* Load the pic register before setting the frame pointer,
27150 so we can use r7 as a temporary work register. */
27151 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27152 arm_load_pic_register (live_regs_mask);
27153
27154 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27155 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27156 stack_pointer_rtx);
27157
27158 if (flag_stack_usage_info)
27159 current_function_static_stack_size
27160 = offsets->outgoing_args - offsets->saved_args;
27161
27162 amount = offsets->outgoing_args - offsets->saved_regs;
27163 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27164 if (amount)
27165 {
27166 if (amount < 512)
27167 {
27168 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27169 GEN_INT (- amount)));
27170 RTX_FRAME_RELATED_P (insn) = 1;
27171 }
27172 else
27173 {
27174 rtx reg, dwarf;
27175
27176 /* The stack decrement is too big for an immediate value in a single
27177 insn. In theory we could issue multiple subtracts, but after
27178 three of them it becomes more space efficient to place the full
27179 value in the constant pool and load into a register. (Also the
27180 ARM debugger really likes to see only one stack decrement per
27181 function). So instead we look for a scratch register into which
27182 we can load the decrement, and then we subtract this from the
27183 stack pointer. Unfortunately on the thumb the only available
27184 scratch registers are the argument registers, and we cannot use
27185 these as they may hold arguments to the function. Instead we
27186 attempt to locate a call preserved register which is used by this
27187 function. If we can find one, then we know that it will have
27188 been pushed at the start of the prologue and so we can corrupt
27189 it now. */
27190 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27191 if (live_regs_mask & (1 << regno))
27192 break;
27193
27194 gcc_assert(regno <= LAST_LO_REGNUM);
27195
27196 reg = gen_rtx_REG (SImode, regno);
27197
27198 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27199
27200 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27201 stack_pointer_rtx, reg));
27202
27203 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27204 plus_constant (Pmode, stack_pointer_rtx,
27205 -amount));
27206 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27207 RTX_FRAME_RELATED_P (insn) = 1;
27208 }
27209 }
27210
27211 if (frame_pointer_needed)
27212 thumb_set_frame_pointer (offsets);
27213
27214 /* If we are profiling, make sure no instructions are scheduled before
27215 the call to mcount. Similarly if the user has requested no
27216 scheduling in the prolog. Similarly if we want non-call exceptions
27217 using the EABI unwinder, to prevent faulting instructions from being
27218 swapped with a stack adjustment. */
27219 if (crtl->profile || !TARGET_SCHED_PROLOG
27220 || (arm_except_unwind_info (&global_options) == UI_TARGET
27221 && cfun->can_throw_non_call_exceptions))
27222 emit_insn (gen_blockage ());
27223
27224 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27225 if (live_regs_mask & 0xff)
27226 cfun->machine->lr_save_eliminated = 0;
27227 }
27228
27229 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27230 POP instruction can be generated. LR should be replaced by PC. All
27231 the checks required are already done by USE_RETURN_INSN (). Hence,
27232 all we really need to check here is if single register is to be
27233 returned, or multiple register return. */
27234 void
27235 thumb2_expand_return (bool simple_return)
27236 {
27237 int i, num_regs;
27238 unsigned long saved_regs_mask;
27239 arm_stack_offsets *offsets;
27240
27241 offsets = arm_get_frame_offsets ();
27242 saved_regs_mask = offsets->saved_regs_mask;
27243
27244 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27245 if (saved_regs_mask & (1 << i))
27246 num_regs++;
27247
27248 if (!simple_return && saved_regs_mask)
27249 {
27250 if (num_regs == 1)
27251 {
27252 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27253 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27254 rtx addr = gen_rtx_MEM (SImode,
27255 gen_rtx_POST_INC (SImode,
27256 stack_pointer_rtx));
27257 set_mem_alias_set (addr, get_frame_alias_set ());
27258 XVECEXP (par, 0, 0) = ret_rtx;
27259 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27260 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27261 emit_jump_insn (par);
27262 }
27263 else
27264 {
27265 saved_regs_mask &= ~ (1 << LR_REGNUM);
27266 saved_regs_mask |= (1 << PC_REGNUM);
27267 arm_emit_multi_reg_pop (saved_regs_mask);
27268 }
27269 }
27270 else
27271 {
27272 emit_jump_insn (simple_return_rtx);
27273 }
27274 }
27275
27276 void
27277 thumb1_expand_epilogue (void)
27278 {
27279 HOST_WIDE_INT amount;
27280 arm_stack_offsets *offsets;
27281 int regno;
27282
27283 /* Naked functions don't have prologues. */
27284 if (IS_NAKED (arm_current_func_type ()))
27285 return;
27286
27287 offsets = arm_get_frame_offsets ();
27288 amount = offsets->outgoing_args - offsets->saved_regs;
27289
27290 if (frame_pointer_needed)
27291 {
27292 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27293 amount = offsets->locals_base - offsets->saved_regs;
27294 }
27295 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27296
27297 gcc_assert (amount >= 0);
27298 if (amount)
27299 {
27300 emit_insn (gen_blockage ());
27301
27302 if (amount < 512)
27303 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27304 GEN_INT (amount)));
27305 else
27306 {
27307 /* r3 is always free in the epilogue. */
27308 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27309
27310 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27311 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27312 }
27313 }
27314
27315 /* Emit a USE (stack_pointer_rtx), so that
27316 the stack adjustment will not be deleted. */
27317 emit_insn (gen_force_register_use (stack_pointer_rtx));
27318
27319 if (crtl->profile || !TARGET_SCHED_PROLOG)
27320 emit_insn (gen_blockage ());
27321
27322 /* Emit a clobber for each insn that will be restored in the epilogue,
27323 so that flow2 will get register lifetimes correct. */
27324 for (regno = 0; regno < 13; regno++)
27325 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27326 emit_clobber (gen_rtx_REG (SImode, regno));
27327
27328 if (! df_regs_ever_live_p (LR_REGNUM))
27329 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27330 }
27331
27332 /* Epilogue code for APCS frame. */
27333 static void
27334 arm_expand_epilogue_apcs_frame (bool really_return)
27335 {
27336 unsigned long func_type;
27337 unsigned long saved_regs_mask;
27338 int num_regs = 0;
27339 int i;
27340 int floats_from_frame = 0;
27341 arm_stack_offsets *offsets;
27342
27343 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27344 func_type = arm_current_func_type ();
27345
27346 /* Get frame offsets for ARM. */
27347 offsets = arm_get_frame_offsets ();
27348 saved_regs_mask = offsets->saved_regs_mask;
27349
27350 /* Find the offset of the floating-point save area in the frame. */
27351 floats_from_frame
27352 = (offsets->saved_args
27353 + arm_compute_static_chain_stack_bytes ()
27354 - offsets->frame);
27355
27356 /* Compute how many core registers saved and how far away the floats are. */
27357 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27358 if (saved_regs_mask & (1 << i))
27359 {
27360 num_regs++;
27361 floats_from_frame += 4;
27362 }
27363
27364 if (TARGET_HARD_FLOAT && TARGET_VFP)
27365 {
27366 int start_reg;
27367 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27368
27369 /* The offset is from IP_REGNUM. */
27370 int saved_size = arm_get_vfp_saved_size ();
27371 if (saved_size > 0)
27372 {
27373 rtx insn;
27374 floats_from_frame += saved_size;
27375 insn = emit_insn (gen_addsi3 (ip_rtx,
27376 hard_frame_pointer_rtx,
27377 GEN_INT (-floats_from_frame)));
27378 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27379 ip_rtx, hard_frame_pointer_rtx);
27380 }
27381
27382 /* Generate VFP register multi-pop. */
27383 start_reg = FIRST_VFP_REGNUM;
27384
27385 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27386 /* Look for a case where a reg does not need restoring. */
27387 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27388 && (!df_regs_ever_live_p (i + 1)
27389 || call_used_regs[i + 1]))
27390 {
27391 if (start_reg != i)
27392 arm_emit_vfp_multi_reg_pop (start_reg,
27393 (i - start_reg) / 2,
27394 gen_rtx_REG (SImode,
27395 IP_REGNUM));
27396 start_reg = i + 2;
27397 }
27398
27399 /* Restore the remaining regs that we have discovered (or possibly
27400 even all of them, if the conditional in the for loop never
27401 fired). */
27402 if (start_reg != i)
27403 arm_emit_vfp_multi_reg_pop (start_reg,
27404 (i - start_reg) / 2,
27405 gen_rtx_REG (SImode, IP_REGNUM));
27406 }
27407
27408 if (TARGET_IWMMXT)
27409 {
27410 /* The frame pointer is guaranteed to be non-double-word aligned, as
27411 it is set to double-word-aligned old_stack_pointer - 4. */
27412 rtx insn;
27413 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27414
27415 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27416 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27417 {
27418 rtx addr = gen_frame_mem (V2SImode,
27419 plus_constant (Pmode, hard_frame_pointer_rtx,
27420 - lrm_count * 4));
27421 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27422 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27423 gen_rtx_REG (V2SImode, i),
27424 NULL_RTX);
27425 lrm_count += 2;
27426 }
27427 }
27428
27429 /* saved_regs_mask should contain IP which contains old stack pointer
27430 at the time of activation creation. Since SP and IP are adjacent registers,
27431 we can restore the value directly into SP. */
27432 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27433 saved_regs_mask &= ~(1 << IP_REGNUM);
27434 saved_regs_mask |= (1 << SP_REGNUM);
27435
27436 /* There are two registers left in saved_regs_mask - LR and PC. We
27437 only need to restore LR (the return address), but to
27438 save time we can load it directly into PC, unless we need a
27439 special function exit sequence, or we are not really returning. */
27440 if (really_return
27441 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27442 && !crtl->calls_eh_return)
27443 /* Delete LR from the register mask, so that LR on
27444 the stack is loaded into the PC in the register mask. */
27445 saved_regs_mask &= ~(1 << LR_REGNUM);
27446 else
27447 saved_regs_mask &= ~(1 << PC_REGNUM);
27448
27449 num_regs = bit_count (saved_regs_mask);
27450 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27451 {
27452 rtx insn;
27453 emit_insn (gen_blockage ());
27454 /* Unwind the stack to just below the saved registers. */
27455 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27456 hard_frame_pointer_rtx,
27457 GEN_INT (- 4 * num_regs)));
27458
27459 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27460 stack_pointer_rtx, hard_frame_pointer_rtx);
27461 }
27462
27463 arm_emit_multi_reg_pop (saved_regs_mask);
27464
27465 if (IS_INTERRUPT (func_type))
27466 {
27467 /* Interrupt handlers will have pushed the
27468 IP onto the stack, so restore it now. */
27469 rtx insn;
27470 rtx addr = gen_rtx_MEM (SImode,
27471 gen_rtx_POST_INC (SImode,
27472 stack_pointer_rtx));
27473 set_mem_alias_set (addr, get_frame_alias_set ());
27474 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27475 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27476 gen_rtx_REG (SImode, IP_REGNUM),
27477 NULL_RTX);
27478 }
27479
27480 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27481 return;
27482
27483 if (crtl->calls_eh_return)
27484 emit_insn (gen_addsi3 (stack_pointer_rtx,
27485 stack_pointer_rtx,
27486 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27487
27488 if (IS_STACKALIGN (func_type))
27489 /* Restore the original stack pointer. Before prologue, the stack was
27490 realigned and the original stack pointer saved in r0. For details,
27491 see comment in arm_expand_prologue. */
27492 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27493
27494 emit_jump_insn (simple_return_rtx);
27495 }
27496
27497 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27498 function is not a sibcall. */
27499 void
27500 arm_expand_epilogue (bool really_return)
27501 {
27502 unsigned long func_type;
27503 unsigned long saved_regs_mask;
27504 int num_regs = 0;
27505 int i;
27506 int amount;
27507 arm_stack_offsets *offsets;
27508
27509 func_type = arm_current_func_type ();
27510
27511 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27512 let output_return_instruction take care of instruction emission if any. */
27513 if (IS_NAKED (func_type)
27514 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27515 {
27516 if (really_return)
27517 emit_jump_insn (simple_return_rtx);
27518 return;
27519 }
27520
27521 /* If we are throwing an exception, then we really must be doing a
27522 return, so we can't tail-call. */
27523 gcc_assert (!crtl->calls_eh_return || really_return);
27524
27525 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27526 {
27527 arm_expand_epilogue_apcs_frame (really_return);
27528 return;
27529 }
27530
27531 /* Get frame offsets for ARM. */
27532 offsets = arm_get_frame_offsets ();
27533 saved_regs_mask = offsets->saved_regs_mask;
27534 num_regs = bit_count (saved_regs_mask);
27535
27536 if (frame_pointer_needed)
27537 {
27538 rtx insn;
27539 /* Restore stack pointer if necessary. */
27540 if (TARGET_ARM)
27541 {
27542 /* In ARM mode, frame pointer points to first saved register.
27543 Restore stack pointer to last saved register. */
27544 amount = offsets->frame - offsets->saved_regs;
27545
27546 /* Force out any pending memory operations that reference stacked data
27547 before stack de-allocation occurs. */
27548 emit_insn (gen_blockage ());
27549 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27550 hard_frame_pointer_rtx,
27551 GEN_INT (amount)));
27552 arm_add_cfa_adjust_cfa_note (insn, amount,
27553 stack_pointer_rtx,
27554 hard_frame_pointer_rtx);
27555
27556 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27557 deleted. */
27558 emit_insn (gen_force_register_use (stack_pointer_rtx));
27559 }
27560 else
27561 {
27562 /* In Thumb-2 mode, the frame pointer points to the last saved
27563 register. */
27564 amount = offsets->locals_base - offsets->saved_regs;
27565 if (amount)
27566 {
27567 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27568 hard_frame_pointer_rtx,
27569 GEN_INT (amount)));
27570 arm_add_cfa_adjust_cfa_note (insn, amount,
27571 hard_frame_pointer_rtx,
27572 hard_frame_pointer_rtx);
27573 }
27574
27575 /* Force out any pending memory operations that reference stacked data
27576 before stack de-allocation occurs. */
27577 emit_insn (gen_blockage ());
27578 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27579 hard_frame_pointer_rtx));
27580 arm_add_cfa_adjust_cfa_note (insn, 0,
27581 stack_pointer_rtx,
27582 hard_frame_pointer_rtx);
27583 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27584 deleted. */
27585 emit_insn (gen_force_register_use (stack_pointer_rtx));
27586 }
27587 }
27588 else
27589 {
27590 /* Pop off outgoing args and local frame to adjust stack pointer to
27591 last saved register. */
27592 amount = offsets->outgoing_args - offsets->saved_regs;
27593 if (amount)
27594 {
27595 rtx tmp;
27596 /* Force out any pending memory operations that reference stacked data
27597 before stack de-allocation occurs. */
27598 emit_insn (gen_blockage ());
27599 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27600 stack_pointer_rtx,
27601 GEN_INT (amount)));
27602 arm_add_cfa_adjust_cfa_note (tmp, amount,
27603 stack_pointer_rtx, stack_pointer_rtx);
27604 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27605 not deleted. */
27606 emit_insn (gen_force_register_use (stack_pointer_rtx));
27607 }
27608 }
27609
27610 if (TARGET_HARD_FLOAT && TARGET_VFP)
27611 {
27612 /* Generate VFP register multi-pop. */
27613 int end_reg = LAST_VFP_REGNUM + 1;
27614
27615 /* Scan the registers in reverse order. We need to match
27616 any groupings made in the prologue and generate matching
27617 vldm operations. The need to match groups is because,
27618 unlike pop, vldm can only do consecutive regs. */
27619 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27620 /* Look for a case where a reg does not need restoring. */
27621 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27622 && (!df_regs_ever_live_p (i + 1)
27623 || call_used_regs[i + 1]))
27624 {
27625 /* Restore the regs discovered so far (from reg+2 to
27626 end_reg). */
27627 if (end_reg > i + 2)
27628 arm_emit_vfp_multi_reg_pop (i + 2,
27629 (end_reg - (i + 2)) / 2,
27630 stack_pointer_rtx);
27631 end_reg = i;
27632 }
27633
27634 /* Restore the remaining regs that we have discovered (or possibly
27635 even all of them, if the conditional in the for loop never
27636 fired). */
27637 if (end_reg > i + 2)
27638 arm_emit_vfp_multi_reg_pop (i + 2,
27639 (end_reg - (i + 2)) / 2,
27640 stack_pointer_rtx);
27641 }
27642
27643 if (TARGET_IWMMXT)
27644 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27645 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27646 {
27647 rtx insn;
27648 rtx addr = gen_rtx_MEM (V2SImode,
27649 gen_rtx_POST_INC (SImode,
27650 stack_pointer_rtx));
27651 set_mem_alias_set (addr, get_frame_alias_set ());
27652 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27653 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27654 gen_rtx_REG (V2SImode, i),
27655 NULL_RTX);
27656 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27657 stack_pointer_rtx, stack_pointer_rtx);
27658 }
27659
27660 if (saved_regs_mask)
27661 {
27662 rtx insn;
27663 bool return_in_pc = false;
27664
27665 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27666 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27667 && !IS_STACKALIGN (func_type)
27668 && really_return
27669 && crtl->args.pretend_args_size == 0
27670 && saved_regs_mask & (1 << LR_REGNUM)
27671 && !crtl->calls_eh_return)
27672 {
27673 saved_regs_mask &= ~(1 << LR_REGNUM);
27674 saved_regs_mask |= (1 << PC_REGNUM);
27675 return_in_pc = true;
27676 }
27677
27678 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27679 {
27680 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27681 if (saved_regs_mask & (1 << i))
27682 {
27683 rtx addr = gen_rtx_MEM (SImode,
27684 gen_rtx_POST_INC (SImode,
27685 stack_pointer_rtx));
27686 set_mem_alias_set (addr, get_frame_alias_set ());
27687
27688 if (i == PC_REGNUM)
27689 {
27690 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27691 XVECEXP (insn, 0, 0) = ret_rtx;
27692 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27693 gen_rtx_REG (SImode, i),
27694 addr);
27695 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27696 insn = emit_jump_insn (insn);
27697 }
27698 else
27699 {
27700 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27701 addr));
27702 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27703 gen_rtx_REG (SImode, i),
27704 NULL_RTX);
27705 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27706 stack_pointer_rtx,
27707 stack_pointer_rtx);
27708 }
27709 }
27710 }
27711 else
27712 {
27713 if (TARGET_LDRD
27714 && current_tune->prefer_ldrd_strd
27715 && !optimize_function_for_size_p (cfun))
27716 {
27717 if (TARGET_THUMB2)
27718 thumb2_emit_ldrd_pop (saved_regs_mask);
27719 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27720 arm_emit_ldrd_pop (saved_regs_mask);
27721 else
27722 arm_emit_multi_reg_pop (saved_regs_mask);
27723 }
27724 else
27725 arm_emit_multi_reg_pop (saved_regs_mask);
27726 }
27727
27728 if (return_in_pc == true)
27729 return;
27730 }
27731
27732 if (crtl->args.pretend_args_size)
27733 {
27734 int i, j;
27735 rtx dwarf = NULL_RTX;
27736 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27737 stack_pointer_rtx,
27738 GEN_INT (crtl->args.pretend_args_size)));
27739
27740 RTX_FRAME_RELATED_P (tmp) = 1;
27741
27742 if (cfun->machine->uses_anonymous_args)
27743 {
27744 /* Restore pretend args. Refer arm_expand_prologue on how to save
27745 pretend_args in stack. */
27746 int num_regs = crtl->args.pretend_args_size / 4;
27747 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27748 for (j = 0, i = 0; j < num_regs; i++)
27749 if (saved_regs_mask & (1 << i))
27750 {
27751 rtx reg = gen_rtx_REG (SImode, i);
27752 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27753 j++;
27754 }
27755 REG_NOTES (tmp) = dwarf;
27756 }
27757 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27758 stack_pointer_rtx, stack_pointer_rtx);
27759 }
27760
27761 if (!really_return)
27762 return;
27763
27764 if (crtl->calls_eh_return)
27765 emit_insn (gen_addsi3 (stack_pointer_rtx,
27766 stack_pointer_rtx,
27767 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27768
27769 if (IS_STACKALIGN (func_type))
27770 /* Restore the original stack pointer. Before prologue, the stack was
27771 realigned and the original stack pointer saved in r0. For details,
27772 see comment in arm_expand_prologue. */
27773 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27774
27775 emit_jump_insn (simple_return_rtx);
27776 }
27777
27778 /* Implementation of insn prologue_thumb1_interwork. This is the first
27779 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27780
27781 const char *
27782 thumb1_output_interwork (void)
27783 {
27784 const char * name;
27785 FILE *f = asm_out_file;
27786
27787 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27788 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27789 == SYMBOL_REF);
27790 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27791
27792 /* Generate code sequence to switch us into Thumb mode. */
27793 /* The .code 32 directive has already been emitted by
27794 ASM_DECLARE_FUNCTION_NAME. */
27795 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27796 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27797
27798 /* Generate a label, so that the debugger will notice the
27799 change in instruction sets. This label is also used by
27800 the assembler to bypass the ARM code when this function
27801 is called from a Thumb encoded function elsewhere in the
27802 same file. Hence the definition of STUB_NAME here must
27803 agree with the definition in gas/config/tc-arm.c. */
27804
27805 #define STUB_NAME ".real_start_of"
27806
27807 fprintf (f, "\t.code\t16\n");
27808 #ifdef ARM_PE
27809 if (arm_dllexport_name_p (name))
27810 name = arm_strip_name_encoding (name);
27811 #endif
27812 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27813 fprintf (f, "\t.thumb_func\n");
27814 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27815
27816 return "";
27817 }
27818
27819 /* Handle the case of a double word load into a low register from
27820 a computed memory address. The computed address may involve a
27821 register which is overwritten by the load. */
27822 const char *
27823 thumb_load_double_from_address (rtx *operands)
27824 {
27825 rtx addr;
27826 rtx base;
27827 rtx offset;
27828 rtx arg1;
27829 rtx arg2;
27830
27831 gcc_assert (REG_P (operands[0]));
27832 gcc_assert (MEM_P (operands[1]));
27833
27834 /* Get the memory address. */
27835 addr = XEXP (operands[1], 0);
27836
27837 /* Work out how the memory address is computed. */
27838 switch (GET_CODE (addr))
27839 {
27840 case REG:
27841 operands[2] = adjust_address (operands[1], SImode, 4);
27842
27843 if (REGNO (operands[0]) == REGNO (addr))
27844 {
27845 output_asm_insn ("ldr\t%H0, %2", operands);
27846 output_asm_insn ("ldr\t%0, %1", operands);
27847 }
27848 else
27849 {
27850 output_asm_insn ("ldr\t%0, %1", operands);
27851 output_asm_insn ("ldr\t%H0, %2", operands);
27852 }
27853 break;
27854
27855 case CONST:
27856 /* Compute <address> + 4 for the high order load. */
27857 operands[2] = adjust_address (operands[1], SImode, 4);
27858
27859 output_asm_insn ("ldr\t%0, %1", operands);
27860 output_asm_insn ("ldr\t%H0, %2", operands);
27861 break;
27862
27863 case PLUS:
27864 arg1 = XEXP (addr, 0);
27865 arg2 = XEXP (addr, 1);
27866
27867 if (CONSTANT_P (arg1))
27868 base = arg2, offset = arg1;
27869 else
27870 base = arg1, offset = arg2;
27871
27872 gcc_assert (REG_P (base));
27873
27874 /* Catch the case of <address> = <reg> + <reg> */
27875 if (REG_P (offset))
27876 {
27877 int reg_offset = REGNO (offset);
27878 int reg_base = REGNO (base);
27879 int reg_dest = REGNO (operands[0]);
27880
27881 /* Add the base and offset registers together into the
27882 higher destination register. */
27883 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27884 reg_dest + 1, reg_base, reg_offset);
27885
27886 /* Load the lower destination register from the address in
27887 the higher destination register. */
27888 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27889 reg_dest, reg_dest + 1);
27890
27891 /* Load the higher destination register from its own address
27892 plus 4. */
27893 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27894 reg_dest + 1, reg_dest + 1);
27895 }
27896 else
27897 {
27898 /* Compute <address> + 4 for the high order load. */
27899 operands[2] = adjust_address (operands[1], SImode, 4);
27900
27901 /* If the computed address is held in the low order register
27902 then load the high order register first, otherwise always
27903 load the low order register first. */
27904 if (REGNO (operands[0]) == REGNO (base))
27905 {
27906 output_asm_insn ("ldr\t%H0, %2", operands);
27907 output_asm_insn ("ldr\t%0, %1", operands);
27908 }
27909 else
27910 {
27911 output_asm_insn ("ldr\t%0, %1", operands);
27912 output_asm_insn ("ldr\t%H0, %2", operands);
27913 }
27914 }
27915 break;
27916
27917 case LABEL_REF:
27918 /* With no registers to worry about we can just load the value
27919 directly. */
27920 operands[2] = adjust_address (operands[1], SImode, 4);
27921
27922 output_asm_insn ("ldr\t%H0, %2", operands);
27923 output_asm_insn ("ldr\t%0, %1", operands);
27924 break;
27925
27926 default:
27927 gcc_unreachable ();
27928 }
27929
27930 return "";
27931 }
27932
27933 const char *
27934 thumb_output_move_mem_multiple (int n, rtx *operands)
27935 {
27936 rtx tmp;
27937
27938 switch (n)
27939 {
27940 case 2:
27941 if (REGNO (operands[4]) > REGNO (operands[5]))
27942 {
27943 tmp = operands[4];
27944 operands[4] = operands[5];
27945 operands[5] = tmp;
27946 }
27947 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27948 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27949 break;
27950
27951 case 3:
27952 if (REGNO (operands[4]) > REGNO (operands[5]))
27953 {
27954 tmp = operands[4];
27955 operands[4] = operands[5];
27956 operands[5] = tmp;
27957 }
27958 if (REGNO (operands[5]) > REGNO (operands[6]))
27959 {
27960 tmp = operands[5];
27961 operands[5] = operands[6];
27962 operands[6] = tmp;
27963 }
27964 if (REGNO (operands[4]) > REGNO (operands[5]))
27965 {
27966 tmp = operands[4];
27967 operands[4] = operands[5];
27968 operands[5] = tmp;
27969 }
27970
27971 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27972 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27973 break;
27974
27975 default:
27976 gcc_unreachable ();
27977 }
27978
27979 return "";
27980 }
27981
27982 /* Output a call-via instruction for thumb state. */
27983 const char *
27984 thumb_call_via_reg (rtx reg)
27985 {
27986 int regno = REGNO (reg);
27987 rtx *labelp;
27988
27989 gcc_assert (regno < LR_REGNUM);
27990
27991 /* If we are in the normal text section we can use a single instance
27992 per compilation unit. If we are doing function sections, then we need
27993 an entry per section, since we can't rely on reachability. */
27994 if (in_section == text_section)
27995 {
27996 thumb_call_reg_needed = 1;
27997
27998 if (thumb_call_via_label[regno] == NULL)
27999 thumb_call_via_label[regno] = gen_label_rtx ();
28000 labelp = thumb_call_via_label + regno;
28001 }
28002 else
28003 {
28004 if (cfun->machine->call_via[regno] == NULL)
28005 cfun->machine->call_via[regno] = gen_label_rtx ();
28006 labelp = cfun->machine->call_via + regno;
28007 }
28008
28009 output_asm_insn ("bl\t%a0", labelp);
28010 return "";
28011 }
28012
28013 /* Routines for generating rtl. */
28014 void
28015 thumb_expand_movmemqi (rtx *operands)
28016 {
28017 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28018 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28019 HOST_WIDE_INT len = INTVAL (operands[2]);
28020 HOST_WIDE_INT offset = 0;
28021
28022 while (len >= 12)
28023 {
28024 emit_insn (gen_movmem12b (out, in, out, in));
28025 len -= 12;
28026 }
28027
28028 if (len >= 8)
28029 {
28030 emit_insn (gen_movmem8b (out, in, out, in));
28031 len -= 8;
28032 }
28033
28034 if (len >= 4)
28035 {
28036 rtx reg = gen_reg_rtx (SImode);
28037 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28038 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28039 len -= 4;
28040 offset += 4;
28041 }
28042
28043 if (len >= 2)
28044 {
28045 rtx reg = gen_reg_rtx (HImode);
28046 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28047 plus_constant (Pmode, in,
28048 offset))));
28049 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28050 offset)),
28051 reg));
28052 len -= 2;
28053 offset += 2;
28054 }
28055
28056 if (len)
28057 {
28058 rtx reg = gen_reg_rtx (QImode);
28059 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28060 plus_constant (Pmode, in,
28061 offset))));
28062 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28063 offset)),
28064 reg));
28065 }
28066 }
28067
28068 void
28069 thumb_reload_out_hi (rtx *operands)
28070 {
28071 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28072 }
28073
28074 /* Handle reading a half-word from memory during reload. */
28075 void
28076 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
28077 {
28078 gcc_unreachable ();
28079 }
28080
28081 /* Return the length of a function name prefix
28082 that starts with the character 'c'. */
28083 static int
28084 arm_get_strip_length (int c)
28085 {
28086 switch (c)
28087 {
28088 ARM_NAME_ENCODING_LENGTHS
28089 default: return 0;
28090 }
28091 }
28092
28093 /* Return a pointer to a function's name with any
28094 and all prefix encodings stripped from it. */
28095 const char *
28096 arm_strip_name_encoding (const char *name)
28097 {
28098 int skip;
28099
28100 while ((skip = arm_get_strip_length (* name)))
28101 name += skip;
28102
28103 return name;
28104 }
28105
28106 /* If there is a '*' anywhere in the name's prefix, then
28107 emit the stripped name verbatim, otherwise prepend an
28108 underscore if leading underscores are being used. */
28109 void
28110 arm_asm_output_labelref (FILE *stream, const char *name)
28111 {
28112 int skip;
28113 int verbatim = 0;
28114
28115 while ((skip = arm_get_strip_length (* name)))
28116 {
28117 verbatim |= (*name == '*');
28118 name += skip;
28119 }
28120
28121 if (verbatim)
28122 fputs (name, stream);
28123 else
28124 asm_fprintf (stream, "%U%s", name);
28125 }
28126
28127 /* This function is used to emit an EABI tag and its associated value.
28128 We emit the numerical value of the tag in case the assembler does not
28129 support textual tags. (Eg gas prior to 2.20). If requested we include
28130 the tag name in a comment so that anyone reading the assembler output
28131 will know which tag is being set.
28132
28133 This function is not static because arm-c.c needs it too. */
28134
28135 void
28136 arm_emit_eabi_attribute (const char *name, int num, int val)
28137 {
28138 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28139 if (flag_verbose_asm || flag_debug_asm)
28140 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28141 asm_fprintf (asm_out_file, "\n");
28142 }
28143
28144 static void
28145 arm_file_start (void)
28146 {
28147 int val;
28148
28149 if (TARGET_UNIFIED_ASM)
28150 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28151
28152 if (TARGET_BPABI)
28153 {
28154 const char *fpu_name;
28155 if (arm_selected_arch)
28156 {
28157 /* armv7ve doesn't support any extensions. */
28158 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28159 {
28160 /* Keep backward compatability for assemblers
28161 which don't support armv7ve. */
28162 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28163 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28164 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28165 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28166 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28167 }
28168 else
28169 {
28170 const char* pos = strchr (arm_selected_arch->name, '+');
28171 if (pos)
28172 {
28173 char buf[15];
28174 gcc_assert (strlen (arm_selected_arch->name)
28175 <= sizeof (buf) / sizeof (*pos));
28176 strncpy (buf, arm_selected_arch->name,
28177 (pos - arm_selected_arch->name) * sizeof (*pos));
28178 buf[pos - arm_selected_arch->name] = '\0';
28179 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28180 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28181 }
28182 else
28183 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28184 }
28185 }
28186 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28187 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28188 else
28189 {
28190 const char* truncated_name
28191 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28192 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28193 }
28194
28195 if (TARGET_SOFT_FLOAT)
28196 {
28197 fpu_name = "softvfp";
28198 }
28199 else
28200 {
28201 fpu_name = arm_fpu_desc->name;
28202 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28203 {
28204 if (TARGET_HARD_FLOAT)
28205 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28206 if (TARGET_HARD_FLOAT_ABI)
28207 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28208 }
28209 }
28210 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28211
28212 /* Some of these attributes only apply when the corresponding features
28213 are used. However we don't have any easy way of figuring this out.
28214 Conservatively record the setting that would have been used. */
28215
28216 if (flag_rounding_math)
28217 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28218
28219 if (!flag_unsafe_math_optimizations)
28220 {
28221 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28222 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28223 }
28224 if (flag_signaling_nans)
28225 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28226
28227 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28228 flag_finite_math_only ? 1 : 3);
28229
28230 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28231 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28232 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28233 flag_short_enums ? 1 : 2);
28234
28235 /* Tag_ABI_optimization_goals. */
28236 if (optimize_size)
28237 val = 4;
28238 else if (optimize >= 2)
28239 val = 2;
28240 else if (optimize)
28241 val = 1;
28242 else
28243 val = 6;
28244 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28245
28246 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28247 unaligned_access);
28248
28249 if (arm_fp16_format)
28250 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28251 (int) arm_fp16_format);
28252
28253 if (arm_lang_output_object_attributes_hook)
28254 arm_lang_output_object_attributes_hook();
28255 }
28256
28257 default_file_start ();
28258 }
28259
28260 static void
28261 arm_file_end (void)
28262 {
28263 int regno;
28264
28265 if (NEED_INDICATE_EXEC_STACK)
28266 /* Add .note.GNU-stack. */
28267 file_end_indicate_exec_stack ();
28268
28269 if (! thumb_call_reg_needed)
28270 return;
28271
28272 switch_to_section (text_section);
28273 asm_fprintf (asm_out_file, "\t.code 16\n");
28274 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28275
28276 for (regno = 0; regno < LR_REGNUM; regno++)
28277 {
28278 rtx label = thumb_call_via_label[regno];
28279
28280 if (label != 0)
28281 {
28282 targetm.asm_out.internal_label (asm_out_file, "L",
28283 CODE_LABEL_NUMBER (label));
28284 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28285 }
28286 }
28287 }
28288
28289 #ifndef ARM_PE
28290 /* Symbols in the text segment can be accessed without indirecting via the
28291 constant pool; it may take an extra binary operation, but this is still
28292 faster than indirecting via memory. Don't do this when not optimizing,
28293 since we won't be calculating al of the offsets necessary to do this
28294 simplification. */
28295
28296 static void
28297 arm_encode_section_info (tree decl, rtx rtl, int first)
28298 {
28299 if (optimize > 0 && TREE_CONSTANT (decl))
28300 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28301
28302 default_encode_section_info (decl, rtl, first);
28303 }
28304 #endif /* !ARM_PE */
28305
28306 static void
28307 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28308 {
28309 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28310 && !strcmp (prefix, "L"))
28311 {
28312 arm_ccfsm_state = 0;
28313 arm_target_insn = NULL;
28314 }
28315 default_internal_label (stream, prefix, labelno);
28316 }
28317
28318 /* Output code to add DELTA to the first argument, and then jump
28319 to FUNCTION. Used for C++ multiple inheritance. */
28320 static void
28321 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28322 HOST_WIDE_INT delta,
28323 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28324 tree function)
28325 {
28326 static int thunk_label = 0;
28327 char label[256];
28328 char labelpc[256];
28329 int mi_delta = delta;
28330 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28331 int shift = 0;
28332 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28333 ? 1 : 0);
28334 if (mi_delta < 0)
28335 mi_delta = - mi_delta;
28336
28337 final_start_function (emit_barrier (), file, 1);
28338
28339 if (TARGET_THUMB1)
28340 {
28341 int labelno = thunk_label++;
28342 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28343 /* Thunks are entered in arm mode when avaiable. */
28344 if (TARGET_THUMB1_ONLY)
28345 {
28346 /* push r3 so we can use it as a temporary. */
28347 /* TODO: Omit this save if r3 is not used. */
28348 fputs ("\tpush {r3}\n", file);
28349 fputs ("\tldr\tr3, ", file);
28350 }
28351 else
28352 {
28353 fputs ("\tldr\tr12, ", file);
28354 }
28355 assemble_name (file, label);
28356 fputc ('\n', file);
28357 if (flag_pic)
28358 {
28359 /* If we are generating PIC, the ldr instruction below loads
28360 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28361 the address of the add + 8, so we have:
28362
28363 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28364 = target + 1.
28365
28366 Note that we have "+ 1" because some versions of GNU ld
28367 don't set the low bit of the result for R_ARM_REL32
28368 relocations against thumb function symbols.
28369 On ARMv6M this is +4, not +8. */
28370 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28371 assemble_name (file, labelpc);
28372 fputs (":\n", file);
28373 if (TARGET_THUMB1_ONLY)
28374 {
28375 /* This is 2 insns after the start of the thunk, so we know it
28376 is 4-byte aligned. */
28377 fputs ("\tadd\tr3, pc, r3\n", file);
28378 fputs ("\tmov r12, r3\n", file);
28379 }
28380 else
28381 fputs ("\tadd\tr12, pc, r12\n", file);
28382 }
28383 else if (TARGET_THUMB1_ONLY)
28384 fputs ("\tmov r12, r3\n", file);
28385 }
28386 if (TARGET_THUMB1_ONLY)
28387 {
28388 if (mi_delta > 255)
28389 {
28390 fputs ("\tldr\tr3, ", file);
28391 assemble_name (file, label);
28392 fputs ("+4\n", file);
28393 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28394 mi_op, this_regno, this_regno);
28395 }
28396 else if (mi_delta != 0)
28397 {
28398 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28399 mi_op, this_regno, this_regno,
28400 mi_delta);
28401 }
28402 }
28403 else
28404 {
28405 /* TODO: Use movw/movt for large constants when available. */
28406 while (mi_delta != 0)
28407 {
28408 if ((mi_delta & (3 << shift)) == 0)
28409 shift += 2;
28410 else
28411 {
28412 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28413 mi_op, this_regno, this_regno,
28414 mi_delta & (0xff << shift));
28415 mi_delta &= ~(0xff << shift);
28416 shift += 8;
28417 }
28418 }
28419 }
28420 if (TARGET_THUMB1)
28421 {
28422 if (TARGET_THUMB1_ONLY)
28423 fputs ("\tpop\t{r3}\n", file);
28424
28425 fprintf (file, "\tbx\tr12\n");
28426 ASM_OUTPUT_ALIGN (file, 2);
28427 assemble_name (file, label);
28428 fputs (":\n", file);
28429 if (flag_pic)
28430 {
28431 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28432 rtx tem = XEXP (DECL_RTL (function), 0);
28433 tem = plus_constant (GET_MODE (tem), tem, -7);
28434 tem = gen_rtx_MINUS (GET_MODE (tem),
28435 tem,
28436 gen_rtx_SYMBOL_REF (Pmode,
28437 ggc_strdup (labelpc)));
28438 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28439 }
28440 else
28441 /* Output ".word .LTHUNKn". */
28442 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28443
28444 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28445 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28446 }
28447 else
28448 {
28449 fputs ("\tb\t", file);
28450 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28451 if (NEED_PLT_RELOC)
28452 fputs ("(PLT)", file);
28453 fputc ('\n', file);
28454 }
28455
28456 final_end_function ();
28457 }
28458
28459 int
28460 arm_emit_vector_const (FILE *file, rtx x)
28461 {
28462 int i;
28463 const char * pattern;
28464
28465 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28466
28467 switch (GET_MODE (x))
28468 {
28469 case V2SImode: pattern = "%08x"; break;
28470 case V4HImode: pattern = "%04x"; break;
28471 case V8QImode: pattern = "%02x"; break;
28472 default: gcc_unreachable ();
28473 }
28474
28475 fprintf (file, "0x");
28476 for (i = CONST_VECTOR_NUNITS (x); i--;)
28477 {
28478 rtx element;
28479
28480 element = CONST_VECTOR_ELT (x, i);
28481 fprintf (file, pattern, INTVAL (element));
28482 }
28483
28484 return 1;
28485 }
28486
28487 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28488 HFmode constant pool entries are actually loaded with ldr. */
28489 void
28490 arm_emit_fp16_const (rtx c)
28491 {
28492 REAL_VALUE_TYPE r;
28493 long bits;
28494
28495 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28496 bits = real_to_target (NULL, &r, HFmode);
28497 if (WORDS_BIG_ENDIAN)
28498 assemble_zeros (2);
28499 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28500 if (!WORDS_BIG_ENDIAN)
28501 assemble_zeros (2);
28502 }
28503
28504 const char *
28505 arm_output_load_gr (rtx *operands)
28506 {
28507 rtx reg;
28508 rtx offset;
28509 rtx wcgr;
28510 rtx sum;
28511
28512 if (!MEM_P (operands [1])
28513 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28514 || !REG_P (reg = XEXP (sum, 0))
28515 || !CONST_INT_P (offset = XEXP (sum, 1))
28516 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28517 return "wldrw%?\t%0, %1";
28518
28519 /* Fix up an out-of-range load of a GR register. */
28520 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28521 wcgr = operands[0];
28522 operands[0] = reg;
28523 output_asm_insn ("ldr%?\t%0, %1", operands);
28524
28525 operands[0] = wcgr;
28526 operands[1] = reg;
28527 output_asm_insn ("tmcr%?\t%0, %1", operands);
28528 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28529
28530 return "";
28531 }
28532
28533 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28534
28535 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28536 named arg and all anonymous args onto the stack.
28537 XXX I know the prologue shouldn't be pushing registers, but it is faster
28538 that way. */
28539
28540 static void
28541 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28542 enum machine_mode mode,
28543 tree type,
28544 int *pretend_size,
28545 int second_time ATTRIBUTE_UNUSED)
28546 {
28547 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28548 int nregs;
28549
28550 cfun->machine->uses_anonymous_args = 1;
28551 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28552 {
28553 nregs = pcum->aapcs_ncrn;
28554 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28555 nregs++;
28556 }
28557 else
28558 nregs = pcum->nregs;
28559
28560 if (nregs < NUM_ARG_REGS)
28561 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28562 }
28563
28564 /* We can't rely on the caller doing the proper promotion when
28565 using APCS or ATPCS. */
28566
28567 static bool
28568 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28569 {
28570 return !TARGET_AAPCS_BASED;
28571 }
28572
28573 static enum machine_mode
28574 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28575 enum machine_mode mode,
28576 int *punsignedp ATTRIBUTE_UNUSED,
28577 const_tree fntype ATTRIBUTE_UNUSED,
28578 int for_return ATTRIBUTE_UNUSED)
28579 {
28580 if (GET_MODE_CLASS (mode) == MODE_INT
28581 && GET_MODE_SIZE (mode) < 4)
28582 return SImode;
28583
28584 return mode;
28585 }
28586
28587 /* AAPCS based ABIs use short enums by default. */
28588
28589 static bool
28590 arm_default_short_enums (void)
28591 {
28592 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28593 }
28594
28595
28596 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28597
28598 static bool
28599 arm_align_anon_bitfield (void)
28600 {
28601 return TARGET_AAPCS_BASED;
28602 }
28603
28604
28605 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28606
28607 static tree
28608 arm_cxx_guard_type (void)
28609 {
28610 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28611 }
28612
28613
28614 /* The EABI says test the least significant bit of a guard variable. */
28615
28616 static bool
28617 arm_cxx_guard_mask_bit (void)
28618 {
28619 return TARGET_AAPCS_BASED;
28620 }
28621
28622
28623 /* The EABI specifies that all array cookies are 8 bytes long. */
28624
28625 static tree
28626 arm_get_cookie_size (tree type)
28627 {
28628 tree size;
28629
28630 if (!TARGET_AAPCS_BASED)
28631 return default_cxx_get_cookie_size (type);
28632
28633 size = build_int_cst (sizetype, 8);
28634 return size;
28635 }
28636
28637
28638 /* The EABI says that array cookies should also contain the element size. */
28639
28640 static bool
28641 arm_cookie_has_size (void)
28642 {
28643 return TARGET_AAPCS_BASED;
28644 }
28645
28646
28647 /* The EABI says constructors and destructors should return a pointer to
28648 the object constructed/destroyed. */
28649
28650 static bool
28651 arm_cxx_cdtor_returns_this (void)
28652 {
28653 return TARGET_AAPCS_BASED;
28654 }
28655
28656 /* The EABI says that an inline function may never be the key
28657 method. */
28658
28659 static bool
28660 arm_cxx_key_method_may_be_inline (void)
28661 {
28662 return !TARGET_AAPCS_BASED;
28663 }
28664
28665 static void
28666 arm_cxx_determine_class_data_visibility (tree decl)
28667 {
28668 if (!TARGET_AAPCS_BASED
28669 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28670 return;
28671
28672 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28673 is exported. However, on systems without dynamic vague linkage,
28674 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28675 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28676 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28677 else
28678 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28679 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28680 }
28681
28682 static bool
28683 arm_cxx_class_data_always_comdat (void)
28684 {
28685 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28686 vague linkage if the class has no key function. */
28687 return !TARGET_AAPCS_BASED;
28688 }
28689
28690
28691 /* The EABI says __aeabi_atexit should be used to register static
28692 destructors. */
28693
28694 static bool
28695 arm_cxx_use_aeabi_atexit (void)
28696 {
28697 return TARGET_AAPCS_BASED;
28698 }
28699
28700
28701 void
28702 arm_set_return_address (rtx source, rtx scratch)
28703 {
28704 arm_stack_offsets *offsets;
28705 HOST_WIDE_INT delta;
28706 rtx addr;
28707 unsigned long saved_regs;
28708
28709 offsets = arm_get_frame_offsets ();
28710 saved_regs = offsets->saved_regs_mask;
28711
28712 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28713 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28714 else
28715 {
28716 if (frame_pointer_needed)
28717 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28718 else
28719 {
28720 /* LR will be the first saved register. */
28721 delta = offsets->outgoing_args - (offsets->frame + 4);
28722
28723
28724 if (delta >= 4096)
28725 {
28726 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28727 GEN_INT (delta & ~4095)));
28728 addr = scratch;
28729 delta &= 4095;
28730 }
28731 else
28732 addr = stack_pointer_rtx;
28733
28734 addr = plus_constant (Pmode, addr, delta);
28735 }
28736 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28737 }
28738 }
28739
28740
28741 void
28742 thumb_set_return_address (rtx source, rtx scratch)
28743 {
28744 arm_stack_offsets *offsets;
28745 HOST_WIDE_INT delta;
28746 HOST_WIDE_INT limit;
28747 int reg;
28748 rtx addr;
28749 unsigned long mask;
28750
28751 emit_use (source);
28752
28753 offsets = arm_get_frame_offsets ();
28754 mask = offsets->saved_regs_mask;
28755 if (mask & (1 << LR_REGNUM))
28756 {
28757 limit = 1024;
28758 /* Find the saved regs. */
28759 if (frame_pointer_needed)
28760 {
28761 delta = offsets->soft_frame - offsets->saved_args;
28762 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28763 if (TARGET_THUMB1)
28764 limit = 128;
28765 }
28766 else
28767 {
28768 delta = offsets->outgoing_args - offsets->saved_args;
28769 reg = SP_REGNUM;
28770 }
28771 /* Allow for the stack frame. */
28772 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28773 delta -= 16;
28774 /* The link register is always the first saved register. */
28775 delta -= 4;
28776
28777 /* Construct the address. */
28778 addr = gen_rtx_REG (SImode, reg);
28779 if (delta > limit)
28780 {
28781 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28782 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28783 addr = scratch;
28784 }
28785 else
28786 addr = plus_constant (Pmode, addr, delta);
28787
28788 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28789 }
28790 else
28791 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28792 }
28793
28794 /* Implements target hook vector_mode_supported_p. */
28795 bool
28796 arm_vector_mode_supported_p (enum machine_mode mode)
28797 {
28798 /* Neon also supports V2SImode, etc. listed in the clause below. */
28799 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28800 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28801 return true;
28802
28803 if ((TARGET_NEON || TARGET_IWMMXT)
28804 && ((mode == V2SImode)
28805 || (mode == V4HImode)
28806 || (mode == V8QImode)))
28807 return true;
28808
28809 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28810 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28811 || mode == V2HAmode))
28812 return true;
28813
28814 return false;
28815 }
28816
28817 /* Implements target hook array_mode_supported_p. */
28818
28819 static bool
28820 arm_array_mode_supported_p (enum machine_mode mode,
28821 unsigned HOST_WIDE_INT nelems)
28822 {
28823 if (TARGET_NEON
28824 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28825 && (nelems >= 2 && nelems <= 4))
28826 return true;
28827
28828 return false;
28829 }
28830
28831 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28832 registers when autovectorizing for Neon, at least until multiple vector
28833 widths are supported properly by the middle-end. */
28834
28835 static enum machine_mode
28836 arm_preferred_simd_mode (enum machine_mode mode)
28837 {
28838 if (TARGET_NEON)
28839 switch (mode)
28840 {
28841 case SFmode:
28842 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28843 case SImode:
28844 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28845 case HImode:
28846 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28847 case QImode:
28848 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28849 case DImode:
28850 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28851 return V2DImode;
28852 break;
28853
28854 default:;
28855 }
28856
28857 if (TARGET_REALLY_IWMMXT)
28858 switch (mode)
28859 {
28860 case SImode:
28861 return V2SImode;
28862 case HImode:
28863 return V4HImode;
28864 case QImode:
28865 return V8QImode;
28866
28867 default:;
28868 }
28869
28870 return word_mode;
28871 }
28872
28873 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28874
28875 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28876 using r0-r4 for function arguments, r7 for the stack frame and don't have
28877 enough left over to do doubleword arithmetic. For Thumb-2 all the
28878 potentially problematic instructions accept high registers so this is not
28879 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28880 that require many low registers. */
28881 static bool
28882 arm_class_likely_spilled_p (reg_class_t rclass)
28883 {
28884 if ((TARGET_THUMB1 && rclass == LO_REGS)
28885 || rclass == CC_REG)
28886 return true;
28887
28888 return false;
28889 }
28890
28891 /* Implements target hook small_register_classes_for_mode_p. */
28892 bool
28893 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28894 {
28895 return TARGET_THUMB1;
28896 }
28897
28898 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28899 ARM insns and therefore guarantee that the shift count is modulo 256.
28900 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28901 guarantee no particular behavior for out-of-range counts. */
28902
28903 static unsigned HOST_WIDE_INT
28904 arm_shift_truncation_mask (enum machine_mode mode)
28905 {
28906 return mode == SImode ? 255 : 0;
28907 }
28908
28909
28910 /* Map internal gcc register numbers to DWARF2 register numbers. */
28911
28912 unsigned int
28913 arm_dbx_register_number (unsigned int regno)
28914 {
28915 if (regno < 16)
28916 return regno;
28917
28918 if (IS_VFP_REGNUM (regno))
28919 {
28920 /* See comment in arm_dwarf_register_span. */
28921 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28922 return 64 + regno - FIRST_VFP_REGNUM;
28923 else
28924 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28925 }
28926
28927 if (IS_IWMMXT_GR_REGNUM (regno))
28928 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28929
28930 if (IS_IWMMXT_REGNUM (regno))
28931 return 112 + regno - FIRST_IWMMXT_REGNUM;
28932
28933 gcc_unreachable ();
28934 }
28935
28936 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28937 GCC models tham as 64 32-bit registers, so we need to describe this to
28938 the DWARF generation code. Other registers can use the default. */
28939 static rtx
28940 arm_dwarf_register_span (rtx rtl)
28941 {
28942 enum machine_mode mode;
28943 unsigned regno;
28944 rtx parts[16];
28945 int nregs;
28946 int i;
28947
28948 regno = REGNO (rtl);
28949 if (!IS_VFP_REGNUM (regno))
28950 return NULL_RTX;
28951
28952 /* XXX FIXME: The EABI defines two VFP register ranges:
28953 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28954 256-287: D0-D31
28955 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28956 corresponding D register. Until GDB supports this, we shall use the
28957 legacy encodings. We also use these encodings for D0-D15 for
28958 compatibility with older debuggers. */
28959 mode = GET_MODE (rtl);
28960 if (GET_MODE_SIZE (mode) < 8)
28961 return NULL_RTX;
28962
28963 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28964 {
28965 nregs = GET_MODE_SIZE (mode) / 4;
28966 for (i = 0; i < nregs; i += 2)
28967 if (TARGET_BIG_END)
28968 {
28969 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28970 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28971 }
28972 else
28973 {
28974 parts[i] = gen_rtx_REG (SImode, regno + i);
28975 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28976 }
28977 }
28978 else
28979 {
28980 nregs = GET_MODE_SIZE (mode) / 8;
28981 for (i = 0; i < nregs; i++)
28982 parts[i] = gen_rtx_REG (DImode, regno + i);
28983 }
28984
28985 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28986 }
28987
28988 #if ARM_UNWIND_INFO
28989 /* Emit unwind directives for a store-multiple instruction or stack pointer
28990 push during alignment.
28991 These should only ever be generated by the function prologue code, so
28992 expect them to have a particular form.
28993 The store-multiple instruction sometimes pushes pc as the last register,
28994 although it should not be tracked into unwind information, or for -Os
28995 sometimes pushes some dummy registers before first register that needs
28996 to be tracked in unwind information; such dummy registers are there just
28997 to avoid separate stack adjustment, and will not be restored in the
28998 epilogue. */
28999
29000 static void
29001 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29002 {
29003 int i;
29004 HOST_WIDE_INT offset;
29005 HOST_WIDE_INT nregs;
29006 int reg_size;
29007 unsigned reg;
29008 unsigned lastreg;
29009 unsigned padfirst = 0, padlast = 0;
29010 rtx e;
29011
29012 e = XVECEXP (p, 0, 0);
29013 gcc_assert (GET_CODE (e) == SET);
29014
29015 /* First insn will adjust the stack pointer. */
29016 gcc_assert (GET_CODE (e) == SET
29017 && REG_P (SET_DEST (e))
29018 && REGNO (SET_DEST (e)) == SP_REGNUM
29019 && GET_CODE (SET_SRC (e)) == PLUS);
29020
29021 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29022 nregs = XVECLEN (p, 0) - 1;
29023 gcc_assert (nregs);
29024
29025 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29026 if (reg < 16)
29027 {
29028 /* For -Os dummy registers can be pushed at the beginning to
29029 avoid separate stack pointer adjustment. */
29030 e = XVECEXP (p, 0, 1);
29031 e = XEXP (SET_DEST (e), 0);
29032 if (GET_CODE (e) == PLUS)
29033 padfirst = INTVAL (XEXP (e, 1));
29034 gcc_assert (padfirst == 0 || optimize_size);
29035 /* The function prologue may also push pc, but not annotate it as it is
29036 never restored. We turn this into a stack pointer adjustment. */
29037 e = XVECEXP (p, 0, nregs);
29038 e = XEXP (SET_DEST (e), 0);
29039 if (GET_CODE (e) == PLUS)
29040 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29041 else
29042 padlast = offset - 4;
29043 gcc_assert (padlast == 0 || padlast == 4);
29044 if (padlast == 4)
29045 fprintf (asm_out_file, "\t.pad #4\n");
29046 reg_size = 4;
29047 fprintf (asm_out_file, "\t.save {");
29048 }
29049 else if (IS_VFP_REGNUM (reg))
29050 {
29051 reg_size = 8;
29052 fprintf (asm_out_file, "\t.vsave {");
29053 }
29054 else
29055 /* Unknown register type. */
29056 gcc_unreachable ();
29057
29058 /* If the stack increment doesn't match the size of the saved registers,
29059 something has gone horribly wrong. */
29060 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29061
29062 offset = padfirst;
29063 lastreg = 0;
29064 /* The remaining insns will describe the stores. */
29065 for (i = 1; i <= nregs; i++)
29066 {
29067 /* Expect (set (mem <addr>) (reg)).
29068 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29069 e = XVECEXP (p, 0, i);
29070 gcc_assert (GET_CODE (e) == SET
29071 && MEM_P (SET_DEST (e))
29072 && REG_P (SET_SRC (e)));
29073
29074 reg = REGNO (SET_SRC (e));
29075 gcc_assert (reg >= lastreg);
29076
29077 if (i != 1)
29078 fprintf (asm_out_file, ", ");
29079 /* We can't use %r for vfp because we need to use the
29080 double precision register names. */
29081 if (IS_VFP_REGNUM (reg))
29082 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29083 else
29084 asm_fprintf (asm_out_file, "%r", reg);
29085
29086 #ifdef ENABLE_CHECKING
29087 /* Check that the addresses are consecutive. */
29088 e = XEXP (SET_DEST (e), 0);
29089 if (GET_CODE (e) == PLUS)
29090 gcc_assert (REG_P (XEXP (e, 0))
29091 && REGNO (XEXP (e, 0)) == SP_REGNUM
29092 && CONST_INT_P (XEXP (e, 1))
29093 && offset == INTVAL (XEXP (e, 1)));
29094 else
29095 gcc_assert (i == 1
29096 && REG_P (e)
29097 && REGNO (e) == SP_REGNUM);
29098 offset += reg_size;
29099 #endif
29100 }
29101 fprintf (asm_out_file, "}\n");
29102 if (padfirst)
29103 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29104 }
29105
29106 /* Emit unwind directives for a SET. */
29107
29108 static void
29109 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29110 {
29111 rtx e0;
29112 rtx e1;
29113 unsigned reg;
29114
29115 e0 = XEXP (p, 0);
29116 e1 = XEXP (p, 1);
29117 switch (GET_CODE (e0))
29118 {
29119 case MEM:
29120 /* Pushing a single register. */
29121 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29122 || !REG_P (XEXP (XEXP (e0, 0), 0))
29123 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29124 abort ();
29125
29126 asm_fprintf (asm_out_file, "\t.save ");
29127 if (IS_VFP_REGNUM (REGNO (e1)))
29128 asm_fprintf(asm_out_file, "{d%d}\n",
29129 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29130 else
29131 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29132 break;
29133
29134 case REG:
29135 if (REGNO (e0) == SP_REGNUM)
29136 {
29137 /* A stack increment. */
29138 if (GET_CODE (e1) != PLUS
29139 || !REG_P (XEXP (e1, 0))
29140 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29141 || !CONST_INT_P (XEXP (e1, 1)))
29142 abort ();
29143
29144 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29145 -INTVAL (XEXP (e1, 1)));
29146 }
29147 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29148 {
29149 HOST_WIDE_INT offset;
29150
29151 if (GET_CODE (e1) == PLUS)
29152 {
29153 if (!REG_P (XEXP (e1, 0))
29154 || !CONST_INT_P (XEXP (e1, 1)))
29155 abort ();
29156 reg = REGNO (XEXP (e1, 0));
29157 offset = INTVAL (XEXP (e1, 1));
29158 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29159 HARD_FRAME_POINTER_REGNUM, reg,
29160 offset);
29161 }
29162 else if (REG_P (e1))
29163 {
29164 reg = REGNO (e1);
29165 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29166 HARD_FRAME_POINTER_REGNUM, reg);
29167 }
29168 else
29169 abort ();
29170 }
29171 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29172 {
29173 /* Move from sp to reg. */
29174 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29175 }
29176 else if (GET_CODE (e1) == PLUS
29177 && REG_P (XEXP (e1, 0))
29178 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29179 && CONST_INT_P (XEXP (e1, 1)))
29180 {
29181 /* Set reg to offset from sp. */
29182 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29183 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29184 }
29185 else
29186 abort ();
29187 break;
29188
29189 default:
29190 abort ();
29191 }
29192 }
29193
29194
29195 /* Emit unwind directives for the given insn. */
29196
29197 static void
29198 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29199 {
29200 rtx note, pat;
29201 bool handled_one = false;
29202
29203 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29204 return;
29205
29206 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29207 && (TREE_NOTHROW (current_function_decl)
29208 || crtl->all_throwers_are_sibcalls))
29209 return;
29210
29211 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29212 return;
29213
29214 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29215 {
29216 switch (REG_NOTE_KIND (note))
29217 {
29218 case REG_FRAME_RELATED_EXPR:
29219 pat = XEXP (note, 0);
29220 goto found;
29221
29222 case REG_CFA_REGISTER:
29223 pat = XEXP (note, 0);
29224 if (pat == NULL)
29225 {
29226 pat = PATTERN (insn);
29227 if (GET_CODE (pat) == PARALLEL)
29228 pat = XVECEXP (pat, 0, 0);
29229 }
29230
29231 /* Only emitted for IS_STACKALIGN re-alignment. */
29232 {
29233 rtx dest, src;
29234 unsigned reg;
29235
29236 src = SET_SRC (pat);
29237 dest = SET_DEST (pat);
29238
29239 gcc_assert (src == stack_pointer_rtx);
29240 reg = REGNO (dest);
29241 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29242 reg + 0x90, reg);
29243 }
29244 handled_one = true;
29245 break;
29246
29247 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29248 to get correct dwarf information for shrink-wrap. We should not
29249 emit unwind information for it because these are used either for
29250 pretend arguments or notes to adjust sp and restore registers from
29251 stack. */
29252 case REG_CFA_DEF_CFA:
29253 case REG_CFA_ADJUST_CFA:
29254 case REG_CFA_RESTORE:
29255 return;
29256
29257 case REG_CFA_EXPRESSION:
29258 case REG_CFA_OFFSET:
29259 /* ??? Only handling here what we actually emit. */
29260 gcc_unreachable ();
29261
29262 default:
29263 break;
29264 }
29265 }
29266 if (handled_one)
29267 return;
29268 pat = PATTERN (insn);
29269 found:
29270
29271 switch (GET_CODE (pat))
29272 {
29273 case SET:
29274 arm_unwind_emit_set (asm_out_file, pat);
29275 break;
29276
29277 case SEQUENCE:
29278 /* Store multiple. */
29279 arm_unwind_emit_sequence (asm_out_file, pat);
29280 break;
29281
29282 default:
29283 abort();
29284 }
29285 }
29286
29287
29288 /* Output a reference from a function exception table to the type_info
29289 object X. The EABI specifies that the symbol should be relocated by
29290 an R_ARM_TARGET2 relocation. */
29291
29292 static bool
29293 arm_output_ttype (rtx x)
29294 {
29295 fputs ("\t.word\t", asm_out_file);
29296 output_addr_const (asm_out_file, x);
29297 /* Use special relocations for symbol references. */
29298 if (!CONST_INT_P (x))
29299 fputs ("(TARGET2)", asm_out_file);
29300 fputc ('\n', asm_out_file);
29301
29302 return TRUE;
29303 }
29304
29305 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29306
29307 static void
29308 arm_asm_emit_except_personality (rtx personality)
29309 {
29310 fputs ("\t.personality\t", asm_out_file);
29311 output_addr_const (asm_out_file, personality);
29312 fputc ('\n', asm_out_file);
29313 }
29314
29315 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29316
29317 static void
29318 arm_asm_init_sections (void)
29319 {
29320 exception_section = get_unnamed_section (0, output_section_asm_op,
29321 "\t.handlerdata");
29322 }
29323 #endif /* ARM_UNWIND_INFO */
29324
29325 /* Output unwind directives for the start/end of a function. */
29326
29327 void
29328 arm_output_fn_unwind (FILE * f, bool prologue)
29329 {
29330 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29331 return;
29332
29333 if (prologue)
29334 fputs ("\t.fnstart\n", f);
29335 else
29336 {
29337 /* If this function will never be unwound, then mark it as such.
29338 The came condition is used in arm_unwind_emit to suppress
29339 the frame annotations. */
29340 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29341 && (TREE_NOTHROW (current_function_decl)
29342 || crtl->all_throwers_are_sibcalls))
29343 fputs("\t.cantunwind\n", f);
29344
29345 fputs ("\t.fnend\n", f);
29346 }
29347 }
29348
29349 static bool
29350 arm_emit_tls_decoration (FILE *fp, rtx x)
29351 {
29352 enum tls_reloc reloc;
29353 rtx val;
29354
29355 val = XVECEXP (x, 0, 0);
29356 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29357
29358 output_addr_const (fp, val);
29359
29360 switch (reloc)
29361 {
29362 case TLS_GD32:
29363 fputs ("(tlsgd)", fp);
29364 break;
29365 case TLS_LDM32:
29366 fputs ("(tlsldm)", fp);
29367 break;
29368 case TLS_LDO32:
29369 fputs ("(tlsldo)", fp);
29370 break;
29371 case TLS_IE32:
29372 fputs ("(gottpoff)", fp);
29373 break;
29374 case TLS_LE32:
29375 fputs ("(tpoff)", fp);
29376 break;
29377 case TLS_DESCSEQ:
29378 fputs ("(tlsdesc)", fp);
29379 break;
29380 default:
29381 gcc_unreachable ();
29382 }
29383
29384 switch (reloc)
29385 {
29386 case TLS_GD32:
29387 case TLS_LDM32:
29388 case TLS_IE32:
29389 case TLS_DESCSEQ:
29390 fputs (" + (. - ", fp);
29391 output_addr_const (fp, XVECEXP (x, 0, 2));
29392 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29393 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29394 output_addr_const (fp, XVECEXP (x, 0, 3));
29395 fputc (')', fp);
29396 break;
29397 default:
29398 break;
29399 }
29400
29401 return TRUE;
29402 }
29403
29404 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29405
29406 static void
29407 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29408 {
29409 gcc_assert (size == 4);
29410 fputs ("\t.word\t", file);
29411 output_addr_const (file, x);
29412 fputs ("(tlsldo)", file);
29413 }
29414
29415 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29416
29417 static bool
29418 arm_output_addr_const_extra (FILE *fp, rtx x)
29419 {
29420 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29421 return arm_emit_tls_decoration (fp, x);
29422 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29423 {
29424 char label[256];
29425 int labelno = INTVAL (XVECEXP (x, 0, 0));
29426
29427 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29428 assemble_name_raw (fp, label);
29429
29430 return TRUE;
29431 }
29432 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29433 {
29434 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29435 if (GOT_PCREL)
29436 fputs ("+.", fp);
29437 fputs ("-(", fp);
29438 output_addr_const (fp, XVECEXP (x, 0, 0));
29439 fputc (')', fp);
29440 return TRUE;
29441 }
29442 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29443 {
29444 output_addr_const (fp, XVECEXP (x, 0, 0));
29445 if (GOT_PCREL)
29446 fputs ("+.", fp);
29447 fputs ("-(", fp);
29448 output_addr_const (fp, XVECEXP (x, 0, 1));
29449 fputc (')', fp);
29450 return TRUE;
29451 }
29452 else if (GET_CODE (x) == CONST_VECTOR)
29453 return arm_emit_vector_const (fp, x);
29454
29455 return FALSE;
29456 }
29457
29458 /* Output assembly for a shift instruction.
29459 SET_FLAGS determines how the instruction modifies the condition codes.
29460 0 - Do not set condition codes.
29461 1 - Set condition codes.
29462 2 - Use smallest instruction. */
29463 const char *
29464 arm_output_shift(rtx * operands, int set_flags)
29465 {
29466 char pattern[100];
29467 static const char flag_chars[3] = {'?', '.', '!'};
29468 const char *shift;
29469 HOST_WIDE_INT val;
29470 char c;
29471
29472 c = flag_chars[set_flags];
29473 if (TARGET_UNIFIED_ASM)
29474 {
29475 shift = shift_op(operands[3], &val);
29476 if (shift)
29477 {
29478 if (val != -1)
29479 operands[2] = GEN_INT(val);
29480 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29481 }
29482 else
29483 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29484 }
29485 else
29486 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29487 output_asm_insn (pattern, operands);
29488 return "";
29489 }
29490
29491 /* Output assembly for a WMMX immediate shift instruction. */
29492 const char *
29493 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29494 {
29495 int shift = INTVAL (operands[2]);
29496 char templ[50];
29497 enum machine_mode opmode = GET_MODE (operands[0]);
29498
29499 gcc_assert (shift >= 0);
29500
29501 /* If the shift value in the register versions is > 63 (for D qualifier),
29502 31 (for W qualifier) or 15 (for H qualifier). */
29503 if (((opmode == V4HImode) && (shift > 15))
29504 || ((opmode == V2SImode) && (shift > 31))
29505 || ((opmode == DImode) && (shift > 63)))
29506 {
29507 if (wror_or_wsra)
29508 {
29509 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29510 output_asm_insn (templ, operands);
29511 if (opmode == DImode)
29512 {
29513 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29514 output_asm_insn (templ, operands);
29515 }
29516 }
29517 else
29518 {
29519 /* The destination register will contain all zeros. */
29520 sprintf (templ, "wzero\t%%0");
29521 output_asm_insn (templ, operands);
29522 }
29523 return "";
29524 }
29525
29526 if ((opmode == DImode) && (shift > 32))
29527 {
29528 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29529 output_asm_insn (templ, operands);
29530 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29531 output_asm_insn (templ, operands);
29532 }
29533 else
29534 {
29535 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29536 output_asm_insn (templ, operands);
29537 }
29538 return "";
29539 }
29540
29541 /* Output assembly for a WMMX tinsr instruction. */
29542 const char *
29543 arm_output_iwmmxt_tinsr (rtx *operands)
29544 {
29545 int mask = INTVAL (operands[3]);
29546 int i;
29547 char templ[50];
29548 int units = mode_nunits[GET_MODE (operands[0])];
29549 gcc_assert ((mask & (mask - 1)) == 0);
29550 for (i = 0; i < units; ++i)
29551 {
29552 if ((mask & 0x01) == 1)
29553 {
29554 break;
29555 }
29556 mask >>= 1;
29557 }
29558 gcc_assert (i < units);
29559 {
29560 switch (GET_MODE (operands[0]))
29561 {
29562 case V8QImode:
29563 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29564 break;
29565 case V4HImode:
29566 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29567 break;
29568 case V2SImode:
29569 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29570 break;
29571 default:
29572 gcc_unreachable ();
29573 break;
29574 }
29575 output_asm_insn (templ, operands);
29576 }
29577 return "";
29578 }
29579
29580 /* Output a Thumb-1 casesi dispatch sequence. */
29581 const char *
29582 thumb1_output_casesi (rtx *operands)
29583 {
29584 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29585
29586 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29587
29588 switch (GET_MODE(diff_vec))
29589 {
29590 case QImode:
29591 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29592 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29593 case HImode:
29594 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29595 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29596 case SImode:
29597 return "bl\t%___gnu_thumb1_case_si";
29598 default:
29599 gcc_unreachable ();
29600 }
29601 }
29602
29603 /* Output a Thumb-2 casesi instruction. */
29604 const char *
29605 thumb2_output_casesi (rtx *operands)
29606 {
29607 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29608
29609 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29610
29611 output_asm_insn ("cmp\t%0, %1", operands);
29612 output_asm_insn ("bhi\t%l3", operands);
29613 switch (GET_MODE(diff_vec))
29614 {
29615 case QImode:
29616 return "tbb\t[%|pc, %0]";
29617 case HImode:
29618 return "tbh\t[%|pc, %0, lsl #1]";
29619 case SImode:
29620 if (flag_pic)
29621 {
29622 output_asm_insn ("adr\t%4, %l2", operands);
29623 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29624 output_asm_insn ("add\t%4, %4, %5", operands);
29625 return "bx\t%4";
29626 }
29627 else
29628 {
29629 output_asm_insn ("adr\t%4, %l2", operands);
29630 return "ldr\t%|pc, [%4, %0, lsl #2]";
29631 }
29632 default:
29633 gcc_unreachable ();
29634 }
29635 }
29636
29637 /* Most ARM cores are single issue, but some newer ones can dual issue.
29638 The scheduler descriptions rely on this being correct. */
29639 static int
29640 arm_issue_rate (void)
29641 {
29642 switch (arm_tune)
29643 {
29644 case cortexa15:
29645 case cortexa57:
29646 return 3;
29647
29648 case cortexr4:
29649 case cortexr4f:
29650 case cortexr5:
29651 case genericv7a:
29652 case cortexa5:
29653 case cortexa7:
29654 case cortexa8:
29655 case cortexa9:
29656 case cortexa12:
29657 case cortexa53:
29658 case fa726te:
29659 case marvell_pj4:
29660 return 2;
29661
29662 default:
29663 return 1;
29664 }
29665 }
29666
29667 /* A table and a function to perform ARM-specific name mangling for
29668 NEON vector types in order to conform to the AAPCS (see "Procedure
29669 Call Standard for the ARM Architecture", Appendix A). To qualify
29670 for emission with the mangled names defined in that document, a
29671 vector type must not only be of the correct mode but also be
29672 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29673 typedef struct
29674 {
29675 enum machine_mode mode;
29676 const char *element_type_name;
29677 const char *aapcs_name;
29678 } arm_mangle_map_entry;
29679
29680 static arm_mangle_map_entry arm_mangle_map[] = {
29681 /* 64-bit containerized types. */
29682 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29683 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29684 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29685 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29686 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29687 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29688 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29689 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29690 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29691 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29692
29693 /* 128-bit containerized types. */
29694 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29695 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29696 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29697 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29698 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29699 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29700 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29701 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29702 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29703 { VOIDmode, NULL, NULL }
29704 };
29705
29706 const char *
29707 arm_mangle_type (const_tree type)
29708 {
29709 arm_mangle_map_entry *pos = arm_mangle_map;
29710
29711 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29712 has to be managled as if it is in the "std" namespace. */
29713 if (TARGET_AAPCS_BASED
29714 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29715 return "St9__va_list";
29716
29717 /* Half-precision float. */
29718 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29719 return "Dh";
29720
29721 if (TREE_CODE (type) != VECTOR_TYPE)
29722 return NULL;
29723
29724 /* Check the mode of the vector type, and the name of the vector
29725 element type, against the table. */
29726 while (pos->mode != VOIDmode)
29727 {
29728 tree elt_type = TREE_TYPE (type);
29729
29730 if (pos->mode == TYPE_MODE (type)
29731 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29732 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29733 pos->element_type_name))
29734 return pos->aapcs_name;
29735
29736 pos++;
29737 }
29738
29739 /* Use the default mangling for unrecognized (possibly user-defined)
29740 vector types. */
29741 return NULL;
29742 }
29743
29744 /* Order of allocation of core registers for Thumb: this allocation is
29745 written over the corresponding initial entries of the array
29746 initialized with REG_ALLOC_ORDER. We allocate all low registers
29747 first. Saving and restoring a low register is usually cheaper than
29748 using a call-clobbered high register. */
29749
29750 static const int thumb_core_reg_alloc_order[] =
29751 {
29752 3, 2, 1, 0, 4, 5, 6, 7,
29753 14, 12, 8, 9, 10, 11
29754 };
29755
29756 /* Adjust register allocation order when compiling for Thumb. */
29757
29758 void
29759 arm_order_regs_for_local_alloc (void)
29760 {
29761 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29762 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29763 if (TARGET_THUMB)
29764 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29765 sizeof (thumb_core_reg_alloc_order));
29766 }
29767
29768 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29769
29770 bool
29771 arm_frame_pointer_required (void)
29772 {
29773 return (cfun->has_nonlocal_label
29774 || SUBTARGET_FRAME_POINTER_REQUIRED
29775 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29776 }
29777
29778 /* Only thumb1 can't support conditional execution, so return true if
29779 the target is not thumb1. */
29780 static bool
29781 arm_have_conditional_execution (void)
29782 {
29783 return !TARGET_THUMB1;
29784 }
29785
29786 tree
29787 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29788 {
29789 enum machine_mode in_mode, out_mode;
29790 int in_n, out_n;
29791
29792 if (TREE_CODE (type_out) != VECTOR_TYPE
29793 || TREE_CODE (type_in) != VECTOR_TYPE)
29794 return NULL_TREE;
29795
29796 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29797 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29798 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29799 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29800
29801 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29802 decl of the vectorized builtin for the appropriate vector mode.
29803 NULL_TREE is returned if no such builtin is available. */
29804 #undef ARM_CHECK_BUILTIN_MODE
29805 #define ARM_CHECK_BUILTIN_MODE(C) \
29806 (TARGET_NEON && TARGET_FPU_ARMV8 \
29807 && flag_unsafe_math_optimizations \
29808 && ARM_CHECK_BUILTIN_MODE_1 (C))
29809
29810 #undef ARM_CHECK_BUILTIN_MODE_1
29811 #define ARM_CHECK_BUILTIN_MODE_1(C) \
29812 (out_mode == SFmode && out_n == C \
29813 && in_mode == SFmode && in_n == C)
29814
29815 #undef ARM_FIND_VRINT_VARIANT
29816 #define ARM_FIND_VRINT_VARIANT(N) \
29817 (ARM_CHECK_BUILTIN_MODE (2) \
29818 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29819 : (ARM_CHECK_BUILTIN_MODE (4) \
29820 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29821 : NULL_TREE))
29822
29823 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29824 {
29825 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29826 switch (fn)
29827 {
29828 case BUILT_IN_FLOORF:
29829 return ARM_FIND_VRINT_VARIANT (vrintm);
29830 case BUILT_IN_CEILF:
29831 return ARM_FIND_VRINT_VARIANT (vrintp);
29832 case BUILT_IN_TRUNCF:
29833 return ARM_FIND_VRINT_VARIANT (vrintz);
29834 case BUILT_IN_ROUNDF:
29835 return ARM_FIND_VRINT_VARIANT (vrinta);
29836 #undef ARM_CHECK_BUILTIN_MODE
29837 #define ARM_CHECK_BUILTIN_MODE(C, N) \
29838 (out_mode == N##Imode && out_n == C \
29839 && in_mode == N##Imode && in_n == C)
29840 case BUILT_IN_BSWAP16:
29841 if (ARM_CHECK_BUILTIN_MODE (4, H))
29842 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
29843 else if (ARM_CHECK_BUILTIN_MODE (8, H))
29844 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
29845 else
29846 return NULL_TREE;
29847 case BUILT_IN_BSWAP32:
29848 if (ARM_CHECK_BUILTIN_MODE (2, S))
29849 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
29850 else if (ARM_CHECK_BUILTIN_MODE (4, S))
29851 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
29852 else
29853 return NULL_TREE;
29854 case BUILT_IN_BSWAP64:
29855 if (ARM_CHECK_BUILTIN_MODE (2, D))
29856 return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
29857 else
29858 return NULL_TREE;
29859
29860 default:
29861 return NULL_TREE;
29862 }
29863 }
29864 return NULL_TREE;
29865 }
29866 #undef ARM_CHECK_BUILTIN_MODE
29867 #undef ARM_FIND_VRINT_VARIANT
29868
29869 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29870 static HOST_WIDE_INT
29871 arm_vector_alignment (const_tree type)
29872 {
29873 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29874
29875 if (TARGET_AAPCS_BASED)
29876 align = MIN (align, 64);
29877
29878 return align;
29879 }
29880
29881 static unsigned int
29882 arm_autovectorize_vector_sizes (void)
29883 {
29884 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29885 }
29886
29887 static bool
29888 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29889 {
29890 /* Vectors which aren't in packed structures will not be less aligned than
29891 the natural alignment of their element type, so this is safe. */
29892 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29893 return !is_packed;
29894
29895 return default_builtin_vector_alignment_reachable (type, is_packed);
29896 }
29897
29898 static bool
29899 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29900 const_tree type, int misalignment,
29901 bool is_packed)
29902 {
29903 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29904 {
29905 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29906
29907 if (is_packed)
29908 return align == 1;
29909
29910 /* If the misalignment is unknown, we should be able to handle the access
29911 so long as it is not to a member of a packed data structure. */
29912 if (misalignment == -1)
29913 return true;
29914
29915 /* Return true if the misalignment is a multiple of the natural alignment
29916 of the vector's element type. This is probably always going to be
29917 true in practice, since we've already established that this isn't a
29918 packed access. */
29919 return ((misalignment % align) == 0);
29920 }
29921
29922 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29923 is_packed);
29924 }
29925
29926 static void
29927 arm_conditional_register_usage (void)
29928 {
29929 int regno;
29930
29931 if (TARGET_THUMB1 && optimize_size)
29932 {
29933 /* When optimizing for size on Thumb-1, it's better not
29934 to use the HI regs, because of the overhead of
29935 stacking them. */
29936 for (regno = FIRST_HI_REGNUM;
29937 regno <= LAST_HI_REGNUM; ++regno)
29938 fixed_regs[regno] = call_used_regs[regno] = 1;
29939 }
29940
29941 /* The link register can be clobbered by any branch insn,
29942 but we have no way to track that at present, so mark
29943 it as unavailable. */
29944 if (TARGET_THUMB1)
29945 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29946
29947 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29948 {
29949 /* VFPv3 registers are disabled when earlier VFP
29950 versions are selected due to the definition of
29951 LAST_VFP_REGNUM. */
29952 for (regno = FIRST_VFP_REGNUM;
29953 regno <= LAST_VFP_REGNUM; ++ regno)
29954 {
29955 fixed_regs[regno] = 0;
29956 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29957 || regno >= FIRST_VFP_REGNUM + 32;
29958 }
29959 }
29960
29961 if (TARGET_REALLY_IWMMXT)
29962 {
29963 regno = FIRST_IWMMXT_GR_REGNUM;
29964 /* The 2002/10/09 revision of the XScale ABI has wCG0
29965 and wCG1 as call-preserved registers. The 2002/11/21
29966 revision changed this so that all wCG registers are
29967 scratch registers. */
29968 for (regno = FIRST_IWMMXT_GR_REGNUM;
29969 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29970 fixed_regs[regno] = 0;
29971 /* The XScale ABI has wR0 - wR9 as scratch registers,
29972 the rest as call-preserved registers. */
29973 for (regno = FIRST_IWMMXT_REGNUM;
29974 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29975 {
29976 fixed_regs[regno] = 0;
29977 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29978 }
29979 }
29980
29981 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29982 {
29983 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29984 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29985 }
29986 else if (TARGET_APCS_STACK)
29987 {
29988 fixed_regs[10] = 1;
29989 call_used_regs[10] = 1;
29990 }
29991 /* -mcaller-super-interworking reserves r11 for calls to
29992 _interwork_r11_call_via_rN(). Making the register global
29993 is an easy way of ensuring that it remains valid for all
29994 calls. */
29995 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29996 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29997 {
29998 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29999 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30000 if (TARGET_CALLER_INTERWORKING)
30001 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30002 }
30003 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30004 }
30005
30006 static reg_class_t
30007 arm_preferred_rename_class (reg_class_t rclass)
30008 {
30009 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30010 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30011 and code size can be reduced. */
30012 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30013 return LO_REGS;
30014 else
30015 return NO_REGS;
30016 }
30017
30018 /* Compute the atrribute "length" of insn "*push_multi".
30019 So this function MUST be kept in sync with that insn pattern. */
30020 int
30021 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30022 {
30023 int i, regno, hi_reg;
30024 int num_saves = XVECLEN (parallel_op, 0);
30025
30026 /* ARM mode. */
30027 if (TARGET_ARM)
30028 return 4;
30029 /* Thumb1 mode. */
30030 if (TARGET_THUMB1)
30031 return 2;
30032
30033 /* Thumb2 mode. */
30034 regno = REGNO (first_op);
30035 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30036 for (i = 1; i < num_saves && !hi_reg; i++)
30037 {
30038 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30039 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30040 }
30041
30042 if (!hi_reg)
30043 return 2;
30044 return 4;
30045 }
30046
30047 /* Compute the number of instructions emitted by output_move_double. */
30048 int
30049 arm_count_output_move_double_insns (rtx *operands)
30050 {
30051 int count;
30052 rtx ops[2];
30053 /* output_move_double may modify the operands array, so call it
30054 here on a copy of the array. */
30055 ops[0] = operands[0];
30056 ops[1] = operands[1];
30057 output_move_double (ops, false, &count);
30058 return count;
30059 }
30060
30061 int
30062 vfp3_const_double_for_fract_bits (rtx operand)
30063 {
30064 REAL_VALUE_TYPE r0;
30065
30066 if (!CONST_DOUBLE_P (operand))
30067 return 0;
30068
30069 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30070 if (exact_real_inverse (DFmode, &r0))
30071 {
30072 if (exact_real_truncate (DFmode, &r0))
30073 {
30074 HOST_WIDE_INT value = real_to_integer (&r0);
30075 value = value & 0xffffffff;
30076 if ((value != 0) && ( (value & (value - 1)) == 0))
30077 return int_log2 (value);
30078 }
30079 }
30080 return 0;
30081 }
30082
30083 int
30084 vfp3_const_double_for_bits (rtx operand)
30085 {
30086 REAL_VALUE_TYPE r0;
30087
30088 if (!CONST_DOUBLE_P (operand))
30089 return 0;
30090
30091 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
30092 if (exact_real_truncate (DFmode, &r0))
30093 {
30094 HOST_WIDE_INT value = real_to_integer (&r0);
30095 value = value & 0xffffffff;
30096 if ((value != 0) && ( (value & (value - 1)) == 0))
30097 return int_log2 (value);
30098 }
30099
30100 return 0;
30101 }
30102 \f
30103 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30104
30105 static void
30106 arm_pre_atomic_barrier (enum memmodel model)
30107 {
30108 if (need_atomic_barrier_p (model, true))
30109 emit_insn (gen_memory_barrier ());
30110 }
30111
30112 static void
30113 arm_post_atomic_barrier (enum memmodel model)
30114 {
30115 if (need_atomic_barrier_p (model, false))
30116 emit_insn (gen_memory_barrier ());
30117 }
30118
30119 /* Emit the load-exclusive and store-exclusive instructions.
30120 Use acquire and release versions if necessary. */
30121
30122 static void
30123 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
30124 {
30125 rtx (*gen) (rtx, rtx);
30126
30127 if (acq)
30128 {
30129 switch (mode)
30130 {
30131 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30132 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30133 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30134 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30135 default:
30136 gcc_unreachable ();
30137 }
30138 }
30139 else
30140 {
30141 switch (mode)
30142 {
30143 case QImode: gen = gen_arm_load_exclusiveqi; break;
30144 case HImode: gen = gen_arm_load_exclusivehi; break;
30145 case SImode: gen = gen_arm_load_exclusivesi; break;
30146 case DImode: gen = gen_arm_load_exclusivedi; break;
30147 default:
30148 gcc_unreachable ();
30149 }
30150 }
30151
30152 emit_insn (gen (rval, mem));
30153 }
30154
30155 static void
30156 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30157 rtx mem, bool rel)
30158 {
30159 rtx (*gen) (rtx, rtx, rtx);
30160
30161 if (rel)
30162 {
30163 switch (mode)
30164 {
30165 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30166 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30167 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30168 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30169 default:
30170 gcc_unreachable ();
30171 }
30172 }
30173 else
30174 {
30175 switch (mode)
30176 {
30177 case QImode: gen = gen_arm_store_exclusiveqi; break;
30178 case HImode: gen = gen_arm_store_exclusivehi; break;
30179 case SImode: gen = gen_arm_store_exclusivesi; break;
30180 case DImode: gen = gen_arm_store_exclusivedi; break;
30181 default:
30182 gcc_unreachable ();
30183 }
30184 }
30185
30186 emit_insn (gen (bval, rval, mem));
30187 }
30188
30189 /* Mark the previous jump instruction as unlikely. */
30190
30191 static void
30192 emit_unlikely_jump (rtx insn)
30193 {
30194 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30195
30196 insn = emit_jump_insn (insn);
30197 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30198 }
30199
30200 /* Expand a compare and swap pattern. */
30201
30202 void
30203 arm_expand_compare_and_swap (rtx operands[])
30204 {
30205 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30206 enum machine_mode mode;
30207 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30208
30209 bval = operands[0];
30210 rval = operands[1];
30211 mem = operands[2];
30212 oldval = operands[3];
30213 newval = operands[4];
30214 is_weak = operands[5];
30215 mod_s = operands[6];
30216 mod_f = operands[7];
30217 mode = GET_MODE (mem);
30218
30219 /* Normally the succ memory model must be stronger than fail, but in the
30220 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30221 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30222
30223 if (TARGET_HAVE_LDACQ
30224 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30225 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30226 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30227
30228 switch (mode)
30229 {
30230 case QImode:
30231 case HImode:
30232 /* For narrow modes, we're going to perform the comparison in SImode,
30233 so do the zero-extension now. */
30234 rval = gen_reg_rtx (SImode);
30235 oldval = convert_modes (SImode, mode, oldval, true);
30236 /* FALLTHRU */
30237
30238 case SImode:
30239 /* Force the value into a register if needed. We waited until after
30240 the zero-extension above to do this properly. */
30241 if (!arm_add_operand (oldval, SImode))
30242 oldval = force_reg (SImode, oldval);
30243 break;
30244
30245 case DImode:
30246 if (!cmpdi_operand (oldval, mode))
30247 oldval = force_reg (mode, oldval);
30248 break;
30249
30250 default:
30251 gcc_unreachable ();
30252 }
30253
30254 switch (mode)
30255 {
30256 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30257 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30258 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30259 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30260 default:
30261 gcc_unreachable ();
30262 }
30263
30264 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30265
30266 if (mode == QImode || mode == HImode)
30267 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30268
30269 /* In all cases, we arrange for success to be signaled by Z set.
30270 This arrangement allows for the boolean result to be used directly
30271 in a subsequent branch, post optimization. */
30272 x = gen_rtx_REG (CCmode, CC_REGNUM);
30273 x = gen_rtx_EQ (SImode, x, const0_rtx);
30274 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30275 }
30276
30277 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30278 another memory store between the load-exclusive and store-exclusive can
30279 reset the monitor from Exclusive to Open state. This means we must wait
30280 until after reload to split the pattern, lest we get a register spill in
30281 the middle of the atomic sequence. */
30282
30283 void
30284 arm_split_compare_and_swap (rtx operands[])
30285 {
30286 rtx rval, mem, oldval, newval, scratch;
30287 enum machine_mode mode;
30288 enum memmodel mod_s, mod_f;
30289 bool is_weak;
30290 rtx label1, label2, x, cond;
30291
30292 rval = operands[0];
30293 mem = operands[1];
30294 oldval = operands[2];
30295 newval = operands[3];
30296 is_weak = (operands[4] != const0_rtx);
30297 mod_s = (enum memmodel) INTVAL (operands[5]);
30298 mod_f = (enum memmodel) INTVAL (operands[6]);
30299 scratch = operands[7];
30300 mode = GET_MODE (mem);
30301
30302 bool use_acquire = TARGET_HAVE_LDACQ
30303 && !(mod_s == MEMMODEL_RELAXED
30304 || mod_s == MEMMODEL_CONSUME
30305 || mod_s == MEMMODEL_RELEASE);
30306
30307 bool use_release = TARGET_HAVE_LDACQ
30308 && !(mod_s == MEMMODEL_RELAXED
30309 || mod_s == MEMMODEL_CONSUME
30310 || mod_s == MEMMODEL_ACQUIRE);
30311
30312 /* Checks whether a barrier is needed and emits one accordingly. */
30313 if (!(use_acquire || use_release))
30314 arm_pre_atomic_barrier (mod_s);
30315
30316 label1 = NULL_RTX;
30317 if (!is_weak)
30318 {
30319 label1 = gen_label_rtx ();
30320 emit_label (label1);
30321 }
30322 label2 = gen_label_rtx ();
30323
30324 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30325
30326 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30327 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30328 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30329 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30330 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30331
30332 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30333
30334 /* Weak or strong, we want EQ to be true for success, so that we
30335 match the flags that we got from the compare above. */
30336 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30337 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30338 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30339
30340 if (!is_weak)
30341 {
30342 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30343 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30344 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30345 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30346 }
30347
30348 if (mod_f != MEMMODEL_RELAXED)
30349 emit_label (label2);
30350
30351 /* Checks whether a barrier is needed and emits one accordingly. */
30352 if (!(use_acquire || use_release))
30353 arm_post_atomic_barrier (mod_s);
30354
30355 if (mod_f == MEMMODEL_RELAXED)
30356 emit_label (label2);
30357 }
30358
30359 void
30360 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30361 rtx value, rtx model_rtx, rtx cond)
30362 {
30363 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30364 enum machine_mode mode = GET_MODE (mem);
30365 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30366 rtx label, x;
30367
30368 bool use_acquire = TARGET_HAVE_LDACQ
30369 && !(model == MEMMODEL_RELAXED
30370 || model == MEMMODEL_CONSUME
30371 || model == MEMMODEL_RELEASE);
30372
30373 bool use_release = TARGET_HAVE_LDACQ
30374 && !(model == MEMMODEL_RELAXED
30375 || model == MEMMODEL_CONSUME
30376 || model == MEMMODEL_ACQUIRE);
30377
30378 /* Checks whether a barrier is needed and emits one accordingly. */
30379 if (!(use_acquire || use_release))
30380 arm_pre_atomic_barrier (model);
30381
30382 label = gen_label_rtx ();
30383 emit_label (label);
30384
30385 if (new_out)
30386 new_out = gen_lowpart (wmode, new_out);
30387 if (old_out)
30388 old_out = gen_lowpart (wmode, old_out);
30389 else
30390 old_out = new_out;
30391 value = simplify_gen_subreg (wmode, value, mode, 0);
30392
30393 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30394
30395 switch (code)
30396 {
30397 case SET:
30398 new_out = value;
30399 break;
30400
30401 case NOT:
30402 x = gen_rtx_AND (wmode, old_out, value);
30403 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30404 x = gen_rtx_NOT (wmode, new_out);
30405 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30406 break;
30407
30408 case MINUS:
30409 if (CONST_INT_P (value))
30410 {
30411 value = GEN_INT (-INTVAL (value));
30412 code = PLUS;
30413 }
30414 /* FALLTHRU */
30415
30416 case PLUS:
30417 if (mode == DImode)
30418 {
30419 /* DImode plus/minus need to clobber flags. */
30420 /* The adddi3 and subdi3 patterns are incorrectly written so that
30421 they require matching operands, even when we could easily support
30422 three operands. Thankfully, this can be fixed up post-splitting,
30423 as the individual add+adc patterns do accept three operands and
30424 post-reload cprop can make these moves go away. */
30425 emit_move_insn (new_out, old_out);
30426 if (code == PLUS)
30427 x = gen_adddi3 (new_out, new_out, value);
30428 else
30429 x = gen_subdi3 (new_out, new_out, value);
30430 emit_insn (x);
30431 break;
30432 }
30433 /* FALLTHRU */
30434
30435 default:
30436 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30437 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30438 break;
30439 }
30440
30441 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30442 use_release);
30443
30444 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30445 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30446
30447 /* Checks whether a barrier is needed and emits one accordingly. */
30448 if (!(use_acquire || use_release))
30449 arm_post_atomic_barrier (model);
30450 }
30451 \f
30452 #define MAX_VECT_LEN 16
30453
30454 struct expand_vec_perm_d
30455 {
30456 rtx target, op0, op1;
30457 unsigned char perm[MAX_VECT_LEN];
30458 enum machine_mode vmode;
30459 unsigned char nelt;
30460 bool one_vector_p;
30461 bool testing_p;
30462 };
30463
30464 /* Generate a variable permutation. */
30465
30466 static void
30467 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30468 {
30469 enum machine_mode vmode = GET_MODE (target);
30470 bool one_vector_p = rtx_equal_p (op0, op1);
30471
30472 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30473 gcc_checking_assert (GET_MODE (op0) == vmode);
30474 gcc_checking_assert (GET_MODE (op1) == vmode);
30475 gcc_checking_assert (GET_MODE (sel) == vmode);
30476 gcc_checking_assert (TARGET_NEON);
30477
30478 if (one_vector_p)
30479 {
30480 if (vmode == V8QImode)
30481 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30482 else
30483 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30484 }
30485 else
30486 {
30487 rtx pair;
30488
30489 if (vmode == V8QImode)
30490 {
30491 pair = gen_reg_rtx (V16QImode);
30492 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30493 pair = gen_lowpart (TImode, pair);
30494 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30495 }
30496 else
30497 {
30498 pair = gen_reg_rtx (OImode);
30499 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30500 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30501 }
30502 }
30503 }
30504
30505 void
30506 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30507 {
30508 enum machine_mode vmode = GET_MODE (target);
30509 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30510 bool one_vector_p = rtx_equal_p (op0, op1);
30511 rtx rmask[MAX_VECT_LEN], mask;
30512
30513 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30514 numbering of elements for big-endian, we must reverse the order. */
30515 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30516
30517 /* The VTBL instruction does not use a modulo index, so we must take care
30518 of that ourselves. */
30519 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30520 for (i = 0; i < nelt; ++i)
30521 rmask[i] = mask;
30522 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30523 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30524
30525 arm_expand_vec_perm_1 (target, op0, op1, sel);
30526 }
30527
30528 /* Generate or test for an insn that supports a constant permutation. */
30529
30530 /* Recognize patterns for the VUZP insns. */
30531
30532 static bool
30533 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30534 {
30535 unsigned int i, odd, mask, nelt = d->nelt;
30536 rtx out0, out1, in0, in1, x;
30537 rtx (*gen)(rtx, rtx, rtx, rtx);
30538
30539 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30540 return false;
30541
30542 /* Note that these are little-endian tests. Adjust for big-endian later. */
30543 if (d->perm[0] == 0)
30544 odd = 0;
30545 else if (d->perm[0] == 1)
30546 odd = 1;
30547 else
30548 return false;
30549 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30550
30551 for (i = 0; i < nelt; i++)
30552 {
30553 unsigned elt = (i * 2 + odd) & mask;
30554 if (d->perm[i] != elt)
30555 return false;
30556 }
30557
30558 /* Success! */
30559 if (d->testing_p)
30560 return true;
30561
30562 switch (d->vmode)
30563 {
30564 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30565 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30566 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30567 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30568 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30569 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30570 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30571 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30572 default:
30573 gcc_unreachable ();
30574 }
30575
30576 in0 = d->op0;
30577 in1 = d->op1;
30578 if (BYTES_BIG_ENDIAN)
30579 {
30580 x = in0, in0 = in1, in1 = x;
30581 odd = !odd;
30582 }
30583
30584 out0 = d->target;
30585 out1 = gen_reg_rtx (d->vmode);
30586 if (odd)
30587 x = out0, out0 = out1, out1 = x;
30588
30589 emit_insn (gen (out0, in0, in1, out1));
30590 return true;
30591 }
30592
30593 /* Recognize patterns for the VZIP insns. */
30594
30595 static bool
30596 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30597 {
30598 unsigned int i, high, mask, nelt = d->nelt;
30599 rtx out0, out1, in0, in1, x;
30600 rtx (*gen)(rtx, rtx, rtx, rtx);
30601
30602 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30603 return false;
30604
30605 /* Note that these are little-endian tests. Adjust for big-endian later. */
30606 high = nelt / 2;
30607 if (d->perm[0] == high)
30608 ;
30609 else if (d->perm[0] == 0)
30610 high = 0;
30611 else
30612 return false;
30613 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30614
30615 for (i = 0; i < nelt / 2; i++)
30616 {
30617 unsigned elt = (i + high) & mask;
30618 if (d->perm[i * 2] != elt)
30619 return false;
30620 elt = (elt + nelt) & mask;
30621 if (d->perm[i * 2 + 1] != elt)
30622 return false;
30623 }
30624
30625 /* Success! */
30626 if (d->testing_p)
30627 return true;
30628
30629 switch (d->vmode)
30630 {
30631 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30632 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30633 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30634 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30635 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30636 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30637 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30638 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30639 default:
30640 gcc_unreachable ();
30641 }
30642
30643 in0 = d->op0;
30644 in1 = d->op1;
30645 if (BYTES_BIG_ENDIAN)
30646 {
30647 x = in0, in0 = in1, in1 = x;
30648 high = !high;
30649 }
30650
30651 out0 = d->target;
30652 out1 = gen_reg_rtx (d->vmode);
30653 if (high)
30654 x = out0, out0 = out1, out1 = x;
30655
30656 emit_insn (gen (out0, in0, in1, out1));
30657 return true;
30658 }
30659
30660 /* Recognize patterns for the VREV insns. */
30661
30662 static bool
30663 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30664 {
30665 unsigned int i, j, diff, nelt = d->nelt;
30666 rtx (*gen)(rtx, rtx, rtx);
30667
30668 if (!d->one_vector_p)
30669 return false;
30670
30671 diff = d->perm[0];
30672 switch (diff)
30673 {
30674 case 7:
30675 switch (d->vmode)
30676 {
30677 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30678 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30679 default:
30680 return false;
30681 }
30682 break;
30683 case 3:
30684 switch (d->vmode)
30685 {
30686 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30687 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30688 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30689 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30690 default:
30691 return false;
30692 }
30693 break;
30694 case 1:
30695 switch (d->vmode)
30696 {
30697 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30698 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30699 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30700 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30701 case V4SImode: gen = gen_neon_vrev64v4si; break;
30702 case V2SImode: gen = gen_neon_vrev64v2si; break;
30703 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30704 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30705 default:
30706 return false;
30707 }
30708 break;
30709 default:
30710 return false;
30711 }
30712
30713 for (i = 0; i < nelt ; i += diff + 1)
30714 for (j = 0; j <= diff; j += 1)
30715 {
30716 /* This is guaranteed to be true as the value of diff
30717 is 7, 3, 1 and we should have enough elements in the
30718 queue to generate this. Getting a vector mask with a
30719 value of diff other than these values implies that
30720 something is wrong by the time we get here. */
30721 gcc_assert (i + j < nelt);
30722 if (d->perm[i + j] != i + diff - j)
30723 return false;
30724 }
30725
30726 /* Success! */
30727 if (d->testing_p)
30728 return true;
30729
30730 /* ??? The third operand is an artifact of the builtin infrastructure
30731 and is ignored by the actual instruction. */
30732 emit_insn (gen (d->target, d->op0, const0_rtx));
30733 return true;
30734 }
30735
30736 /* Recognize patterns for the VTRN insns. */
30737
30738 static bool
30739 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30740 {
30741 unsigned int i, odd, mask, nelt = d->nelt;
30742 rtx out0, out1, in0, in1, x;
30743 rtx (*gen)(rtx, rtx, rtx, rtx);
30744
30745 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30746 return false;
30747
30748 /* Note that these are little-endian tests. Adjust for big-endian later. */
30749 if (d->perm[0] == 0)
30750 odd = 0;
30751 else if (d->perm[0] == 1)
30752 odd = 1;
30753 else
30754 return false;
30755 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30756
30757 for (i = 0; i < nelt; i += 2)
30758 {
30759 if (d->perm[i] != i + odd)
30760 return false;
30761 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30762 return false;
30763 }
30764
30765 /* Success! */
30766 if (d->testing_p)
30767 return true;
30768
30769 switch (d->vmode)
30770 {
30771 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30772 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30773 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30774 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30775 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30776 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30777 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30778 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30779 default:
30780 gcc_unreachable ();
30781 }
30782
30783 in0 = d->op0;
30784 in1 = d->op1;
30785 if (BYTES_BIG_ENDIAN)
30786 {
30787 x = in0, in0 = in1, in1 = x;
30788 odd = !odd;
30789 }
30790
30791 out0 = d->target;
30792 out1 = gen_reg_rtx (d->vmode);
30793 if (odd)
30794 x = out0, out0 = out1, out1 = x;
30795
30796 emit_insn (gen (out0, in0, in1, out1));
30797 return true;
30798 }
30799
30800 /* Recognize patterns for the VEXT insns. */
30801
30802 static bool
30803 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30804 {
30805 unsigned int i, nelt = d->nelt;
30806 rtx (*gen) (rtx, rtx, rtx, rtx);
30807 rtx offset;
30808
30809 unsigned int location;
30810
30811 unsigned int next = d->perm[0] + 1;
30812
30813 /* TODO: Handle GCC's numbering of elements for big-endian. */
30814 if (BYTES_BIG_ENDIAN)
30815 return false;
30816
30817 /* Check if the extracted indexes are increasing by one. */
30818 for (i = 1; i < nelt; next++, i++)
30819 {
30820 /* If we hit the most significant element of the 2nd vector in
30821 the previous iteration, no need to test further. */
30822 if (next == 2 * nelt)
30823 return false;
30824
30825 /* If we are operating on only one vector: it could be a
30826 rotation. If there are only two elements of size < 64, let
30827 arm_evpc_neon_vrev catch it. */
30828 if (d->one_vector_p && (next == nelt))
30829 {
30830 if ((nelt == 2) && (d->vmode != V2DImode))
30831 return false;
30832 else
30833 next = 0;
30834 }
30835
30836 if (d->perm[i] != next)
30837 return false;
30838 }
30839
30840 location = d->perm[0];
30841
30842 switch (d->vmode)
30843 {
30844 case V16QImode: gen = gen_neon_vextv16qi; break;
30845 case V8QImode: gen = gen_neon_vextv8qi; break;
30846 case V4HImode: gen = gen_neon_vextv4hi; break;
30847 case V8HImode: gen = gen_neon_vextv8hi; break;
30848 case V2SImode: gen = gen_neon_vextv2si; break;
30849 case V4SImode: gen = gen_neon_vextv4si; break;
30850 case V2SFmode: gen = gen_neon_vextv2sf; break;
30851 case V4SFmode: gen = gen_neon_vextv4sf; break;
30852 case V2DImode: gen = gen_neon_vextv2di; break;
30853 default:
30854 return false;
30855 }
30856
30857 /* Success! */
30858 if (d->testing_p)
30859 return true;
30860
30861 offset = GEN_INT (location);
30862 emit_insn (gen (d->target, d->op0, d->op1, offset));
30863 return true;
30864 }
30865
30866 /* The NEON VTBL instruction is a fully variable permuation that's even
30867 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30868 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30869 can do slightly better by expanding this as a constant where we don't
30870 have to apply a mask. */
30871
30872 static bool
30873 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30874 {
30875 rtx rperm[MAX_VECT_LEN], sel;
30876 enum machine_mode vmode = d->vmode;
30877 unsigned int i, nelt = d->nelt;
30878
30879 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30880 numbering of elements for big-endian, we must reverse the order. */
30881 if (BYTES_BIG_ENDIAN)
30882 return false;
30883
30884 if (d->testing_p)
30885 return true;
30886
30887 /* Generic code will try constant permutation twice. Once with the
30888 original mode and again with the elements lowered to QImode.
30889 So wait and don't do the selector expansion ourselves. */
30890 if (vmode != V8QImode && vmode != V16QImode)
30891 return false;
30892
30893 for (i = 0; i < nelt; ++i)
30894 rperm[i] = GEN_INT (d->perm[i]);
30895 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30896 sel = force_reg (vmode, sel);
30897
30898 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30899 return true;
30900 }
30901
30902 static bool
30903 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30904 {
30905 /* Check if the input mask matches vext before reordering the
30906 operands. */
30907 if (TARGET_NEON)
30908 if (arm_evpc_neon_vext (d))
30909 return true;
30910
30911 /* The pattern matching functions above are written to look for a small
30912 number to begin the sequence (0, 1, N/2). If we begin with an index
30913 from the second operand, we can swap the operands. */
30914 if (d->perm[0] >= d->nelt)
30915 {
30916 unsigned i, nelt = d->nelt;
30917 rtx x;
30918
30919 for (i = 0; i < nelt; ++i)
30920 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30921
30922 x = d->op0;
30923 d->op0 = d->op1;
30924 d->op1 = x;
30925 }
30926
30927 if (TARGET_NEON)
30928 {
30929 if (arm_evpc_neon_vuzp (d))
30930 return true;
30931 if (arm_evpc_neon_vzip (d))
30932 return true;
30933 if (arm_evpc_neon_vrev (d))
30934 return true;
30935 if (arm_evpc_neon_vtrn (d))
30936 return true;
30937 return arm_evpc_neon_vtbl (d);
30938 }
30939 return false;
30940 }
30941
30942 /* Expand a vec_perm_const pattern. */
30943
30944 bool
30945 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30946 {
30947 struct expand_vec_perm_d d;
30948 int i, nelt, which;
30949
30950 d.target = target;
30951 d.op0 = op0;
30952 d.op1 = op1;
30953
30954 d.vmode = GET_MODE (target);
30955 gcc_assert (VECTOR_MODE_P (d.vmode));
30956 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30957 d.testing_p = false;
30958
30959 for (i = which = 0; i < nelt; ++i)
30960 {
30961 rtx e = XVECEXP (sel, 0, i);
30962 int ei = INTVAL (e) & (2 * nelt - 1);
30963 which |= (ei < nelt ? 1 : 2);
30964 d.perm[i] = ei;
30965 }
30966
30967 switch (which)
30968 {
30969 default:
30970 gcc_unreachable();
30971
30972 case 3:
30973 d.one_vector_p = false;
30974 if (!rtx_equal_p (op0, op1))
30975 break;
30976
30977 /* The elements of PERM do not suggest that only the first operand
30978 is used, but both operands are identical. Allow easier matching
30979 of the permutation by folding the permutation into the single
30980 input vector. */
30981 /* FALLTHRU */
30982 case 2:
30983 for (i = 0; i < nelt; ++i)
30984 d.perm[i] &= nelt - 1;
30985 d.op0 = op1;
30986 d.one_vector_p = true;
30987 break;
30988
30989 case 1:
30990 d.op1 = op0;
30991 d.one_vector_p = true;
30992 break;
30993 }
30994
30995 return arm_expand_vec_perm_const_1 (&d);
30996 }
30997
30998 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30999
31000 static bool
31001 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31002 const unsigned char *sel)
31003 {
31004 struct expand_vec_perm_d d;
31005 unsigned int i, nelt, which;
31006 bool ret;
31007
31008 d.vmode = vmode;
31009 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31010 d.testing_p = true;
31011 memcpy (d.perm, sel, nelt);
31012
31013 /* Categorize the set of elements in the selector. */
31014 for (i = which = 0; i < nelt; ++i)
31015 {
31016 unsigned char e = d.perm[i];
31017 gcc_assert (e < 2 * nelt);
31018 which |= (e < nelt ? 1 : 2);
31019 }
31020
31021 /* For all elements from second vector, fold the elements to first. */
31022 if (which == 2)
31023 for (i = 0; i < nelt; ++i)
31024 d.perm[i] -= nelt;
31025
31026 /* Check whether the mask can be applied to the vector type. */
31027 d.one_vector_p = (which != 3);
31028
31029 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31030 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31031 if (!d.one_vector_p)
31032 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31033
31034 start_sequence ();
31035 ret = arm_expand_vec_perm_const_1 (&d);
31036 end_sequence ();
31037
31038 return ret;
31039 }
31040
31041 bool
31042 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
31043 {
31044 /* If we are soft float and we do not have ldrd
31045 then all auto increment forms are ok. */
31046 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31047 return true;
31048
31049 switch (code)
31050 {
31051 /* Post increment and Pre Decrement are supported for all
31052 instruction forms except for vector forms. */
31053 case ARM_POST_INC:
31054 case ARM_PRE_DEC:
31055 if (VECTOR_MODE_P (mode))
31056 {
31057 if (code != ARM_PRE_DEC)
31058 return true;
31059 else
31060 return false;
31061 }
31062
31063 return true;
31064
31065 case ARM_POST_DEC:
31066 case ARM_PRE_INC:
31067 /* Without LDRD and mode size greater than
31068 word size, there is no point in auto-incrementing
31069 because ldm and stm will not have these forms. */
31070 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31071 return false;
31072
31073 /* Vector and floating point modes do not support
31074 these auto increment forms. */
31075 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31076 return false;
31077
31078 return true;
31079
31080 default:
31081 return false;
31082
31083 }
31084
31085 return false;
31086 }
31087
31088 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31089 on ARM, since we know that shifts by negative amounts are no-ops.
31090 Additionally, the default expansion code is not available or suitable
31091 for post-reload insn splits (this can occur when the register allocator
31092 chooses not to do a shift in NEON).
31093
31094 This function is used in both initial expand and post-reload splits, and
31095 handles all kinds of 64-bit shifts.
31096
31097 Input requirements:
31098 - It is safe for the input and output to be the same register, but
31099 early-clobber rules apply for the shift amount and scratch registers.
31100 - Shift by register requires both scratch registers. In all other cases
31101 the scratch registers may be NULL.
31102 - Ashiftrt by a register also clobbers the CC register. */
31103 void
31104 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31105 rtx amount, rtx scratch1, rtx scratch2)
31106 {
31107 rtx out_high = gen_highpart (SImode, out);
31108 rtx out_low = gen_lowpart (SImode, out);
31109 rtx in_high = gen_highpart (SImode, in);
31110 rtx in_low = gen_lowpart (SImode, in);
31111
31112 /* Terminology:
31113 in = the register pair containing the input value.
31114 out = the destination register pair.
31115 up = the high- or low-part of each pair.
31116 down = the opposite part to "up".
31117 In a shift, we can consider bits to shift from "up"-stream to
31118 "down"-stream, so in a left-shift "up" is the low-part and "down"
31119 is the high-part of each register pair. */
31120
31121 rtx out_up = code == ASHIFT ? out_low : out_high;
31122 rtx out_down = code == ASHIFT ? out_high : out_low;
31123 rtx in_up = code == ASHIFT ? in_low : in_high;
31124 rtx in_down = code == ASHIFT ? in_high : in_low;
31125
31126 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31127 gcc_assert (out
31128 && (REG_P (out) || GET_CODE (out) == SUBREG)
31129 && GET_MODE (out) == DImode);
31130 gcc_assert (in
31131 && (REG_P (in) || GET_CODE (in) == SUBREG)
31132 && GET_MODE (in) == DImode);
31133 gcc_assert (amount
31134 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
31135 && GET_MODE (amount) == SImode)
31136 || CONST_INT_P (amount)));
31137 gcc_assert (scratch1 == NULL
31138 || (GET_CODE (scratch1) == SCRATCH)
31139 || (GET_MODE (scratch1) == SImode
31140 && REG_P (scratch1)));
31141 gcc_assert (scratch2 == NULL
31142 || (GET_CODE (scratch2) == SCRATCH)
31143 || (GET_MODE (scratch2) == SImode
31144 && REG_P (scratch2)));
31145 gcc_assert (!REG_P (out) || !REG_P (amount)
31146 || !HARD_REGISTER_P (out)
31147 || (REGNO (out) != REGNO (amount)
31148 && REGNO (out) + 1 != REGNO (amount)));
31149
31150 /* Macros to make following code more readable. */
31151 #define SUB_32(DEST,SRC) \
31152 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31153 #define RSB_32(DEST,SRC) \
31154 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31155 #define SUB_S_32(DEST,SRC) \
31156 gen_addsi3_compare0 ((DEST), (SRC), \
31157 GEN_INT (-32))
31158 #define SET(DEST,SRC) \
31159 gen_rtx_SET (SImode, (DEST), (SRC))
31160 #define SHIFT(CODE,SRC,AMOUNT) \
31161 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31162 #define LSHIFT(CODE,SRC,AMOUNT) \
31163 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31164 SImode, (SRC), (AMOUNT))
31165 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31166 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31167 SImode, (SRC), (AMOUNT))
31168 #define ORR(A,B) \
31169 gen_rtx_IOR (SImode, (A), (B))
31170 #define BRANCH(COND,LABEL) \
31171 gen_arm_cond_branch ((LABEL), \
31172 gen_rtx_ ## COND (CCmode, cc_reg, \
31173 const0_rtx), \
31174 cc_reg)
31175
31176 /* Shifts by register and shifts by constant are handled separately. */
31177 if (CONST_INT_P (amount))
31178 {
31179 /* We have a shift-by-constant. */
31180
31181 /* First, handle out-of-range shift amounts.
31182 In both cases we try to match the result an ARM instruction in a
31183 shift-by-register would give. This helps reduce execution
31184 differences between optimization levels, but it won't stop other
31185 parts of the compiler doing different things. This is "undefined
31186 behaviour, in any case. */
31187 if (INTVAL (amount) <= 0)
31188 emit_insn (gen_movdi (out, in));
31189 else if (INTVAL (amount) >= 64)
31190 {
31191 if (code == ASHIFTRT)
31192 {
31193 rtx const31_rtx = GEN_INT (31);
31194 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31195 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31196 }
31197 else
31198 emit_insn (gen_movdi (out, const0_rtx));
31199 }
31200
31201 /* Now handle valid shifts. */
31202 else if (INTVAL (amount) < 32)
31203 {
31204 /* Shifts by a constant less than 32. */
31205 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31206
31207 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31208 emit_insn (SET (out_down,
31209 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31210 out_down)));
31211 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31212 }
31213 else
31214 {
31215 /* Shifts by a constant greater than 31. */
31216 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31217
31218 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31219 if (code == ASHIFTRT)
31220 emit_insn (gen_ashrsi3 (out_up, in_up,
31221 GEN_INT (31)));
31222 else
31223 emit_insn (SET (out_up, const0_rtx));
31224 }
31225 }
31226 else
31227 {
31228 /* We have a shift-by-register. */
31229 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31230
31231 /* This alternative requires the scratch registers. */
31232 gcc_assert (scratch1 && REG_P (scratch1));
31233 gcc_assert (scratch2 && REG_P (scratch2));
31234
31235 /* We will need the values "amount-32" and "32-amount" later.
31236 Swapping them around now allows the later code to be more general. */
31237 switch (code)
31238 {
31239 case ASHIFT:
31240 emit_insn (SUB_32 (scratch1, amount));
31241 emit_insn (RSB_32 (scratch2, amount));
31242 break;
31243 case ASHIFTRT:
31244 emit_insn (RSB_32 (scratch1, amount));
31245 /* Also set CC = amount > 32. */
31246 emit_insn (SUB_S_32 (scratch2, amount));
31247 break;
31248 case LSHIFTRT:
31249 emit_insn (RSB_32 (scratch1, amount));
31250 emit_insn (SUB_32 (scratch2, amount));
31251 break;
31252 default:
31253 gcc_unreachable ();
31254 }
31255
31256 /* Emit code like this:
31257
31258 arithmetic-left:
31259 out_down = in_down << amount;
31260 out_down = (in_up << (amount - 32)) | out_down;
31261 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31262 out_up = in_up << amount;
31263
31264 arithmetic-right:
31265 out_down = in_down >> amount;
31266 out_down = (in_up << (32 - amount)) | out_down;
31267 if (amount < 32)
31268 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31269 out_up = in_up << amount;
31270
31271 logical-right:
31272 out_down = in_down >> amount;
31273 out_down = (in_up << (32 - amount)) | out_down;
31274 if (amount < 32)
31275 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31276 out_up = in_up << amount;
31277
31278 The ARM and Thumb2 variants are the same but implemented slightly
31279 differently. If this were only called during expand we could just
31280 use the Thumb2 case and let combine do the right thing, but this
31281 can also be called from post-reload splitters. */
31282
31283 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31284
31285 if (!TARGET_THUMB2)
31286 {
31287 /* Emit code for ARM mode. */
31288 emit_insn (SET (out_down,
31289 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31290 if (code == ASHIFTRT)
31291 {
31292 rtx done_label = gen_label_rtx ();
31293 emit_jump_insn (BRANCH (LT, done_label));
31294 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31295 out_down)));
31296 emit_label (done_label);
31297 }
31298 else
31299 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31300 out_down)));
31301 }
31302 else
31303 {
31304 /* Emit code for Thumb2 mode.
31305 Thumb2 can't do shift and or in one insn. */
31306 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31307 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31308
31309 if (code == ASHIFTRT)
31310 {
31311 rtx done_label = gen_label_rtx ();
31312 emit_jump_insn (BRANCH (LT, done_label));
31313 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31314 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31315 emit_label (done_label);
31316 }
31317 else
31318 {
31319 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31320 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31321 }
31322 }
31323
31324 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31325 }
31326
31327 #undef SUB_32
31328 #undef RSB_32
31329 #undef SUB_S_32
31330 #undef SET
31331 #undef SHIFT
31332 #undef LSHIFT
31333 #undef REV_LSHIFT
31334 #undef ORR
31335 #undef BRANCH
31336 }
31337
31338
31339 /* Returns true if a valid comparison operation and makes
31340 the operands in a form that is valid. */
31341 bool
31342 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31343 {
31344 enum rtx_code code = GET_CODE (*comparison);
31345 int code_int;
31346 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31347 ? GET_MODE (*op2) : GET_MODE (*op1);
31348
31349 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31350
31351 if (code == UNEQ || code == LTGT)
31352 return false;
31353
31354 code_int = (int)code;
31355 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31356 PUT_CODE (*comparison, (enum rtx_code)code_int);
31357
31358 switch (mode)
31359 {
31360 case SImode:
31361 if (!arm_add_operand (*op1, mode))
31362 *op1 = force_reg (mode, *op1);
31363 if (!arm_add_operand (*op2, mode))
31364 *op2 = force_reg (mode, *op2);
31365 return true;
31366
31367 case DImode:
31368 if (!cmpdi_operand (*op1, mode))
31369 *op1 = force_reg (mode, *op1);
31370 if (!cmpdi_operand (*op2, mode))
31371 *op2 = force_reg (mode, *op2);
31372 return true;
31373
31374 case SFmode:
31375 case DFmode:
31376 if (!arm_float_compare_operand (*op1, mode))
31377 *op1 = force_reg (mode, *op1);
31378 if (!arm_float_compare_operand (*op2, mode))
31379 *op2 = force_reg (mode, *op2);
31380 return true;
31381 default:
31382 break;
31383 }
31384
31385 return false;
31386
31387 }
31388
31389 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31390
31391 static unsigned HOST_WIDE_INT
31392 arm_asan_shadow_offset (void)
31393 {
31394 return (unsigned HOST_WIDE_INT) 1 << 29;
31395 }
31396
31397
31398 /* This is a temporary fix for PR60655. Ideally we need
31399 to handle most of these cases in the generic part but
31400 currently we reject minus (..) (sym_ref). We try to
31401 ameliorate the case with minus (sym_ref1) (sym_ref2)
31402 where they are in the same section. */
31403
31404 static bool
31405 arm_const_not_ok_for_debug_p (rtx p)
31406 {
31407 tree decl_op0 = NULL;
31408 tree decl_op1 = NULL;
31409
31410 if (GET_CODE (p) == MINUS)
31411 {
31412 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31413 {
31414 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31415 if (decl_op1
31416 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31417 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31418 {
31419 if ((TREE_CODE (decl_op1) == VAR_DECL
31420 || TREE_CODE (decl_op1) == CONST_DECL)
31421 && (TREE_CODE (decl_op0) == VAR_DECL
31422 || TREE_CODE (decl_op0) == CONST_DECL))
31423 return (get_variable_section (decl_op1, false)
31424 != get_variable_section (decl_op0, false));
31425
31426 if (TREE_CODE (decl_op1) == LABEL_DECL
31427 && TREE_CODE (decl_op0) == LABEL_DECL)
31428 return (DECL_CONTEXT (decl_op1)
31429 != DECL_CONTEXT (decl_op0));
31430 }
31431
31432 return true;
31433 }
31434 }
31435
31436 return false;
31437 }
31438
31439 static void
31440 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
31441 {
31442 const unsigned ARM_FE_INVALID = 1;
31443 const unsigned ARM_FE_DIVBYZERO = 2;
31444 const unsigned ARM_FE_OVERFLOW = 4;
31445 const unsigned ARM_FE_UNDERFLOW = 8;
31446 const unsigned ARM_FE_INEXACT = 16;
31447 const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID
31448 | ARM_FE_DIVBYZERO
31449 | ARM_FE_OVERFLOW
31450 | ARM_FE_UNDERFLOW
31451 | ARM_FE_INEXACT);
31452 const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8;
31453 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
31454 tree new_fenv_var, reload_fenv, restore_fnenv;
31455 tree update_call, atomic_feraiseexcept, hold_fnclex;
31456
31457 if (!TARGET_VFP)
31458 return;
31459
31460 /* Generate the equivalent of :
31461 unsigned int fenv_var;
31462 fenv_var = __builtin_arm_get_fpscr ();
31463
31464 unsigned int masked_fenv;
31465 masked_fenv = fenv_var & mask;
31466
31467 __builtin_arm_set_fpscr (masked_fenv); */
31468
31469 fenv_var = create_tmp_var (unsigned_type_node, NULL);
31470 get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR];
31471 set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR];
31472 mask = build_int_cst (unsigned_type_node,
31473 ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT)
31474 | ARM_FE_ALL_EXCEPT));
31475 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
31476 fenv_var, build_call_expr (get_fpscr, 0));
31477 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
31478 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
31479 *hold = build2 (COMPOUND_EXPR, void_type_node,
31480 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
31481 hold_fnclex);
31482
31483 /* Store the value of masked_fenv to clear the exceptions:
31484 __builtin_arm_set_fpscr (masked_fenv); */
31485
31486 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
31487
31488 /* Generate the equivalent of :
31489 unsigned int new_fenv_var;
31490 new_fenv_var = __builtin_arm_get_fpscr ();
31491
31492 __builtin_arm_set_fpscr (fenv_var);
31493
31494 __atomic_feraiseexcept (new_fenv_var); */
31495
31496 new_fenv_var = create_tmp_var (unsigned_type_node, NULL);
31497 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
31498 build_call_expr (get_fpscr, 0));
31499 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
31500 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
31501 update_call = build_call_expr (atomic_feraiseexcept, 1,
31502 fold_convert (integer_type_node, new_fenv_var));
31503 *update = build2 (COMPOUND_EXPR, void_type_node,
31504 build2 (COMPOUND_EXPR, void_type_node,
31505 reload_fenv, restore_fnenv), update_call);
31506 }
31507
31508 #include "gt-arm.h"