]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
arm.c (enum arm_builtins): Add crypto builtins.
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
66
67 void (*arm_lang_output_object_attributes_hook)(void);
68
69 struct four_ints
70 {
71 int i[4];
72 };
73
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
192
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
200
201 static void arm_file_end (void);
202 static void arm_file_start (void);
203
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
221
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
274
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
277
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
286
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
290 \f
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
293 {
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
318
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
322 */
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
334 };
335 \f
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
341
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
344
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
347
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
350
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
355
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
360
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
367
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
370
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
373
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
376
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
379
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
382
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
385
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
388
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
391
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
394
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
397
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
404
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
407
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
410
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
413
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
416
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
419
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
422
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
427
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
432
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
444
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
447
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
454
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
457
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
472
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
475
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
478
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
485
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
488
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
491
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
494
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
497
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
500
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
503
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
506
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
509
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
512
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
515
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
518
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
522
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
525
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
528
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
531
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
534
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
538
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
542
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
545
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
548
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
552
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
555
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
558
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
563
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
566
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
569
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
572
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
575
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
581
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
584
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
587
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
594
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
599
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
602
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
605
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
608
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
611
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
614
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
617
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
620
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
623
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
626
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
629
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
632
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
635
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
639
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
642
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
646
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
650
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
654
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
658
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
664
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
668
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
671
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
674
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
677
678 struct gcc_target targetm = TARGET_INITIALIZER;
679 \f
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack;
682 static char * minipool_startobj;
683
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped = 5;
687
688 extern FILE * asm_out_file;
689
690 /* True if we are currently building a constant table. */
691 int making_const_table;
692
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune = arm_none;
695
696 /* The current tuning set. */
697 const struct tune_params *current_tune;
698
699 /* Which floating point hardware to schedule for. */
700 int arm_fpu_attr;
701
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc *arm_fpu_desc;
704
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label[14];
707 static int thumb_call_reg_needed;
708
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
730 profile. */
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
735 architecture. */
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
739 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
740
741 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
742 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
743
744 /* Flags that only effect tuning, not available instructions. */
745 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
746 | FL_CO_PROC)
747
748 #define FL_FOR_ARCH2 FL_NOTM
749 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
750 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
751 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
752 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
753 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
754 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
755 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
756 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
757 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
758 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
759 #define FL_FOR_ARCH6J FL_FOR_ARCH6
760 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
761 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
762 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
763 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
764 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
765 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
766 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
771 | FL_ARM_DIV | FL_NOTM)
772
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 static unsigned long insn_flags = 0;
776
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 static unsigned long tune_flags = 0;
780
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
784
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
787
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
790
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
793
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
796
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
799
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
802
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
805
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
808
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
811
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
814
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
817
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
820
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
823
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
826
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
829
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
832
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
835
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
838
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
841
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
845
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
848
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
851
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
854
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
861
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
864
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
868
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits = 0;
872
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool = false;
875
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 enum machine_mode output_memory_reference_mode;
880
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register = INVALID_REGNUM;
883
884 /* Set to 1 after arm_reorg has started. Reset to start at the start of
885 the next function. */
886 static int after_arm_reorg = 0;
887
888 enum arm_pcs arm_pcs_default;
889
890 /* For an explanation of these variables, see final_prescan_insn below. */
891 int arm_ccfsm_state;
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc;
894
895 rtx arm_target_insn;
896 int arm_target_label;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count = 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask = 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen = 0;
904
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc = 0;
907
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes[] =
910 {
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
913 };
914
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence[] =
917 {
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
919 };
920
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 \f
928 /* Initialization code. */
929
930 struct processors
931 {
932 const char *const name;
933 enum processor_type core;
934 const char *arch;
935 enum base_architecture base_arch;
936 const unsigned long flags;
937 const struct tune_params *const tune;
938 };
939
940
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
943 prefetch_slots, \
944 l1_size, \
945 l1_line_size
946
947 /* arm generic vectorizer costs. */
948 static const
949 struct cpu_vec_costs arm_default_vec_cost = {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
962 };
963
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
966
967
968
969 const struct cpu_cost_table cortexa9_extra_costs =
970 {
971 /* ALU */
972 {
973 0, /* Arith. */
974 0, /* Logical. */
975 0, /* Shift. */
976 COSTS_N_INSNS (1), /* Shift_reg. */
977 COSTS_N_INSNS (1), /* Arith_shift. */
978 COSTS_N_INSNS (2), /* Arith_shift_reg. */
979 0, /* Log_shift. */
980 COSTS_N_INSNS (1), /* Log_shift_reg. */
981 COSTS_N_INSNS (1), /* Extend. */
982 COSTS_N_INSNS (2), /* Extend_arith. */
983 COSTS_N_INSNS (1), /* Bfi. */
984 COSTS_N_INSNS (1), /* Bfx. */
985 0, /* Clz. */
986 0, /* non_exec. */
987 true /* non_exec_costs_exec. */
988 },
989 {
990 /* MULT SImode */
991 {
992 COSTS_N_INSNS (3), /* Simple. */
993 COSTS_N_INSNS (3), /* Flag_setting. */
994 COSTS_N_INSNS (2), /* Extend. */
995 COSTS_N_INSNS (3), /* Add. */
996 COSTS_N_INSNS (2), /* Extend_add. */
997 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
998 },
999 /* MULT DImode */
1000 {
1001 0, /* Simple (N/A). */
1002 0, /* Flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* Extend. */
1004 0, /* Add (N/A). */
1005 COSTS_N_INSNS (4), /* Extend_add. */
1006 0 /* Idiv (N/A). */
1007 }
1008 },
1009 /* LD/ST */
1010 {
1011 COSTS_N_INSNS (2), /* Load. */
1012 COSTS_N_INSNS (2), /* Load_sign_extend. */
1013 COSTS_N_INSNS (2), /* Ldrd. */
1014 COSTS_N_INSNS (2), /* Ldm_1st. */
1015 1, /* Ldm_regs_per_insn_1st. */
1016 2, /* Ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* Loadf. */
1018 COSTS_N_INSNS (5), /* Loadd. */
1019 COSTS_N_INSNS (1), /* Load_unaligned. */
1020 COSTS_N_INSNS (2), /* Store. */
1021 COSTS_N_INSNS (2), /* Strd. */
1022 COSTS_N_INSNS (2), /* Stm_1st. */
1023 1, /* Stm_regs_per_insn_1st. */
1024 2, /* Stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* Storef. */
1026 COSTS_N_INSNS (1), /* Stored. */
1027 COSTS_N_INSNS (1) /* Store_unaligned. */
1028 },
1029 {
1030 /* FP SFmode */
1031 {
1032 COSTS_N_INSNS (14), /* Div. */
1033 COSTS_N_INSNS (4), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (30), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 COSTS_N_INSNS (1), /* Fpconst. */
1038 COSTS_N_INSNS (1), /* Neg. */
1039 COSTS_N_INSNS (3), /* Compare. */
1040 COSTS_N_INSNS (3), /* Widen. */
1041 COSTS_N_INSNS (3), /* Narrow. */
1042 COSTS_N_INSNS (3), /* Toint. */
1043 COSTS_N_INSNS (3), /* Fromint. */
1044 COSTS_N_INSNS (3) /* Roundint. */
1045 },
1046 /* FP DFmode */
1047 {
1048 COSTS_N_INSNS (24), /* Div. */
1049 COSTS_N_INSNS (5), /* Mult. */
1050 COSTS_N_INSNS (8), /* Mult_addsub. */
1051 COSTS_N_INSNS (30), /* Fma. */
1052 COSTS_N_INSNS (3), /* Addsub. */
1053 COSTS_N_INSNS (1), /* Fpconst. */
1054 COSTS_N_INSNS (1), /* Neg. */
1055 COSTS_N_INSNS (3), /* Compare. */
1056 COSTS_N_INSNS (3), /* Widen. */
1057 COSTS_N_INSNS (3), /* Narrow. */
1058 COSTS_N_INSNS (3), /* Toint. */
1059 COSTS_N_INSNS (3), /* Fromint. */
1060 COSTS_N_INSNS (3) /* Roundint. */
1061 }
1062 },
1063 /* Vector */
1064 {
1065 COSTS_N_INSNS (1) /* Alu. */
1066 }
1067 };
1068
1069
1070 const struct cpu_cost_table cortexa7_extra_costs =
1071 {
1072 /* ALU */
1073 {
1074 0, /* Arith. */
1075 0, /* Logical. */
1076 COSTS_N_INSNS (1), /* Shift. */
1077 COSTS_N_INSNS (1), /* Shift_reg. */
1078 COSTS_N_INSNS (1), /* Arith_shift. */
1079 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* Log_shift. */
1081 COSTS_N_INSNS (1), /* Log_shift_reg. */
1082 COSTS_N_INSNS (1), /* Extend. */
1083 COSTS_N_INSNS (1), /* Extend_arith. */
1084 COSTS_N_INSNS (1), /* Bfi. */
1085 COSTS_N_INSNS (1), /* Bfx. */
1086 COSTS_N_INSNS (1), /* Clz. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1089 },
1090
1091 {
1092 /* MULT SImode */
1093 {
1094 0, /* Simple. */
1095 COSTS_N_INSNS (1), /* Flag_setting. */
1096 COSTS_N_INSNS (1), /* Extend. */
1097 COSTS_N_INSNS (1), /* Add. */
1098 COSTS_N_INSNS (1), /* Extend_add. */
1099 COSTS_N_INSNS (7) /* Idiv. */
1100 },
1101 /* MULT DImode */
1102 {
1103 0, /* Simple (N/A). */
1104 0, /* Flag_setting (N/A). */
1105 COSTS_N_INSNS (1), /* Extend. */
1106 0, /* Add. */
1107 COSTS_N_INSNS (2), /* Extend_add. */
1108 0 /* Idiv (N/A). */
1109 }
1110 },
1111 /* LD/ST */
1112 {
1113 COSTS_N_INSNS (1), /* Load. */
1114 COSTS_N_INSNS (1), /* Load_sign_extend. */
1115 COSTS_N_INSNS (3), /* Ldrd. */
1116 COSTS_N_INSNS (1), /* Ldm_1st. */
1117 1, /* Ldm_regs_per_insn_1st. */
1118 2, /* Ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (2), /* Loadf. */
1120 COSTS_N_INSNS (2), /* Loadd. */
1121 COSTS_N_INSNS (1), /* Load_unaligned. */
1122 COSTS_N_INSNS (1), /* Store. */
1123 COSTS_N_INSNS (3), /* Strd. */
1124 COSTS_N_INSNS (1), /* Stm_1st. */
1125 1, /* Stm_regs_per_insn_1st. */
1126 2, /* Stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (2), /* Storef. */
1128 COSTS_N_INSNS (2), /* Stored. */
1129 COSTS_N_INSNS (1) /* Store_unaligned. */
1130 },
1131 {
1132 /* FP SFmode */
1133 {
1134 COSTS_N_INSNS (15), /* Div. */
1135 COSTS_N_INSNS (3), /* Mult. */
1136 COSTS_N_INSNS (7), /* Mult_addsub. */
1137 COSTS_N_INSNS (7), /* Fma. */
1138 COSTS_N_INSNS (3), /* Addsub. */
1139 COSTS_N_INSNS (3), /* Fpconst. */
1140 COSTS_N_INSNS (3), /* Neg. */
1141 COSTS_N_INSNS (3), /* Compare. */
1142 COSTS_N_INSNS (3), /* Widen. */
1143 COSTS_N_INSNS (3), /* Narrow. */
1144 COSTS_N_INSNS (3), /* Toint. */
1145 COSTS_N_INSNS (3), /* Fromint. */
1146 COSTS_N_INSNS (3) /* Roundint. */
1147 },
1148 /* FP DFmode */
1149 {
1150 COSTS_N_INSNS (30), /* Div. */
1151 COSTS_N_INSNS (6), /* Mult. */
1152 COSTS_N_INSNS (10), /* Mult_addsub. */
1153 COSTS_N_INSNS (7), /* Fma. */
1154 COSTS_N_INSNS (3), /* Addsub. */
1155 COSTS_N_INSNS (3), /* Fpconst. */
1156 COSTS_N_INSNS (3), /* Neg. */
1157 COSTS_N_INSNS (3), /* Compare. */
1158 COSTS_N_INSNS (3), /* Widen. */
1159 COSTS_N_INSNS (3), /* Narrow. */
1160 COSTS_N_INSNS (3), /* Toint. */
1161 COSTS_N_INSNS (3), /* Fromint. */
1162 COSTS_N_INSNS (3) /* Roundint. */
1163 }
1164 },
1165 /* Vector */
1166 {
1167 COSTS_N_INSNS (1) /* Alu. */
1168 }
1169 };
1170
1171 const struct cpu_cost_table cortexa12_extra_costs =
1172 {
1173 /* ALU */
1174 {
1175 0, /* Arith. */
1176 0, /* Logical. */
1177 0, /* Shift. */
1178 COSTS_N_INSNS (1), /* Shift_reg. */
1179 COSTS_N_INSNS (1), /* Arith_shift. */
1180 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1181 COSTS_N_INSNS (1), /* Log_shift. */
1182 COSTS_N_INSNS (1), /* Log_shift_reg. */
1183 0, /* Extend. */
1184 COSTS_N_INSNS (1), /* Extend_arith. */
1185 0, /* Bfi. */
1186 COSTS_N_INSNS (1), /* Bfx. */
1187 COSTS_N_INSNS (1), /* Clz. */
1188 0, /* non_exec. */
1189 true /* non_exec_costs_exec. */
1190 },
1191 /* MULT SImode */
1192 {
1193 {
1194 COSTS_N_INSNS (2), /* Simple. */
1195 COSTS_N_INSNS (3), /* Flag_setting. */
1196 COSTS_N_INSNS (2), /* Extend. */
1197 COSTS_N_INSNS (3), /* Add. */
1198 COSTS_N_INSNS (2), /* Extend_add. */
1199 COSTS_N_INSNS (18) /* Idiv. */
1200 },
1201 /* MULT DImode */
1202 {
1203 0, /* Simple (N/A). */
1204 0, /* Flag_setting (N/A). */
1205 COSTS_N_INSNS (3), /* Extend. */
1206 0, /* Add (N/A). */
1207 COSTS_N_INSNS (3), /* Extend_add. */
1208 0 /* Idiv (N/A). */
1209 }
1210 },
1211 /* LD/ST */
1212 {
1213 COSTS_N_INSNS (3), /* Load. */
1214 COSTS_N_INSNS (3), /* Load_sign_extend. */
1215 COSTS_N_INSNS (3), /* Ldrd. */
1216 COSTS_N_INSNS (3), /* Ldm_1st. */
1217 1, /* Ldm_regs_per_insn_1st. */
1218 2, /* Ldm_regs_per_insn_subsequent. */
1219 COSTS_N_INSNS (3), /* Loadf. */
1220 COSTS_N_INSNS (3), /* Loadd. */
1221 0, /* Load_unaligned. */
1222 0, /* Store. */
1223 0, /* Strd. */
1224 0, /* Stm_1st. */
1225 1, /* Stm_regs_per_insn_1st. */
1226 2, /* Stm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* Storef. */
1228 COSTS_N_INSNS (2), /* Stored. */
1229 0 /* Store_unaligned. */
1230 },
1231 {
1232 /* FP SFmode */
1233 {
1234 COSTS_N_INSNS (17), /* Div. */
1235 COSTS_N_INSNS (4), /* Mult. */
1236 COSTS_N_INSNS (8), /* Mult_addsub. */
1237 COSTS_N_INSNS (8), /* Fma. */
1238 COSTS_N_INSNS (4), /* Addsub. */
1239 COSTS_N_INSNS (2), /* Fpconst. */
1240 COSTS_N_INSNS (2), /* Neg. */
1241 COSTS_N_INSNS (2), /* Compare. */
1242 COSTS_N_INSNS (4), /* Widen. */
1243 COSTS_N_INSNS (4), /* Narrow. */
1244 COSTS_N_INSNS (4), /* Toint. */
1245 COSTS_N_INSNS (4), /* Fromint. */
1246 COSTS_N_INSNS (4) /* Roundint. */
1247 },
1248 /* FP DFmode */
1249 {
1250 COSTS_N_INSNS (31), /* Div. */
1251 COSTS_N_INSNS (4), /* Mult. */
1252 COSTS_N_INSNS (8), /* Mult_addsub. */
1253 COSTS_N_INSNS (8), /* Fma. */
1254 COSTS_N_INSNS (4), /* Addsub. */
1255 COSTS_N_INSNS (2), /* Fpconst. */
1256 COSTS_N_INSNS (2), /* Neg. */
1257 COSTS_N_INSNS (2), /* Compare. */
1258 COSTS_N_INSNS (4), /* Widen. */
1259 COSTS_N_INSNS (4), /* Narrow. */
1260 COSTS_N_INSNS (4), /* Toint. */
1261 COSTS_N_INSNS (4), /* Fromint. */
1262 COSTS_N_INSNS (4) /* Roundint. */
1263 }
1264 },
1265 /* Vector */
1266 {
1267 COSTS_N_INSNS (1) /* Alu. */
1268 }
1269 };
1270
1271 const struct cpu_cost_table cortexa15_extra_costs =
1272 {
1273 /* ALU */
1274 {
1275 0, /* Arith. */
1276 0, /* Logical. */
1277 0, /* Shift. */
1278 0, /* Shift_reg. */
1279 COSTS_N_INSNS (1), /* Arith_shift. */
1280 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1281 COSTS_N_INSNS (1), /* Log_shift. */
1282 COSTS_N_INSNS (1), /* Log_shift_reg. */
1283 0, /* Extend. */
1284 COSTS_N_INSNS (1), /* Extend_arith. */
1285 COSTS_N_INSNS (1), /* Bfi. */
1286 0, /* Bfx. */
1287 0, /* Clz. */
1288 0, /* non_exec. */
1289 true /* non_exec_costs_exec. */
1290 },
1291 /* MULT SImode */
1292 {
1293 {
1294 COSTS_N_INSNS (2), /* Simple. */
1295 COSTS_N_INSNS (3), /* Flag_setting. */
1296 COSTS_N_INSNS (2), /* Extend. */
1297 COSTS_N_INSNS (2), /* Add. */
1298 COSTS_N_INSNS (2), /* Extend_add. */
1299 COSTS_N_INSNS (18) /* Idiv. */
1300 },
1301 /* MULT DImode */
1302 {
1303 0, /* Simple (N/A). */
1304 0, /* Flag_setting (N/A). */
1305 COSTS_N_INSNS (3), /* Extend. */
1306 0, /* Add (N/A). */
1307 COSTS_N_INSNS (3), /* Extend_add. */
1308 0 /* Idiv (N/A). */
1309 }
1310 },
1311 /* LD/ST */
1312 {
1313 COSTS_N_INSNS (3), /* Load. */
1314 COSTS_N_INSNS (3), /* Load_sign_extend. */
1315 COSTS_N_INSNS (3), /* Ldrd. */
1316 COSTS_N_INSNS (4), /* Ldm_1st. */
1317 1, /* Ldm_regs_per_insn_1st. */
1318 2, /* Ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (4), /* Loadf. */
1320 COSTS_N_INSNS (4), /* Loadd. */
1321 0, /* Load_unaligned. */
1322 0, /* Store. */
1323 0, /* Strd. */
1324 COSTS_N_INSNS (1), /* Stm_1st. */
1325 1, /* Stm_regs_per_insn_1st. */
1326 2, /* Stm_regs_per_insn_subsequent. */
1327 0, /* Storef. */
1328 0, /* Stored. */
1329 0 /* Store_unaligned. */
1330 },
1331 {
1332 /* FP SFmode */
1333 {
1334 COSTS_N_INSNS (17), /* Div. */
1335 COSTS_N_INSNS (4), /* Mult. */
1336 COSTS_N_INSNS (8), /* Mult_addsub. */
1337 COSTS_N_INSNS (8), /* Fma. */
1338 COSTS_N_INSNS (4), /* Addsub. */
1339 COSTS_N_INSNS (2), /* Fpconst. */
1340 COSTS_N_INSNS (2), /* Neg. */
1341 COSTS_N_INSNS (5), /* Compare. */
1342 COSTS_N_INSNS (4), /* Widen. */
1343 COSTS_N_INSNS (4), /* Narrow. */
1344 COSTS_N_INSNS (4), /* Toint. */
1345 COSTS_N_INSNS (4), /* Fromint. */
1346 COSTS_N_INSNS (4) /* Roundint. */
1347 },
1348 /* FP DFmode */
1349 {
1350 COSTS_N_INSNS (31), /* Div. */
1351 COSTS_N_INSNS (4), /* Mult. */
1352 COSTS_N_INSNS (8), /* Mult_addsub. */
1353 COSTS_N_INSNS (8), /* Fma. */
1354 COSTS_N_INSNS (4), /* Addsub. */
1355 COSTS_N_INSNS (2), /* Fpconst. */
1356 COSTS_N_INSNS (2), /* Neg. */
1357 COSTS_N_INSNS (2), /* Compare. */
1358 COSTS_N_INSNS (4), /* Widen. */
1359 COSTS_N_INSNS (4), /* Narrow. */
1360 COSTS_N_INSNS (4), /* Toint. */
1361 COSTS_N_INSNS (4), /* Fromint. */
1362 COSTS_N_INSNS (4) /* Roundint. */
1363 }
1364 },
1365 /* Vector */
1366 {
1367 COSTS_N_INSNS (1) /* Alu. */
1368 }
1369 };
1370
1371 const struct cpu_cost_table v7m_extra_costs =
1372 {
1373 /* ALU */
1374 {
1375 0, /* Arith. */
1376 0, /* Logical. */
1377 0, /* Shift. */
1378 0, /* Shift_reg. */
1379 0, /* Arith_shift. */
1380 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1381 0, /* Log_shift. */
1382 COSTS_N_INSNS (1), /* Log_shift_reg. */
1383 0, /* Extend. */
1384 COSTS_N_INSNS (1), /* Extend_arith. */
1385 0, /* Bfi. */
1386 0, /* Bfx. */
1387 0, /* Clz. */
1388 COSTS_N_INSNS (1), /* non_exec. */
1389 false /* non_exec_costs_exec. */
1390 },
1391 {
1392 /* MULT SImode */
1393 {
1394 COSTS_N_INSNS (1), /* Simple. */
1395 COSTS_N_INSNS (1), /* Flag_setting. */
1396 COSTS_N_INSNS (2), /* Extend. */
1397 COSTS_N_INSNS (1), /* Add. */
1398 COSTS_N_INSNS (3), /* Extend_add. */
1399 COSTS_N_INSNS (8) /* Idiv. */
1400 },
1401 /* MULT DImode */
1402 {
1403 0, /* Simple (N/A). */
1404 0, /* Flag_setting (N/A). */
1405 COSTS_N_INSNS (2), /* Extend. */
1406 0, /* Add (N/A). */
1407 COSTS_N_INSNS (3), /* Extend_add. */
1408 0 /* Idiv (N/A). */
1409 }
1410 },
1411 /* LD/ST */
1412 {
1413 COSTS_N_INSNS (2), /* Load. */
1414 0, /* Load_sign_extend. */
1415 COSTS_N_INSNS (3), /* Ldrd. */
1416 COSTS_N_INSNS (2), /* Ldm_1st. */
1417 1, /* Ldm_regs_per_insn_1st. */
1418 1, /* Ldm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* Loadf. */
1420 COSTS_N_INSNS (3), /* Loadd. */
1421 COSTS_N_INSNS (1), /* Load_unaligned. */
1422 COSTS_N_INSNS (2), /* Store. */
1423 COSTS_N_INSNS (3), /* Strd. */
1424 COSTS_N_INSNS (2), /* Stm_1st. */
1425 1, /* Stm_regs_per_insn_1st. */
1426 1, /* Stm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (2), /* Storef. */
1428 COSTS_N_INSNS (3), /* Stored. */
1429 COSTS_N_INSNS (1) /* Store_unaligned. */
1430 },
1431 {
1432 /* FP SFmode */
1433 {
1434 COSTS_N_INSNS (7), /* Div. */
1435 COSTS_N_INSNS (2), /* Mult. */
1436 COSTS_N_INSNS (5), /* Mult_addsub. */
1437 COSTS_N_INSNS (3), /* Fma. */
1438 COSTS_N_INSNS (1), /* Addsub. */
1439 0, /* Fpconst. */
1440 0, /* Neg. */
1441 0, /* Compare. */
1442 0, /* Widen. */
1443 0, /* Narrow. */
1444 0, /* Toint. */
1445 0, /* Fromint. */
1446 0 /* Roundint. */
1447 },
1448 /* FP DFmode */
1449 {
1450 COSTS_N_INSNS (15), /* Div. */
1451 COSTS_N_INSNS (5), /* Mult. */
1452 COSTS_N_INSNS (7), /* Mult_addsub. */
1453 COSTS_N_INSNS (7), /* Fma. */
1454 COSTS_N_INSNS (3), /* Addsub. */
1455 0, /* Fpconst. */
1456 0, /* Neg. */
1457 0, /* Compare. */
1458 0, /* Widen. */
1459 0, /* Narrow. */
1460 0, /* Toint. */
1461 0, /* Fromint. */
1462 0 /* Roundint. */
1463 }
1464 },
1465 /* Vector */
1466 {
1467 COSTS_N_INSNS (1) /* Alu. */
1468 }
1469 };
1470
1471 const struct tune_params arm_slowmul_tune =
1472 {
1473 arm_slowmul_rtx_costs,
1474 NULL,
1475 NULL, /* Sched adj cost. */
1476 3, /* Constant limit. */
1477 5, /* Max cond insns. */
1478 ARM_PREFETCH_NOT_BENEFICIAL,
1479 true, /* Prefer constant pool. */
1480 arm_default_branch_cost,
1481 false, /* Prefer LDRD/STRD. */
1482 {true, true}, /* Prefer non short circuit. */
1483 &arm_default_vec_cost, /* Vectorizer costs. */
1484 false /* Prefer Neon for 64-bits bitops. */
1485 };
1486
1487 const struct tune_params arm_fastmul_tune =
1488 {
1489 arm_fastmul_rtx_costs,
1490 NULL,
1491 NULL, /* Sched adj cost. */
1492 1, /* Constant limit. */
1493 5, /* Max cond insns. */
1494 ARM_PREFETCH_NOT_BENEFICIAL,
1495 true, /* Prefer constant pool. */
1496 arm_default_branch_cost,
1497 false, /* Prefer LDRD/STRD. */
1498 {true, true}, /* Prefer non short circuit. */
1499 &arm_default_vec_cost, /* Vectorizer costs. */
1500 false /* Prefer Neon for 64-bits bitops. */
1501 };
1502
1503 /* StrongARM has early execution of branches, so a sequence that is worth
1504 skipping is shorter. Set max_insns_skipped to a lower value. */
1505
1506 const struct tune_params arm_strongarm_tune =
1507 {
1508 arm_fastmul_rtx_costs,
1509 NULL,
1510 NULL, /* Sched adj cost. */
1511 1, /* Constant limit. */
1512 3, /* Max cond insns. */
1513 ARM_PREFETCH_NOT_BENEFICIAL,
1514 true, /* Prefer constant pool. */
1515 arm_default_branch_cost,
1516 false, /* Prefer LDRD/STRD. */
1517 {true, true}, /* Prefer non short circuit. */
1518 &arm_default_vec_cost, /* Vectorizer costs. */
1519 false /* Prefer Neon for 64-bits bitops. */
1520 };
1521
1522 const struct tune_params arm_xscale_tune =
1523 {
1524 arm_xscale_rtx_costs,
1525 NULL,
1526 xscale_sched_adjust_cost,
1527 2, /* Constant limit. */
1528 3, /* Max cond insns. */
1529 ARM_PREFETCH_NOT_BENEFICIAL,
1530 true, /* Prefer constant pool. */
1531 arm_default_branch_cost,
1532 false, /* Prefer LDRD/STRD. */
1533 {true, true}, /* Prefer non short circuit. */
1534 &arm_default_vec_cost, /* Vectorizer costs. */
1535 false /* Prefer Neon for 64-bits bitops. */
1536 };
1537
1538 const struct tune_params arm_9e_tune =
1539 {
1540 arm_9e_rtx_costs,
1541 NULL,
1542 NULL, /* Sched adj cost. */
1543 1, /* Constant limit. */
1544 5, /* Max cond insns. */
1545 ARM_PREFETCH_NOT_BENEFICIAL,
1546 true, /* Prefer constant pool. */
1547 arm_default_branch_cost,
1548 false, /* Prefer LDRD/STRD. */
1549 {true, true}, /* Prefer non short circuit. */
1550 &arm_default_vec_cost, /* Vectorizer costs. */
1551 false /* Prefer Neon for 64-bits bitops. */
1552 };
1553
1554 const struct tune_params arm_v6t2_tune =
1555 {
1556 arm_9e_rtx_costs,
1557 NULL,
1558 NULL, /* Sched adj cost. */
1559 1, /* Constant limit. */
1560 5, /* Max cond insns. */
1561 ARM_PREFETCH_NOT_BENEFICIAL,
1562 false, /* Prefer constant pool. */
1563 arm_default_branch_cost,
1564 false, /* Prefer LDRD/STRD. */
1565 {true, true}, /* Prefer non short circuit. */
1566 &arm_default_vec_cost, /* Vectorizer costs. */
1567 false /* Prefer Neon for 64-bits bitops. */
1568 };
1569
1570 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1571 const struct tune_params arm_cortex_tune =
1572 {
1573 arm_9e_rtx_costs,
1574 &generic_extra_costs,
1575 NULL, /* Sched adj cost. */
1576 1, /* Constant limit. */
1577 5, /* Max cond insns. */
1578 ARM_PREFETCH_NOT_BENEFICIAL,
1579 false, /* Prefer constant pool. */
1580 arm_default_branch_cost,
1581 false, /* Prefer LDRD/STRD. */
1582 {true, true}, /* Prefer non short circuit. */
1583 &arm_default_vec_cost, /* Vectorizer costs. */
1584 false /* Prefer Neon for 64-bits bitops. */
1585 };
1586
1587 const struct tune_params arm_cortex_a7_tune =
1588 {
1589 arm_9e_rtx_costs,
1590 &cortexa7_extra_costs,
1591 NULL,
1592 1, /* Constant limit. */
1593 5, /* Max cond insns. */
1594 ARM_PREFETCH_NOT_BENEFICIAL,
1595 false, /* Prefer constant pool. */
1596 arm_default_branch_cost,
1597 false, /* Prefer LDRD/STRD. */
1598 {true, true}, /* Prefer non short circuit. */
1599 &arm_default_vec_cost, /* Vectorizer costs. */
1600 false /* Prefer Neon for 64-bits bitops. */
1601 };
1602
1603 const struct tune_params arm_cortex_a15_tune =
1604 {
1605 arm_9e_rtx_costs,
1606 &cortexa15_extra_costs,
1607 NULL, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 2, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL,
1611 false, /* Prefer constant pool. */
1612 arm_default_branch_cost,
1613 true, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost, /* Vectorizer costs. */
1616 false /* Prefer Neon for 64-bits bitops. */
1617 };
1618
1619 const struct tune_params arm_cortex_a53_tune =
1620 {
1621 arm_9e_rtx_costs,
1622 &cortexa53_extra_costs,
1623 NULL, /* Scheduler cost adjustment. */
1624 1, /* Constant limit. */
1625 5, /* Max cond insns. */
1626 ARM_PREFETCH_NOT_BENEFICIAL,
1627 false, /* Prefer constant pool. */
1628 arm_default_branch_cost,
1629 false, /* Prefer LDRD/STRD. */
1630 {true, true}, /* Prefer non short circuit. */
1631 &arm_default_vec_cost, /* Vectorizer costs. */
1632 false /* Prefer Neon for 64-bits bitops. */
1633 };
1634
1635 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1636 less appealing. Set max_insns_skipped to a low value. */
1637
1638 const struct tune_params arm_cortex_a5_tune =
1639 {
1640 arm_9e_rtx_costs,
1641 NULL,
1642 NULL, /* Sched adj cost. */
1643 1, /* Constant limit. */
1644 1, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL,
1646 false, /* Prefer constant pool. */
1647 arm_cortex_a5_branch_cost,
1648 false, /* Prefer LDRD/STRD. */
1649 {false, false}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1652 };
1653
1654 const struct tune_params arm_cortex_a9_tune =
1655 {
1656 arm_9e_rtx_costs,
1657 &cortexa9_extra_costs,
1658 cortex_a9_sched_adjust_cost,
1659 1, /* Constant limit. */
1660 5, /* Max cond insns. */
1661 ARM_PREFETCH_BENEFICIAL(4,32,32),
1662 false, /* Prefer constant pool. */
1663 arm_default_branch_cost,
1664 false, /* Prefer LDRD/STRD. */
1665 {true, true}, /* Prefer non short circuit. */
1666 &arm_default_vec_cost, /* Vectorizer costs. */
1667 false /* Prefer Neon for 64-bits bitops. */
1668 };
1669
1670 const struct tune_params arm_cortex_a12_tune =
1671 {
1672 arm_9e_rtx_costs,
1673 &cortexa12_extra_costs,
1674 NULL,
1675 1, /* Constant limit. */
1676 5, /* Max cond insns. */
1677 ARM_PREFETCH_BENEFICIAL(4,32,32),
1678 false, /* Prefer constant pool. */
1679 arm_default_branch_cost,
1680 true, /* Prefer LDRD/STRD. */
1681 {true, true}, /* Prefer non short circuit. */
1682 &arm_default_vec_cost, /* Vectorizer costs. */
1683 false /* Prefer Neon for 64-bits bitops. */
1684 };
1685
1686 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1687 cycle to execute each. An LDR from the constant pool also takes two cycles
1688 to execute, but mildly increases pipelining opportunity (consecutive
1689 loads/stores can be pipelined together, saving one cycle), and may also
1690 improve icache utilisation. Hence we prefer the constant pool for such
1691 processors. */
1692
1693 const struct tune_params arm_v7m_tune =
1694 {
1695 arm_9e_rtx_costs,
1696 &v7m_extra_costs,
1697 NULL, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 5, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL,
1701 true, /* Prefer constant pool. */
1702 arm_cortex_m_branch_cost,
1703 false, /* Prefer LDRD/STRD. */
1704 {false, false}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost, /* Vectorizer costs. */
1706 false /* Prefer Neon for 64-bits bitops. */
1707 };
1708
1709 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1710 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1711 const struct tune_params arm_v6m_tune =
1712 {
1713 arm_9e_rtx_costs,
1714 NULL,
1715 NULL, /* Sched adj cost. */
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 ARM_PREFETCH_NOT_BENEFICIAL,
1719 false, /* Prefer constant pool. */
1720 arm_default_branch_cost,
1721 false, /* Prefer LDRD/STRD. */
1722 {false, false}, /* Prefer non short circuit. */
1723 &arm_default_vec_cost, /* Vectorizer costs. */
1724 false /* Prefer Neon for 64-bits bitops. */
1725 };
1726
1727 const struct tune_params arm_fa726te_tune =
1728 {
1729 arm_9e_rtx_costs,
1730 NULL,
1731 fa726te_sched_adjust_cost,
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost, /* Vectorizer costs. */
1740 false /* Prefer Neon for 64-bits bitops. */
1741 };
1742
1743
1744 /* Not all of these give usefully different compilation alternatives,
1745 but there is no simple way of generalizing them. */
1746 static const struct processors all_cores[] =
1747 {
1748 /* ARM Cores */
1749 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1750 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1751 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1752 #include "arm-cores.def"
1753 #undef ARM_CORE
1754 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1755 };
1756
1757 static const struct processors all_architectures[] =
1758 {
1759 /* ARM Architectures */
1760 /* We don't specify tuning costs here as it will be figured out
1761 from the core. */
1762
1763 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1764 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1765 #include "arm-arches.def"
1766 #undef ARM_ARCH
1767 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1768 };
1769
1770
1771 /* These are populated as commandline arguments are processed, or NULL
1772 if not specified. */
1773 static const struct processors *arm_selected_arch;
1774 static const struct processors *arm_selected_cpu;
1775 static const struct processors *arm_selected_tune;
1776
1777 /* The name of the preprocessor macro to define for this architecture. */
1778
1779 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1780
1781 /* Available values for -mfpu=. */
1782
1783 static const struct arm_fpu_desc all_fpus[] =
1784 {
1785 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1786 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1787 #include "arm-fpus.def"
1788 #undef ARM_FPU
1789 };
1790
1791
1792 /* Supported TLS relocations. */
1793
1794 enum tls_reloc {
1795 TLS_GD32,
1796 TLS_LDM32,
1797 TLS_LDO32,
1798 TLS_IE32,
1799 TLS_LE32,
1800 TLS_DESCSEQ /* GNU scheme */
1801 };
1802
1803 /* The maximum number of insns to be used when loading a constant. */
1804 inline static int
1805 arm_constant_limit (bool size_p)
1806 {
1807 return size_p ? 1 : current_tune->constant_limit;
1808 }
1809
1810 /* Emit an insn that's a simple single-set. Both the operands must be known
1811 to be valid. */
1812 inline static rtx
1813 emit_set_insn (rtx x, rtx y)
1814 {
1815 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1816 }
1817
1818 /* Return the number of bits set in VALUE. */
1819 static unsigned
1820 bit_count (unsigned long value)
1821 {
1822 unsigned long count = 0;
1823
1824 while (value)
1825 {
1826 count++;
1827 value &= value - 1; /* Clear the least-significant set bit. */
1828 }
1829
1830 return count;
1831 }
1832
1833 typedef struct
1834 {
1835 enum machine_mode mode;
1836 const char *name;
1837 } arm_fixed_mode_set;
1838
1839 /* A small helper for setting fixed-point library libfuncs. */
1840
1841 static void
1842 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1843 const char *funcname, const char *modename,
1844 int num_suffix)
1845 {
1846 char buffer[50];
1847
1848 if (num_suffix == 0)
1849 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1850 else
1851 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1852
1853 set_optab_libfunc (optable, mode, buffer);
1854 }
1855
1856 static void
1857 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1858 enum machine_mode from, const char *funcname,
1859 const char *toname, const char *fromname)
1860 {
1861 char buffer[50];
1862 const char *maybe_suffix_2 = "";
1863
1864 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1865 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1866 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1867 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1868 maybe_suffix_2 = "2";
1869
1870 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1871 maybe_suffix_2);
1872
1873 set_conv_libfunc (optable, to, from, buffer);
1874 }
1875
1876 /* Set up library functions unique to ARM. */
1877
1878 static void
1879 arm_init_libfuncs (void)
1880 {
1881 /* For Linux, we have access to kernel support for atomic operations. */
1882 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1883 init_sync_libfuncs (2 * UNITS_PER_WORD);
1884
1885 /* There are no special library functions unless we are using the
1886 ARM BPABI. */
1887 if (!TARGET_BPABI)
1888 return;
1889
1890 /* The functions below are described in Section 4 of the "Run-Time
1891 ABI for the ARM architecture", Version 1.0. */
1892
1893 /* Double-precision floating-point arithmetic. Table 2. */
1894 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1895 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1896 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1897 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1898 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1899
1900 /* Double-precision comparisons. Table 3. */
1901 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1902 set_optab_libfunc (ne_optab, DFmode, NULL);
1903 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1904 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1905 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1906 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1907 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1908
1909 /* Single-precision floating-point arithmetic. Table 4. */
1910 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1911 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1912 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1913 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1914 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1915
1916 /* Single-precision comparisons. Table 5. */
1917 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1918 set_optab_libfunc (ne_optab, SFmode, NULL);
1919 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1920 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1921 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1922 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1923 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1924
1925 /* Floating-point to integer conversions. Table 6. */
1926 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1927 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1928 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1929 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1930 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1931 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1932 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1933 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1934
1935 /* Conversions between floating types. Table 7. */
1936 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1937 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1938
1939 /* Integer to floating-point conversions. Table 8. */
1940 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1941 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1942 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1943 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1944 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1945 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1946 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1947 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1948
1949 /* Long long. Table 9. */
1950 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1951 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1952 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1953 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1954 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1955 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1956 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1957 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1958
1959 /* Integer (32/32->32) division. \S 4.3.1. */
1960 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1961 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1962
1963 /* The divmod functions are designed so that they can be used for
1964 plain division, even though they return both the quotient and the
1965 remainder. The quotient is returned in the usual location (i.e.,
1966 r0 for SImode, {r0, r1} for DImode), just as would be expected
1967 for an ordinary division routine. Because the AAPCS calling
1968 conventions specify that all of { r0, r1, r2, r3 } are
1969 callee-saved registers, there is no need to tell the compiler
1970 explicitly that those registers are clobbered by these
1971 routines. */
1972 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1973 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1974
1975 /* For SImode division the ABI provides div-without-mod routines,
1976 which are faster. */
1977 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1978 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1979
1980 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1981 divmod libcalls instead. */
1982 set_optab_libfunc (smod_optab, DImode, NULL);
1983 set_optab_libfunc (umod_optab, DImode, NULL);
1984 set_optab_libfunc (smod_optab, SImode, NULL);
1985 set_optab_libfunc (umod_optab, SImode, NULL);
1986
1987 /* Half-precision float operations. The compiler handles all operations
1988 with NULL libfuncs by converting the SFmode. */
1989 switch (arm_fp16_format)
1990 {
1991 case ARM_FP16_FORMAT_IEEE:
1992 case ARM_FP16_FORMAT_ALTERNATIVE:
1993
1994 /* Conversions. */
1995 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1996 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1997 ? "__gnu_f2h_ieee"
1998 : "__gnu_f2h_alternative"));
1999 set_conv_libfunc (sext_optab, SFmode, HFmode,
2000 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2001 ? "__gnu_h2f_ieee"
2002 : "__gnu_h2f_alternative"));
2003
2004 /* Arithmetic. */
2005 set_optab_libfunc (add_optab, HFmode, NULL);
2006 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2007 set_optab_libfunc (smul_optab, HFmode, NULL);
2008 set_optab_libfunc (neg_optab, HFmode, NULL);
2009 set_optab_libfunc (sub_optab, HFmode, NULL);
2010
2011 /* Comparisons. */
2012 set_optab_libfunc (eq_optab, HFmode, NULL);
2013 set_optab_libfunc (ne_optab, HFmode, NULL);
2014 set_optab_libfunc (lt_optab, HFmode, NULL);
2015 set_optab_libfunc (le_optab, HFmode, NULL);
2016 set_optab_libfunc (ge_optab, HFmode, NULL);
2017 set_optab_libfunc (gt_optab, HFmode, NULL);
2018 set_optab_libfunc (unord_optab, HFmode, NULL);
2019 break;
2020
2021 default:
2022 break;
2023 }
2024
2025 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2026 {
2027 const arm_fixed_mode_set fixed_arith_modes[] =
2028 {
2029 { QQmode, "qq" },
2030 { UQQmode, "uqq" },
2031 { HQmode, "hq" },
2032 { UHQmode, "uhq" },
2033 { SQmode, "sq" },
2034 { USQmode, "usq" },
2035 { DQmode, "dq" },
2036 { UDQmode, "udq" },
2037 { TQmode, "tq" },
2038 { UTQmode, "utq" },
2039 { HAmode, "ha" },
2040 { UHAmode, "uha" },
2041 { SAmode, "sa" },
2042 { USAmode, "usa" },
2043 { DAmode, "da" },
2044 { UDAmode, "uda" },
2045 { TAmode, "ta" },
2046 { UTAmode, "uta" }
2047 };
2048 const arm_fixed_mode_set fixed_conv_modes[] =
2049 {
2050 { QQmode, "qq" },
2051 { UQQmode, "uqq" },
2052 { HQmode, "hq" },
2053 { UHQmode, "uhq" },
2054 { SQmode, "sq" },
2055 { USQmode, "usq" },
2056 { DQmode, "dq" },
2057 { UDQmode, "udq" },
2058 { TQmode, "tq" },
2059 { UTQmode, "utq" },
2060 { HAmode, "ha" },
2061 { UHAmode, "uha" },
2062 { SAmode, "sa" },
2063 { USAmode, "usa" },
2064 { DAmode, "da" },
2065 { UDAmode, "uda" },
2066 { TAmode, "ta" },
2067 { UTAmode, "uta" },
2068 { QImode, "qi" },
2069 { HImode, "hi" },
2070 { SImode, "si" },
2071 { DImode, "di" },
2072 { TImode, "ti" },
2073 { SFmode, "sf" },
2074 { DFmode, "df" }
2075 };
2076 unsigned int i, j;
2077
2078 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2079 {
2080 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2081 "add", fixed_arith_modes[i].name, 3);
2082 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2083 "ssadd", fixed_arith_modes[i].name, 3);
2084 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2085 "usadd", fixed_arith_modes[i].name, 3);
2086 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2087 "sub", fixed_arith_modes[i].name, 3);
2088 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2089 "sssub", fixed_arith_modes[i].name, 3);
2090 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2091 "ussub", fixed_arith_modes[i].name, 3);
2092 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2093 "mul", fixed_arith_modes[i].name, 3);
2094 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2095 "ssmul", fixed_arith_modes[i].name, 3);
2096 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2097 "usmul", fixed_arith_modes[i].name, 3);
2098 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2099 "div", fixed_arith_modes[i].name, 3);
2100 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2101 "udiv", fixed_arith_modes[i].name, 3);
2102 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2103 "ssdiv", fixed_arith_modes[i].name, 3);
2104 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2105 "usdiv", fixed_arith_modes[i].name, 3);
2106 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2107 "neg", fixed_arith_modes[i].name, 2);
2108 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2109 "ssneg", fixed_arith_modes[i].name, 2);
2110 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2111 "usneg", fixed_arith_modes[i].name, 2);
2112 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2113 "ashl", fixed_arith_modes[i].name, 3);
2114 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2115 "ashr", fixed_arith_modes[i].name, 3);
2116 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2117 "lshr", fixed_arith_modes[i].name, 3);
2118 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2119 "ssashl", fixed_arith_modes[i].name, 3);
2120 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2121 "usashl", fixed_arith_modes[i].name, 3);
2122 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2123 "cmp", fixed_arith_modes[i].name, 2);
2124 }
2125
2126 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2127 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2128 {
2129 if (i == j
2130 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2131 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2132 continue;
2133
2134 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2135 fixed_conv_modes[j].mode, "fract",
2136 fixed_conv_modes[i].name,
2137 fixed_conv_modes[j].name);
2138 arm_set_fixed_conv_libfunc (satfract_optab,
2139 fixed_conv_modes[i].mode,
2140 fixed_conv_modes[j].mode, "satfract",
2141 fixed_conv_modes[i].name,
2142 fixed_conv_modes[j].name);
2143 arm_set_fixed_conv_libfunc (fractuns_optab,
2144 fixed_conv_modes[i].mode,
2145 fixed_conv_modes[j].mode, "fractuns",
2146 fixed_conv_modes[i].name,
2147 fixed_conv_modes[j].name);
2148 arm_set_fixed_conv_libfunc (satfractuns_optab,
2149 fixed_conv_modes[i].mode,
2150 fixed_conv_modes[j].mode, "satfractuns",
2151 fixed_conv_modes[i].name,
2152 fixed_conv_modes[j].name);
2153 }
2154 }
2155
2156 if (TARGET_AAPCS_BASED)
2157 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2158 }
2159
2160 /* On AAPCS systems, this is the "struct __va_list". */
2161 static GTY(()) tree va_list_type;
2162
2163 /* Return the type to use as __builtin_va_list. */
2164 static tree
2165 arm_build_builtin_va_list (void)
2166 {
2167 tree va_list_name;
2168 tree ap_field;
2169
2170 if (!TARGET_AAPCS_BASED)
2171 return std_build_builtin_va_list ();
2172
2173 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2174 defined as:
2175
2176 struct __va_list
2177 {
2178 void *__ap;
2179 };
2180
2181 The C Library ABI further reinforces this definition in \S
2182 4.1.
2183
2184 We must follow this definition exactly. The structure tag
2185 name is visible in C++ mangled names, and thus forms a part
2186 of the ABI. The field name may be used by people who
2187 #include <stdarg.h>. */
2188 /* Create the type. */
2189 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2190 /* Give it the required name. */
2191 va_list_name = build_decl (BUILTINS_LOCATION,
2192 TYPE_DECL,
2193 get_identifier ("__va_list"),
2194 va_list_type);
2195 DECL_ARTIFICIAL (va_list_name) = 1;
2196 TYPE_NAME (va_list_type) = va_list_name;
2197 TYPE_STUB_DECL (va_list_type) = va_list_name;
2198 /* Create the __ap field. */
2199 ap_field = build_decl (BUILTINS_LOCATION,
2200 FIELD_DECL,
2201 get_identifier ("__ap"),
2202 ptr_type_node);
2203 DECL_ARTIFICIAL (ap_field) = 1;
2204 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2205 TYPE_FIELDS (va_list_type) = ap_field;
2206 /* Compute its layout. */
2207 layout_type (va_list_type);
2208
2209 return va_list_type;
2210 }
2211
2212 /* Return an expression of type "void *" pointing to the next
2213 available argument in a variable-argument list. VALIST is the
2214 user-level va_list object, of type __builtin_va_list. */
2215 static tree
2216 arm_extract_valist_ptr (tree valist)
2217 {
2218 if (TREE_TYPE (valist) == error_mark_node)
2219 return error_mark_node;
2220
2221 /* On an AAPCS target, the pointer is stored within "struct
2222 va_list". */
2223 if (TARGET_AAPCS_BASED)
2224 {
2225 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2226 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2227 valist, ap_field, NULL_TREE);
2228 }
2229
2230 return valist;
2231 }
2232
2233 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2234 static void
2235 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2236 {
2237 valist = arm_extract_valist_ptr (valist);
2238 std_expand_builtin_va_start (valist, nextarg);
2239 }
2240
2241 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2242 static tree
2243 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2244 gimple_seq *post_p)
2245 {
2246 valist = arm_extract_valist_ptr (valist);
2247 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2248 }
2249
2250 /* Fix up any incompatible options that the user has specified. */
2251 static void
2252 arm_option_override (void)
2253 {
2254 if (global_options_set.x_arm_arch_option)
2255 arm_selected_arch = &all_architectures[arm_arch_option];
2256
2257 if (global_options_set.x_arm_cpu_option)
2258 {
2259 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2260 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2261 }
2262
2263 if (global_options_set.x_arm_tune_option)
2264 arm_selected_tune = &all_cores[(int) arm_tune_option];
2265
2266 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2267 SUBTARGET_OVERRIDE_OPTIONS;
2268 #endif
2269
2270 if (arm_selected_arch)
2271 {
2272 if (arm_selected_cpu)
2273 {
2274 /* Check for conflict between mcpu and march. */
2275 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2276 {
2277 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2278 arm_selected_cpu->name, arm_selected_arch->name);
2279 /* -march wins for code generation.
2280 -mcpu wins for default tuning. */
2281 if (!arm_selected_tune)
2282 arm_selected_tune = arm_selected_cpu;
2283
2284 arm_selected_cpu = arm_selected_arch;
2285 }
2286 else
2287 /* -mcpu wins. */
2288 arm_selected_arch = NULL;
2289 }
2290 else
2291 /* Pick a CPU based on the architecture. */
2292 arm_selected_cpu = arm_selected_arch;
2293 }
2294
2295 /* If the user did not specify a processor, choose one for them. */
2296 if (!arm_selected_cpu)
2297 {
2298 const struct processors * sel;
2299 unsigned int sought;
2300
2301 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2302 if (!arm_selected_cpu->name)
2303 {
2304 #ifdef SUBTARGET_CPU_DEFAULT
2305 /* Use the subtarget default CPU if none was specified by
2306 configure. */
2307 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2308 #endif
2309 /* Default to ARM6. */
2310 if (!arm_selected_cpu->name)
2311 arm_selected_cpu = &all_cores[arm6];
2312 }
2313
2314 sel = arm_selected_cpu;
2315 insn_flags = sel->flags;
2316
2317 /* Now check to see if the user has specified some command line
2318 switch that require certain abilities from the cpu. */
2319 sought = 0;
2320
2321 if (TARGET_INTERWORK || TARGET_THUMB)
2322 {
2323 sought |= (FL_THUMB | FL_MODE32);
2324
2325 /* There are no ARM processors that support both APCS-26 and
2326 interworking. Therefore we force FL_MODE26 to be removed
2327 from insn_flags here (if it was set), so that the search
2328 below will always be able to find a compatible processor. */
2329 insn_flags &= ~FL_MODE26;
2330 }
2331
2332 if (sought != 0 && ((sought & insn_flags) != sought))
2333 {
2334 /* Try to locate a CPU type that supports all of the abilities
2335 of the default CPU, plus the extra abilities requested by
2336 the user. */
2337 for (sel = all_cores; sel->name != NULL; sel++)
2338 if ((sel->flags & sought) == (sought | insn_flags))
2339 break;
2340
2341 if (sel->name == NULL)
2342 {
2343 unsigned current_bit_count = 0;
2344 const struct processors * best_fit = NULL;
2345
2346 /* Ideally we would like to issue an error message here
2347 saying that it was not possible to find a CPU compatible
2348 with the default CPU, but which also supports the command
2349 line options specified by the programmer, and so they
2350 ought to use the -mcpu=<name> command line option to
2351 override the default CPU type.
2352
2353 If we cannot find a cpu that has both the
2354 characteristics of the default cpu and the given
2355 command line options we scan the array again looking
2356 for a best match. */
2357 for (sel = all_cores; sel->name != NULL; sel++)
2358 if ((sel->flags & sought) == sought)
2359 {
2360 unsigned count;
2361
2362 count = bit_count (sel->flags & insn_flags);
2363
2364 if (count >= current_bit_count)
2365 {
2366 best_fit = sel;
2367 current_bit_count = count;
2368 }
2369 }
2370
2371 gcc_assert (best_fit);
2372 sel = best_fit;
2373 }
2374
2375 arm_selected_cpu = sel;
2376 }
2377 }
2378
2379 gcc_assert (arm_selected_cpu);
2380 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2381 if (!arm_selected_tune)
2382 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2383
2384 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2385 insn_flags = arm_selected_cpu->flags;
2386 arm_base_arch = arm_selected_cpu->base_arch;
2387
2388 arm_tune = arm_selected_tune->core;
2389 tune_flags = arm_selected_tune->flags;
2390 current_tune = arm_selected_tune->tune;
2391
2392 /* Make sure that the processor choice does not conflict with any of the
2393 other command line choices. */
2394 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2395 error ("target CPU does not support ARM mode");
2396
2397 /* BPABI targets use linker tricks to allow interworking on cores
2398 without thumb support. */
2399 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2400 {
2401 warning (0, "target CPU does not support interworking" );
2402 target_flags &= ~MASK_INTERWORK;
2403 }
2404
2405 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2406 {
2407 warning (0, "target CPU does not support THUMB instructions");
2408 target_flags &= ~MASK_THUMB;
2409 }
2410
2411 if (TARGET_APCS_FRAME && TARGET_THUMB)
2412 {
2413 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2414 target_flags &= ~MASK_APCS_FRAME;
2415 }
2416
2417 /* Callee super interworking implies thumb interworking. Adding
2418 this to the flags here simplifies the logic elsewhere. */
2419 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2420 target_flags |= MASK_INTERWORK;
2421
2422 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2423 from here where no function is being compiled currently. */
2424 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2425 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2426
2427 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2428 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2429
2430 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2431 {
2432 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2433 target_flags |= MASK_APCS_FRAME;
2434 }
2435
2436 if (TARGET_POKE_FUNCTION_NAME)
2437 target_flags |= MASK_APCS_FRAME;
2438
2439 if (TARGET_APCS_REENT && flag_pic)
2440 error ("-fpic and -mapcs-reent are incompatible");
2441
2442 if (TARGET_APCS_REENT)
2443 warning (0, "APCS reentrant code not supported. Ignored");
2444
2445 /* If this target is normally configured to use APCS frames, warn if they
2446 are turned off and debugging is turned on. */
2447 if (TARGET_ARM
2448 && write_symbols != NO_DEBUG
2449 && !TARGET_APCS_FRAME
2450 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2451 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2452
2453 if (TARGET_APCS_FLOAT)
2454 warning (0, "passing floating point arguments in fp regs not yet supported");
2455
2456 if (TARGET_LITTLE_WORDS)
2457 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2458 "will be removed in a future release");
2459
2460 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2461 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2462 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2463 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2464 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2465 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2466 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2467 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2468 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2469 arm_arch6m = arm_arch6 && !arm_arch_notm;
2470 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2471 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2472 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2473 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2474 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2475
2476 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2477 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2478 thumb_code = TARGET_ARM == 0;
2479 thumb1_code = TARGET_THUMB1 != 0;
2480 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2481 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2482 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2483 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2484 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2485 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2486 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2487 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2488 if (arm_restrict_it == 2)
2489 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2490
2491 if (!TARGET_THUMB2)
2492 arm_restrict_it = 0;
2493
2494 /* If we are not using the default (ARM mode) section anchor offset
2495 ranges, then set the correct ranges now. */
2496 if (TARGET_THUMB1)
2497 {
2498 /* Thumb-1 LDR instructions cannot have negative offsets.
2499 Permissible positive offset ranges are 5-bit (for byte loads),
2500 6-bit (for halfword loads), or 7-bit (for word loads).
2501 Empirical results suggest a 7-bit anchor range gives the best
2502 overall code size. */
2503 targetm.min_anchor_offset = 0;
2504 targetm.max_anchor_offset = 127;
2505 }
2506 else if (TARGET_THUMB2)
2507 {
2508 /* The minimum is set such that the total size of the block
2509 for a particular anchor is 248 + 1 + 4095 bytes, which is
2510 divisible by eight, ensuring natural spacing of anchors. */
2511 targetm.min_anchor_offset = -248;
2512 targetm.max_anchor_offset = 4095;
2513 }
2514
2515 /* V5 code we generate is completely interworking capable, so we turn off
2516 TARGET_INTERWORK here to avoid many tests later on. */
2517
2518 /* XXX However, we must pass the right pre-processor defines to CPP
2519 or GLD can get confused. This is a hack. */
2520 if (TARGET_INTERWORK)
2521 arm_cpp_interwork = 1;
2522
2523 if (arm_arch5)
2524 target_flags &= ~MASK_INTERWORK;
2525
2526 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2527 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2528
2529 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2530 error ("iwmmxt abi requires an iwmmxt capable cpu");
2531
2532 if (!global_options_set.x_arm_fpu_index)
2533 {
2534 const char *target_fpu_name;
2535 bool ok;
2536
2537 #ifdef FPUTYPE_DEFAULT
2538 target_fpu_name = FPUTYPE_DEFAULT;
2539 #else
2540 target_fpu_name = "vfp";
2541 #endif
2542
2543 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2544 CL_TARGET);
2545 gcc_assert (ok);
2546 }
2547
2548 arm_fpu_desc = &all_fpus[arm_fpu_index];
2549
2550 switch (arm_fpu_desc->model)
2551 {
2552 case ARM_FP_MODEL_VFP:
2553 arm_fpu_attr = FPU_VFP;
2554 break;
2555
2556 default:
2557 gcc_unreachable();
2558 }
2559
2560 if (TARGET_AAPCS_BASED)
2561 {
2562 if (TARGET_CALLER_INTERWORKING)
2563 error ("AAPCS does not support -mcaller-super-interworking");
2564 else
2565 if (TARGET_CALLEE_INTERWORKING)
2566 error ("AAPCS does not support -mcallee-super-interworking");
2567 }
2568
2569 /* iWMMXt and NEON are incompatible. */
2570 if (TARGET_IWMMXT && TARGET_NEON)
2571 error ("iWMMXt and NEON are incompatible");
2572
2573 /* iWMMXt unsupported under Thumb mode. */
2574 if (TARGET_THUMB && TARGET_IWMMXT)
2575 error ("iWMMXt unsupported under Thumb mode");
2576
2577 /* __fp16 support currently assumes the core has ldrh. */
2578 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2579 sorry ("__fp16 and no ldrh");
2580
2581 /* If soft-float is specified then don't use FPU. */
2582 if (TARGET_SOFT_FLOAT)
2583 arm_fpu_attr = FPU_NONE;
2584
2585 if (TARGET_AAPCS_BASED)
2586 {
2587 if (arm_abi == ARM_ABI_IWMMXT)
2588 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2589 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2590 && TARGET_HARD_FLOAT
2591 && TARGET_VFP)
2592 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2593 else
2594 arm_pcs_default = ARM_PCS_AAPCS;
2595 }
2596 else
2597 {
2598 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2599 sorry ("-mfloat-abi=hard and VFP");
2600
2601 if (arm_abi == ARM_ABI_APCS)
2602 arm_pcs_default = ARM_PCS_APCS;
2603 else
2604 arm_pcs_default = ARM_PCS_ATPCS;
2605 }
2606
2607 /* For arm2/3 there is no need to do any scheduling if we are doing
2608 software floating-point. */
2609 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2610 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2611
2612 /* Use the cp15 method if it is available. */
2613 if (target_thread_pointer == TP_AUTO)
2614 {
2615 if (arm_arch6k && !TARGET_THUMB1)
2616 target_thread_pointer = TP_CP15;
2617 else
2618 target_thread_pointer = TP_SOFT;
2619 }
2620
2621 if (TARGET_HARD_TP && TARGET_THUMB1)
2622 error ("can not use -mtp=cp15 with 16-bit Thumb");
2623
2624 /* Override the default structure alignment for AAPCS ABI. */
2625 if (!global_options_set.x_arm_structure_size_boundary)
2626 {
2627 if (TARGET_AAPCS_BASED)
2628 arm_structure_size_boundary = 8;
2629 }
2630 else
2631 {
2632 if (arm_structure_size_boundary != 8
2633 && arm_structure_size_boundary != 32
2634 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2635 {
2636 if (ARM_DOUBLEWORD_ALIGN)
2637 warning (0,
2638 "structure size boundary can only be set to 8, 32 or 64");
2639 else
2640 warning (0, "structure size boundary can only be set to 8 or 32");
2641 arm_structure_size_boundary
2642 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2643 }
2644 }
2645
2646 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2647 {
2648 error ("RTP PIC is incompatible with Thumb");
2649 flag_pic = 0;
2650 }
2651
2652 /* If stack checking is disabled, we can use r10 as the PIC register,
2653 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2654 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2655 {
2656 if (TARGET_VXWORKS_RTP)
2657 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2658 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2659 }
2660
2661 if (flag_pic && TARGET_VXWORKS_RTP)
2662 arm_pic_register = 9;
2663
2664 if (arm_pic_register_string != NULL)
2665 {
2666 int pic_register = decode_reg_name (arm_pic_register_string);
2667
2668 if (!flag_pic)
2669 warning (0, "-mpic-register= is useless without -fpic");
2670
2671 /* Prevent the user from choosing an obviously stupid PIC register. */
2672 else if (pic_register < 0 || call_used_regs[pic_register]
2673 || pic_register == HARD_FRAME_POINTER_REGNUM
2674 || pic_register == STACK_POINTER_REGNUM
2675 || pic_register >= PC_REGNUM
2676 || (TARGET_VXWORKS_RTP
2677 && (unsigned int) pic_register != arm_pic_register))
2678 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2679 else
2680 arm_pic_register = pic_register;
2681 }
2682
2683 if (TARGET_VXWORKS_RTP
2684 && !global_options_set.x_arm_pic_data_is_text_relative)
2685 arm_pic_data_is_text_relative = 0;
2686
2687 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2688 if (fix_cm3_ldrd == 2)
2689 {
2690 if (arm_selected_cpu->core == cortexm3)
2691 fix_cm3_ldrd = 1;
2692 else
2693 fix_cm3_ldrd = 0;
2694 }
2695
2696 /* Enable -munaligned-access by default for
2697 - all ARMv6 architecture-based processors
2698 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2699 - ARMv8 architecture-base processors.
2700
2701 Disable -munaligned-access by default for
2702 - all pre-ARMv6 architecture-based processors
2703 - ARMv6-M architecture-based processors. */
2704
2705 if (unaligned_access == 2)
2706 {
2707 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2708 unaligned_access = 1;
2709 else
2710 unaligned_access = 0;
2711 }
2712 else if (unaligned_access == 1
2713 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2714 {
2715 warning (0, "target CPU does not support unaligned accesses");
2716 unaligned_access = 0;
2717 }
2718
2719 if (TARGET_THUMB1 && flag_schedule_insns)
2720 {
2721 /* Don't warn since it's on by default in -O2. */
2722 flag_schedule_insns = 0;
2723 }
2724
2725 if (optimize_size)
2726 {
2727 /* If optimizing for size, bump the number of instructions that we
2728 are prepared to conditionally execute (even on a StrongARM). */
2729 max_insns_skipped = 6;
2730 }
2731 else
2732 max_insns_skipped = current_tune->max_insns_skipped;
2733
2734 /* Hot/Cold partitioning is not currently supported, since we can't
2735 handle literal pool placement in that case. */
2736 if (flag_reorder_blocks_and_partition)
2737 {
2738 inform (input_location,
2739 "-freorder-blocks-and-partition not supported on this architecture");
2740 flag_reorder_blocks_and_partition = 0;
2741 flag_reorder_blocks = 1;
2742 }
2743
2744 if (flag_pic)
2745 /* Hoisting PIC address calculations more aggressively provides a small,
2746 but measurable, size reduction for PIC code. Therefore, we decrease
2747 the bar for unrestricted expression hoisting to the cost of PIC address
2748 calculation, which is 2 instructions. */
2749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2750 global_options.x_param_values,
2751 global_options_set.x_param_values);
2752
2753 /* ARM EABI defaults to strict volatile bitfields. */
2754 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2755 && abi_version_at_least(2))
2756 flag_strict_volatile_bitfields = 1;
2757
2758 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2759 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2760 if (flag_prefetch_loop_arrays < 0
2761 && HAVE_prefetch
2762 && optimize >= 3
2763 && current_tune->num_prefetch_slots > 0)
2764 flag_prefetch_loop_arrays = 1;
2765
2766 /* Set up parameters to be used in prefetching algorithm. Do not override the
2767 defaults unless we are tuning for a core we have researched values for. */
2768 if (current_tune->num_prefetch_slots > 0)
2769 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2770 current_tune->num_prefetch_slots,
2771 global_options.x_param_values,
2772 global_options_set.x_param_values);
2773 if (current_tune->l1_cache_line_size >= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2775 current_tune->l1_cache_line_size,
2776 global_options.x_param_values,
2777 global_options_set.x_param_values);
2778 if (current_tune->l1_cache_size >= 0)
2779 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2780 current_tune->l1_cache_size,
2781 global_options.x_param_values,
2782 global_options_set.x_param_values);
2783
2784 /* Use Neon to perform 64-bits operations rather than core
2785 registers. */
2786 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2787 if (use_neon_for_64bits == 1)
2788 prefer_neon_for_64bits = true;
2789
2790 /* Use the alternative scheduling-pressure algorithm by default. */
2791 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2792 global_options.x_param_values,
2793 global_options_set.x_param_values);
2794
2795 /* Disable shrink-wrap when optimizing function for size, since it tends to
2796 generate additional returns. */
2797 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2798 flag_shrink_wrap = false;
2799 /* TBD: Dwarf info for apcs frame is not handled yet. */
2800 if (TARGET_APCS_FRAME)
2801 flag_shrink_wrap = false;
2802
2803 /* We only support -mslow-flash-data on armv7-m targets. */
2804 if (target_slow_flash_data
2805 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2806 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2807 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2808
2809 /* Currently, for slow flash data, we just disable literal pools. */
2810 if (target_slow_flash_data)
2811 arm_disable_literal_pool = true;
2812
2813 /* Register global variables with the garbage collector. */
2814 arm_add_gc_roots ();
2815 }
2816
2817 static void
2818 arm_add_gc_roots (void)
2819 {
2820 gcc_obstack_init(&minipool_obstack);
2821 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2822 }
2823 \f
2824 /* A table of known ARM exception types.
2825 For use with the interrupt function attribute. */
2826
2827 typedef struct
2828 {
2829 const char *const arg;
2830 const unsigned long return_value;
2831 }
2832 isr_attribute_arg;
2833
2834 static const isr_attribute_arg isr_attribute_args [] =
2835 {
2836 { "IRQ", ARM_FT_ISR },
2837 { "irq", ARM_FT_ISR },
2838 { "FIQ", ARM_FT_FIQ },
2839 { "fiq", ARM_FT_FIQ },
2840 { "ABORT", ARM_FT_ISR },
2841 { "abort", ARM_FT_ISR },
2842 { "ABORT", ARM_FT_ISR },
2843 { "abort", ARM_FT_ISR },
2844 { "UNDEF", ARM_FT_EXCEPTION },
2845 { "undef", ARM_FT_EXCEPTION },
2846 { "SWI", ARM_FT_EXCEPTION },
2847 { "swi", ARM_FT_EXCEPTION },
2848 { NULL, ARM_FT_NORMAL }
2849 };
2850
2851 /* Returns the (interrupt) function type of the current
2852 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2853
2854 static unsigned long
2855 arm_isr_value (tree argument)
2856 {
2857 const isr_attribute_arg * ptr;
2858 const char * arg;
2859
2860 if (!arm_arch_notm)
2861 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2862
2863 /* No argument - default to IRQ. */
2864 if (argument == NULL_TREE)
2865 return ARM_FT_ISR;
2866
2867 /* Get the value of the argument. */
2868 if (TREE_VALUE (argument) == NULL_TREE
2869 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2870 return ARM_FT_UNKNOWN;
2871
2872 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2873
2874 /* Check it against the list of known arguments. */
2875 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2876 if (streq (arg, ptr->arg))
2877 return ptr->return_value;
2878
2879 /* An unrecognized interrupt type. */
2880 return ARM_FT_UNKNOWN;
2881 }
2882
2883 /* Computes the type of the current function. */
2884
2885 static unsigned long
2886 arm_compute_func_type (void)
2887 {
2888 unsigned long type = ARM_FT_UNKNOWN;
2889 tree a;
2890 tree attr;
2891
2892 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2893
2894 /* Decide if the current function is volatile. Such functions
2895 never return, and many memory cycles can be saved by not storing
2896 register values that will never be needed again. This optimization
2897 was added to speed up context switching in a kernel application. */
2898 if (optimize > 0
2899 && (TREE_NOTHROW (current_function_decl)
2900 || !(flag_unwind_tables
2901 || (flag_exceptions
2902 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2903 && TREE_THIS_VOLATILE (current_function_decl))
2904 type |= ARM_FT_VOLATILE;
2905
2906 if (cfun->static_chain_decl != NULL)
2907 type |= ARM_FT_NESTED;
2908
2909 attr = DECL_ATTRIBUTES (current_function_decl);
2910
2911 a = lookup_attribute ("naked", attr);
2912 if (a != NULL_TREE)
2913 type |= ARM_FT_NAKED;
2914
2915 a = lookup_attribute ("isr", attr);
2916 if (a == NULL_TREE)
2917 a = lookup_attribute ("interrupt", attr);
2918
2919 if (a == NULL_TREE)
2920 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2921 else
2922 type |= arm_isr_value (TREE_VALUE (a));
2923
2924 return type;
2925 }
2926
2927 /* Returns the type of the current function. */
2928
2929 unsigned long
2930 arm_current_func_type (void)
2931 {
2932 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2933 cfun->machine->func_type = arm_compute_func_type ();
2934
2935 return cfun->machine->func_type;
2936 }
2937
2938 bool
2939 arm_allocate_stack_slots_for_args (void)
2940 {
2941 /* Naked functions should not allocate stack slots for arguments. */
2942 return !IS_NAKED (arm_current_func_type ());
2943 }
2944
2945 static bool
2946 arm_warn_func_return (tree decl)
2947 {
2948 /* Naked functions are implemented entirely in assembly, including the
2949 return sequence, so suppress warnings about this. */
2950 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2951 }
2952
2953 \f
2954 /* Output assembler code for a block containing the constant parts
2955 of a trampoline, leaving space for the variable parts.
2956
2957 On the ARM, (if r8 is the static chain regnum, and remembering that
2958 referencing pc adds an offset of 8) the trampoline looks like:
2959 ldr r8, [pc, #0]
2960 ldr pc, [pc]
2961 .word static chain value
2962 .word function's address
2963 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2964
2965 static void
2966 arm_asm_trampoline_template (FILE *f)
2967 {
2968 if (TARGET_ARM)
2969 {
2970 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2971 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2972 }
2973 else if (TARGET_THUMB2)
2974 {
2975 /* The Thumb-2 trampoline is similar to the arm implementation.
2976 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2977 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2978 STATIC_CHAIN_REGNUM, PC_REGNUM);
2979 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2980 }
2981 else
2982 {
2983 ASM_OUTPUT_ALIGN (f, 2);
2984 fprintf (f, "\t.code\t16\n");
2985 fprintf (f, ".Ltrampoline_start:\n");
2986 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2987 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2988 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2989 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2990 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2991 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2992 }
2993 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2994 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2995 }
2996
2997 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2998
2999 static void
3000 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3001 {
3002 rtx fnaddr, mem, a_tramp;
3003
3004 emit_block_move (m_tramp, assemble_trampoline_template (),
3005 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3006
3007 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3008 emit_move_insn (mem, chain_value);
3009
3010 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3011 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3012 emit_move_insn (mem, fnaddr);
3013
3014 a_tramp = XEXP (m_tramp, 0);
3015 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3016 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3017 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3018 }
3019
3020 /* Thumb trampolines should be entered in thumb mode, so set
3021 the bottom bit of the address. */
3022
3023 static rtx
3024 arm_trampoline_adjust_address (rtx addr)
3025 {
3026 if (TARGET_THUMB)
3027 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3028 NULL, 0, OPTAB_LIB_WIDEN);
3029 return addr;
3030 }
3031 \f
3032 /* Return 1 if it is possible to return using a single instruction.
3033 If SIBLING is non-null, this is a test for a return before a sibling
3034 call. SIBLING is the call insn, so we can examine its register usage. */
3035
3036 int
3037 use_return_insn (int iscond, rtx sibling)
3038 {
3039 int regno;
3040 unsigned int func_type;
3041 unsigned long saved_int_regs;
3042 unsigned HOST_WIDE_INT stack_adjust;
3043 arm_stack_offsets *offsets;
3044
3045 /* Never use a return instruction before reload has run. */
3046 if (!reload_completed)
3047 return 0;
3048
3049 func_type = arm_current_func_type ();
3050
3051 /* Naked, volatile and stack alignment functions need special
3052 consideration. */
3053 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3054 return 0;
3055
3056 /* So do interrupt functions that use the frame pointer and Thumb
3057 interrupt functions. */
3058 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3059 return 0;
3060
3061 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3062 && !optimize_function_for_size_p (cfun))
3063 return 0;
3064
3065 offsets = arm_get_frame_offsets ();
3066 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3067
3068 /* As do variadic functions. */
3069 if (crtl->args.pretend_args_size
3070 || cfun->machine->uses_anonymous_args
3071 /* Or if the function calls __builtin_eh_return () */
3072 || crtl->calls_eh_return
3073 /* Or if the function calls alloca */
3074 || cfun->calls_alloca
3075 /* Or if there is a stack adjustment. However, if the stack pointer
3076 is saved on the stack, we can use a pre-incrementing stack load. */
3077 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3078 && stack_adjust == 4)))
3079 return 0;
3080
3081 saved_int_regs = offsets->saved_regs_mask;
3082
3083 /* Unfortunately, the insn
3084
3085 ldmib sp, {..., sp, ...}
3086
3087 triggers a bug on most SA-110 based devices, such that the stack
3088 pointer won't be correctly restored if the instruction takes a
3089 page fault. We work around this problem by popping r3 along with
3090 the other registers, since that is never slower than executing
3091 another instruction.
3092
3093 We test for !arm_arch5 here, because code for any architecture
3094 less than this could potentially be run on one of the buggy
3095 chips. */
3096 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3097 {
3098 /* Validate that r3 is a call-clobbered register (always true in
3099 the default abi) ... */
3100 if (!call_used_regs[3])
3101 return 0;
3102
3103 /* ... that it isn't being used for a return value ... */
3104 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3105 return 0;
3106
3107 /* ... or for a tail-call argument ... */
3108 if (sibling)
3109 {
3110 gcc_assert (CALL_P (sibling));
3111
3112 if (find_regno_fusage (sibling, USE, 3))
3113 return 0;
3114 }
3115
3116 /* ... and that there are no call-saved registers in r0-r2
3117 (always true in the default ABI). */
3118 if (saved_int_regs & 0x7)
3119 return 0;
3120 }
3121
3122 /* Can't be done if interworking with Thumb, and any registers have been
3123 stacked. */
3124 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3125 return 0;
3126
3127 /* On StrongARM, conditional returns are expensive if they aren't
3128 taken and multiple registers have been stacked. */
3129 if (iscond && arm_tune_strongarm)
3130 {
3131 /* Conditional return when just the LR is stored is a simple
3132 conditional-load instruction, that's not expensive. */
3133 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3134 return 0;
3135
3136 if (flag_pic
3137 && arm_pic_register != INVALID_REGNUM
3138 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3139 return 0;
3140 }
3141
3142 /* If there are saved registers but the LR isn't saved, then we need
3143 two instructions for the return. */
3144 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3145 return 0;
3146
3147 /* Can't be done if any of the VFP regs are pushed,
3148 since this also requires an insn. */
3149 if (TARGET_HARD_FLOAT && TARGET_VFP)
3150 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3151 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3152 return 0;
3153
3154 if (TARGET_REALLY_IWMMXT)
3155 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3156 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3157 return 0;
3158
3159 return 1;
3160 }
3161
3162 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3163 shrink-wrapping if possible. This is the case if we need to emit a
3164 prologue, which we can test by looking at the offsets. */
3165 bool
3166 use_simple_return_p (void)
3167 {
3168 arm_stack_offsets *offsets;
3169
3170 offsets = arm_get_frame_offsets ();
3171 return offsets->outgoing_args != 0;
3172 }
3173
3174 /* Return TRUE if int I is a valid immediate ARM constant. */
3175
3176 int
3177 const_ok_for_arm (HOST_WIDE_INT i)
3178 {
3179 int lowbit;
3180
3181 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3182 be all zero, or all one. */
3183 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3184 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3185 != ((~(unsigned HOST_WIDE_INT) 0)
3186 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3187 return FALSE;
3188
3189 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3190
3191 /* Fast return for 0 and small values. We must do this for zero, since
3192 the code below can't handle that one case. */
3193 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3194 return TRUE;
3195
3196 /* Get the number of trailing zeros. */
3197 lowbit = ffs((int) i) - 1;
3198
3199 /* Only even shifts are allowed in ARM mode so round down to the
3200 nearest even number. */
3201 if (TARGET_ARM)
3202 lowbit &= ~1;
3203
3204 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3205 return TRUE;
3206
3207 if (TARGET_ARM)
3208 {
3209 /* Allow rotated constants in ARM mode. */
3210 if (lowbit <= 4
3211 && ((i & ~0xc000003f) == 0
3212 || (i & ~0xf000000f) == 0
3213 || (i & ~0xfc000003) == 0))
3214 return TRUE;
3215 }
3216 else
3217 {
3218 HOST_WIDE_INT v;
3219
3220 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3221 v = i & 0xff;
3222 v |= v << 16;
3223 if (i == v || i == (v | (v << 8)))
3224 return TRUE;
3225
3226 /* Allow repeated pattern 0xXY00XY00. */
3227 v = i & 0xff00;
3228 v |= v << 16;
3229 if (i == v)
3230 return TRUE;
3231 }
3232
3233 return FALSE;
3234 }
3235
3236 /* Return true if I is a valid constant for the operation CODE. */
3237 int
3238 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3239 {
3240 if (const_ok_for_arm (i))
3241 return 1;
3242
3243 switch (code)
3244 {
3245 case SET:
3246 /* See if we can use movw. */
3247 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3248 return 1;
3249 else
3250 /* Otherwise, try mvn. */
3251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3252
3253 case PLUS:
3254 /* See if we can use addw or subw. */
3255 if (TARGET_THUMB2
3256 && ((i & 0xfffff000) == 0
3257 || ((-i) & 0xfffff000) == 0))
3258 return 1;
3259 /* else fall through. */
3260
3261 case COMPARE:
3262 case EQ:
3263 case NE:
3264 case GT:
3265 case LE:
3266 case LT:
3267 case GE:
3268 case GEU:
3269 case LTU:
3270 case GTU:
3271 case LEU:
3272 case UNORDERED:
3273 case ORDERED:
3274 case UNEQ:
3275 case UNGE:
3276 case UNLT:
3277 case UNGT:
3278 case UNLE:
3279 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3280
3281 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3282 case XOR:
3283 return 0;
3284
3285 case IOR:
3286 if (TARGET_THUMB2)
3287 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3288 return 0;
3289
3290 case AND:
3291 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3292
3293 default:
3294 gcc_unreachable ();
3295 }
3296 }
3297
3298 /* Return true if I is a valid di mode constant for the operation CODE. */
3299 int
3300 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3301 {
3302 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3303 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3304 rtx hi = GEN_INT (hi_val);
3305 rtx lo = GEN_INT (lo_val);
3306
3307 if (TARGET_THUMB1)
3308 return 0;
3309
3310 switch (code)
3311 {
3312 case AND:
3313 case IOR:
3314 case XOR:
3315 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3316 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3317 case PLUS:
3318 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3319
3320 default:
3321 return 0;
3322 }
3323 }
3324
3325 /* Emit a sequence of insns to handle a large constant.
3326 CODE is the code of the operation required, it can be any of SET, PLUS,
3327 IOR, AND, XOR, MINUS;
3328 MODE is the mode in which the operation is being performed;
3329 VAL is the integer to operate on;
3330 SOURCE is the other operand (a register, or a null-pointer for SET);
3331 SUBTARGETS means it is safe to create scratch registers if that will
3332 either produce a simpler sequence, or we will want to cse the values.
3333 Return value is the number of insns emitted. */
3334
3335 /* ??? Tweak this for thumb2. */
3336 int
3337 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3338 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3339 {
3340 rtx cond;
3341
3342 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3343 cond = COND_EXEC_TEST (PATTERN (insn));
3344 else
3345 cond = NULL_RTX;
3346
3347 if (subtargets || code == SET
3348 || (REG_P (target) && REG_P (source)
3349 && REGNO (target) != REGNO (source)))
3350 {
3351 /* After arm_reorg has been called, we can't fix up expensive
3352 constants by pushing them into memory so we must synthesize
3353 them in-line, regardless of the cost. This is only likely to
3354 be more costly on chips that have load delay slots and we are
3355 compiling without running the scheduler (so no splitting
3356 occurred before the final instruction emission).
3357
3358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3359 */
3360 if (!after_arm_reorg
3361 && !cond
3362 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3363 1, 0)
3364 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3365 + (code != SET))))
3366 {
3367 if (code == SET)
3368 {
3369 /* Currently SET is the only monadic value for CODE, all
3370 the rest are diadic. */
3371 if (TARGET_USE_MOVT)
3372 arm_emit_movpair (target, GEN_INT (val));
3373 else
3374 emit_set_insn (target, GEN_INT (val));
3375
3376 return 1;
3377 }
3378 else
3379 {
3380 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3381
3382 if (TARGET_USE_MOVT)
3383 arm_emit_movpair (temp, GEN_INT (val));
3384 else
3385 emit_set_insn (temp, GEN_INT (val));
3386
3387 /* For MINUS, the value is subtracted from, since we never
3388 have subtraction of a constant. */
3389 if (code == MINUS)
3390 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3391 else
3392 emit_set_insn (target,
3393 gen_rtx_fmt_ee (code, mode, source, temp));
3394 return 2;
3395 }
3396 }
3397 }
3398
3399 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3400 1);
3401 }
3402
3403 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3404 ARM/THUMB2 immediates, and add up to VAL.
3405 Thr function return value gives the number of insns required. */
3406 static int
3407 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3408 struct four_ints *return_sequence)
3409 {
3410 int best_consecutive_zeros = 0;
3411 int i;
3412 int best_start = 0;
3413 int insns1, insns2;
3414 struct four_ints tmp_sequence;
3415
3416 /* If we aren't targeting ARM, the best place to start is always at
3417 the bottom, otherwise look more closely. */
3418 if (TARGET_ARM)
3419 {
3420 for (i = 0; i < 32; i += 2)
3421 {
3422 int consecutive_zeros = 0;
3423
3424 if (!(val & (3 << i)))
3425 {
3426 while ((i < 32) && !(val & (3 << i)))
3427 {
3428 consecutive_zeros += 2;
3429 i += 2;
3430 }
3431 if (consecutive_zeros > best_consecutive_zeros)
3432 {
3433 best_consecutive_zeros = consecutive_zeros;
3434 best_start = i - consecutive_zeros;
3435 }
3436 i -= 2;
3437 }
3438 }
3439 }
3440
3441 /* So long as it won't require any more insns to do so, it's
3442 desirable to emit a small constant (in bits 0...9) in the last
3443 insn. This way there is more chance that it can be combined with
3444 a later addressing insn to form a pre-indexed load or store
3445 operation. Consider:
3446
3447 *((volatile int *)0xe0000100) = 1;
3448 *((volatile int *)0xe0000110) = 2;
3449
3450 We want this to wind up as:
3451
3452 mov rA, #0xe0000000
3453 mov rB, #1
3454 str rB, [rA, #0x100]
3455 mov rB, #2
3456 str rB, [rA, #0x110]
3457
3458 rather than having to synthesize both large constants from scratch.
3459
3460 Therefore, we calculate how many insns would be required to emit
3461 the constant starting from `best_start', and also starting from
3462 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3463 yield a shorter sequence, we may as well use zero. */
3464 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3465 if (best_start != 0
3466 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3467 {
3468 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3469 if (insns2 <= insns1)
3470 {
3471 *return_sequence = tmp_sequence;
3472 insns1 = insns2;
3473 }
3474 }
3475
3476 return insns1;
3477 }
3478
3479 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3480 static int
3481 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3482 struct four_ints *return_sequence, int i)
3483 {
3484 int remainder = val & 0xffffffff;
3485 int insns = 0;
3486
3487 /* Try and find a way of doing the job in either two or three
3488 instructions.
3489
3490 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3491 location. We start at position I. This may be the MSB, or
3492 optimial_immediate_sequence may have positioned it at the largest block
3493 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3494 wrapping around to the top of the word when we drop off the bottom.
3495 In the worst case this code should produce no more than four insns.
3496
3497 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3498 constants, shifted to any arbitrary location. We should always start
3499 at the MSB. */
3500 do
3501 {
3502 int end;
3503 unsigned int b1, b2, b3, b4;
3504 unsigned HOST_WIDE_INT result;
3505 int loc;
3506
3507 gcc_assert (insns < 4);
3508
3509 if (i <= 0)
3510 i += 32;
3511
3512 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3513 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3514 {
3515 loc = i;
3516 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3517 /* We can use addw/subw for the last 12 bits. */
3518 result = remainder;
3519 else
3520 {
3521 /* Use an 8-bit shifted/rotated immediate. */
3522 end = i - 8;
3523 if (end < 0)
3524 end += 32;
3525 result = remainder & ((0x0ff << end)
3526 | ((i < end) ? (0xff >> (32 - end))
3527 : 0));
3528 i -= 8;
3529 }
3530 }
3531 else
3532 {
3533 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3534 arbitrary shifts. */
3535 i -= TARGET_ARM ? 2 : 1;
3536 continue;
3537 }
3538
3539 /* Next, see if we can do a better job with a thumb2 replicated
3540 constant.
3541
3542 We do it this way around to catch the cases like 0x01F001E0 where
3543 two 8-bit immediates would work, but a replicated constant would
3544 make it worse.
3545
3546 TODO: 16-bit constants that don't clear all the bits, but still win.
3547 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3548 if (TARGET_THUMB2)
3549 {
3550 b1 = (remainder & 0xff000000) >> 24;
3551 b2 = (remainder & 0x00ff0000) >> 16;
3552 b3 = (remainder & 0x0000ff00) >> 8;
3553 b4 = remainder & 0xff;
3554
3555 if (loc > 24)
3556 {
3557 /* The 8-bit immediate already found clears b1 (and maybe b2),
3558 but must leave b3 and b4 alone. */
3559
3560 /* First try to find a 32-bit replicated constant that clears
3561 almost everything. We can assume that we can't do it in one,
3562 or else we wouldn't be here. */
3563 unsigned int tmp = b1 & b2 & b3 & b4;
3564 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3565 + (tmp << 24);
3566 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3567 + (tmp == b3) + (tmp == b4);
3568 if (tmp
3569 && (matching_bytes >= 3
3570 || (matching_bytes == 2
3571 && const_ok_for_op (remainder & ~tmp2, code))))
3572 {
3573 /* At least 3 of the bytes match, and the fourth has at
3574 least as many bits set, or two of the bytes match
3575 and it will only require one more insn to finish. */
3576 result = tmp2;
3577 i = tmp != b1 ? 32
3578 : tmp != b2 ? 24
3579 : tmp != b3 ? 16
3580 : 8;
3581 }
3582
3583 /* Second, try to find a 16-bit replicated constant that can
3584 leave three of the bytes clear. If b2 or b4 is already
3585 zero, then we can. If the 8-bit from above would not
3586 clear b2 anyway, then we still win. */
3587 else if (b1 == b3 && (!b2 || !b4
3588 || (remainder & 0x00ff0000 & ~result)))
3589 {
3590 result = remainder & 0xff00ff00;
3591 i = 24;
3592 }
3593 }
3594 else if (loc > 16)
3595 {
3596 /* The 8-bit immediate already found clears b2 (and maybe b3)
3597 and we don't get here unless b1 is alredy clear, but it will
3598 leave b4 unchanged. */
3599
3600 /* If we can clear b2 and b4 at once, then we win, since the
3601 8-bits couldn't possibly reach that far. */
3602 if (b2 == b4)
3603 {
3604 result = remainder & 0x00ff00ff;
3605 i = 16;
3606 }
3607 }
3608 }
3609
3610 return_sequence->i[insns++] = result;
3611 remainder &= ~result;
3612
3613 if (code == SET || code == MINUS)
3614 code = PLUS;
3615 }
3616 while (remainder);
3617
3618 return insns;
3619 }
3620
3621 /* Emit an instruction with the indicated PATTERN. If COND is
3622 non-NULL, conditionalize the execution of the instruction on COND
3623 being true. */
3624
3625 static void
3626 emit_constant_insn (rtx cond, rtx pattern)
3627 {
3628 if (cond)
3629 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3630 emit_insn (pattern);
3631 }
3632
3633 /* As above, but extra parameter GENERATE which, if clear, suppresses
3634 RTL generation. */
3635
3636 static int
3637 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3638 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3639 int generate)
3640 {
3641 int can_invert = 0;
3642 int can_negate = 0;
3643 int final_invert = 0;
3644 int i;
3645 int set_sign_bit_copies = 0;
3646 int clear_sign_bit_copies = 0;
3647 int clear_zero_bit_copies = 0;
3648 int set_zero_bit_copies = 0;
3649 int insns = 0, neg_insns, inv_insns;
3650 unsigned HOST_WIDE_INT temp1, temp2;
3651 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3652 struct four_ints *immediates;
3653 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3654
3655 /* Find out which operations are safe for a given CODE. Also do a quick
3656 check for degenerate cases; these can occur when DImode operations
3657 are split. */
3658 switch (code)
3659 {
3660 case SET:
3661 can_invert = 1;
3662 break;
3663
3664 case PLUS:
3665 can_negate = 1;
3666 break;
3667
3668 case IOR:
3669 if (remainder == 0xffffffff)
3670 {
3671 if (generate)
3672 emit_constant_insn (cond,
3673 gen_rtx_SET (VOIDmode, target,
3674 GEN_INT (ARM_SIGN_EXTEND (val))));
3675 return 1;
3676 }
3677
3678 if (remainder == 0)
3679 {
3680 if (reload_completed && rtx_equal_p (target, source))
3681 return 0;
3682
3683 if (generate)
3684 emit_constant_insn (cond,
3685 gen_rtx_SET (VOIDmode, target, source));
3686 return 1;
3687 }
3688 break;
3689
3690 case AND:
3691 if (remainder == 0)
3692 {
3693 if (generate)
3694 emit_constant_insn (cond,
3695 gen_rtx_SET (VOIDmode, target, const0_rtx));
3696 return 1;
3697 }
3698 if (remainder == 0xffffffff)
3699 {
3700 if (reload_completed && rtx_equal_p (target, source))
3701 return 0;
3702 if (generate)
3703 emit_constant_insn (cond,
3704 gen_rtx_SET (VOIDmode, target, source));
3705 return 1;
3706 }
3707 can_invert = 1;
3708 break;
3709
3710 case XOR:
3711 if (remainder == 0)
3712 {
3713 if (reload_completed && rtx_equal_p (target, source))
3714 return 0;
3715 if (generate)
3716 emit_constant_insn (cond,
3717 gen_rtx_SET (VOIDmode, target, source));
3718 return 1;
3719 }
3720
3721 if (remainder == 0xffffffff)
3722 {
3723 if (generate)
3724 emit_constant_insn (cond,
3725 gen_rtx_SET (VOIDmode, target,
3726 gen_rtx_NOT (mode, source)));
3727 return 1;
3728 }
3729 final_invert = 1;
3730 break;
3731
3732 case MINUS:
3733 /* We treat MINUS as (val - source), since (source - val) is always
3734 passed as (source + (-val)). */
3735 if (remainder == 0)
3736 {
3737 if (generate)
3738 emit_constant_insn (cond,
3739 gen_rtx_SET (VOIDmode, target,
3740 gen_rtx_NEG (mode, source)));
3741 return 1;
3742 }
3743 if (const_ok_for_arm (val))
3744 {
3745 if (generate)
3746 emit_constant_insn (cond,
3747 gen_rtx_SET (VOIDmode, target,
3748 gen_rtx_MINUS (mode, GEN_INT (val),
3749 source)));
3750 return 1;
3751 }
3752
3753 break;
3754
3755 default:
3756 gcc_unreachable ();
3757 }
3758
3759 /* If we can do it in one insn get out quickly. */
3760 if (const_ok_for_op (val, code))
3761 {
3762 if (generate)
3763 emit_constant_insn (cond,
3764 gen_rtx_SET (VOIDmode, target,
3765 (source
3766 ? gen_rtx_fmt_ee (code, mode, source,
3767 GEN_INT (val))
3768 : GEN_INT (val))));
3769 return 1;
3770 }
3771
3772 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3773 insn. */
3774 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3775 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3776 {
3777 if (generate)
3778 {
3779 if (mode == SImode && i == 16)
3780 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3781 smaller insn. */
3782 emit_constant_insn (cond,
3783 gen_zero_extendhisi2
3784 (target, gen_lowpart (HImode, source)));
3785 else
3786 /* Extz only supports SImode, but we can coerce the operands
3787 into that mode. */
3788 emit_constant_insn (cond,
3789 gen_extzv_t2 (gen_lowpart (SImode, target),
3790 gen_lowpart (SImode, source),
3791 GEN_INT (i), const0_rtx));
3792 }
3793
3794 return 1;
3795 }
3796
3797 /* Calculate a few attributes that may be useful for specific
3798 optimizations. */
3799 /* Count number of leading zeros. */
3800 for (i = 31; i >= 0; i--)
3801 {
3802 if ((remainder & (1 << i)) == 0)
3803 clear_sign_bit_copies++;
3804 else
3805 break;
3806 }
3807
3808 /* Count number of leading 1's. */
3809 for (i = 31; i >= 0; i--)
3810 {
3811 if ((remainder & (1 << i)) != 0)
3812 set_sign_bit_copies++;
3813 else
3814 break;
3815 }
3816
3817 /* Count number of trailing zero's. */
3818 for (i = 0; i <= 31; i++)
3819 {
3820 if ((remainder & (1 << i)) == 0)
3821 clear_zero_bit_copies++;
3822 else
3823 break;
3824 }
3825
3826 /* Count number of trailing 1's. */
3827 for (i = 0; i <= 31; i++)
3828 {
3829 if ((remainder & (1 << i)) != 0)
3830 set_zero_bit_copies++;
3831 else
3832 break;
3833 }
3834
3835 switch (code)
3836 {
3837 case SET:
3838 /* See if we can do this by sign_extending a constant that is known
3839 to be negative. This is a good, way of doing it, since the shift
3840 may well merge into a subsequent insn. */
3841 if (set_sign_bit_copies > 1)
3842 {
3843 if (const_ok_for_arm
3844 (temp1 = ARM_SIGN_EXTEND (remainder
3845 << (set_sign_bit_copies - 1))))
3846 {
3847 if (generate)
3848 {
3849 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3850 emit_constant_insn (cond,
3851 gen_rtx_SET (VOIDmode, new_src,
3852 GEN_INT (temp1)));
3853 emit_constant_insn (cond,
3854 gen_ashrsi3 (target, new_src,
3855 GEN_INT (set_sign_bit_copies - 1)));
3856 }
3857 return 2;
3858 }
3859 /* For an inverted constant, we will need to set the low bits,
3860 these will be shifted out of harm's way. */
3861 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3862 if (const_ok_for_arm (~temp1))
3863 {
3864 if (generate)
3865 {
3866 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3867 emit_constant_insn (cond,
3868 gen_rtx_SET (VOIDmode, new_src,
3869 GEN_INT (temp1)));
3870 emit_constant_insn (cond,
3871 gen_ashrsi3 (target, new_src,
3872 GEN_INT (set_sign_bit_copies - 1)));
3873 }
3874 return 2;
3875 }
3876 }
3877
3878 /* See if we can calculate the value as the difference between two
3879 valid immediates. */
3880 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3881 {
3882 int topshift = clear_sign_bit_copies & ~1;
3883
3884 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3885 & (0xff000000 >> topshift));
3886
3887 /* If temp1 is zero, then that means the 9 most significant
3888 bits of remainder were 1 and we've caused it to overflow.
3889 When topshift is 0 we don't need to do anything since we
3890 can borrow from 'bit 32'. */
3891 if (temp1 == 0 && topshift != 0)
3892 temp1 = 0x80000000 >> (topshift - 1);
3893
3894 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3895
3896 if (const_ok_for_arm (temp2))
3897 {
3898 if (generate)
3899 {
3900 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3901 emit_constant_insn (cond,
3902 gen_rtx_SET (VOIDmode, new_src,
3903 GEN_INT (temp1)));
3904 emit_constant_insn (cond,
3905 gen_addsi3 (target, new_src,
3906 GEN_INT (-temp2)));
3907 }
3908
3909 return 2;
3910 }
3911 }
3912
3913 /* See if we can generate this by setting the bottom (or the top)
3914 16 bits, and then shifting these into the other half of the
3915 word. We only look for the simplest cases, to do more would cost
3916 too much. Be careful, however, not to generate this when the
3917 alternative would take fewer insns. */
3918 if (val & 0xffff0000)
3919 {
3920 temp1 = remainder & 0xffff0000;
3921 temp2 = remainder & 0x0000ffff;
3922
3923 /* Overlaps outside this range are best done using other methods. */
3924 for (i = 9; i < 24; i++)
3925 {
3926 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3927 && !const_ok_for_arm (temp2))
3928 {
3929 rtx new_src = (subtargets
3930 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3931 : target);
3932 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3933 source, subtargets, generate);
3934 source = new_src;
3935 if (generate)
3936 emit_constant_insn
3937 (cond,
3938 gen_rtx_SET
3939 (VOIDmode, target,
3940 gen_rtx_IOR (mode,
3941 gen_rtx_ASHIFT (mode, source,
3942 GEN_INT (i)),
3943 source)));
3944 return insns + 1;
3945 }
3946 }
3947
3948 /* Don't duplicate cases already considered. */
3949 for (i = 17; i < 24; i++)
3950 {
3951 if (((temp1 | (temp1 >> i)) == remainder)
3952 && !const_ok_for_arm (temp1))
3953 {
3954 rtx new_src = (subtargets
3955 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3956 : target);
3957 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3958 source, subtargets, generate);
3959 source = new_src;
3960 if (generate)
3961 emit_constant_insn
3962 (cond,
3963 gen_rtx_SET (VOIDmode, target,
3964 gen_rtx_IOR
3965 (mode,
3966 gen_rtx_LSHIFTRT (mode, source,
3967 GEN_INT (i)),
3968 source)));
3969 return insns + 1;
3970 }
3971 }
3972 }
3973 break;
3974
3975 case IOR:
3976 case XOR:
3977 /* If we have IOR or XOR, and the constant can be loaded in a
3978 single instruction, and we can find a temporary to put it in,
3979 then this can be done in two instructions instead of 3-4. */
3980 if (subtargets
3981 /* TARGET can't be NULL if SUBTARGETS is 0 */
3982 || (reload_completed && !reg_mentioned_p (target, source)))
3983 {
3984 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3985 {
3986 if (generate)
3987 {
3988 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3989
3990 emit_constant_insn (cond,
3991 gen_rtx_SET (VOIDmode, sub,
3992 GEN_INT (val)));
3993 emit_constant_insn (cond,
3994 gen_rtx_SET (VOIDmode, target,
3995 gen_rtx_fmt_ee (code, mode,
3996 source, sub)));
3997 }
3998 return 2;
3999 }
4000 }
4001
4002 if (code == XOR)
4003 break;
4004
4005 /* Convert.
4006 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4007 and the remainder 0s for e.g. 0xfff00000)
4008 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4009
4010 This can be done in 2 instructions by using shifts with mov or mvn.
4011 e.g. for
4012 x = x | 0xfff00000;
4013 we generate.
4014 mvn r0, r0, asl #12
4015 mvn r0, r0, lsr #12 */
4016 if (set_sign_bit_copies > 8
4017 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4018 {
4019 if (generate)
4020 {
4021 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4022 rtx shift = GEN_INT (set_sign_bit_copies);
4023
4024 emit_constant_insn
4025 (cond,
4026 gen_rtx_SET (VOIDmode, sub,
4027 gen_rtx_NOT (mode,
4028 gen_rtx_ASHIFT (mode,
4029 source,
4030 shift))));
4031 emit_constant_insn
4032 (cond,
4033 gen_rtx_SET (VOIDmode, target,
4034 gen_rtx_NOT (mode,
4035 gen_rtx_LSHIFTRT (mode, sub,
4036 shift))));
4037 }
4038 return 2;
4039 }
4040
4041 /* Convert
4042 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4043 to
4044 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4045
4046 For eg. r0 = r0 | 0xfff
4047 mvn r0, r0, lsr #12
4048 mvn r0, r0, asl #12
4049
4050 */
4051 if (set_zero_bit_copies > 8
4052 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4053 {
4054 if (generate)
4055 {
4056 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4057 rtx shift = GEN_INT (set_zero_bit_copies);
4058
4059 emit_constant_insn
4060 (cond,
4061 gen_rtx_SET (VOIDmode, sub,
4062 gen_rtx_NOT (mode,
4063 gen_rtx_LSHIFTRT (mode,
4064 source,
4065 shift))));
4066 emit_constant_insn
4067 (cond,
4068 gen_rtx_SET (VOIDmode, target,
4069 gen_rtx_NOT (mode,
4070 gen_rtx_ASHIFT (mode, sub,
4071 shift))));
4072 }
4073 return 2;
4074 }
4075
4076 /* This will never be reached for Thumb2 because orn is a valid
4077 instruction. This is for Thumb1 and the ARM 32 bit cases.
4078
4079 x = y | constant (such that ~constant is a valid constant)
4080 Transform this to
4081 x = ~(~y & ~constant).
4082 */
4083 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4084 {
4085 if (generate)
4086 {
4087 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4088 emit_constant_insn (cond,
4089 gen_rtx_SET (VOIDmode, sub,
4090 gen_rtx_NOT (mode, source)));
4091 source = sub;
4092 if (subtargets)
4093 sub = gen_reg_rtx (mode);
4094 emit_constant_insn (cond,
4095 gen_rtx_SET (VOIDmode, sub,
4096 gen_rtx_AND (mode, source,
4097 GEN_INT (temp1))));
4098 emit_constant_insn (cond,
4099 gen_rtx_SET (VOIDmode, target,
4100 gen_rtx_NOT (mode, sub)));
4101 }
4102 return 3;
4103 }
4104 break;
4105
4106 case AND:
4107 /* See if two shifts will do 2 or more insn's worth of work. */
4108 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4109 {
4110 HOST_WIDE_INT shift_mask = ((0xffffffff
4111 << (32 - clear_sign_bit_copies))
4112 & 0xffffffff);
4113
4114 if ((remainder | shift_mask) != 0xffffffff)
4115 {
4116 if (generate)
4117 {
4118 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4119 insns = arm_gen_constant (AND, mode, cond,
4120 remainder | shift_mask,
4121 new_src, source, subtargets, 1);
4122 source = new_src;
4123 }
4124 else
4125 {
4126 rtx targ = subtargets ? NULL_RTX : target;
4127 insns = arm_gen_constant (AND, mode, cond,
4128 remainder | shift_mask,
4129 targ, source, subtargets, 0);
4130 }
4131 }
4132
4133 if (generate)
4134 {
4135 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4136 rtx shift = GEN_INT (clear_sign_bit_copies);
4137
4138 emit_insn (gen_ashlsi3 (new_src, source, shift));
4139 emit_insn (gen_lshrsi3 (target, new_src, shift));
4140 }
4141
4142 return insns + 2;
4143 }
4144
4145 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4146 {
4147 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4148
4149 if ((remainder | shift_mask) != 0xffffffff)
4150 {
4151 if (generate)
4152 {
4153 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4154
4155 insns = arm_gen_constant (AND, mode, cond,
4156 remainder | shift_mask,
4157 new_src, source, subtargets, 1);
4158 source = new_src;
4159 }
4160 else
4161 {
4162 rtx targ = subtargets ? NULL_RTX : target;
4163
4164 insns = arm_gen_constant (AND, mode, cond,
4165 remainder | shift_mask,
4166 targ, source, subtargets, 0);
4167 }
4168 }
4169
4170 if (generate)
4171 {
4172 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4173 rtx shift = GEN_INT (clear_zero_bit_copies);
4174
4175 emit_insn (gen_lshrsi3 (new_src, source, shift));
4176 emit_insn (gen_ashlsi3 (target, new_src, shift));
4177 }
4178
4179 return insns + 2;
4180 }
4181
4182 break;
4183
4184 default:
4185 break;
4186 }
4187
4188 /* Calculate what the instruction sequences would be if we generated it
4189 normally, negated, or inverted. */
4190 if (code == AND)
4191 /* AND cannot be split into multiple insns, so invert and use BIC. */
4192 insns = 99;
4193 else
4194 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4195
4196 if (can_negate)
4197 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4198 &neg_immediates);
4199 else
4200 neg_insns = 99;
4201
4202 if (can_invert || final_invert)
4203 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4204 &inv_immediates);
4205 else
4206 inv_insns = 99;
4207
4208 immediates = &pos_immediates;
4209
4210 /* Is the negated immediate sequence more efficient? */
4211 if (neg_insns < insns && neg_insns <= inv_insns)
4212 {
4213 insns = neg_insns;
4214 immediates = &neg_immediates;
4215 }
4216 else
4217 can_negate = 0;
4218
4219 /* Is the inverted immediate sequence more efficient?
4220 We must allow for an extra NOT instruction for XOR operations, although
4221 there is some chance that the final 'mvn' will get optimized later. */
4222 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4223 {
4224 insns = inv_insns;
4225 immediates = &inv_immediates;
4226 }
4227 else
4228 {
4229 can_invert = 0;
4230 final_invert = 0;
4231 }
4232
4233 /* Now output the chosen sequence as instructions. */
4234 if (generate)
4235 {
4236 for (i = 0; i < insns; i++)
4237 {
4238 rtx new_src, temp1_rtx;
4239
4240 temp1 = immediates->i[i];
4241
4242 if (code == SET || code == MINUS)
4243 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4244 else if ((final_invert || i < (insns - 1)) && subtargets)
4245 new_src = gen_reg_rtx (mode);
4246 else
4247 new_src = target;
4248
4249 if (can_invert)
4250 temp1 = ~temp1;
4251 else if (can_negate)
4252 temp1 = -temp1;
4253
4254 temp1 = trunc_int_for_mode (temp1, mode);
4255 temp1_rtx = GEN_INT (temp1);
4256
4257 if (code == SET)
4258 ;
4259 else if (code == MINUS)
4260 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4261 else
4262 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4263
4264 emit_constant_insn (cond,
4265 gen_rtx_SET (VOIDmode, new_src,
4266 temp1_rtx));
4267 source = new_src;
4268
4269 if (code == SET)
4270 {
4271 can_negate = can_invert;
4272 can_invert = 0;
4273 code = PLUS;
4274 }
4275 else if (code == MINUS)
4276 code = PLUS;
4277 }
4278 }
4279
4280 if (final_invert)
4281 {
4282 if (generate)
4283 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4284 gen_rtx_NOT (mode, source)));
4285 insns++;
4286 }
4287
4288 return insns;
4289 }
4290
4291 /* Canonicalize a comparison so that we are more likely to recognize it.
4292 This can be done for a few constant compares, where we can make the
4293 immediate value easier to load. */
4294
4295 static void
4296 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4297 bool op0_preserve_value)
4298 {
4299 enum machine_mode mode;
4300 unsigned HOST_WIDE_INT i, maxval;
4301
4302 mode = GET_MODE (*op0);
4303 if (mode == VOIDmode)
4304 mode = GET_MODE (*op1);
4305
4306 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4307
4308 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4309 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4310 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4311 for GTU/LEU in Thumb mode. */
4312 if (mode == DImode)
4313 {
4314 rtx tem;
4315
4316 if (*code == GT || *code == LE
4317 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4318 {
4319 /* Missing comparison. First try to use an available
4320 comparison. */
4321 if (CONST_INT_P (*op1))
4322 {
4323 i = INTVAL (*op1);
4324 switch (*code)
4325 {
4326 case GT:
4327 case LE:
4328 if (i != maxval
4329 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4330 {
4331 *op1 = GEN_INT (i + 1);
4332 *code = *code == GT ? GE : LT;
4333 return;
4334 }
4335 break;
4336 case GTU:
4337 case LEU:
4338 if (i != ~((unsigned HOST_WIDE_INT) 0)
4339 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4340 {
4341 *op1 = GEN_INT (i + 1);
4342 *code = *code == GTU ? GEU : LTU;
4343 return;
4344 }
4345 break;
4346 default:
4347 gcc_unreachable ();
4348 }
4349 }
4350
4351 /* If that did not work, reverse the condition. */
4352 if (!op0_preserve_value)
4353 {
4354 tem = *op0;
4355 *op0 = *op1;
4356 *op1 = tem;
4357 *code = (int)swap_condition ((enum rtx_code)*code);
4358 }
4359 }
4360 return;
4361 }
4362
4363 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4364 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4365 to facilitate possible combining with a cmp into 'ands'. */
4366 if (mode == SImode
4367 && GET_CODE (*op0) == ZERO_EXTEND
4368 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4369 && GET_MODE (XEXP (*op0, 0)) == QImode
4370 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4371 && subreg_lowpart_p (XEXP (*op0, 0))
4372 && *op1 == const0_rtx)
4373 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4374 GEN_INT (255));
4375
4376 /* Comparisons smaller than DImode. Only adjust comparisons against
4377 an out-of-range constant. */
4378 if (!CONST_INT_P (*op1)
4379 || const_ok_for_arm (INTVAL (*op1))
4380 || const_ok_for_arm (- INTVAL (*op1)))
4381 return;
4382
4383 i = INTVAL (*op1);
4384
4385 switch (*code)
4386 {
4387 case EQ:
4388 case NE:
4389 return;
4390
4391 case GT:
4392 case LE:
4393 if (i != maxval
4394 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4395 {
4396 *op1 = GEN_INT (i + 1);
4397 *code = *code == GT ? GE : LT;
4398 return;
4399 }
4400 break;
4401
4402 case GE:
4403 case LT:
4404 if (i != ~maxval
4405 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4406 {
4407 *op1 = GEN_INT (i - 1);
4408 *code = *code == GE ? GT : LE;
4409 return;
4410 }
4411 break;
4412
4413 case GTU:
4414 case LEU:
4415 if (i != ~((unsigned HOST_WIDE_INT) 0)
4416 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4417 {
4418 *op1 = GEN_INT (i + 1);
4419 *code = *code == GTU ? GEU : LTU;
4420 return;
4421 }
4422 break;
4423
4424 case GEU:
4425 case LTU:
4426 if (i != 0
4427 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4428 {
4429 *op1 = GEN_INT (i - 1);
4430 *code = *code == GEU ? GTU : LEU;
4431 return;
4432 }
4433 break;
4434
4435 default:
4436 gcc_unreachable ();
4437 }
4438 }
4439
4440
4441 /* Define how to find the value returned by a function. */
4442
4443 static rtx
4444 arm_function_value(const_tree type, const_tree func,
4445 bool outgoing ATTRIBUTE_UNUSED)
4446 {
4447 enum machine_mode mode;
4448 int unsignedp ATTRIBUTE_UNUSED;
4449 rtx r ATTRIBUTE_UNUSED;
4450
4451 mode = TYPE_MODE (type);
4452
4453 if (TARGET_AAPCS_BASED)
4454 return aapcs_allocate_return_reg (mode, type, func);
4455
4456 /* Promote integer types. */
4457 if (INTEGRAL_TYPE_P (type))
4458 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4459
4460 /* Promotes small structs returned in a register to full-word size
4461 for big-endian AAPCS. */
4462 if (arm_return_in_msb (type))
4463 {
4464 HOST_WIDE_INT size = int_size_in_bytes (type);
4465 if (size % UNITS_PER_WORD != 0)
4466 {
4467 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4468 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4469 }
4470 }
4471
4472 return arm_libcall_value_1 (mode);
4473 }
4474
4475 /* libcall hashtable helpers. */
4476
4477 struct libcall_hasher : typed_noop_remove <rtx_def>
4478 {
4479 typedef rtx_def value_type;
4480 typedef rtx_def compare_type;
4481 static inline hashval_t hash (const value_type *);
4482 static inline bool equal (const value_type *, const compare_type *);
4483 static inline void remove (value_type *);
4484 };
4485
4486 inline bool
4487 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4488 {
4489 return rtx_equal_p (p1, p2);
4490 }
4491
4492 inline hashval_t
4493 libcall_hasher::hash (const value_type *p1)
4494 {
4495 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4496 }
4497
4498 typedef hash_table <libcall_hasher> libcall_table_type;
4499
4500 static void
4501 add_libcall (libcall_table_type htab, rtx libcall)
4502 {
4503 *htab.find_slot (libcall, INSERT) = libcall;
4504 }
4505
4506 static bool
4507 arm_libcall_uses_aapcs_base (const_rtx libcall)
4508 {
4509 static bool init_done = false;
4510 static libcall_table_type libcall_htab;
4511
4512 if (!init_done)
4513 {
4514 init_done = true;
4515
4516 libcall_htab.create (31);
4517 add_libcall (libcall_htab,
4518 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4519 add_libcall (libcall_htab,
4520 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4521 add_libcall (libcall_htab,
4522 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4523 add_libcall (libcall_htab,
4524 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4525
4526 add_libcall (libcall_htab,
4527 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4528 add_libcall (libcall_htab,
4529 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4530 add_libcall (libcall_htab,
4531 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4532 add_libcall (libcall_htab,
4533 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4534
4535 add_libcall (libcall_htab,
4536 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4537 add_libcall (libcall_htab,
4538 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4539 add_libcall (libcall_htab,
4540 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4541 add_libcall (libcall_htab,
4542 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4543 add_libcall (libcall_htab,
4544 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4545 add_libcall (libcall_htab,
4546 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4547 add_libcall (libcall_htab,
4548 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4549 add_libcall (libcall_htab,
4550 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4551
4552 /* Values from double-precision helper functions are returned in core
4553 registers if the selected core only supports single-precision
4554 arithmetic, even if we are using the hard-float ABI. The same is
4555 true for single-precision helpers, but we will never be using the
4556 hard-float ABI on a CPU which doesn't support single-precision
4557 operations in hardware. */
4558 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4559 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4560 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4561 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4562 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4563 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4564 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4565 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4566 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4567 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4568 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4569 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4570 SFmode));
4571 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4572 DFmode));
4573 }
4574
4575 return libcall && libcall_htab.find (libcall) != NULL;
4576 }
4577
4578 static rtx
4579 arm_libcall_value_1 (enum machine_mode mode)
4580 {
4581 if (TARGET_AAPCS_BASED)
4582 return aapcs_libcall_value (mode);
4583 else if (TARGET_IWMMXT_ABI
4584 && arm_vector_mode_supported_p (mode))
4585 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4586 else
4587 return gen_rtx_REG (mode, ARG_REGISTER (1));
4588 }
4589
4590 /* Define how to find the value returned by a library function
4591 assuming the value has mode MODE. */
4592
4593 static rtx
4594 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4595 {
4596 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4597 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4598 {
4599 /* The following libcalls return their result in integer registers,
4600 even though they return a floating point value. */
4601 if (arm_libcall_uses_aapcs_base (libcall))
4602 return gen_rtx_REG (mode, ARG_REGISTER(1));
4603
4604 }
4605
4606 return arm_libcall_value_1 (mode);
4607 }
4608
4609 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4610
4611 static bool
4612 arm_function_value_regno_p (const unsigned int regno)
4613 {
4614 if (regno == ARG_REGISTER (1)
4615 || (TARGET_32BIT
4616 && TARGET_AAPCS_BASED
4617 && TARGET_VFP
4618 && TARGET_HARD_FLOAT
4619 && regno == FIRST_VFP_REGNUM)
4620 || (TARGET_IWMMXT_ABI
4621 && regno == FIRST_IWMMXT_REGNUM))
4622 return true;
4623
4624 return false;
4625 }
4626
4627 /* Determine the amount of memory needed to store the possible return
4628 registers of an untyped call. */
4629 int
4630 arm_apply_result_size (void)
4631 {
4632 int size = 16;
4633
4634 if (TARGET_32BIT)
4635 {
4636 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4637 size += 32;
4638 if (TARGET_IWMMXT_ABI)
4639 size += 8;
4640 }
4641
4642 return size;
4643 }
4644
4645 /* Decide whether TYPE should be returned in memory (true)
4646 or in a register (false). FNTYPE is the type of the function making
4647 the call. */
4648 static bool
4649 arm_return_in_memory (const_tree type, const_tree fntype)
4650 {
4651 HOST_WIDE_INT size;
4652
4653 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4654
4655 if (TARGET_AAPCS_BASED)
4656 {
4657 /* Simple, non-aggregate types (ie not including vectors and
4658 complex) are always returned in a register (or registers).
4659 We don't care about which register here, so we can short-cut
4660 some of the detail. */
4661 if (!AGGREGATE_TYPE_P (type)
4662 && TREE_CODE (type) != VECTOR_TYPE
4663 && TREE_CODE (type) != COMPLEX_TYPE)
4664 return false;
4665
4666 /* Any return value that is no larger than one word can be
4667 returned in r0. */
4668 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4669 return false;
4670
4671 /* Check any available co-processors to see if they accept the
4672 type as a register candidate (VFP, for example, can return
4673 some aggregates in consecutive registers). These aren't
4674 available if the call is variadic. */
4675 if (aapcs_select_return_coproc (type, fntype) >= 0)
4676 return false;
4677
4678 /* Vector values should be returned using ARM registers, not
4679 memory (unless they're over 16 bytes, which will break since
4680 we only have four call-clobbered registers to play with). */
4681 if (TREE_CODE (type) == VECTOR_TYPE)
4682 return (size < 0 || size > (4 * UNITS_PER_WORD));
4683
4684 /* The rest go in memory. */
4685 return true;
4686 }
4687
4688 if (TREE_CODE (type) == VECTOR_TYPE)
4689 return (size < 0 || size > (4 * UNITS_PER_WORD));
4690
4691 if (!AGGREGATE_TYPE_P (type) &&
4692 (TREE_CODE (type) != VECTOR_TYPE))
4693 /* All simple types are returned in registers. */
4694 return false;
4695
4696 if (arm_abi != ARM_ABI_APCS)
4697 {
4698 /* ATPCS and later return aggregate types in memory only if they are
4699 larger than a word (or are variable size). */
4700 return (size < 0 || size > UNITS_PER_WORD);
4701 }
4702
4703 /* For the arm-wince targets we choose to be compatible with Microsoft's
4704 ARM and Thumb compilers, which always return aggregates in memory. */
4705 #ifndef ARM_WINCE
4706 /* All structures/unions bigger than one word are returned in memory.
4707 Also catch the case where int_size_in_bytes returns -1. In this case
4708 the aggregate is either huge or of variable size, and in either case
4709 we will want to return it via memory and not in a register. */
4710 if (size < 0 || size > UNITS_PER_WORD)
4711 return true;
4712
4713 if (TREE_CODE (type) == RECORD_TYPE)
4714 {
4715 tree field;
4716
4717 /* For a struct the APCS says that we only return in a register
4718 if the type is 'integer like' and every addressable element
4719 has an offset of zero. For practical purposes this means
4720 that the structure can have at most one non bit-field element
4721 and that this element must be the first one in the structure. */
4722
4723 /* Find the first field, ignoring non FIELD_DECL things which will
4724 have been created by C++. */
4725 for (field = TYPE_FIELDS (type);
4726 field && TREE_CODE (field) != FIELD_DECL;
4727 field = DECL_CHAIN (field))
4728 continue;
4729
4730 if (field == NULL)
4731 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4732
4733 /* Check that the first field is valid for returning in a register. */
4734
4735 /* ... Floats are not allowed */
4736 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4737 return true;
4738
4739 /* ... Aggregates that are not themselves valid for returning in
4740 a register are not allowed. */
4741 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4742 return true;
4743
4744 /* Now check the remaining fields, if any. Only bitfields are allowed,
4745 since they are not addressable. */
4746 for (field = DECL_CHAIN (field);
4747 field;
4748 field = DECL_CHAIN (field))
4749 {
4750 if (TREE_CODE (field) != FIELD_DECL)
4751 continue;
4752
4753 if (!DECL_BIT_FIELD_TYPE (field))
4754 return true;
4755 }
4756
4757 return false;
4758 }
4759
4760 if (TREE_CODE (type) == UNION_TYPE)
4761 {
4762 tree field;
4763
4764 /* Unions can be returned in registers if every element is
4765 integral, or can be returned in an integer register. */
4766 for (field = TYPE_FIELDS (type);
4767 field;
4768 field = DECL_CHAIN (field))
4769 {
4770 if (TREE_CODE (field) != FIELD_DECL)
4771 continue;
4772
4773 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4774 return true;
4775
4776 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4777 return true;
4778 }
4779
4780 return false;
4781 }
4782 #endif /* not ARM_WINCE */
4783
4784 /* Return all other types in memory. */
4785 return true;
4786 }
4787
4788 const struct pcs_attribute_arg
4789 {
4790 const char *arg;
4791 enum arm_pcs value;
4792 } pcs_attribute_args[] =
4793 {
4794 {"aapcs", ARM_PCS_AAPCS},
4795 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4796 #if 0
4797 /* We could recognize these, but changes would be needed elsewhere
4798 * to implement them. */
4799 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4800 {"atpcs", ARM_PCS_ATPCS},
4801 {"apcs", ARM_PCS_APCS},
4802 #endif
4803 {NULL, ARM_PCS_UNKNOWN}
4804 };
4805
4806 static enum arm_pcs
4807 arm_pcs_from_attribute (tree attr)
4808 {
4809 const struct pcs_attribute_arg *ptr;
4810 const char *arg;
4811
4812 /* Get the value of the argument. */
4813 if (TREE_VALUE (attr) == NULL_TREE
4814 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4815 return ARM_PCS_UNKNOWN;
4816
4817 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4818
4819 /* Check it against the list of known arguments. */
4820 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4821 if (streq (arg, ptr->arg))
4822 return ptr->value;
4823
4824 /* An unrecognized interrupt type. */
4825 return ARM_PCS_UNKNOWN;
4826 }
4827
4828 /* Get the PCS variant to use for this call. TYPE is the function's type
4829 specification, DECL is the specific declartion. DECL may be null if
4830 the call could be indirect or if this is a library call. */
4831 static enum arm_pcs
4832 arm_get_pcs_model (const_tree type, const_tree decl)
4833 {
4834 bool user_convention = false;
4835 enum arm_pcs user_pcs = arm_pcs_default;
4836 tree attr;
4837
4838 gcc_assert (type);
4839
4840 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4841 if (attr)
4842 {
4843 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4844 user_convention = true;
4845 }
4846
4847 if (TARGET_AAPCS_BASED)
4848 {
4849 /* Detect varargs functions. These always use the base rules
4850 (no argument is ever a candidate for a co-processor
4851 register). */
4852 bool base_rules = stdarg_p (type);
4853
4854 if (user_convention)
4855 {
4856 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4857 sorry ("non-AAPCS derived PCS variant");
4858 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4859 error ("variadic functions must use the base AAPCS variant");
4860 }
4861
4862 if (base_rules)
4863 return ARM_PCS_AAPCS;
4864 else if (user_convention)
4865 return user_pcs;
4866 else if (decl && flag_unit_at_a_time)
4867 {
4868 /* Local functions never leak outside this compilation unit,
4869 so we are free to use whatever conventions are
4870 appropriate. */
4871 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4872 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4873 if (i && i->local)
4874 return ARM_PCS_AAPCS_LOCAL;
4875 }
4876 }
4877 else if (user_convention && user_pcs != arm_pcs_default)
4878 sorry ("PCS variant");
4879
4880 /* For everything else we use the target's default. */
4881 return arm_pcs_default;
4882 }
4883
4884
4885 static void
4886 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4887 const_tree fntype ATTRIBUTE_UNUSED,
4888 rtx libcall ATTRIBUTE_UNUSED,
4889 const_tree fndecl ATTRIBUTE_UNUSED)
4890 {
4891 /* Record the unallocated VFP registers. */
4892 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4893 pcum->aapcs_vfp_reg_alloc = 0;
4894 }
4895
4896 /* Walk down the type tree of TYPE counting consecutive base elements.
4897 If *MODEP is VOIDmode, then set it to the first valid floating point
4898 type. If a non-floating point type is found, or if a floating point
4899 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4900 otherwise return the count in the sub-tree. */
4901 static int
4902 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4903 {
4904 enum machine_mode mode;
4905 HOST_WIDE_INT size;
4906
4907 switch (TREE_CODE (type))
4908 {
4909 case REAL_TYPE:
4910 mode = TYPE_MODE (type);
4911 if (mode != DFmode && mode != SFmode)
4912 return -1;
4913
4914 if (*modep == VOIDmode)
4915 *modep = mode;
4916
4917 if (*modep == mode)
4918 return 1;
4919
4920 break;
4921
4922 case COMPLEX_TYPE:
4923 mode = TYPE_MODE (TREE_TYPE (type));
4924 if (mode != DFmode && mode != SFmode)
4925 return -1;
4926
4927 if (*modep == VOIDmode)
4928 *modep = mode;
4929
4930 if (*modep == mode)
4931 return 2;
4932
4933 break;
4934
4935 case VECTOR_TYPE:
4936 /* Use V2SImode and V4SImode as representatives of all 64-bit
4937 and 128-bit vector types, whether or not those modes are
4938 supported with the present options. */
4939 size = int_size_in_bytes (type);
4940 switch (size)
4941 {
4942 case 8:
4943 mode = V2SImode;
4944 break;
4945 case 16:
4946 mode = V4SImode;
4947 break;
4948 default:
4949 return -1;
4950 }
4951
4952 if (*modep == VOIDmode)
4953 *modep = mode;
4954
4955 /* Vector modes are considered to be opaque: two vectors are
4956 equivalent for the purposes of being homogeneous aggregates
4957 if they are the same size. */
4958 if (*modep == mode)
4959 return 1;
4960
4961 break;
4962
4963 case ARRAY_TYPE:
4964 {
4965 int count;
4966 tree index = TYPE_DOMAIN (type);
4967
4968 /* Can't handle incomplete types. */
4969 if (!COMPLETE_TYPE_P (type))
4970 return -1;
4971
4972 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4973 if (count == -1
4974 || !index
4975 || !TYPE_MAX_VALUE (index)
4976 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4977 || !TYPE_MIN_VALUE (index)
4978 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4979 || count < 0)
4980 return -1;
4981
4982 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4983 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
4984
4985 /* There must be no padding. */
4986 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4987 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4988 != count * GET_MODE_BITSIZE (*modep)))
4989 return -1;
4990
4991 return count;
4992 }
4993
4994 case RECORD_TYPE:
4995 {
4996 int count = 0;
4997 int sub_count;
4998 tree field;
4999
5000 /* Can't handle incomplete types. */
5001 if (!COMPLETE_TYPE_P (type))
5002 return -1;
5003
5004 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5005 {
5006 if (TREE_CODE (field) != FIELD_DECL)
5007 continue;
5008
5009 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5010 if (sub_count < 0)
5011 return -1;
5012 count += sub_count;
5013 }
5014
5015 /* There must be no padding. */
5016 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5017 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5018 != count * GET_MODE_BITSIZE (*modep)))
5019 return -1;
5020
5021 return count;
5022 }
5023
5024 case UNION_TYPE:
5025 case QUAL_UNION_TYPE:
5026 {
5027 /* These aren't very interesting except in a degenerate case. */
5028 int count = 0;
5029 int sub_count;
5030 tree field;
5031
5032 /* Can't handle incomplete types. */
5033 if (!COMPLETE_TYPE_P (type))
5034 return -1;
5035
5036 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5037 {
5038 if (TREE_CODE (field) != FIELD_DECL)
5039 continue;
5040
5041 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5042 if (sub_count < 0)
5043 return -1;
5044 count = count > sub_count ? count : sub_count;
5045 }
5046
5047 /* There must be no padding. */
5048 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5049 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5050 != count * GET_MODE_BITSIZE (*modep)))
5051 return -1;
5052
5053 return count;
5054 }
5055
5056 default:
5057 break;
5058 }
5059
5060 return -1;
5061 }
5062
5063 /* Return true if PCS_VARIANT should use VFP registers. */
5064 static bool
5065 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5066 {
5067 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5068 {
5069 static bool seen_thumb1_vfp = false;
5070
5071 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5072 {
5073 sorry ("Thumb-1 hard-float VFP ABI");
5074 /* sorry() is not immediately fatal, so only display this once. */
5075 seen_thumb1_vfp = true;
5076 }
5077
5078 return true;
5079 }
5080
5081 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5082 return false;
5083
5084 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5085 (TARGET_VFP_DOUBLE || !is_double));
5086 }
5087
5088 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5089 suitable for passing or returning in VFP registers for the PCS
5090 variant selected. If it is, then *BASE_MODE is updated to contain
5091 a machine mode describing each element of the argument's type and
5092 *COUNT to hold the number of such elements. */
5093 static bool
5094 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5095 enum machine_mode mode, const_tree type,
5096 enum machine_mode *base_mode, int *count)
5097 {
5098 enum machine_mode new_mode = VOIDmode;
5099
5100 /* If we have the type information, prefer that to working things
5101 out from the mode. */
5102 if (type)
5103 {
5104 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5105
5106 if (ag_count > 0 && ag_count <= 4)
5107 *count = ag_count;
5108 else
5109 return false;
5110 }
5111 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5112 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5113 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5114 {
5115 *count = 1;
5116 new_mode = mode;
5117 }
5118 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5119 {
5120 *count = 2;
5121 new_mode = (mode == DCmode ? DFmode : SFmode);
5122 }
5123 else
5124 return false;
5125
5126
5127 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5128 return false;
5129
5130 *base_mode = new_mode;
5131 return true;
5132 }
5133
5134 static bool
5135 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5136 enum machine_mode mode, const_tree type)
5137 {
5138 int count ATTRIBUTE_UNUSED;
5139 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5140
5141 if (!use_vfp_abi (pcs_variant, false))
5142 return false;
5143 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5144 &ag_mode, &count);
5145 }
5146
5147 static bool
5148 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5149 const_tree type)
5150 {
5151 if (!use_vfp_abi (pcum->pcs_variant, false))
5152 return false;
5153
5154 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5155 &pcum->aapcs_vfp_rmode,
5156 &pcum->aapcs_vfp_rcount);
5157 }
5158
5159 static bool
5160 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5161 const_tree type ATTRIBUTE_UNUSED)
5162 {
5163 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5164 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5165 int regno;
5166
5167 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5168 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5169 {
5170 pcum->aapcs_vfp_reg_alloc = mask << regno;
5171 if (mode == BLKmode
5172 || (mode == TImode && ! TARGET_NEON)
5173 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5174 {
5175 int i;
5176 int rcount = pcum->aapcs_vfp_rcount;
5177 int rshift = shift;
5178 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5179 rtx par;
5180 if (!TARGET_NEON)
5181 {
5182 /* Avoid using unsupported vector modes. */
5183 if (rmode == V2SImode)
5184 rmode = DImode;
5185 else if (rmode == V4SImode)
5186 {
5187 rmode = DImode;
5188 rcount *= 2;
5189 rshift /= 2;
5190 }
5191 }
5192 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5193 for (i = 0; i < rcount; i++)
5194 {
5195 rtx tmp = gen_rtx_REG (rmode,
5196 FIRST_VFP_REGNUM + regno + i * rshift);
5197 tmp = gen_rtx_EXPR_LIST
5198 (VOIDmode, tmp,
5199 GEN_INT (i * GET_MODE_SIZE (rmode)));
5200 XVECEXP (par, 0, i) = tmp;
5201 }
5202
5203 pcum->aapcs_reg = par;
5204 }
5205 else
5206 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5207 return true;
5208 }
5209 return false;
5210 }
5211
5212 static rtx
5213 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5214 enum machine_mode mode,
5215 const_tree type ATTRIBUTE_UNUSED)
5216 {
5217 if (!use_vfp_abi (pcs_variant, false))
5218 return NULL;
5219
5220 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5221 {
5222 int count;
5223 enum machine_mode ag_mode;
5224 int i;
5225 rtx par;
5226 int shift;
5227
5228 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5229 &ag_mode, &count);
5230
5231 if (!TARGET_NEON)
5232 {
5233 if (ag_mode == V2SImode)
5234 ag_mode = DImode;
5235 else if (ag_mode == V4SImode)
5236 {
5237 ag_mode = DImode;
5238 count *= 2;
5239 }
5240 }
5241 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5242 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5243 for (i = 0; i < count; i++)
5244 {
5245 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5246 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5247 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5248 XVECEXP (par, 0, i) = tmp;
5249 }
5250
5251 return par;
5252 }
5253
5254 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5255 }
5256
5257 static void
5258 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5259 enum machine_mode mode ATTRIBUTE_UNUSED,
5260 const_tree type ATTRIBUTE_UNUSED)
5261 {
5262 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5263 pcum->aapcs_vfp_reg_alloc = 0;
5264 return;
5265 }
5266
5267 #define AAPCS_CP(X) \
5268 { \
5269 aapcs_ ## X ## _cum_init, \
5270 aapcs_ ## X ## _is_call_candidate, \
5271 aapcs_ ## X ## _allocate, \
5272 aapcs_ ## X ## _is_return_candidate, \
5273 aapcs_ ## X ## _allocate_return_reg, \
5274 aapcs_ ## X ## _advance \
5275 }
5276
5277 /* Table of co-processors that can be used to pass arguments in
5278 registers. Idealy no arugment should be a candidate for more than
5279 one co-processor table entry, but the table is processed in order
5280 and stops after the first match. If that entry then fails to put
5281 the argument into a co-processor register, the argument will go on
5282 the stack. */
5283 static struct
5284 {
5285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5286 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5287
5288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5289 BLKmode) is a candidate for this co-processor's registers; this
5290 function should ignore any position-dependent state in
5291 CUMULATIVE_ARGS and only use call-type dependent information. */
5292 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5293
5294 /* Return true if the argument does get a co-processor register; it
5295 should set aapcs_reg to an RTX of the register allocated as is
5296 required for a return from FUNCTION_ARG. */
5297 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5298
5299 /* Return true if a result of mode MODE (or type TYPE if MODE is
5300 BLKmode) is can be returned in this co-processor's registers. */
5301 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5302
5303 /* Allocate and return an RTX element to hold the return type of a
5304 call, this routine must not fail and will only be called if
5305 is_return_candidate returned true with the same parameters. */
5306 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5307
5308 /* Finish processing this argument and prepare to start processing
5309 the next one. */
5310 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5311 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5312 {
5313 AAPCS_CP(vfp)
5314 };
5315
5316 #undef AAPCS_CP
5317
5318 static int
5319 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5320 const_tree type)
5321 {
5322 int i;
5323
5324 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5325 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5326 return i;
5327
5328 return -1;
5329 }
5330
5331 static int
5332 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5333 {
5334 /* We aren't passed a decl, so we can't check that a call is local.
5335 However, it isn't clear that that would be a win anyway, since it
5336 might limit some tail-calling opportunities. */
5337 enum arm_pcs pcs_variant;
5338
5339 if (fntype)
5340 {
5341 const_tree fndecl = NULL_TREE;
5342
5343 if (TREE_CODE (fntype) == FUNCTION_DECL)
5344 {
5345 fndecl = fntype;
5346 fntype = TREE_TYPE (fntype);
5347 }
5348
5349 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5350 }
5351 else
5352 pcs_variant = arm_pcs_default;
5353
5354 if (pcs_variant != ARM_PCS_AAPCS)
5355 {
5356 int i;
5357
5358 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5359 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5360 TYPE_MODE (type),
5361 type))
5362 return i;
5363 }
5364 return -1;
5365 }
5366
5367 static rtx
5368 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5369 const_tree fntype)
5370 {
5371 /* We aren't passed a decl, so we can't check that a call is local.
5372 However, it isn't clear that that would be a win anyway, since it
5373 might limit some tail-calling opportunities. */
5374 enum arm_pcs pcs_variant;
5375 int unsignedp ATTRIBUTE_UNUSED;
5376
5377 if (fntype)
5378 {
5379 const_tree fndecl = NULL_TREE;
5380
5381 if (TREE_CODE (fntype) == FUNCTION_DECL)
5382 {
5383 fndecl = fntype;
5384 fntype = TREE_TYPE (fntype);
5385 }
5386
5387 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5388 }
5389 else
5390 pcs_variant = arm_pcs_default;
5391
5392 /* Promote integer types. */
5393 if (type && INTEGRAL_TYPE_P (type))
5394 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5395
5396 if (pcs_variant != ARM_PCS_AAPCS)
5397 {
5398 int i;
5399
5400 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5401 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5402 type))
5403 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5404 mode, type);
5405 }
5406
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (type && arm_return_in_msb (type))
5410 {
5411 HOST_WIDE_INT size = int_size_in_bytes (type);
5412 if (size % UNITS_PER_WORD != 0)
5413 {
5414 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5415 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5416 }
5417 }
5418
5419 return gen_rtx_REG (mode, R0_REGNUM);
5420 }
5421
5422 static rtx
5423 aapcs_libcall_value (enum machine_mode mode)
5424 {
5425 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5426 && GET_MODE_SIZE (mode) <= 4)
5427 mode = SImode;
5428
5429 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5430 }
5431
5432 /* Lay out a function argument using the AAPCS rules. The rule
5433 numbers referred to here are those in the AAPCS. */
5434 static void
5435 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5436 const_tree type, bool named)
5437 {
5438 int nregs, nregs2;
5439 int ncrn;
5440
5441 /* We only need to do this once per argument. */
5442 if (pcum->aapcs_arg_processed)
5443 return;
5444
5445 pcum->aapcs_arg_processed = true;
5446
5447 /* Special case: if named is false then we are handling an incoming
5448 anonymous argument which is on the stack. */
5449 if (!named)
5450 return;
5451
5452 /* Is this a potential co-processor register candidate? */
5453 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5454 {
5455 int slot = aapcs_select_call_coproc (pcum, mode, type);
5456 pcum->aapcs_cprc_slot = slot;
5457
5458 /* We don't have to apply any of the rules from part B of the
5459 preparation phase, these are handled elsewhere in the
5460 compiler. */
5461
5462 if (slot >= 0)
5463 {
5464 /* A Co-processor register candidate goes either in its own
5465 class of registers or on the stack. */
5466 if (!pcum->aapcs_cprc_failed[slot])
5467 {
5468 /* C1.cp - Try to allocate the argument to co-processor
5469 registers. */
5470 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5471 return;
5472
5473 /* C2.cp - Put the argument on the stack and note that we
5474 can't assign any more candidates in this slot. We also
5475 need to note that we have allocated stack space, so that
5476 we won't later try to split a non-cprc candidate between
5477 core registers and the stack. */
5478 pcum->aapcs_cprc_failed[slot] = true;
5479 pcum->can_split = false;
5480 }
5481
5482 /* We didn't get a register, so this argument goes on the
5483 stack. */
5484 gcc_assert (pcum->can_split == false);
5485 return;
5486 }
5487 }
5488
5489 /* C3 - For double-word aligned arguments, round the NCRN up to the
5490 next even number. */
5491 ncrn = pcum->aapcs_ncrn;
5492 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5493 ncrn++;
5494
5495 nregs = ARM_NUM_REGS2(mode, type);
5496
5497 /* Sigh, this test should really assert that nregs > 0, but a GCC
5498 extension allows empty structs and then gives them empty size; it
5499 then allows such a structure to be passed by value. For some of
5500 the code below we have to pretend that such an argument has
5501 non-zero size so that we 'locate' it correctly either in
5502 registers or on the stack. */
5503 gcc_assert (nregs >= 0);
5504
5505 nregs2 = nregs ? nregs : 1;
5506
5507 /* C4 - Argument fits entirely in core registers. */
5508 if (ncrn + nregs2 <= NUM_ARG_REGS)
5509 {
5510 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5511 pcum->aapcs_next_ncrn = ncrn + nregs;
5512 return;
5513 }
5514
5515 /* C5 - Some core registers left and there are no arguments already
5516 on the stack: split this argument between the remaining core
5517 registers and the stack. */
5518 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5519 {
5520 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5522 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5523 return;
5524 }
5525
5526 /* C6 - NCRN is set to 4. */
5527 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5528
5529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5530 return;
5531 }
5532
5533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5534 for a call to a function whose data type is FNTYPE.
5535 For a library call, FNTYPE is NULL. */
5536 void
5537 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5538 rtx libname,
5539 tree fndecl ATTRIBUTE_UNUSED)
5540 {
5541 /* Long call handling. */
5542 if (fntype)
5543 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5544 else
5545 pcum->pcs_variant = arm_pcs_default;
5546
5547 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5548 {
5549 if (arm_libcall_uses_aapcs_base (libname))
5550 pcum->pcs_variant = ARM_PCS_AAPCS;
5551
5552 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5553 pcum->aapcs_reg = NULL_RTX;
5554 pcum->aapcs_partial = 0;
5555 pcum->aapcs_arg_processed = false;
5556 pcum->aapcs_cprc_slot = -1;
5557 pcum->can_split = true;
5558
5559 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5560 {
5561 int i;
5562
5563 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5564 {
5565 pcum->aapcs_cprc_failed[i] = false;
5566 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5567 }
5568 }
5569 return;
5570 }
5571
5572 /* Legacy ABIs */
5573
5574 /* On the ARM, the offset starts at 0. */
5575 pcum->nregs = 0;
5576 pcum->iwmmxt_nregs = 0;
5577 pcum->can_split = true;
5578
5579 /* Varargs vectors are treated the same as long long.
5580 named_count avoids having to change the way arm handles 'named' */
5581 pcum->named_count = 0;
5582 pcum->nargs = 0;
5583
5584 if (TARGET_REALLY_IWMMXT && fntype)
5585 {
5586 tree fn_arg;
5587
5588 for (fn_arg = TYPE_ARG_TYPES (fntype);
5589 fn_arg;
5590 fn_arg = TREE_CHAIN (fn_arg))
5591 pcum->named_count += 1;
5592
5593 if (! pcum->named_count)
5594 pcum->named_count = INT_MAX;
5595 }
5596 }
5597
5598 /* Return true if we use LRA instead of reload pass. */
5599 static bool
5600 arm_lra_p (void)
5601 {
5602 return arm_lra_flag;
5603 }
5604
5605 /* Return true if mode/type need doubleword alignment. */
5606 static bool
5607 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5608 {
5609 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5610 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5611 }
5612
5613
5614 /* Determine where to put an argument to a function.
5615 Value is zero to push the argument on the stack,
5616 or a hard register in which to store the argument.
5617
5618 MODE is the argument's machine mode.
5619 TYPE is the data type of the argument (as a tree).
5620 This is null for libcalls where that information may
5621 not be available.
5622 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5623 the preceding args and about the function being called.
5624 NAMED is nonzero if this argument is a named parameter
5625 (otherwise it is an extra parameter matching an ellipsis).
5626
5627 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5628 other arguments are passed on the stack. If (NAMED == 0) (which happens
5629 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5630 defined), say it is passed in the stack (function_prologue will
5631 indeed make it pass in the stack if necessary). */
5632
5633 static rtx
5634 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5635 const_tree type, bool named)
5636 {
5637 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5638 int nregs;
5639
5640 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5641 a call insn (op3 of a call_value insn). */
5642 if (mode == VOIDmode)
5643 return const0_rtx;
5644
5645 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5646 {
5647 aapcs_layout_arg (pcum, mode, type, named);
5648 return pcum->aapcs_reg;
5649 }
5650
5651 /* Varargs vectors are treated the same as long long.
5652 named_count avoids having to change the way arm handles 'named' */
5653 if (TARGET_IWMMXT_ABI
5654 && arm_vector_mode_supported_p (mode)
5655 && pcum->named_count > pcum->nargs + 1)
5656 {
5657 if (pcum->iwmmxt_nregs <= 9)
5658 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5659 else
5660 {
5661 pcum->can_split = false;
5662 return NULL_RTX;
5663 }
5664 }
5665
5666 /* Put doubleword aligned quantities in even register pairs. */
5667 if (pcum->nregs & 1
5668 && ARM_DOUBLEWORD_ALIGN
5669 && arm_needs_doubleword_align (mode, type))
5670 pcum->nregs++;
5671
5672 /* Only allow splitting an arg between regs and memory if all preceding
5673 args were allocated to regs. For args passed by reference we only count
5674 the reference pointer. */
5675 if (pcum->can_split)
5676 nregs = 1;
5677 else
5678 nregs = ARM_NUM_REGS2 (mode, type);
5679
5680 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5681 return NULL_RTX;
5682
5683 return gen_rtx_REG (mode, pcum->nregs);
5684 }
5685
5686 static unsigned int
5687 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5688 {
5689 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5690 ? DOUBLEWORD_ALIGNMENT
5691 : PARM_BOUNDARY);
5692 }
5693
5694 static int
5695 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5696 tree type, bool named)
5697 {
5698 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5699 int nregs = pcum->nregs;
5700
5701 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5702 {
5703 aapcs_layout_arg (pcum, mode, type, named);
5704 return pcum->aapcs_partial;
5705 }
5706
5707 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5708 return 0;
5709
5710 if (NUM_ARG_REGS > nregs
5711 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5712 && pcum->can_split)
5713 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5714
5715 return 0;
5716 }
5717
5718 /* Update the data in PCUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5721
5722 static void
5723 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5724 const_tree type, bool named)
5725 {
5726 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5727
5728 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5729 {
5730 aapcs_layout_arg (pcum, mode, type, named);
5731
5732 if (pcum->aapcs_cprc_slot >= 0)
5733 {
5734 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5735 type);
5736 pcum->aapcs_cprc_slot = -1;
5737 }
5738
5739 /* Generic stuff. */
5740 pcum->aapcs_arg_processed = false;
5741 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5742 pcum->aapcs_reg = NULL_RTX;
5743 pcum->aapcs_partial = 0;
5744 }
5745 else
5746 {
5747 pcum->nargs += 1;
5748 if (arm_vector_mode_supported_p (mode)
5749 && pcum->named_count > pcum->nargs
5750 && TARGET_IWMMXT_ABI)
5751 pcum->iwmmxt_nregs += 1;
5752 else
5753 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5754 }
5755 }
5756
5757 /* Variable sized types are passed by reference. This is a GCC
5758 extension to the ARM ABI. */
5759
5760 static bool
5761 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5762 enum machine_mode mode ATTRIBUTE_UNUSED,
5763 const_tree type, bool named ATTRIBUTE_UNUSED)
5764 {
5765 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5766 }
5767 \f
5768 /* Encode the current state of the #pragma [no_]long_calls. */
5769 typedef enum
5770 {
5771 OFF, /* No #pragma [no_]long_calls is in effect. */
5772 LONG, /* #pragma long_calls is in effect. */
5773 SHORT /* #pragma no_long_calls is in effect. */
5774 } arm_pragma_enum;
5775
5776 static arm_pragma_enum arm_pragma_long_calls = OFF;
5777
5778 void
5779 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5780 {
5781 arm_pragma_long_calls = LONG;
5782 }
5783
5784 void
5785 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5786 {
5787 arm_pragma_long_calls = SHORT;
5788 }
5789
5790 void
5791 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5792 {
5793 arm_pragma_long_calls = OFF;
5794 }
5795 \f
5796 /* Handle an attribute requiring a FUNCTION_DECL;
5797 arguments as in struct attribute_spec.handler. */
5798 static tree
5799 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5800 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5801 {
5802 if (TREE_CODE (*node) != FUNCTION_DECL)
5803 {
5804 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5805 name);
5806 *no_add_attrs = true;
5807 }
5808
5809 return NULL_TREE;
5810 }
5811
5812 /* Handle an "interrupt" or "isr" attribute;
5813 arguments as in struct attribute_spec.handler. */
5814 static tree
5815 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5816 bool *no_add_attrs)
5817 {
5818 if (DECL_P (*node))
5819 {
5820 if (TREE_CODE (*node) != FUNCTION_DECL)
5821 {
5822 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5823 name);
5824 *no_add_attrs = true;
5825 }
5826 /* FIXME: the argument if any is checked for type attributes;
5827 should it be checked for decl ones? */
5828 }
5829 else
5830 {
5831 if (TREE_CODE (*node) == FUNCTION_TYPE
5832 || TREE_CODE (*node) == METHOD_TYPE)
5833 {
5834 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5835 {
5836 warning (OPT_Wattributes, "%qE attribute ignored",
5837 name);
5838 *no_add_attrs = true;
5839 }
5840 }
5841 else if (TREE_CODE (*node) == POINTER_TYPE
5842 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5843 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5844 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5845 {
5846 *node = build_variant_type_copy (*node);
5847 TREE_TYPE (*node) = build_type_attribute_variant
5848 (TREE_TYPE (*node),
5849 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5850 *no_add_attrs = true;
5851 }
5852 else
5853 {
5854 /* Possibly pass this attribute on from the type to a decl. */
5855 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5856 | (int) ATTR_FLAG_FUNCTION_NEXT
5857 | (int) ATTR_FLAG_ARRAY_NEXT))
5858 {
5859 *no_add_attrs = true;
5860 return tree_cons (name, args, NULL_TREE);
5861 }
5862 else
5863 {
5864 warning (OPT_Wattributes, "%qE attribute ignored",
5865 name);
5866 }
5867 }
5868 }
5869
5870 return NULL_TREE;
5871 }
5872
5873 /* Handle a "pcs" attribute; arguments as in struct
5874 attribute_spec.handler. */
5875 static tree
5876 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5877 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5878 {
5879 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5880 {
5881 warning (OPT_Wattributes, "%qE attribute ignored", name);
5882 *no_add_attrs = true;
5883 }
5884 return NULL_TREE;
5885 }
5886
5887 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5888 /* Handle the "notshared" attribute. This attribute is another way of
5889 requesting hidden visibility. ARM's compiler supports
5890 "__declspec(notshared)"; we support the same thing via an
5891 attribute. */
5892
5893 static tree
5894 arm_handle_notshared_attribute (tree *node,
5895 tree name ATTRIBUTE_UNUSED,
5896 tree args ATTRIBUTE_UNUSED,
5897 int flags ATTRIBUTE_UNUSED,
5898 bool *no_add_attrs)
5899 {
5900 tree decl = TYPE_NAME (*node);
5901
5902 if (decl)
5903 {
5904 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5905 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5906 *no_add_attrs = false;
5907 }
5908 return NULL_TREE;
5909 }
5910 #endif
5911
5912 /* Return 0 if the attributes for two types are incompatible, 1 if they
5913 are compatible, and 2 if they are nearly compatible (which causes a
5914 warning to be generated). */
5915 static int
5916 arm_comp_type_attributes (const_tree type1, const_tree type2)
5917 {
5918 int l1, l2, s1, s2;
5919
5920 /* Check for mismatch of non-default calling convention. */
5921 if (TREE_CODE (type1) != FUNCTION_TYPE)
5922 return 1;
5923
5924 /* Check for mismatched call attributes. */
5925 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5926 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5927 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5928 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5929
5930 /* Only bother to check if an attribute is defined. */
5931 if (l1 | l2 | s1 | s2)
5932 {
5933 /* If one type has an attribute, the other must have the same attribute. */
5934 if ((l1 != l2) || (s1 != s2))
5935 return 0;
5936
5937 /* Disallow mixed attributes. */
5938 if ((l1 & s2) || (l2 & s1))
5939 return 0;
5940 }
5941
5942 /* Check for mismatched ISR attribute. */
5943 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5944 if (! l1)
5945 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5946 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5947 if (! l2)
5948 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5949 if (l1 != l2)
5950 return 0;
5951
5952 return 1;
5953 }
5954
5955 /* Assigns default attributes to newly defined type. This is used to
5956 set short_call/long_call attributes for function types of
5957 functions defined inside corresponding #pragma scopes. */
5958 static void
5959 arm_set_default_type_attributes (tree type)
5960 {
5961 /* Add __attribute__ ((long_call)) to all functions, when
5962 inside #pragma long_calls or __attribute__ ((short_call)),
5963 when inside #pragma no_long_calls. */
5964 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5965 {
5966 tree type_attr_list, attr_name;
5967 type_attr_list = TYPE_ATTRIBUTES (type);
5968
5969 if (arm_pragma_long_calls == LONG)
5970 attr_name = get_identifier ("long_call");
5971 else if (arm_pragma_long_calls == SHORT)
5972 attr_name = get_identifier ("short_call");
5973 else
5974 return;
5975
5976 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5977 TYPE_ATTRIBUTES (type) = type_attr_list;
5978 }
5979 }
5980 \f
5981 /* Return true if DECL is known to be linked into section SECTION. */
5982
5983 static bool
5984 arm_function_in_section_p (tree decl, section *section)
5985 {
5986 /* We can only be certain about functions defined in the same
5987 compilation unit. */
5988 if (!TREE_STATIC (decl))
5989 return false;
5990
5991 /* Make sure that SYMBOL always binds to the definition in this
5992 compilation unit. */
5993 if (!targetm.binds_local_p (decl))
5994 return false;
5995
5996 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5997 if (!DECL_SECTION_NAME (decl))
5998 {
5999 /* Make sure that we will not create a unique section for DECL. */
6000 if (flag_function_sections || DECL_ONE_ONLY (decl))
6001 return false;
6002 }
6003
6004 return function_section (decl) == section;
6005 }
6006
6007 /* Return nonzero if a 32-bit "long_call" should be generated for
6008 a call from the current function to DECL. We generate a long_call
6009 if the function:
6010
6011 a. has an __attribute__((long call))
6012 or b. is within the scope of a #pragma long_calls
6013 or c. the -mlong-calls command line switch has been specified
6014
6015 However we do not generate a long call if the function:
6016
6017 d. has an __attribute__ ((short_call))
6018 or e. is inside the scope of a #pragma no_long_calls
6019 or f. is defined in the same section as the current function. */
6020
6021 bool
6022 arm_is_long_call_p (tree decl)
6023 {
6024 tree attrs;
6025
6026 if (!decl)
6027 return TARGET_LONG_CALLS;
6028
6029 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6030 if (lookup_attribute ("short_call", attrs))
6031 return false;
6032
6033 /* For "f", be conservative, and only cater for cases in which the
6034 whole of the current function is placed in the same section. */
6035 if (!flag_reorder_blocks_and_partition
6036 && TREE_CODE (decl) == FUNCTION_DECL
6037 && arm_function_in_section_p (decl, current_function_section ()))
6038 return false;
6039
6040 if (lookup_attribute ("long_call", attrs))
6041 return true;
6042
6043 return TARGET_LONG_CALLS;
6044 }
6045
6046 /* Return nonzero if it is ok to make a tail-call to DECL. */
6047 static bool
6048 arm_function_ok_for_sibcall (tree decl, tree exp)
6049 {
6050 unsigned long func_type;
6051
6052 if (cfun->machine->sibcall_blocked)
6053 return false;
6054
6055 /* Never tailcall something if we are generating code for Thumb-1. */
6056 if (TARGET_THUMB1)
6057 return false;
6058
6059 /* The PIC register is live on entry to VxWorks PLT entries, so we
6060 must make the call before restoring the PIC register. */
6061 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6062 return false;
6063
6064 /* Cannot tail-call to long calls, since these are out of range of
6065 a branch instruction. */
6066 if (decl && arm_is_long_call_p (decl))
6067 return false;
6068
6069 /* If we are interworking and the function is not declared static
6070 then we can't tail-call it unless we know that it exists in this
6071 compilation unit (since it might be a Thumb routine). */
6072 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6073 && !TREE_ASM_WRITTEN (decl))
6074 return false;
6075
6076 func_type = arm_current_func_type ();
6077 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6078 if (IS_INTERRUPT (func_type))
6079 return false;
6080
6081 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6082 {
6083 /* Check that the return value locations are the same. For
6084 example that we aren't returning a value from the sibling in
6085 a VFP register but then need to transfer it to a core
6086 register. */
6087 rtx a, b;
6088
6089 a = arm_function_value (TREE_TYPE (exp), decl, false);
6090 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6091 cfun->decl, false);
6092 if (!rtx_equal_p (a, b))
6093 return false;
6094 }
6095
6096 /* Never tailcall if function may be called with a misaligned SP. */
6097 if (IS_STACKALIGN (func_type))
6098 return false;
6099
6100 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6101 references should become a NOP. Don't convert such calls into
6102 sibling calls. */
6103 if (TARGET_AAPCS_BASED
6104 && arm_abi == ARM_ABI_AAPCS
6105 && decl
6106 && DECL_WEAK (decl))
6107 return false;
6108
6109 /* Everything else is ok. */
6110 return true;
6111 }
6112
6113 \f
6114 /* Addressing mode support functions. */
6115
6116 /* Return nonzero if X is a legitimate immediate operand when compiling
6117 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6118 int
6119 legitimate_pic_operand_p (rtx x)
6120 {
6121 if (GET_CODE (x) == SYMBOL_REF
6122 || (GET_CODE (x) == CONST
6123 && GET_CODE (XEXP (x, 0)) == PLUS
6124 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6125 return 0;
6126
6127 return 1;
6128 }
6129
6130 /* Record that the current function needs a PIC register. Initialize
6131 cfun->machine->pic_reg if we have not already done so. */
6132
6133 static void
6134 require_pic_register (void)
6135 {
6136 /* A lot of the logic here is made obscure by the fact that this
6137 routine gets called as part of the rtx cost estimation process.
6138 We don't want those calls to affect any assumptions about the real
6139 function; and further, we can't call entry_of_function() until we
6140 start the real expansion process. */
6141 if (!crtl->uses_pic_offset_table)
6142 {
6143 gcc_assert (can_create_pseudo_p ());
6144 if (arm_pic_register != INVALID_REGNUM
6145 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6146 {
6147 if (!cfun->machine->pic_reg)
6148 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6149
6150 /* Play games to avoid marking the function as needing pic
6151 if we are being called as part of the cost-estimation
6152 process. */
6153 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6154 crtl->uses_pic_offset_table = 1;
6155 }
6156 else
6157 {
6158 rtx seq, insn;
6159
6160 if (!cfun->machine->pic_reg)
6161 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6162
6163 /* Play games to avoid marking the function as needing pic
6164 if we are being called as part of the cost-estimation
6165 process. */
6166 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6167 {
6168 crtl->uses_pic_offset_table = 1;
6169 start_sequence ();
6170
6171 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6172 && arm_pic_register > LAST_LO_REGNUM)
6173 emit_move_insn (cfun->machine->pic_reg,
6174 gen_rtx_REG (Pmode, arm_pic_register));
6175 else
6176 arm_load_pic_register (0UL);
6177
6178 seq = get_insns ();
6179 end_sequence ();
6180
6181 for (insn = seq; insn; insn = NEXT_INSN (insn))
6182 if (INSN_P (insn))
6183 INSN_LOCATION (insn) = prologue_location;
6184
6185 /* We can be called during expansion of PHI nodes, where
6186 we can't yet emit instructions directly in the final
6187 insn stream. Queue the insns on the entry edge, they will
6188 be committed after everything else is expanded. */
6189 insert_insn_on_edge (seq,
6190 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6191 }
6192 }
6193 }
6194 }
6195
6196 rtx
6197 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6198 {
6199 if (GET_CODE (orig) == SYMBOL_REF
6200 || GET_CODE (orig) == LABEL_REF)
6201 {
6202 rtx insn;
6203
6204 if (reg == 0)
6205 {
6206 gcc_assert (can_create_pseudo_p ());
6207 reg = gen_reg_rtx (Pmode);
6208 }
6209
6210 /* VxWorks does not impose a fixed gap between segments; the run-time
6211 gap can be different from the object-file gap. We therefore can't
6212 use GOTOFF unless we are absolutely sure that the symbol is in the
6213 same segment as the GOT. Unfortunately, the flexibility of linker
6214 scripts means that we can't be sure of that in general, so assume
6215 that GOTOFF is never valid on VxWorks. */
6216 if ((GET_CODE (orig) == LABEL_REF
6217 || (GET_CODE (orig) == SYMBOL_REF &&
6218 SYMBOL_REF_LOCAL_P (orig)))
6219 && NEED_GOT_RELOC
6220 && arm_pic_data_is_text_relative)
6221 insn = arm_pic_static_addr (orig, reg);
6222 else
6223 {
6224 rtx pat;
6225 rtx mem;
6226
6227 /* If this function doesn't have a pic register, create one now. */
6228 require_pic_register ();
6229
6230 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6231
6232 /* Make the MEM as close to a constant as possible. */
6233 mem = SET_SRC (pat);
6234 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6235 MEM_READONLY_P (mem) = 1;
6236 MEM_NOTRAP_P (mem) = 1;
6237
6238 insn = emit_insn (pat);
6239 }
6240
6241 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6242 by loop. */
6243 set_unique_reg_note (insn, REG_EQUAL, orig);
6244
6245 return reg;
6246 }
6247 else if (GET_CODE (orig) == CONST)
6248 {
6249 rtx base, offset;
6250
6251 if (GET_CODE (XEXP (orig, 0)) == PLUS
6252 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6253 return orig;
6254
6255 /* Handle the case where we have: const (UNSPEC_TLS). */
6256 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6257 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6258 return orig;
6259
6260 /* Handle the case where we have:
6261 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6262 CONST_INT. */
6263 if (GET_CODE (XEXP (orig, 0)) == PLUS
6264 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6265 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6266 {
6267 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6268 return orig;
6269 }
6270
6271 if (reg == 0)
6272 {
6273 gcc_assert (can_create_pseudo_p ());
6274 reg = gen_reg_rtx (Pmode);
6275 }
6276
6277 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6278
6279 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6280 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6281 base == reg ? 0 : reg);
6282
6283 if (CONST_INT_P (offset))
6284 {
6285 /* The base register doesn't really matter, we only want to
6286 test the index for the appropriate mode. */
6287 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6288 {
6289 gcc_assert (can_create_pseudo_p ());
6290 offset = force_reg (Pmode, offset);
6291 }
6292
6293 if (CONST_INT_P (offset))
6294 return plus_constant (Pmode, base, INTVAL (offset));
6295 }
6296
6297 if (GET_MODE_SIZE (mode) > 4
6298 && (GET_MODE_CLASS (mode) == MODE_INT
6299 || TARGET_SOFT_FLOAT))
6300 {
6301 emit_insn (gen_addsi3 (reg, base, offset));
6302 return reg;
6303 }
6304
6305 return gen_rtx_PLUS (Pmode, base, offset);
6306 }
6307
6308 return orig;
6309 }
6310
6311
6312 /* Find a spare register to use during the prolog of a function. */
6313
6314 static int
6315 thumb_find_work_register (unsigned long pushed_regs_mask)
6316 {
6317 int reg;
6318
6319 /* Check the argument registers first as these are call-used. The
6320 register allocation order means that sometimes r3 might be used
6321 but earlier argument registers might not, so check them all. */
6322 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6323 if (!df_regs_ever_live_p (reg))
6324 return reg;
6325
6326 /* Before going on to check the call-saved registers we can try a couple
6327 more ways of deducing that r3 is available. The first is when we are
6328 pushing anonymous arguments onto the stack and we have less than 4
6329 registers worth of fixed arguments(*). In this case r3 will be part of
6330 the variable argument list and so we can be sure that it will be
6331 pushed right at the start of the function. Hence it will be available
6332 for the rest of the prologue.
6333 (*): ie crtl->args.pretend_args_size is greater than 0. */
6334 if (cfun->machine->uses_anonymous_args
6335 && crtl->args.pretend_args_size > 0)
6336 return LAST_ARG_REGNUM;
6337
6338 /* The other case is when we have fixed arguments but less than 4 registers
6339 worth. In this case r3 might be used in the body of the function, but
6340 it is not being used to convey an argument into the function. In theory
6341 we could just check crtl->args.size to see how many bytes are
6342 being passed in argument registers, but it seems that it is unreliable.
6343 Sometimes it will have the value 0 when in fact arguments are being
6344 passed. (See testcase execute/20021111-1.c for an example). So we also
6345 check the args_info.nregs field as well. The problem with this field is
6346 that it makes no allowances for arguments that are passed to the
6347 function but which are not used. Hence we could miss an opportunity
6348 when a function has an unused argument in r3. But it is better to be
6349 safe than to be sorry. */
6350 if (! cfun->machine->uses_anonymous_args
6351 && crtl->args.size >= 0
6352 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6353 && (TARGET_AAPCS_BASED
6354 ? crtl->args.info.aapcs_ncrn < 4
6355 : crtl->args.info.nregs < 4))
6356 return LAST_ARG_REGNUM;
6357
6358 /* Otherwise look for a call-saved register that is going to be pushed. */
6359 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6360 if (pushed_regs_mask & (1 << reg))
6361 return reg;
6362
6363 if (TARGET_THUMB2)
6364 {
6365 /* Thumb-2 can use high regs. */
6366 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6367 if (pushed_regs_mask & (1 << reg))
6368 return reg;
6369 }
6370 /* Something went wrong - thumb_compute_save_reg_mask()
6371 should have arranged for a suitable register to be pushed. */
6372 gcc_unreachable ();
6373 }
6374
6375 static GTY(()) int pic_labelno;
6376
6377 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6378 low register. */
6379
6380 void
6381 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6382 {
6383 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6384
6385 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6386 return;
6387
6388 gcc_assert (flag_pic);
6389
6390 pic_reg = cfun->machine->pic_reg;
6391 if (TARGET_VXWORKS_RTP)
6392 {
6393 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6394 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6395 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6396
6397 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6398
6399 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6400 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6401 }
6402 else
6403 {
6404 /* We use an UNSPEC rather than a LABEL_REF because this label
6405 never appears in the code stream. */
6406
6407 labelno = GEN_INT (pic_labelno++);
6408 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6409 l1 = gen_rtx_CONST (VOIDmode, l1);
6410
6411 /* On the ARM the PC register contains 'dot + 8' at the time of the
6412 addition, on the Thumb it is 'dot + 4'. */
6413 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6414 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6415 UNSPEC_GOTSYM_OFF);
6416 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6417
6418 if (TARGET_32BIT)
6419 {
6420 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6421 }
6422 else /* TARGET_THUMB1 */
6423 {
6424 if (arm_pic_register != INVALID_REGNUM
6425 && REGNO (pic_reg) > LAST_LO_REGNUM)
6426 {
6427 /* We will have pushed the pic register, so we should always be
6428 able to find a work register. */
6429 pic_tmp = gen_rtx_REG (SImode,
6430 thumb_find_work_register (saved_regs));
6431 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6432 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6433 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6434 }
6435 else if (arm_pic_register != INVALID_REGNUM
6436 && arm_pic_register > LAST_LO_REGNUM
6437 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6438 {
6439 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6440 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6441 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6442 }
6443 else
6444 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6445 }
6446 }
6447
6448 /* Need to emit this whether or not we obey regdecls,
6449 since setjmp/longjmp can cause life info to screw up. */
6450 emit_use (pic_reg);
6451 }
6452
6453 /* Generate code to load the address of a static var when flag_pic is set. */
6454 static rtx
6455 arm_pic_static_addr (rtx orig, rtx reg)
6456 {
6457 rtx l1, labelno, offset_rtx, insn;
6458
6459 gcc_assert (flag_pic);
6460
6461 /* We use an UNSPEC rather than a LABEL_REF because this label
6462 never appears in the code stream. */
6463 labelno = GEN_INT (pic_labelno++);
6464 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6465 l1 = gen_rtx_CONST (VOIDmode, l1);
6466
6467 /* On the ARM the PC register contains 'dot + 8' at the time of the
6468 addition, on the Thumb it is 'dot + 4'. */
6469 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6470 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6471 UNSPEC_SYMBOL_OFFSET);
6472 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6473
6474 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6475 return insn;
6476 }
6477
6478 /* Return nonzero if X is valid as an ARM state addressing register. */
6479 static int
6480 arm_address_register_rtx_p (rtx x, int strict_p)
6481 {
6482 int regno;
6483
6484 if (!REG_P (x))
6485 return 0;
6486
6487 regno = REGNO (x);
6488
6489 if (strict_p)
6490 return ARM_REGNO_OK_FOR_BASE_P (regno);
6491
6492 return (regno <= LAST_ARM_REGNUM
6493 || regno >= FIRST_PSEUDO_REGISTER
6494 || regno == FRAME_POINTER_REGNUM
6495 || regno == ARG_POINTER_REGNUM);
6496 }
6497
6498 /* Return TRUE if this rtx is the difference of a symbol and a label,
6499 and will reduce to a PC-relative relocation in the object file.
6500 Expressions like this can be left alone when generating PIC, rather
6501 than forced through the GOT. */
6502 static int
6503 pcrel_constant_p (rtx x)
6504 {
6505 if (GET_CODE (x) == MINUS)
6506 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6507
6508 return FALSE;
6509 }
6510
6511 /* Return true if X will surely end up in an index register after next
6512 splitting pass. */
6513 static bool
6514 will_be_in_index_register (const_rtx x)
6515 {
6516 /* arm.md: calculate_pic_address will split this into a register. */
6517 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6518 }
6519
6520 /* Return nonzero if X is a valid ARM state address operand. */
6521 int
6522 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6523 int strict_p)
6524 {
6525 bool use_ldrd;
6526 enum rtx_code code = GET_CODE (x);
6527
6528 if (arm_address_register_rtx_p (x, strict_p))
6529 return 1;
6530
6531 use_ldrd = (TARGET_LDRD
6532 && (mode == DImode
6533 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6534
6535 if (code == POST_INC || code == PRE_DEC
6536 || ((code == PRE_INC || code == POST_DEC)
6537 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6538 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6539
6540 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6541 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6542 && GET_CODE (XEXP (x, 1)) == PLUS
6543 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6544 {
6545 rtx addend = XEXP (XEXP (x, 1), 1);
6546
6547 /* Don't allow ldrd post increment by register because it's hard
6548 to fixup invalid register choices. */
6549 if (use_ldrd
6550 && GET_CODE (x) == POST_MODIFY
6551 && REG_P (addend))
6552 return 0;
6553
6554 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6555 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6556 }
6557
6558 /* After reload constants split into minipools will have addresses
6559 from a LABEL_REF. */
6560 else if (reload_completed
6561 && (code == LABEL_REF
6562 || (code == CONST
6563 && GET_CODE (XEXP (x, 0)) == PLUS
6564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6565 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6566 return 1;
6567
6568 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6569 return 0;
6570
6571 else if (code == PLUS)
6572 {
6573 rtx xop0 = XEXP (x, 0);
6574 rtx xop1 = XEXP (x, 1);
6575
6576 return ((arm_address_register_rtx_p (xop0, strict_p)
6577 && ((CONST_INT_P (xop1)
6578 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6579 || (!strict_p && will_be_in_index_register (xop1))))
6580 || (arm_address_register_rtx_p (xop1, strict_p)
6581 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6582 }
6583
6584 #if 0
6585 /* Reload currently can't handle MINUS, so disable this for now */
6586 else if (GET_CODE (x) == MINUS)
6587 {
6588 rtx xop0 = XEXP (x, 0);
6589 rtx xop1 = XEXP (x, 1);
6590
6591 return (arm_address_register_rtx_p (xop0, strict_p)
6592 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6593 }
6594 #endif
6595
6596 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6597 && code == SYMBOL_REF
6598 && CONSTANT_POOL_ADDRESS_P (x)
6599 && ! (flag_pic
6600 && symbol_mentioned_p (get_pool_constant (x))
6601 && ! pcrel_constant_p (get_pool_constant (x))))
6602 return 1;
6603
6604 return 0;
6605 }
6606
6607 /* Return nonzero if X is a valid Thumb-2 address operand. */
6608 static int
6609 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6610 {
6611 bool use_ldrd;
6612 enum rtx_code code = GET_CODE (x);
6613
6614 if (arm_address_register_rtx_p (x, strict_p))
6615 return 1;
6616
6617 use_ldrd = (TARGET_LDRD
6618 && (mode == DImode
6619 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6620
6621 if (code == POST_INC || code == PRE_DEC
6622 || ((code == PRE_INC || code == POST_DEC)
6623 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6624 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6625
6626 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6627 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6628 && GET_CODE (XEXP (x, 1)) == PLUS
6629 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6630 {
6631 /* Thumb-2 only has autoincrement by constant. */
6632 rtx addend = XEXP (XEXP (x, 1), 1);
6633 HOST_WIDE_INT offset;
6634
6635 if (!CONST_INT_P (addend))
6636 return 0;
6637
6638 offset = INTVAL(addend);
6639 if (GET_MODE_SIZE (mode) <= 4)
6640 return (offset > -256 && offset < 256);
6641
6642 return (use_ldrd && offset > -1024 && offset < 1024
6643 && (offset & 3) == 0);
6644 }
6645
6646 /* After reload constants split into minipools will have addresses
6647 from a LABEL_REF. */
6648 else if (reload_completed
6649 && (code == LABEL_REF
6650 || (code == CONST
6651 && GET_CODE (XEXP (x, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6653 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6654 return 1;
6655
6656 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6657 return 0;
6658
6659 else if (code == PLUS)
6660 {
6661 rtx xop0 = XEXP (x, 0);
6662 rtx xop1 = XEXP (x, 1);
6663
6664 return ((arm_address_register_rtx_p (xop0, strict_p)
6665 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6666 || (!strict_p && will_be_in_index_register (xop1))))
6667 || (arm_address_register_rtx_p (xop1, strict_p)
6668 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6669 }
6670
6671 /* Normally we can assign constant values to target registers without
6672 the help of constant pool. But there are cases we have to use constant
6673 pool like:
6674 1) assign a label to register.
6675 2) sign-extend a 8bit value to 32bit and then assign to register.
6676
6677 Constant pool access in format:
6678 (set (reg r0) (mem (symbol_ref (".LC0"))))
6679 will cause the use of literal pool (later in function arm_reorg).
6680 So here we mark such format as an invalid format, then the compiler
6681 will adjust it into:
6682 (set (reg r0) (symbol_ref (".LC0")))
6683 (set (reg r0) (mem (reg r0))).
6684 No extra register is required, and (mem (reg r0)) won't cause the use
6685 of literal pools. */
6686 else if (arm_disable_literal_pool && code == SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x))
6688 return 0;
6689
6690 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6691 && code == SYMBOL_REF
6692 && CONSTANT_POOL_ADDRESS_P (x)
6693 && ! (flag_pic
6694 && symbol_mentioned_p (get_pool_constant (x))
6695 && ! pcrel_constant_p (get_pool_constant (x))))
6696 return 1;
6697
6698 return 0;
6699 }
6700
6701 /* Return nonzero if INDEX is valid for an address index operand in
6702 ARM state. */
6703 static int
6704 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6705 int strict_p)
6706 {
6707 HOST_WIDE_INT range;
6708 enum rtx_code code = GET_CODE (index);
6709
6710 /* Standard coprocessor addressing modes. */
6711 if (TARGET_HARD_FLOAT
6712 && TARGET_VFP
6713 && (mode == SFmode || mode == DFmode))
6714 return (code == CONST_INT && INTVAL (index) < 1024
6715 && INTVAL (index) > -1024
6716 && (INTVAL (index) & 3) == 0);
6717
6718 /* For quad modes, we restrict the constant offset to be slightly less
6719 than what the instruction format permits. We do this because for
6720 quad mode moves, we will actually decompose them into two separate
6721 double-mode reads or writes. INDEX must therefore be a valid
6722 (double-mode) offset and so should INDEX+8. */
6723 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6724 return (code == CONST_INT
6725 && INTVAL (index) < 1016
6726 && INTVAL (index) > -1024
6727 && (INTVAL (index) & 3) == 0);
6728
6729 /* We have no such constraint on double mode offsets, so we permit the
6730 full range of the instruction format. */
6731 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6732 return (code == CONST_INT
6733 && INTVAL (index) < 1024
6734 && INTVAL (index) > -1024
6735 && (INTVAL (index) & 3) == 0);
6736
6737 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6738 return (code == CONST_INT
6739 && INTVAL (index) < 1024
6740 && INTVAL (index) > -1024
6741 && (INTVAL (index) & 3) == 0);
6742
6743 if (arm_address_register_rtx_p (index, strict_p)
6744 && (GET_MODE_SIZE (mode) <= 4))
6745 return 1;
6746
6747 if (mode == DImode || mode == DFmode)
6748 {
6749 if (code == CONST_INT)
6750 {
6751 HOST_WIDE_INT val = INTVAL (index);
6752
6753 if (TARGET_LDRD)
6754 return val > -256 && val < 256;
6755 else
6756 return val > -4096 && val < 4092;
6757 }
6758
6759 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6760 }
6761
6762 if (GET_MODE_SIZE (mode) <= 4
6763 && ! (arm_arch4
6764 && (mode == HImode
6765 || mode == HFmode
6766 || (mode == QImode && outer == SIGN_EXTEND))))
6767 {
6768 if (code == MULT)
6769 {
6770 rtx xiop0 = XEXP (index, 0);
6771 rtx xiop1 = XEXP (index, 1);
6772
6773 return ((arm_address_register_rtx_p (xiop0, strict_p)
6774 && power_of_two_operand (xiop1, SImode))
6775 || (arm_address_register_rtx_p (xiop1, strict_p)
6776 && power_of_two_operand (xiop0, SImode)));
6777 }
6778 else if (code == LSHIFTRT || code == ASHIFTRT
6779 || code == ASHIFT || code == ROTATERT)
6780 {
6781 rtx op = XEXP (index, 1);
6782
6783 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6784 && CONST_INT_P (op)
6785 && INTVAL (op) > 0
6786 && INTVAL (op) <= 31);
6787 }
6788 }
6789
6790 /* For ARM v4 we may be doing a sign-extend operation during the
6791 load. */
6792 if (arm_arch4)
6793 {
6794 if (mode == HImode
6795 || mode == HFmode
6796 || (outer == SIGN_EXTEND && mode == QImode))
6797 range = 256;
6798 else
6799 range = 4096;
6800 }
6801 else
6802 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6803
6804 return (code == CONST_INT
6805 && INTVAL (index) < range
6806 && INTVAL (index) > -range);
6807 }
6808
6809 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6810 index operand. i.e. 1, 2, 4 or 8. */
6811 static bool
6812 thumb2_index_mul_operand (rtx op)
6813 {
6814 HOST_WIDE_INT val;
6815
6816 if (!CONST_INT_P (op))
6817 return false;
6818
6819 val = INTVAL(op);
6820 return (val == 1 || val == 2 || val == 4 || val == 8);
6821 }
6822
6823 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6824 static int
6825 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6826 {
6827 enum rtx_code code = GET_CODE (index);
6828
6829 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6830 /* Standard coprocessor addressing modes. */
6831 if (TARGET_HARD_FLOAT
6832 && TARGET_VFP
6833 && (mode == SFmode || mode == DFmode))
6834 return (code == CONST_INT && INTVAL (index) < 1024
6835 /* Thumb-2 allows only > -256 index range for it's core register
6836 load/stores. Since we allow SF/DF in core registers, we have
6837 to use the intersection between -256~4096 (core) and -1024~1024
6838 (coprocessor). */
6839 && INTVAL (index) > -256
6840 && (INTVAL (index) & 3) == 0);
6841
6842 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6843 {
6844 /* For DImode assume values will usually live in core regs
6845 and only allow LDRD addressing modes. */
6846 if (!TARGET_LDRD || mode != DImode)
6847 return (code == CONST_INT
6848 && INTVAL (index) < 1024
6849 && INTVAL (index) > -1024
6850 && (INTVAL (index) & 3) == 0);
6851 }
6852
6853 /* For quad modes, we restrict the constant offset to be slightly less
6854 than what the instruction format permits. We do this because for
6855 quad mode moves, we will actually decompose them into two separate
6856 double-mode reads or writes. INDEX must therefore be a valid
6857 (double-mode) offset and so should INDEX+8. */
6858 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6859 return (code == CONST_INT
6860 && INTVAL (index) < 1016
6861 && INTVAL (index) > -1024
6862 && (INTVAL (index) & 3) == 0);
6863
6864 /* We have no such constraint on double mode offsets, so we permit the
6865 full range of the instruction format. */
6866 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6867 return (code == CONST_INT
6868 && INTVAL (index) < 1024
6869 && INTVAL (index) > -1024
6870 && (INTVAL (index) & 3) == 0);
6871
6872 if (arm_address_register_rtx_p (index, strict_p)
6873 && (GET_MODE_SIZE (mode) <= 4))
6874 return 1;
6875
6876 if (mode == DImode || mode == DFmode)
6877 {
6878 if (code == CONST_INT)
6879 {
6880 HOST_WIDE_INT val = INTVAL (index);
6881 /* ??? Can we assume ldrd for thumb2? */
6882 /* Thumb-2 ldrd only has reg+const addressing modes. */
6883 /* ldrd supports offsets of +-1020.
6884 However the ldr fallback does not. */
6885 return val > -256 && val < 256 && (val & 3) == 0;
6886 }
6887 else
6888 return 0;
6889 }
6890
6891 if (code == MULT)
6892 {
6893 rtx xiop0 = XEXP (index, 0);
6894 rtx xiop1 = XEXP (index, 1);
6895
6896 return ((arm_address_register_rtx_p (xiop0, strict_p)
6897 && thumb2_index_mul_operand (xiop1))
6898 || (arm_address_register_rtx_p (xiop1, strict_p)
6899 && thumb2_index_mul_operand (xiop0)));
6900 }
6901 else if (code == ASHIFT)
6902 {
6903 rtx op = XEXP (index, 1);
6904
6905 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6906 && CONST_INT_P (op)
6907 && INTVAL (op) > 0
6908 && INTVAL (op) <= 3);
6909 }
6910
6911 return (code == CONST_INT
6912 && INTVAL (index) < 4096
6913 && INTVAL (index) > -256);
6914 }
6915
6916 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6917 static int
6918 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6919 {
6920 int regno;
6921
6922 if (!REG_P (x))
6923 return 0;
6924
6925 regno = REGNO (x);
6926
6927 if (strict_p)
6928 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6929
6930 return (regno <= LAST_LO_REGNUM
6931 || regno > LAST_VIRTUAL_REGISTER
6932 || regno == FRAME_POINTER_REGNUM
6933 || (GET_MODE_SIZE (mode) >= 4
6934 && (regno == STACK_POINTER_REGNUM
6935 || regno >= FIRST_PSEUDO_REGISTER
6936 || x == hard_frame_pointer_rtx
6937 || x == arg_pointer_rtx)));
6938 }
6939
6940 /* Return nonzero if x is a legitimate index register. This is the case
6941 for any base register that can access a QImode object. */
6942 inline static int
6943 thumb1_index_register_rtx_p (rtx x, int strict_p)
6944 {
6945 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6946 }
6947
6948 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6949
6950 The AP may be eliminated to either the SP or the FP, so we use the
6951 least common denominator, e.g. SImode, and offsets from 0 to 64.
6952
6953 ??? Verify whether the above is the right approach.
6954
6955 ??? Also, the FP may be eliminated to the SP, so perhaps that
6956 needs special handling also.
6957
6958 ??? Look at how the mips16 port solves this problem. It probably uses
6959 better ways to solve some of these problems.
6960
6961 Although it is not incorrect, we don't accept QImode and HImode
6962 addresses based on the frame pointer or arg pointer until the
6963 reload pass starts. This is so that eliminating such addresses
6964 into stack based ones won't produce impossible code. */
6965 int
6966 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6967 {
6968 /* ??? Not clear if this is right. Experiment. */
6969 if (GET_MODE_SIZE (mode) < 4
6970 && !(reload_in_progress || reload_completed)
6971 && (reg_mentioned_p (frame_pointer_rtx, x)
6972 || reg_mentioned_p (arg_pointer_rtx, x)
6973 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6974 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6975 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6976 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6977 return 0;
6978
6979 /* Accept any base register. SP only in SImode or larger. */
6980 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6981 return 1;
6982
6983 /* This is PC relative data before arm_reorg runs. */
6984 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6985 && GET_CODE (x) == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6987 return 1;
6988
6989 /* This is PC relative data after arm_reorg runs. */
6990 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6991 && reload_completed
6992 && (GET_CODE (x) == LABEL_REF
6993 || (GET_CODE (x) == CONST
6994 && GET_CODE (XEXP (x, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6996 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6997 return 1;
6998
6999 /* Post-inc indexing only supported for SImode and larger. */
7000 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7001 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7002 return 1;
7003
7004 else if (GET_CODE (x) == PLUS)
7005 {
7006 /* REG+REG address can be any two index registers. */
7007 /* We disallow FRAME+REG addressing since we know that FRAME
7008 will be replaced with STACK, and SP relative addressing only
7009 permits SP+OFFSET. */
7010 if (GET_MODE_SIZE (mode) <= 4
7011 && XEXP (x, 0) != frame_pointer_rtx
7012 && XEXP (x, 1) != frame_pointer_rtx
7013 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7014 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7015 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7016 return 1;
7017
7018 /* REG+const has 5-7 bit offset for non-SP registers. */
7019 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7020 || XEXP (x, 0) == arg_pointer_rtx)
7021 && CONST_INT_P (XEXP (x, 1))
7022 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7023 return 1;
7024
7025 /* REG+const has 10-bit offset for SP, but only SImode and
7026 larger is supported. */
7027 /* ??? Should probably check for DI/DFmode overflow here
7028 just like GO_IF_LEGITIMATE_OFFSET does. */
7029 else if (REG_P (XEXP (x, 0))
7030 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7031 && GET_MODE_SIZE (mode) >= 4
7032 && CONST_INT_P (XEXP (x, 1))
7033 && INTVAL (XEXP (x, 1)) >= 0
7034 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7035 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7036 return 1;
7037
7038 else if (REG_P (XEXP (x, 0))
7039 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7040 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7041 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7042 && REGNO (XEXP (x, 0))
7043 <= LAST_VIRTUAL_POINTER_REGISTER))
7044 && GET_MODE_SIZE (mode) >= 4
7045 && CONST_INT_P (XEXP (x, 1))
7046 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7047 return 1;
7048 }
7049
7050 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7051 && GET_MODE_SIZE (mode) == 4
7052 && GET_CODE (x) == SYMBOL_REF
7053 && CONSTANT_POOL_ADDRESS_P (x)
7054 && ! (flag_pic
7055 && symbol_mentioned_p (get_pool_constant (x))
7056 && ! pcrel_constant_p (get_pool_constant (x))))
7057 return 1;
7058
7059 return 0;
7060 }
7061
7062 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7063 instruction of mode MODE. */
7064 int
7065 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7066 {
7067 switch (GET_MODE_SIZE (mode))
7068 {
7069 case 1:
7070 return val >= 0 && val < 32;
7071
7072 case 2:
7073 return val >= 0 && val < 64 && (val & 1) == 0;
7074
7075 default:
7076 return (val >= 0
7077 && (val + GET_MODE_SIZE (mode)) <= 128
7078 && (val & 3) == 0);
7079 }
7080 }
7081
7082 bool
7083 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7084 {
7085 if (TARGET_ARM)
7086 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7087 else if (TARGET_THUMB2)
7088 return thumb2_legitimate_address_p (mode, x, strict_p);
7089 else /* if (TARGET_THUMB1) */
7090 return thumb1_legitimate_address_p (mode, x, strict_p);
7091 }
7092
7093 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7094
7095 Given an rtx X being reloaded into a reg required to be
7096 in class CLASS, return the class of reg to actually use.
7097 In general this is just CLASS, but for the Thumb core registers and
7098 immediate constants we prefer a LO_REGS class or a subset. */
7099
7100 static reg_class_t
7101 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7102 {
7103 if (TARGET_32BIT)
7104 return rclass;
7105 else
7106 {
7107 if (rclass == GENERAL_REGS)
7108 return LO_REGS;
7109 else
7110 return rclass;
7111 }
7112 }
7113
7114 /* Build the SYMBOL_REF for __tls_get_addr. */
7115
7116 static GTY(()) rtx tls_get_addr_libfunc;
7117
7118 static rtx
7119 get_tls_get_addr (void)
7120 {
7121 if (!tls_get_addr_libfunc)
7122 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7123 return tls_get_addr_libfunc;
7124 }
7125
7126 rtx
7127 arm_load_tp (rtx target)
7128 {
7129 if (!target)
7130 target = gen_reg_rtx (SImode);
7131
7132 if (TARGET_HARD_TP)
7133 {
7134 /* Can return in any reg. */
7135 emit_insn (gen_load_tp_hard (target));
7136 }
7137 else
7138 {
7139 /* Always returned in r0. Immediately copy the result into a pseudo,
7140 otherwise other uses of r0 (e.g. setting up function arguments) may
7141 clobber the value. */
7142
7143 rtx tmp;
7144
7145 emit_insn (gen_load_tp_soft ());
7146
7147 tmp = gen_rtx_REG (SImode, 0);
7148 emit_move_insn (target, tmp);
7149 }
7150 return target;
7151 }
7152
7153 static rtx
7154 load_tls_operand (rtx x, rtx reg)
7155 {
7156 rtx tmp;
7157
7158 if (reg == NULL_RTX)
7159 reg = gen_reg_rtx (SImode);
7160
7161 tmp = gen_rtx_CONST (SImode, x);
7162
7163 emit_move_insn (reg, tmp);
7164
7165 return reg;
7166 }
7167
7168 static rtx
7169 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7170 {
7171 rtx insns, label, labelno, sum;
7172
7173 gcc_assert (reloc != TLS_DESCSEQ);
7174 start_sequence ();
7175
7176 labelno = GEN_INT (pic_labelno++);
7177 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7178 label = gen_rtx_CONST (VOIDmode, label);
7179
7180 sum = gen_rtx_UNSPEC (Pmode,
7181 gen_rtvec (4, x, GEN_INT (reloc), label,
7182 GEN_INT (TARGET_ARM ? 8 : 4)),
7183 UNSPEC_TLS);
7184 reg = load_tls_operand (sum, reg);
7185
7186 if (TARGET_ARM)
7187 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7188 else
7189 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7190
7191 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7192 LCT_PURE, /* LCT_CONST? */
7193 Pmode, 1, reg, Pmode);
7194
7195 insns = get_insns ();
7196 end_sequence ();
7197
7198 return insns;
7199 }
7200
7201 static rtx
7202 arm_tls_descseq_addr (rtx x, rtx reg)
7203 {
7204 rtx labelno = GEN_INT (pic_labelno++);
7205 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7206 rtx sum = gen_rtx_UNSPEC (Pmode,
7207 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7208 gen_rtx_CONST (VOIDmode, label),
7209 GEN_INT (!TARGET_ARM)),
7210 UNSPEC_TLS);
7211 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7212
7213 emit_insn (gen_tlscall (x, labelno));
7214 if (!reg)
7215 reg = gen_reg_rtx (SImode);
7216 else
7217 gcc_assert (REGNO (reg) != 0);
7218
7219 emit_move_insn (reg, reg0);
7220
7221 return reg;
7222 }
7223
7224 rtx
7225 legitimize_tls_address (rtx x, rtx reg)
7226 {
7227 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7228 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7229
7230 switch (model)
7231 {
7232 case TLS_MODEL_GLOBAL_DYNAMIC:
7233 if (TARGET_GNU2_TLS)
7234 {
7235 reg = arm_tls_descseq_addr (x, reg);
7236
7237 tp = arm_load_tp (NULL_RTX);
7238
7239 dest = gen_rtx_PLUS (Pmode, tp, reg);
7240 }
7241 else
7242 {
7243 /* Original scheme */
7244 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7245 dest = gen_reg_rtx (Pmode);
7246 emit_libcall_block (insns, dest, ret, x);
7247 }
7248 return dest;
7249
7250 case TLS_MODEL_LOCAL_DYNAMIC:
7251 if (TARGET_GNU2_TLS)
7252 {
7253 reg = arm_tls_descseq_addr (x, reg);
7254
7255 tp = arm_load_tp (NULL_RTX);
7256
7257 dest = gen_rtx_PLUS (Pmode, tp, reg);
7258 }
7259 else
7260 {
7261 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7262
7263 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7264 share the LDM result with other LD model accesses. */
7265 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7266 UNSPEC_TLS);
7267 dest = gen_reg_rtx (Pmode);
7268 emit_libcall_block (insns, dest, ret, eqv);
7269
7270 /* Load the addend. */
7271 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7272 GEN_INT (TLS_LDO32)),
7273 UNSPEC_TLS);
7274 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7275 dest = gen_rtx_PLUS (Pmode, dest, addend);
7276 }
7277 return dest;
7278
7279 case TLS_MODEL_INITIAL_EXEC:
7280 labelno = GEN_INT (pic_labelno++);
7281 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7282 label = gen_rtx_CONST (VOIDmode, label);
7283 sum = gen_rtx_UNSPEC (Pmode,
7284 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7285 GEN_INT (TARGET_ARM ? 8 : 4)),
7286 UNSPEC_TLS);
7287 reg = load_tls_operand (sum, reg);
7288
7289 if (TARGET_ARM)
7290 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7291 else if (TARGET_THUMB2)
7292 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7293 else
7294 {
7295 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7296 emit_move_insn (reg, gen_const_mem (SImode, reg));
7297 }
7298
7299 tp = arm_load_tp (NULL_RTX);
7300
7301 return gen_rtx_PLUS (Pmode, tp, reg);
7302
7303 case TLS_MODEL_LOCAL_EXEC:
7304 tp = arm_load_tp (NULL_RTX);
7305
7306 reg = gen_rtx_UNSPEC (Pmode,
7307 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7308 UNSPEC_TLS);
7309 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7310
7311 return gen_rtx_PLUS (Pmode, tp, reg);
7312
7313 default:
7314 abort ();
7315 }
7316 }
7317
7318 /* Try machine-dependent ways of modifying an illegitimate address
7319 to be legitimate. If we find one, return the new, valid address. */
7320 rtx
7321 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7322 {
7323 if (!TARGET_ARM)
7324 {
7325 /* TODO: legitimize_address for Thumb2. */
7326 if (TARGET_THUMB2)
7327 return x;
7328 return thumb_legitimize_address (x, orig_x, mode);
7329 }
7330
7331 if (arm_tls_symbol_p (x))
7332 return legitimize_tls_address (x, NULL_RTX);
7333
7334 if (GET_CODE (x) == PLUS)
7335 {
7336 rtx xop0 = XEXP (x, 0);
7337 rtx xop1 = XEXP (x, 1);
7338
7339 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7340 xop0 = force_reg (SImode, xop0);
7341
7342 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7343 && !symbol_mentioned_p (xop1))
7344 xop1 = force_reg (SImode, xop1);
7345
7346 if (ARM_BASE_REGISTER_RTX_P (xop0)
7347 && CONST_INT_P (xop1))
7348 {
7349 HOST_WIDE_INT n, low_n;
7350 rtx base_reg, val;
7351 n = INTVAL (xop1);
7352
7353 /* VFP addressing modes actually allow greater offsets, but for
7354 now we just stick with the lowest common denominator. */
7355 if (mode == DImode
7356 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7357 {
7358 low_n = n & 0x0f;
7359 n &= ~0x0f;
7360 if (low_n > 4)
7361 {
7362 n += 16;
7363 low_n -= 16;
7364 }
7365 }
7366 else
7367 {
7368 low_n = ((mode) == TImode ? 0
7369 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7370 n -= low_n;
7371 }
7372
7373 base_reg = gen_reg_rtx (SImode);
7374 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7375 emit_move_insn (base_reg, val);
7376 x = plus_constant (Pmode, base_reg, low_n);
7377 }
7378 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7379 x = gen_rtx_PLUS (SImode, xop0, xop1);
7380 }
7381
7382 /* XXX We don't allow MINUS any more -- see comment in
7383 arm_legitimate_address_outer_p (). */
7384 else if (GET_CODE (x) == MINUS)
7385 {
7386 rtx xop0 = XEXP (x, 0);
7387 rtx xop1 = XEXP (x, 1);
7388
7389 if (CONSTANT_P (xop0))
7390 xop0 = force_reg (SImode, xop0);
7391
7392 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7393 xop1 = force_reg (SImode, xop1);
7394
7395 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7396 x = gen_rtx_MINUS (SImode, xop0, xop1);
7397 }
7398
7399 /* Make sure to take full advantage of the pre-indexed addressing mode
7400 with absolute addresses which often allows for the base register to
7401 be factorized for multiple adjacent memory references, and it might
7402 even allows for the mini pool to be avoided entirely. */
7403 else if (CONST_INT_P (x) && optimize > 0)
7404 {
7405 unsigned int bits;
7406 HOST_WIDE_INT mask, base, index;
7407 rtx base_reg;
7408
7409 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7410 use a 8-bit index. So let's use a 12-bit index for SImode only and
7411 hope that arm_gen_constant will enable ldrb to use more bits. */
7412 bits = (mode == SImode) ? 12 : 8;
7413 mask = (1 << bits) - 1;
7414 base = INTVAL (x) & ~mask;
7415 index = INTVAL (x) & mask;
7416 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7417 {
7418 /* It'll most probably be more efficient to generate the base
7419 with more bits set and use a negative index instead. */
7420 base |= mask;
7421 index -= mask;
7422 }
7423 base_reg = force_reg (SImode, GEN_INT (base));
7424 x = plus_constant (Pmode, base_reg, index);
7425 }
7426
7427 if (flag_pic)
7428 {
7429 /* We need to find and carefully transform any SYMBOL and LABEL
7430 references; so go back to the original address expression. */
7431 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7432
7433 if (new_x != orig_x)
7434 x = new_x;
7435 }
7436
7437 return x;
7438 }
7439
7440
7441 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7442 to be legitimate. If we find one, return the new, valid address. */
7443 rtx
7444 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7445 {
7446 if (arm_tls_symbol_p (x))
7447 return legitimize_tls_address (x, NULL_RTX);
7448
7449 if (GET_CODE (x) == PLUS
7450 && CONST_INT_P (XEXP (x, 1))
7451 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7452 || INTVAL (XEXP (x, 1)) < 0))
7453 {
7454 rtx xop0 = XEXP (x, 0);
7455 rtx xop1 = XEXP (x, 1);
7456 HOST_WIDE_INT offset = INTVAL (xop1);
7457
7458 /* Try and fold the offset into a biasing of the base register and
7459 then offsetting that. Don't do this when optimizing for space
7460 since it can cause too many CSEs. */
7461 if (optimize_size && offset >= 0
7462 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7463 {
7464 HOST_WIDE_INT delta;
7465
7466 if (offset >= 256)
7467 delta = offset - (256 - GET_MODE_SIZE (mode));
7468 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7469 delta = 31 * GET_MODE_SIZE (mode);
7470 else
7471 delta = offset & (~31 * GET_MODE_SIZE (mode));
7472
7473 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7474 NULL_RTX);
7475 x = plus_constant (Pmode, xop0, delta);
7476 }
7477 else if (offset < 0 && offset > -256)
7478 /* Small negative offsets are best done with a subtract before the
7479 dereference, forcing these into a register normally takes two
7480 instructions. */
7481 x = force_operand (x, NULL_RTX);
7482 else
7483 {
7484 /* For the remaining cases, force the constant into a register. */
7485 xop1 = force_reg (SImode, xop1);
7486 x = gen_rtx_PLUS (SImode, xop0, xop1);
7487 }
7488 }
7489 else if (GET_CODE (x) == PLUS
7490 && s_register_operand (XEXP (x, 1), SImode)
7491 && !s_register_operand (XEXP (x, 0), SImode))
7492 {
7493 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7494
7495 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7496 }
7497
7498 if (flag_pic)
7499 {
7500 /* We need to find and carefully transform any SYMBOL and LABEL
7501 references; so go back to the original address expression. */
7502 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7503
7504 if (new_x != orig_x)
7505 x = new_x;
7506 }
7507
7508 return x;
7509 }
7510
7511 bool
7512 arm_legitimize_reload_address (rtx *p,
7513 enum machine_mode mode,
7514 int opnum, int type,
7515 int ind_levels ATTRIBUTE_UNUSED)
7516 {
7517 /* We must recognize output that we have already generated ourselves. */
7518 if (GET_CODE (*p) == PLUS
7519 && GET_CODE (XEXP (*p, 0)) == PLUS
7520 && REG_P (XEXP (XEXP (*p, 0), 0))
7521 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7522 && CONST_INT_P (XEXP (*p, 1)))
7523 {
7524 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7525 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7526 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7527 return true;
7528 }
7529
7530 if (GET_CODE (*p) == PLUS
7531 && REG_P (XEXP (*p, 0))
7532 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7533 /* If the base register is equivalent to a constant, let the generic
7534 code handle it. Otherwise we will run into problems if a future
7535 reload pass decides to rematerialize the constant. */
7536 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7537 && CONST_INT_P (XEXP (*p, 1)))
7538 {
7539 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7540 HOST_WIDE_INT low, high;
7541
7542 /* Detect coprocessor load/stores. */
7543 bool coproc_p = ((TARGET_HARD_FLOAT
7544 && TARGET_VFP
7545 && (mode == SFmode || mode == DFmode))
7546 || (TARGET_REALLY_IWMMXT
7547 && VALID_IWMMXT_REG_MODE (mode))
7548 || (TARGET_NEON
7549 && (VALID_NEON_DREG_MODE (mode)
7550 || VALID_NEON_QREG_MODE (mode))));
7551
7552 /* For some conditions, bail out when lower two bits are unaligned. */
7553 if ((val & 0x3) != 0
7554 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7555 && (coproc_p
7556 /* For DI, and DF under soft-float: */
7557 || ((mode == DImode || mode == DFmode)
7558 /* Without ldrd, we use stm/ldm, which does not
7559 fair well with unaligned bits. */
7560 && (! TARGET_LDRD
7561 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7562 || TARGET_THUMB2))))
7563 return false;
7564
7565 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7566 of which the (reg+high) gets turned into a reload add insn,
7567 we try to decompose the index into high/low values that can often
7568 also lead to better reload CSE.
7569 For example:
7570 ldr r0, [r2, #4100] // Offset too large
7571 ldr r1, [r2, #4104] // Offset too large
7572
7573 is best reloaded as:
7574 add t1, r2, #4096
7575 ldr r0, [t1, #4]
7576 add t2, r2, #4096
7577 ldr r1, [t2, #8]
7578
7579 which post-reload CSE can simplify in most cases to eliminate the
7580 second add instruction:
7581 add t1, r2, #4096
7582 ldr r0, [t1, #4]
7583 ldr r1, [t1, #8]
7584
7585 The idea here is that we want to split out the bits of the constant
7586 as a mask, rather than as subtracting the maximum offset that the
7587 respective type of load/store used can handle.
7588
7589 When encountering negative offsets, we can still utilize it even if
7590 the overall offset is positive; sometimes this may lead to an immediate
7591 that can be constructed with fewer instructions.
7592 For example:
7593 ldr r0, [r2, #0x3FFFFC]
7594
7595 This is best reloaded as:
7596 add t1, r2, #0x400000
7597 ldr r0, [t1, #-4]
7598
7599 The trick for spotting this for a load insn with N bits of offset
7600 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7601 negative offset that is going to make bit N and all the bits below
7602 it become zero in the remainder part.
7603
7604 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7605 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7606 used in most cases of ARM load/store instructions. */
7607
7608 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7609 (((VAL) & ((1 << (N)) - 1)) \
7610 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7611 : 0)
7612
7613 if (coproc_p)
7614 {
7615 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7616
7617 /* NEON quad-word load/stores are made of two double-word accesses,
7618 so the valid index range is reduced by 8. Treat as 9-bit range if
7619 we go over it. */
7620 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7621 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7622 }
7623 else if (GET_MODE_SIZE (mode) == 8)
7624 {
7625 if (TARGET_LDRD)
7626 low = (TARGET_THUMB2
7627 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7628 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7629 else
7630 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7631 to access doublewords. The supported load/store offsets are
7632 -8, -4, and 4, which we try to produce here. */
7633 low = ((val & 0xf) ^ 0x8) - 0x8;
7634 }
7635 else if (GET_MODE_SIZE (mode) < 8)
7636 {
7637 /* NEON element load/stores do not have an offset. */
7638 if (TARGET_NEON_FP16 && mode == HFmode)
7639 return false;
7640
7641 if (TARGET_THUMB2)
7642 {
7643 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7644 Try the wider 12-bit range first, and re-try if the result
7645 is out of range. */
7646 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7647 if (low < -255)
7648 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7649 }
7650 else
7651 {
7652 if (mode == HImode || mode == HFmode)
7653 {
7654 if (arm_arch4)
7655 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7656 else
7657 {
7658 /* The storehi/movhi_bytes fallbacks can use only
7659 [-4094,+4094] of the full ldrb/strb index range. */
7660 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7661 if (low == 4095 || low == -4095)
7662 return false;
7663 }
7664 }
7665 else
7666 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7667 }
7668 }
7669 else
7670 return false;
7671
7672 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7673 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7674 - (unsigned HOST_WIDE_INT) 0x80000000);
7675 /* Check for overflow or zero */
7676 if (low == 0 || high == 0 || (high + low != val))
7677 return false;
7678
7679 /* Reload the high part into a base reg; leave the low part
7680 in the mem.
7681 Note that replacing this gen_rtx_PLUS with plus_constant is
7682 wrong in this case because we rely on the
7683 (plus (plus reg c1) c2) structure being preserved so that
7684 XEXP (*p, 0) in push_reload below uses the correct term. */
7685 *p = gen_rtx_PLUS (GET_MODE (*p),
7686 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7687 GEN_INT (high)),
7688 GEN_INT (low));
7689 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7690 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7691 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7692 return true;
7693 }
7694
7695 return false;
7696 }
7697
7698 rtx
7699 thumb_legitimize_reload_address (rtx *x_p,
7700 enum machine_mode mode,
7701 int opnum, int type,
7702 int ind_levels ATTRIBUTE_UNUSED)
7703 {
7704 rtx x = *x_p;
7705
7706 if (GET_CODE (x) == PLUS
7707 && GET_MODE_SIZE (mode) < 4
7708 && REG_P (XEXP (x, 0))
7709 && XEXP (x, 0) == stack_pointer_rtx
7710 && CONST_INT_P (XEXP (x, 1))
7711 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7712 {
7713 rtx orig_x = x;
7714
7715 x = copy_rtx (x);
7716 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7717 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7718 return x;
7719 }
7720
7721 /* If both registers are hi-regs, then it's better to reload the
7722 entire expression rather than each register individually. That
7723 only requires one reload register rather than two. */
7724 if (GET_CODE (x) == PLUS
7725 && REG_P (XEXP (x, 0))
7726 && REG_P (XEXP (x, 1))
7727 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7728 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7729 {
7730 rtx orig_x = x;
7731
7732 x = copy_rtx (x);
7733 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7734 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7735 return x;
7736 }
7737
7738 return NULL;
7739 }
7740
7741 /* Test for various thread-local symbols. */
7742
7743 /* Return TRUE if X is a thread-local symbol. */
7744
7745 static bool
7746 arm_tls_symbol_p (rtx x)
7747 {
7748 if (! TARGET_HAVE_TLS)
7749 return false;
7750
7751 if (GET_CODE (x) != SYMBOL_REF)
7752 return false;
7753
7754 return SYMBOL_REF_TLS_MODEL (x) != 0;
7755 }
7756
7757 /* Helper for arm_tls_referenced_p. */
7758
7759 static int
7760 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7761 {
7762 if (GET_CODE (*x) == SYMBOL_REF)
7763 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7764
7765 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7766 TLS offsets, not real symbol references. */
7767 if (GET_CODE (*x) == UNSPEC
7768 && XINT (*x, 1) == UNSPEC_TLS)
7769 return -1;
7770
7771 return 0;
7772 }
7773
7774 /* Return TRUE if X contains any TLS symbol references. */
7775
7776 bool
7777 arm_tls_referenced_p (rtx x)
7778 {
7779 if (! TARGET_HAVE_TLS)
7780 return false;
7781
7782 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7783 }
7784
7785 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7786
7787 On the ARM, allow any integer (invalid ones are removed later by insn
7788 patterns), nice doubles and symbol_refs which refer to the function's
7789 constant pool XXX.
7790
7791 When generating pic allow anything. */
7792
7793 static bool
7794 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7795 {
7796 /* At present, we have no support for Neon structure constants, so forbid
7797 them here. It might be possible to handle simple cases like 0 and -1
7798 in future. */
7799 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7800 return false;
7801
7802 return flag_pic || !label_mentioned_p (x);
7803 }
7804
7805 static bool
7806 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7807 {
7808 return (CONST_INT_P (x)
7809 || CONST_DOUBLE_P (x)
7810 || CONSTANT_ADDRESS_P (x)
7811 || flag_pic);
7812 }
7813
7814 static bool
7815 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7816 {
7817 return (!arm_cannot_force_const_mem (mode, x)
7818 && (TARGET_32BIT
7819 ? arm_legitimate_constant_p_1 (mode, x)
7820 : thumb_legitimate_constant_p (mode, x)));
7821 }
7822
7823 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7824
7825 static bool
7826 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7827 {
7828 rtx base, offset;
7829
7830 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7831 {
7832 split_const (x, &base, &offset);
7833 if (GET_CODE (base) == SYMBOL_REF
7834 && !offset_within_block_p (base, INTVAL (offset)))
7835 return true;
7836 }
7837 return arm_tls_referenced_p (x);
7838 }
7839 \f
7840 #define REG_OR_SUBREG_REG(X) \
7841 (REG_P (X) \
7842 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7843
7844 #define REG_OR_SUBREG_RTX(X) \
7845 (REG_P (X) ? (X) : SUBREG_REG (X))
7846
7847 static inline int
7848 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7849 {
7850 enum machine_mode mode = GET_MODE (x);
7851 int total, words;
7852
7853 switch (code)
7854 {
7855 case ASHIFT:
7856 case ASHIFTRT:
7857 case LSHIFTRT:
7858 case ROTATERT:
7859 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7860
7861 case PLUS:
7862 case MINUS:
7863 case COMPARE:
7864 case NEG:
7865 case NOT:
7866 return COSTS_N_INSNS (1);
7867
7868 case MULT:
7869 if (CONST_INT_P (XEXP (x, 1)))
7870 {
7871 int cycles = 0;
7872 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7873
7874 while (i)
7875 {
7876 i >>= 2;
7877 cycles++;
7878 }
7879 return COSTS_N_INSNS (2) + cycles;
7880 }
7881 return COSTS_N_INSNS (1) + 16;
7882
7883 case SET:
7884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7885 the mode. */
7886 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7887 return (COSTS_N_INSNS (words)
7888 + 4 * ((MEM_P (SET_SRC (x)))
7889 + MEM_P (SET_DEST (x))));
7890
7891 case CONST_INT:
7892 if (outer == SET)
7893 {
7894 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7895 return 0;
7896 if (thumb_shiftable_const (INTVAL (x)))
7897 return COSTS_N_INSNS (2);
7898 return COSTS_N_INSNS (3);
7899 }
7900 else if ((outer == PLUS || outer == COMPARE)
7901 && INTVAL (x) < 256 && INTVAL (x) > -256)
7902 return 0;
7903 else if ((outer == IOR || outer == XOR || outer == AND)
7904 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7905 return COSTS_N_INSNS (1);
7906 else if (outer == AND)
7907 {
7908 int i;
7909 /* This duplicates the tests in the andsi3 expander. */
7910 for (i = 9; i <= 31; i++)
7911 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7912 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7913 return COSTS_N_INSNS (2);
7914 }
7915 else if (outer == ASHIFT || outer == ASHIFTRT
7916 || outer == LSHIFTRT)
7917 return 0;
7918 return COSTS_N_INSNS (2);
7919
7920 case CONST:
7921 case CONST_DOUBLE:
7922 case LABEL_REF:
7923 case SYMBOL_REF:
7924 return COSTS_N_INSNS (3);
7925
7926 case UDIV:
7927 case UMOD:
7928 case DIV:
7929 case MOD:
7930 return 100;
7931
7932 case TRUNCATE:
7933 return 99;
7934
7935 case AND:
7936 case XOR:
7937 case IOR:
7938 /* XXX guess. */
7939 return 8;
7940
7941 case MEM:
7942 /* XXX another guess. */
7943 /* Memory costs quite a lot for the first word, but subsequent words
7944 load at the equivalent of a single insn each. */
7945 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7946 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7947 ? 4 : 0));
7948
7949 case IF_THEN_ELSE:
7950 /* XXX a guess. */
7951 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7952 return 14;
7953 return 2;
7954
7955 case SIGN_EXTEND:
7956 case ZERO_EXTEND:
7957 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7958 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7959
7960 if (mode == SImode)
7961 return total;
7962
7963 if (arm_arch6)
7964 return total + COSTS_N_INSNS (1);
7965
7966 /* Assume a two-shift sequence. Increase the cost slightly so
7967 we prefer actual shifts over an extend operation. */
7968 return total + 1 + COSTS_N_INSNS (2);
7969
7970 default:
7971 return 99;
7972 }
7973 }
7974
7975 static inline bool
7976 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7977 {
7978 enum machine_mode mode = GET_MODE (x);
7979 enum rtx_code subcode;
7980 rtx operand;
7981 enum rtx_code code = GET_CODE (x);
7982 *total = 0;
7983
7984 switch (code)
7985 {
7986 case MEM:
7987 /* Memory costs quite a lot for the first word, but subsequent words
7988 load at the equivalent of a single insn each. */
7989 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7990 return true;
7991
7992 case DIV:
7993 case MOD:
7994 case UDIV:
7995 case UMOD:
7996 if (TARGET_HARD_FLOAT && mode == SFmode)
7997 *total = COSTS_N_INSNS (2);
7998 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7999 *total = COSTS_N_INSNS (4);
8000 else
8001 *total = COSTS_N_INSNS (20);
8002 return false;
8003
8004 case ROTATE:
8005 if (REG_P (XEXP (x, 1)))
8006 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8007 else if (!CONST_INT_P (XEXP (x, 1)))
8008 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8009
8010 /* Fall through */
8011 case ROTATERT:
8012 if (mode != SImode)
8013 {
8014 *total += COSTS_N_INSNS (4);
8015 return true;
8016 }
8017
8018 /* Fall through */
8019 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8020 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8021 if (mode == DImode)
8022 {
8023 *total += COSTS_N_INSNS (3);
8024 return true;
8025 }
8026
8027 *total += COSTS_N_INSNS (1);
8028 /* Increase the cost of complex shifts because they aren't any faster,
8029 and reduce dual issue opportunities. */
8030 if (arm_tune_cortex_a9
8031 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8032 ++*total;
8033
8034 return true;
8035
8036 case MINUS:
8037 if (mode == DImode)
8038 {
8039 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8040 if (CONST_INT_P (XEXP (x, 0))
8041 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8042 {
8043 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8044 return true;
8045 }
8046
8047 if (CONST_INT_P (XEXP (x, 1))
8048 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8049 {
8050 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8051 return true;
8052 }
8053
8054 return false;
8055 }
8056
8057 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8058 {
8059 if (TARGET_HARD_FLOAT
8060 && (mode == SFmode
8061 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8062 {
8063 *total = COSTS_N_INSNS (1);
8064 if (CONST_DOUBLE_P (XEXP (x, 0))
8065 && arm_const_double_rtx (XEXP (x, 0)))
8066 {
8067 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8068 return true;
8069 }
8070
8071 if (CONST_DOUBLE_P (XEXP (x, 1))
8072 && arm_const_double_rtx (XEXP (x, 1)))
8073 {
8074 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8075 return true;
8076 }
8077
8078 return false;
8079 }
8080 *total = COSTS_N_INSNS (20);
8081 return false;
8082 }
8083
8084 *total = COSTS_N_INSNS (1);
8085 if (CONST_INT_P (XEXP (x, 0))
8086 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8087 {
8088 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8089 return true;
8090 }
8091
8092 subcode = GET_CODE (XEXP (x, 1));
8093 if (subcode == ASHIFT || subcode == ASHIFTRT
8094 || subcode == LSHIFTRT
8095 || subcode == ROTATE || subcode == ROTATERT)
8096 {
8097 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8098 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8099 return true;
8100 }
8101
8102 /* A shift as a part of RSB costs no more than RSB itself. */
8103 if (GET_CODE (XEXP (x, 0)) == MULT
8104 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8105 {
8106 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8107 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8108 return true;
8109 }
8110
8111 if (subcode == MULT
8112 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8113 {
8114 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8115 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8116 return true;
8117 }
8118
8119 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8120 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8121 {
8122 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8123 if (REG_P (XEXP (XEXP (x, 1), 0))
8124 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8125 *total += COSTS_N_INSNS (1);
8126
8127 return true;
8128 }
8129
8130 /* Fall through */
8131
8132 case PLUS:
8133 if (code == PLUS && arm_arch6 && mode == SImode
8134 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8135 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8136 {
8137 *total = COSTS_N_INSNS (1);
8138 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8139 0, speed);
8140 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8141 return true;
8142 }
8143
8144 /* MLA: All arguments must be registers. We filter out
8145 multiplication by a power of two, so that we fall down into
8146 the code below. */
8147 if (GET_CODE (XEXP (x, 0)) == MULT
8148 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8149 {
8150 /* The cost comes from the cost of the multiply. */
8151 return false;
8152 }
8153
8154 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8155 {
8156 if (TARGET_HARD_FLOAT
8157 && (mode == SFmode
8158 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8159 {
8160 *total = COSTS_N_INSNS (1);
8161 if (CONST_DOUBLE_P (XEXP (x, 1))
8162 && arm_const_double_rtx (XEXP (x, 1)))
8163 {
8164 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8165 return true;
8166 }
8167
8168 return false;
8169 }
8170
8171 *total = COSTS_N_INSNS (20);
8172 return false;
8173 }
8174
8175 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8176 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8177 {
8178 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8179 if (REG_P (XEXP (XEXP (x, 0), 0))
8180 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8181 *total += COSTS_N_INSNS (1);
8182 return true;
8183 }
8184
8185 /* Fall through */
8186
8187 case AND: case XOR: case IOR:
8188
8189 /* Normally the frame registers will be spilt into reg+const during
8190 reload, so it is a bad idea to combine them with other instructions,
8191 since then they might not be moved outside of loops. As a compromise
8192 we allow integration with ops that have a constant as their second
8193 operand. */
8194 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8195 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8196 && !CONST_INT_P (XEXP (x, 1)))
8197 *total = COSTS_N_INSNS (1);
8198
8199 if (mode == DImode)
8200 {
8201 *total += COSTS_N_INSNS (2);
8202 if (CONST_INT_P (XEXP (x, 1))
8203 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8204 {
8205 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8206 return true;
8207 }
8208
8209 return false;
8210 }
8211
8212 *total += COSTS_N_INSNS (1);
8213 if (CONST_INT_P (XEXP (x, 1))
8214 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8215 {
8216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8217 return true;
8218 }
8219 subcode = GET_CODE (XEXP (x, 0));
8220 if (subcode == ASHIFT || subcode == ASHIFTRT
8221 || subcode == LSHIFTRT
8222 || subcode == ROTATE || subcode == ROTATERT)
8223 {
8224 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8225 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8226 return true;
8227 }
8228
8229 if (subcode == MULT
8230 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8231 {
8232 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8233 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8234 return true;
8235 }
8236
8237 if (subcode == UMIN || subcode == UMAX
8238 || subcode == SMIN || subcode == SMAX)
8239 {
8240 *total = COSTS_N_INSNS (3);
8241 return true;
8242 }
8243
8244 return false;
8245
8246 case MULT:
8247 /* This should have been handled by the CPU specific routines. */
8248 gcc_unreachable ();
8249
8250 case TRUNCATE:
8251 if (arm_arch3m && mode == SImode
8252 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8253 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8254 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8255 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8256 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8257 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8258 {
8259 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8260 return true;
8261 }
8262 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8263 return false;
8264
8265 case NEG:
8266 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8267 {
8268 if (TARGET_HARD_FLOAT
8269 && (mode == SFmode
8270 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8271 {
8272 *total = COSTS_N_INSNS (1);
8273 return false;
8274 }
8275 *total = COSTS_N_INSNS (2);
8276 return false;
8277 }
8278
8279 /* Fall through */
8280 case NOT:
8281 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8282 if (mode == SImode && code == NOT)
8283 {
8284 subcode = GET_CODE (XEXP (x, 0));
8285 if (subcode == ASHIFT || subcode == ASHIFTRT
8286 || subcode == LSHIFTRT
8287 || subcode == ROTATE || subcode == ROTATERT
8288 || (subcode == MULT
8289 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8290 {
8291 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8292 /* Register shifts cost an extra cycle. */
8293 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8294 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8295 subcode, 1, speed);
8296 return true;
8297 }
8298 }
8299
8300 return false;
8301
8302 case IF_THEN_ELSE:
8303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8304 {
8305 *total = COSTS_N_INSNS (4);
8306 return true;
8307 }
8308
8309 operand = XEXP (x, 0);
8310
8311 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8312 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8313 && REG_P (XEXP (operand, 0))
8314 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8315 *total += COSTS_N_INSNS (1);
8316 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8317 + rtx_cost (XEXP (x, 2), code, 2, speed));
8318 return true;
8319
8320 case NE:
8321 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8322 {
8323 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8324 return true;
8325 }
8326 goto scc_insn;
8327
8328 case GE:
8329 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8330 && mode == SImode && XEXP (x, 1) == const0_rtx)
8331 {
8332 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8333 return true;
8334 }
8335 goto scc_insn;
8336
8337 case LT:
8338 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8339 && mode == SImode && XEXP (x, 1) == const0_rtx)
8340 {
8341 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8342 return true;
8343 }
8344 goto scc_insn;
8345
8346 case EQ:
8347 case GT:
8348 case LE:
8349 case GEU:
8350 case LTU:
8351 case GTU:
8352 case LEU:
8353 case UNORDERED:
8354 case ORDERED:
8355 case UNEQ:
8356 case UNGE:
8357 case UNLT:
8358 case UNGT:
8359 case UNLE:
8360 scc_insn:
8361 /* SCC insns. In the case where the comparison has already been
8362 performed, then they cost 2 instructions. Otherwise they need
8363 an additional comparison before them. */
8364 *total = COSTS_N_INSNS (2);
8365 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8366 {
8367 return true;
8368 }
8369
8370 /* Fall through */
8371 case COMPARE:
8372 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8373 {
8374 *total = 0;
8375 return true;
8376 }
8377
8378 *total += COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x, 1))
8380 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8381 {
8382 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8383 return true;
8384 }
8385
8386 subcode = GET_CODE (XEXP (x, 0));
8387 if (subcode == ASHIFT || subcode == ASHIFTRT
8388 || subcode == LSHIFTRT
8389 || subcode == ROTATE || subcode == ROTATERT)
8390 {
8391 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8392 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8393 return true;
8394 }
8395
8396 if (subcode == MULT
8397 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8398 {
8399 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8400 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8401 return true;
8402 }
8403
8404 return false;
8405
8406 case UMIN:
8407 case UMAX:
8408 case SMIN:
8409 case SMAX:
8410 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8411 if (!CONST_INT_P (XEXP (x, 1))
8412 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8413 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8414 return true;
8415
8416 case ABS:
8417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8418 {
8419 if (TARGET_HARD_FLOAT
8420 && (mode == SFmode
8421 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8422 {
8423 *total = COSTS_N_INSNS (1);
8424 return false;
8425 }
8426 *total = COSTS_N_INSNS (20);
8427 return false;
8428 }
8429 *total = COSTS_N_INSNS (1);
8430 if (mode == DImode)
8431 *total += COSTS_N_INSNS (3);
8432 return false;
8433
8434 case SIGN_EXTEND:
8435 case ZERO_EXTEND:
8436 *total = 0;
8437 if (GET_MODE_CLASS (mode) == MODE_INT)
8438 {
8439 rtx op = XEXP (x, 0);
8440 enum machine_mode opmode = GET_MODE (op);
8441
8442 if (mode == DImode)
8443 *total += COSTS_N_INSNS (1);
8444
8445 if (opmode != SImode)
8446 {
8447 if (MEM_P (op))
8448 {
8449 /* If !arm_arch4, we use one of the extendhisi2_mem
8450 or movhi_bytes patterns for HImode. For a QImode
8451 sign extension, we first zero-extend from memory
8452 and then perform a shift sequence. */
8453 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8454 *total += COSTS_N_INSNS (2);
8455 }
8456 else if (arm_arch6)
8457 *total += COSTS_N_INSNS (1);
8458
8459 /* We don't have the necessary insn, so we need to perform some
8460 other operation. */
8461 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8462 /* An and with constant 255. */
8463 *total += COSTS_N_INSNS (1);
8464 else
8465 /* A shift sequence. Increase costs slightly to avoid
8466 combining two shifts into an extend operation. */
8467 *total += COSTS_N_INSNS (2) + 1;
8468 }
8469
8470 return false;
8471 }
8472
8473 switch (GET_MODE (XEXP (x, 0)))
8474 {
8475 case V8QImode:
8476 case V4HImode:
8477 case V2SImode:
8478 case V4QImode:
8479 case V2HImode:
8480 *total = COSTS_N_INSNS (1);
8481 return false;
8482
8483 default:
8484 gcc_unreachable ();
8485 }
8486 gcc_unreachable ();
8487
8488 case ZERO_EXTRACT:
8489 case SIGN_EXTRACT:
8490 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8491 return true;
8492
8493 case CONST_INT:
8494 if (const_ok_for_arm (INTVAL (x))
8495 || const_ok_for_arm (~INTVAL (x)))
8496 *total = COSTS_N_INSNS (1);
8497 else
8498 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8499 INTVAL (x), NULL_RTX,
8500 NULL_RTX, 0, 0));
8501 return true;
8502
8503 case CONST:
8504 case LABEL_REF:
8505 case SYMBOL_REF:
8506 *total = COSTS_N_INSNS (3);
8507 return true;
8508
8509 case HIGH:
8510 *total = COSTS_N_INSNS (1);
8511 return true;
8512
8513 case LO_SUM:
8514 *total = COSTS_N_INSNS (1);
8515 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8516 return true;
8517
8518 case CONST_DOUBLE:
8519 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8520 && (mode == SFmode || !TARGET_VFP_SINGLE))
8521 *total = COSTS_N_INSNS (1);
8522 else
8523 *total = COSTS_N_INSNS (4);
8524 return true;
8525
8526 case SET:
8527 /* The vec_extract patterns accept memory operands that require an
8528 address reload. Account for the cost of that reload to give the
8529 auto-inc-dec pass an incentive to try to replace them. */
8530 if (TARGET_NEON && MEM_P (SET_DEST (x))
8531 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8532 {
8533 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8534 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8535 *total += COSTS_N_INSNS (1);
8536 return true;
8537 }
8538 /* Likewise for the vec_set patterns. */
8539 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8540 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8541 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8542 {
8543 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8544 *total = rtx_cost (mem, code, 0, speed);
8545 if (!neon_vector_mem_operand (mem, 2, true))
8546 *total += COSTS_N_INSNS (1);
8547 return true;
8548 }
8549 return false;
8550
8551 case UNSPEC:
8552 /* We cost this as high as our memory costs to allow this to
8553 be hoisted from loops. */
8554 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8555 {
8556 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8557 }
8558 return true;
8559
8560 case CONST_VECTOR:
8561 if (TARGET_NEON
8562 && TARGET_HARD_FLOAT
8563 && outer == SET
8564 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8565 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8566 *total = COSTS_N_INSNS (1);
8567 else
8568 *total = COSTS_N_INSNS (4);
8569 return true;
8570
8571 default:
8572 *total = COSTS_N_INSNS (4);
8573 return false;
8574 }
8575 }
8576
8577 /* Estimates the size cost of thumb1 instructions.
8578 For now most of the code is copied from thumb1_rtx_costs. We need more
8579 fine grain tuning when we have more related test cases. */
8580 static inline int
8581 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8582 {
8583 enum machine_mode mode = GET_MODE (x);
8584 int words;
8585
8586 switch (code)
8587 {
8588 case ASHIFT:
8589 case ASHIFTRT:
8590 case LSHIFTRT:
8591 case ROTATERT:
8592 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8593
8594 case PLUS:
8595 case MINUS:
8596 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8597 defined by RTL expansion, especially for the expansion of
8598 multiplication. */
8599 if ((GET_CODE (XEXP (x, 0)) == MULT
8600 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8601 || (GET_CODE (XEXP (x, 1)) == MULT
8602 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8603 return COSTS_N_INSNS (2);
8604 /* On purpose fall through for normal RTX. */
8605 case COMPARE:
8606 case NEG:
8607 case NOT:
8608 return COSTS_N_INSNS (1);
8609
8610 case MULT:
8611 if (CONST_INT_P (XEXP (x, 1)))
8612 {
8613 /* Thumb1 mul instruction can't operate on const. We must Load it
8614 into a register first. */
8615 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8616 return COSTS_N_INSNS (1) + const_size;
8617 }
8618 return COSTS_N_INSNS (1);
8619
8620 case SET:
8621 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8622 the mode. */
8623 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8624 return (COSTS_N_INSNS (words)
8625 + 4 * ((MEM_P (SET_SRC (x)))
8626 + MEM_P (SET_DEST (x))));
8627
8628 case CONST_INT:
8629 if (outer == SET)
8630 {
8631 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8632 return COSTS_N_INSNS (1);
8633 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8634 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8635 return COSTS_N_INSNS (2);
8636 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8637 if (thumb_shiftable_const (INTVAL (x)))
8638 return COSTS_N_INSNS (2);
8639 return COSTS_N_INSNS (3);
8640 }
8641 else if ((outer == PLUS || outer == COMPARE)
8642 && INTVAL (x) < 256 && INTVAL (x) > -256)
8643 return 0;
8644 else if ((outer == IOR || outer == XOR || outer == AND)
8645 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8646 return COSTS_N_INSNS (1);
8647 else if (outer == AND)
8648 {
8649 int i;
8650 /* This duplicates the tests in the andsi3 expander. */
8651 for (i = 9; i <= 31; i++)
8652 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8653 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8654 return COSTS_N_INSNS (2);
8655 }
8656 else if (outer == ASHIFT || outer == ASHIFTRT
8657 || outer == LSHIFTRT)
8658 return 0;
8659 return COSTS_N_INSNS (2);
8660
8661 case CONST:
8662 case CONST_DOUBLE:
8663 case LABEL_REF:
8664 case SYMBOL_REF:
8665 return COSTS_N_INSNS (3);
8666
8667 case UDIV:
8668 case UMOD:
8669 case DIV:
8670 case MOD:
8671 return 100;
8672
8673 case TRUNCATE:
8674 return 99;
8675
8676 case AND:
8677 case XOR:
8678 case IOR:
8679 /* XXX guess. */
8680 return 8;
8681
8682 case MEM:
8683 /* XXX another guess. */
8684 /* Memory costs quite a lot for the first word, but subsequent words
8685 load at the equivalent of a single insn each. */
8686 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8687 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8688 ? 4 : 0));
8689
8690 case IF_THEN_ELSE:
8691 /* XXX a guess. */
8692 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8693 return 14;
8694 return 2;
8695
8696 case ZERO_EXTEND:
8697 /* XXX still guessing. */
8698 switch (GET_MODE (XEXP (x, 0)))
8699 {
8700 case QImode:
8701 return (1 + (mode == DImode ? 4 : 0)
8702 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8703
8704 case HImode:
8705 return (4 + (mode == DImode ? 4 : 0)
8706 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8707
8708 case SImode:
8709 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8710
8711 default:
8712 return 99;
8713 }
8714
8715 default:
8716 return 99;
8717 }
8718 }
8719
8720 /* RTX costs when optimizing for size. */
8721 static bool
8722 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8723 int *total)
8724 {
8725 enum machine_mode mode = GET_MODE (x);
8726 if (TARGET_THUMB1)
8727 {
8728 *total = thumb1_size_rtx_costs (x, code, outer_code);
8729 return true;
8730 }
8731
8732 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8733 switch (code)
8734 {
8735 case MEM:
8736 /* A memory access costs 1 insn if the mode is small, or the address is
8737 a single register, otherwise it costs one insn per word. */
8738 if (REG_P (XEXP (x, 0)))
8739 *total = COSTS_N_INSNS (1);
8740 else if (flag_pic
8741 && GET_CODE (XEXP (x, 0)) == PLUS
8742 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8743 /* This will be split into two instructions.
8744 See arm.md:calculate_pic_address. */
8745 *total = COSTS_N_INSNS (2);
8746 else
8747 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8748 return true;
8749
8750 case DIV:
8751 case MOD:
8752 case UDIV:
8753 case UMOD:
8754 /* Needs a libcall, so it costs about this. */
8755 *total = COSTS_N_INSNS (2);
8756 return false;
8757
8758 case ROTATE:
8759 if (mode == SImode && REG_P (XEXP (x, 1)))
8760 {
8761 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8762 return true;
8763 }
8764 /* Fall through */
8765 case ROTATERT:
8766 case ASHIFT:
8767 case LSHIFTRT:
8768 case ASHIFTRT:
8769 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8770 {
8771 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8772 return true;
8773 }
8774 else if (mode == SImode)
8775 {
8776 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8777 /* Slightly disparage register shifts, but not by much. */
8778 if (!CONST_INT_P (XEXP (x, 1)))
8779 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8780 return true;
8781 }
8782
8783 /* Needs a libcall. */
8784 *total = COSTS_N_INSNS (2);
8785 return false;
8786
8787 case MINUS:
8788 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8789 && (mode == SFmode || !TARGET_VFP_SINGLE))
8790 {
8791 *total = COSTS_N_INSNS (1);
8792 return false;
8793 }
8794
8795 if (mode == SImode)
8796 {
8797 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8798 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8799
8800 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8801 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8802 || subcode1 == ROTATE || subcode1 == ROTATERT
8803 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8804 || subcode1 == ASHIFTRT)
8805 {
8806 /* It's just the cost of the two operands. */
8807 *total = 0;
8808 return false;
8809 }
8810
8811 *total = COSTS_N_INSNS (1);
8812 return false;
8813 }
8814
8815 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8816 return false;
8817
8818 case PLUS:
8819 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8820 && (mode == SFmode || !TARGET_VFP_SINGLE))
8821 {
8822 *total = COSTS_N_INSNS (1);
8823 return false;
8824 }
8825
8826 /* A shift as a part of ADD costs nothing. */
8827 if (GET_CODE (XEXP (x, 0)) == MULT
8828 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8829 {
8830 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8831 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8832 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8833 return true;
8834 }
8835
8836 /* Fall through */
8837 case AND: case XOR: case IOR:
8838 if (mode == SImode)
8839 {
8840 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8841
8842 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8843 || subcode == LSHIFTRT || subcode == ASHIFTRT
8844 || (code == AND && subcode == NOT))
8845 {
8846 /* It's just the cost of the two operands. */
8847 *total = 0;
8848 return false;
8849 }
8850 }
8851
8852 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8853 return false;
8854
8855 case MULT:
8856 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8857 return false;
8858
8859 case NEG:
8860 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8861 && (mode == SFmode || !TARGET_VFP_SINGLE))
8862 {
8863 *total = COSTS_N_INSNS (1);
8864 return false;
8865 }
8866
8867 /* Fall through */
8868 case NOT:
8869 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8870
8871 return false;
8872
8873 case IF_THEN_ELSE:
8874 *total = 0;
8875 return false;
8876
8877 case COMPARE:
8878 if (cc_register (XEXP (x, 0), VOIDmode))
8879 * total = 0;
8880 else
8881 *total = COSTS_N_INSNS (1);
8882 return false;
8883
8884 case ABS:
8885 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8886 && (mode == SFmode || !TARGET_VFP_SINGLE))
8887 *total = COSTS_N_INSNS (1);
8888 else
8889 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8890 return false;
8891
8892 case SIGN_EXTEND:
8893 case ZERO_EXTEND:
8894 return arm_rtx_costs_1 (x, outer_code, total, 0);
8895
8896 case CONST_INT:
8897 if (const_ok_for_arm (INTVAL (x)))
8898 /* A multiplication by a constant requires another instruction
8899 to load the constant to a register. */
8900 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8901 ? 1 : 0);
8902 else if (const_ok_for_arm (~INTVAL (x)))
8903 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8904 else if (const_ok_for_arm (-INTVAL (x)))
8905 {
8906 if (outer_code == COMPARE || outer_code == PLUS
8907 || outer_code == MINUS)
8908 *total = 0;
8909 else
8910 *total = COSTS_N_INSNS (1);
8911 }
8912 else
8913 *total = COSTS_N_INSNS (2);
8914 return true;
8915
8916 case CONST:
8917 case LABEL_REF:
8918 case SYMBOL_REF:
8919 *total = COSTS_N_INSNS (2);
8920 return true;
8921
8922 case CONST_DOUBLE:
8923 *total = COSTS_N_INSNS (4);
8924 return true;
8925
8926 case CONST_VECTOR:
8927 if (TARGET_NEON
8928 && TARGET_HARD_FLOAT
8929 && outer_code == SET
8930 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8931 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8932 *total = COSTS_N_INSNS (1);
8933 else
8934 *total = COSTS_N_INSNS (4);
8935 return true;
8936
8937 case HIGH:
8938 case LO_SUM:
8939 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8940 cost of these slightly. */
8941 *total = COSTS_N_INSNS (1) + 1;
8942 return true;
8943
8944 case SET:
8945 return false;
8946
8947 default:
8948 if (mode != VOIDmode)
8949 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8950 else
8951 *total = COSTS_N_INSNS (4); /* How knows? */
8952 return false;
8953 }
8954 }
8955
8956 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8957 operand, then return the operand that is being shifted. If the shift
8958 is not by a constant, then set SHIFT_REG to point to the operand.
8959 Return NULL if OP is not a shifter operand. */
8960 static rtx
8961 shifter_op_p (rtx op, rtx *shift_reg)
8962 {
8963 enum rtx_code code = GET_CODE (op);
8964
8965 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8966 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8967 return XEXP (op, 0);
8968 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8969 return XEXP (op, 0);
8970 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8971 || code == ASHIFTRT)
8972 {
8973 if (!CONST_INT_P (XEXP (op, 1)))
8974 *shift_reg = XEXP (op, 1);
8975 return XEXP (op, 0);
8976 }
8977
8978 return NULL;
8979 }
8980
8981 static bool
8982 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8983 {
8984 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8985 gcc_assert (GET_CODE (x) == UNSPEC);
8986
8987 switch (XINT (x, 1))
8988 {
8989 case UNSPEC_UNALIGNED_LOAD:
8990 /* We can only do unaligned loads into the integer unit, and we can't
8991 use LDM or LDRD. */
8992 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8993 if (speed_p)
8994 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8995 + extra_cost->ldst.load_unaligned);
8996
8997 #ifdef NOT_YET
8998 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8999 ADDR_SPACE_GENERIC, speed_p);
9000 #endif
9001 return true;
9002
9003 case UNSPEC_UNALIGNED_STORE:
9004 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9005 if (speed_p)
9006 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9007 + extra_cost->ldst.store_unaligned);
9008
9009 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9010 #ifdef NOT_YET
9011 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9012 ADDR_SPACE_GENERIC, speed_p);
9013 #endif
9014 return true;
9015
9016 case UNSPEC_VRINTZ:
9017 case UNSPEC_VRINTP:
9018 case UNSPEC_VRINTM:
9019 case UNSPEC_VRINTR:
9020 case UNSPEC_VRINTX:
9021 case UNSPEC_VRINTA:
9022 *cost = COSTS_N_INSNS (1);
9023 if (speed_p)
9024 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9025
9026 return true;
9027 default:
9028 *cost = COSTS_N_INSNS (2);
9029 break;
9030 }
9031 return false;
9032 }
9033
9034 /* Cost of a libcall. We assume one insn per argument, an amount for the
9035 call (one insn for -Os) and then one for processing the result. */
9036 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9037
9038 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9039 do \
9040 { \
9041 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9042 if (shift_op != NULL \
9043 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9044 { \
9045 if (shift_reg) \
9046 { \
9047 if (speed_p) \
9048 *cost += extra_cost->alu.arith_shift_reg; \
9049 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9050 } \
9051 else if (speed_p) \
9052 *cost += extra_cost->alu.arith_shift; \
9053 \
9054 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9055 + rtx_cost (XEXP (x, 1 - IDX), \
9056 OP, 1, speed_p)); \
9057 return true; \
9058 } \
9059 } \
9060 while (0);
9061
9062 /* RTX costs. Make an estimate of the cost of executing the operation
9063 X, which is contained with an operation with code OUTER_CODE.
9064 SPEED_P indicates whether the cost desired is the performance cost,
9065 or the size cost. The estimate is stored in COST and the return
9066 value is TRUE if the cost calculation is final, or FALSE if the
9067 caller should recurse through the operands of X to add additional
9068 costs.
9069
9070 We currently make no attempt to model the size savings of Thumb-2
9071 16-bit instructions. At the normal points in compilation where
9072 this code is called we have no measure of whether the condition
9073 flags are live or not, and thus no realistic way to determine what
9074 the size will eventually be. */
9075 static bool
9076 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9077 const struct cpu_cost_table *extra_cost,
9078 int *cost, bool speed_p)
9079 {
9080 enum machine_mode mode = GET_MODE (x);
9081
9082 if (TARGET_THUMB1)
9083 {
9084 if (speed_p)
9085 *cost = thumb1_rtx_costs (x, code, outer_code);
9086 else
9087 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9088 return true;
9089 }
9090
9091 switch (code)
9092 {
9093 case SET:
9094 *cost = 0;
9095 if (REG_P (SET_SRC (x))
9096 && REG_P (SET_DEST (x)))
9097 {
9098 /* Assume that most copies can be done with a single insn,
9099 unless we don't have HW FP, in which case everything
9100 larger than word mode will require two insns. */
9101 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9102 && GET_MODE_SIZE (mode) > 4)
9103 || mode == DImode)
9104 ? 2 : 1);
9105 /* Conditional register moves can be encoded
9106 in 16 bits in Thumb mode. */
9107 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9108 *cost >>= 1;
9109 }
9110
9111 if (CONST_INT_P (SET_SRC (x)))
9112 {
9113 /* Handle CONST_INT here, since the value doesn't have a mode
9114 and we would otherwise be unable to work out the true cost. */
9115 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9116 mode = GET_MODE (SET_DEST (x));
9117 outer_code = SET;
9118 /* Slightly lower the cost of setting a core reg to a constant.
9119 This helps break up chains and allows for better scheduling. */
9120 if (REG_P (SET_DEST (x))
9121 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9122 *cost -= 1;
9123 x = SET_SRC (x);
9124 /* Immediate moves with an immediate in the range [0, 255] can be
9125 encoded in 16 bits in Thumb mode. */
9126 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9127 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9128 *cost >>= 1;
9129 goto const_int_cost;
9130 }
9131
9132 return false;
9133
9134 case MEM:
9135 /* A memory access costs 1 insn if the mode is small, or the address is
9136 a single register, otherwise it costs one insn per word. */
9137 if (REG_P (XEXP (x, 0)))
9138 *cost = COSTS_N_INSNS (1);
9139 else if (flag_pic
9140 && GET_CODE (XEXP (x, 0)) == PLUS
9141 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9142 /* This will be split into two instructions.
9143 See arm.md:calculate_pic_address. */
9144 *cost = COSTS_N_INSNS (2);
9145 else
9146 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9147
9148 /* For speed optimizations, add the costs of the address and
9149 accessing memory. */
9150 if (speed_p)
9151 #ifdef NOT_YET
9152 *cost += (extra_cost->ldst.load
9153 + arm_address_cost (XEXP (x, 0), mode,
9154 ADDR_SPACE_GENERIC, speed_p));
9155 #else
9156 *cost += extra_cost->ldst.load;
9157 #endif
9158 return true;
9159
9160 case PARALLEL:
9161 {
9162 /* Calculations of LDM costs are complex. We assume an initial cost
9163 (ldm_1st) which will load the number of registers mentioned in
9164 ldm_regs_per_insn_1st registers; then each additional
9165 ldm_regs_per_insn_subsequent registers cost one more insn. The
9166 formula for N regs is thus:
9167
9168 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9169 + ldm_regs_per_insn_subsequent - 1)
9170 / ldm_regs_per_insn_subsequent).
9171
9172 Additional costs may also be added for addressing. A similar
9173 formula is used for STM. */
9174
9175 bool is_ldm = load_multiple_operation (x, SImode);
9176 bool is_stm = store_multiple_operation (x, SImode);
9177
9178 *cost = COSTS_N_INSNS (1);
9179
9180 if (is_ldm || is_stm)
9181 {
9182 if (speed_p)
9183 {
9184 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9185 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9186 ? extra_cost->ldst.ldm_regs_per_insn_1st
9187 : extra_cost->ldst.stm_regs_per_insn_1st;
9188 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9189 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9190 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9191
9192 *cost += regs_per_insn_1st
9193 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9194 + regs_per_insn_sub - 1)
9195 / regs_per_insn_sub);
9196 return true;
9197 }
9198
9199 }
9200 return false;
9201 }
9202 case DIV:
9203 case UDIV:
9204 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9205 && (mode == SFmode || !TARGET_VFP_SINGLE))
9206 *cost = COSTS_N_INSNS (speed_p
9207 ? extra_cost->fp[mode != SFmode].div : 1);
9208 else if (mode == SImode && TARGET_IDIV)
9209 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9210 else
9211 *cost = LIBCALL_COST (2);
9212 return false; /* All arguments must be in registers. */
9213
9214 case MOD:
9215 case UMOD:
9216 *cost = LIBCALL_COST (2);
9217 return false; /* All arguments must be in registers. */
9218
9219 case ROTATE:
9220 if (mode == SImode && REG_P (XEXP (x, 1)))
9221 {
9222 *cost = (COSTS_N_INSNS (2)
9223 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9224 if (speed_p)
9225 *cost += extra_cost->alu.shift_reg;
9226 return true;
9227 }
9228 /* Fall through */
9229 case ROTATERT:
9230 case ASHIFT:
9231 case LSHIFTRT:
9232 case ASHIFTRT:
9233 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9234 {
9235 *cost = (COSTS_N_INSNS (3)
9236 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9237 if (speed_p)
9238 *cost += 2 * extra_cost->alu.shift;
9239 return true;
9240 }
9241 else if (mode == SImode)
9242 {
9243 *cost = (COSTS_N_INSNS (1)
9244 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9245 /* Slightly disparage register shifts at -Os, but not by much. */
9246 if (!CONST_INT_P (XEXP (x, 1)))
9247 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9248 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9249 return true;
9250 }
9251 else if (GET_MODE_CLASS (mode) == MODE_INT
9252 && GET_MODE_SIZE (mode) < 4)
9253 {
9254 if (code == ASHIFT)
9255 {
9256 *cost = (COSTS_N_INSNS (1)
9257 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9258 /* Slightly disparage register shifts at -Os, but not by
9259 much. */
9260 if (!CONST_INT_P (XEXP (x, 1)))
9261 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9262 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9263 }
9264 else if (code == LSHIFTRT || code == ASHIFTRT)
9265 {
9266 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9267 {
9268 /* Can use SBFX/UBFX. */
9269 *cost = COSTS_N_INSNS (1);
9270 if (speed_p)
9271 *cost += extra_cost->alu.bfx;
9272 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9273 }
9274 else
9275 {
9276 *cost = COSTS_N_INSNS (2);
9277 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9278 if (speed_p)
9279 {
9280 if (CONST_INT_P (XEXP (x, 1)))
9281 *cost += 2 * extra_cost->alu.shift;
9282 else
9283 *cost += (extra_cost->alu.shift
9284 + extra_cost->alu.shift_reg);
9285 }
9286 else
9287 /* Slightly disparage register shifts. */
9288 *cost += !CONST_INT_P (XEXP (x, 1));
9289 }
9290 }
9291 else /* Rotates. */
9292 {
9293 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9294 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9295 if (speed_p)
9296 {
9297 if (CONST_INT_P (XEXP (x, 1)))
9298 *cost += (2 * extra_cost->alu.shift
9299 + extra_cost->alu.log_shift);
9300 else
9301 *cost += (extra_cost->alu.shift
9302 + extra_cost->alu.shift_reg
9303 + extra_cost->alu.log_shift_reg);
9304 }
9305 }
9306 return true;
9307 }
9308
9309 *cost = LIBCALL_COST (2);
9310 return false;
9311
9312 case MINUS:
9313 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9314 && (mode == SFmode || !TARGET_VFP_SINGLE))
9315 {
9316 *cost = COSTS_N_INSNS (1);
9317 if (GET_CODE (XEXP (x, 0)) == MULT
9318 || GET_CODE (XEXP (x, 1)) == MULT)
9319 {
9320 rtx mul_op0, mul_op1, sub_op;
9321
9322 if (speed_p)
9323 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9324
9325 if (GET_CODE (XEXP (x, 0)) == MULT)
9326 {
9327 mul_op0 = XEXP (XEXP (x, 0), 0);
9328 mul_op1 = XEXP (XEXP (x, 0), 1);
9329 sub_op = XEXP (x, 1);
9330 }
9331 else
9332 {
9333 mul_op0 = XEXP (XEXP (x, 1), 0);
9334 mul_op1 = XEXP (XEXP (x, 1), 1);
9335 sub_op = XEXP (x, 0);
9336 }
9337
9338 /* The first operand of the multiply may be optionally
9339 negated. */
9340 if (GET_CODE (mul_op0) == NEG)
9341 mul_op0 = XEXP (mul_op0, 0);
9342
9343 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9344 + rtx_cost (mul_op1, code, 0, speed_p)
9345 + rtx_cost (sub_op, code, 0, speed_p));
9346
9347 return true;
9348 }
9349
9350 if (speed_p)
9351 *cost += extra_cost->fp[mode != SFmode].addsub;
9352 return false;
9353 }
9354
9355 if (mode == SImode)
9356 {
9357 rtx shift_by_reg = NULL;
9358 rtx shift_op;
9359 rtx non_shift_op;
9360
9361 *cost = COSTS_N_INSNS (1);
9362
9363 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9364 if (shift_op == NULL)
9365 {
9366 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9367 non_shift_op = XEXP (x, 0);
9368 }
9369 else
9370 non_shift_op = XEXP (x, 1);
9371
9372 if (shift_op != NULL)
9373 {
9374 if (shift_by_reg != NULL)
9375 {
9376 if (speed_p)
9377 *cost += extra_cost->alu.arith_shift_reg;
9378 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9379 }
9380 else if (speed_p)
9381 *cost += extra_cost->alu.arith_shift;
9382
9383 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9384 + rtx_cost (non_shift_op, code, 0, speed_p));
9385 return true;
9386 }
9387
9388 if (arm_arch_thumb2
9389 && GET_CODE (XEXP (x, 1)) == MULT)
9390 {
9391 /* MLS. */
9392 if (speed_p)
9393 *cost += extra_cost->mult[0].add;
9394 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9395 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9396 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9397 return true;
9398 }
9399
9400 if (CONST_INT_P (XEXP (x, 0)))
9401 {
9402 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9403 INTVAL (XEXP (x, 0)), NULL_RTX,
9404 NULL_RTX, 1, 0);
9405 *cost = COSTS_N_INSNS (insns);
9406 if (speed_p)
9407 *cost += insns * extra_cost->alu.arith;
9408 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9409 return true;
9410 }
9411
9412 return false;
9413 }
9414
9415 if (GET_MODE_CLASS (mode) == MODE_INT
9416 && GET_MODE_SIZE (mode) < 4)
9417 {
9418 rtx shift_op, shift_reg;
9419 shift_reg = NULL;
9420
9421 /* We check both sides of the MINUS for shifter operands since,
9422 unlike PLUS, it's not commutative. */
9423
9424 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9425 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9426
9427 /* Slightly disparage, as we might need to widen the result. */
9428 *cost = 1 + COSTS_N_INSNS (1);
9429 if (speed_p)
9430 *cost += extra_cost->alu.arith;
9431
9432 if (CONST_INT_P (XEXP (x, 0)))
9433 {
9434 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9435 return true;
9436 }
9437
9438 return false;
9439 }
9440
9441 if (mode == DImode)
9442 {
9443 *cost = COSTS_N_INSNS (2);
9444
9445 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9446 {
9447 rtx op1 = XEXP (x, 1);
9448
9449 if (speed_p)
9450 *cost += 2 * extra_cost->alu.arith;
9451
9452 if (GET_CODE (op1) == ZERO_EXTEND)
9453 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9454 else
9455 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9456 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9457 0, speed_p);
9458 return true;
9459 }
9460 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9461 {
9462 if (speed_p)
9463 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9464 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9465 0, speed_p)
9466 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9467 return true;
9468 }
9469 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9470 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9471 {
9472 if (speed_p)
9473 *cost += (extra_cost->alu.arith
9474 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9475 ? extra_cost->alu.arith
9476 : extra_cost->alu.arith_shift));
9477 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9478 + rtx_cost (XEXP (XEXP (x, 1), 0),
9479 GET_CODE (XEXP (x, 1)), 0, speed_p));
9480 return true;
9481 }
9482
9483 if (speed_p)
9484 *cost += 2 * extra_cost->alu.arith;
9485 return false;
9486 }
9487
9488 /* Vector mode? */
9489
9490 *cost = LIBCALL_COST (2);
9491 return false;
9492
9493 case PLUS:
9494 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9495 && (mode == SFmode || !TARGET_VFP_SINGLE))
9496 {
9497 *cost = COSTS_N_INSNS (1);
9498 if (GET_CODE (XEXP (x, 0)) == MULT)
9499 {
9500 rtx mul_op0, mul_op1, add_op;
9501
9502 if (speed_p)
9503 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9504
9505 mul_op0 = XEXP (XEXP (x, 0), 0);
9506 mul_op1 = XEXP (XEXP (x, 0), 1);
9507 add_op = XEXP (x, 1);
9508
9509 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9510 + rtx_cost (mul_op1, code, 0, speed_p)
9511 + rtx_cost (add_op, code, 0, speed_p));
9512
9513 return true;
9514 }
9515
9516 if (speed_p)
9517 *cost += extra_cost->fp[mode != SFmode].addsub;
9518 return false;
9519 }
9520 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9521 {
9522 *cost = LIBCALL_COST (2);
9523 return false;
9524 }
9525
9526 /* Narrow modes can be synthesized in SImode, but the range
9527 of useful sub-operations is limited. Check for shift operations
9528 on one of the operands. Only left shifts can be used in the
9529 narrow modes. */
9530 if (GET_MODE_CLASS (mode) == MODE_INT
9531 && GET_MODE_SIZE (mode) < 4)
9532 {
9533 rtx shift_op, shift_reg;
9534 shift_reg = NULL;
9535
9536 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9537
9538 if (CONST_INT_P (XEXP (x, 1)))
9539 {
9540 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9541 INTVAL (XEXP (x, 1)), NULL_RTX,
9542 NULL_RTX, 1, 0);
9543 *cost = COSTS_N_INSNS (insns);
9544 if (speed_p)
9545 *cost += insns * extra_cost->alu.arith;
9546 /* Slightly penalize a narrow operation as the result may
9547 need widening. */
9548 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9549 return true;
9550 }
9551
9552 /* Slightly penalize a narrow operation as the result may
9553 need widening. */
9554 *cost = 1 + COSTS_N_INSNS (1);
9555 if (speed_p)
9556 *cost += extra_cost->alu.arith;
9557
9558 return false;
9559 }
9560
9561 if (mode == SImode)
9562 {
9563 rtx shift_op, shift_reg;
9564
9565 *cost = COSTS_N_INSNS (1);
9566 if (TARGET_INT_SIMD
9567 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9568 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9569 {
9570 /* UXTA[BH] or SXTA[BH]. */
9571 if (speed_p)
9572 *cost += extra_cost->alu.extnd_arith;
9573 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9574 speed_p)
9575 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9576 return true;
9577 }
9578
9579 shift_reg = NULL;
9580 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9581 if (shift_op != NULL)
9582 {
9583 if (shift_reg)
9584 {
9585 if (speed_p)
9586 *cost += extra_cost->alu.arith_shift_reg;
9587 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9588 }
9589 else if (speed_p)
9590 *cost += extra_cost->alu.arith_shift;
9591
9592 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9593 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9594 return true;
9595 }
9596 if (GET_CODE (XEXP (x, 0)) == MULT)
9597 {
9598 rtx mul_op = XEXP (x, 0);
9599
9600 *cost = COSTS_N_INSNS (1);
9601
9602 if (TARGET_DSP_MULTIPLY
9603 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9604 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9605 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9606 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9607 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9608 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9609 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9610 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9611 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9612 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9613 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9614 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9615 == 16))))))
9616 {
9617 /* SMLA[BT][BT]. */
9618 if (speed_p)
9619 *cost += extra_cost->mult[0].extend_add;
9620 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9621 SIGN_EXTEND, 0, speed_p)
9622 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9623 SIGN_EXTEND, 0, speed_p)
9624 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9625 return true;
9626 }
9627
9628 if (speed_p)
9629 *cost += extra_cost->mult[0].add;
9630 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9631 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9632 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9633 return true;
9634 }
9635 if (CONST_INT_P (XEXP (x, 1)))
9636 {
9637 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9638 INTVAL (XEXP (x, 1)), NULL_RTX,
9639 NULL_RTX, 1, 0);
9640 *cost = COSTS_N_INSNS (insns);
9641 if (speed_p)
9642 *cost += insns * extra_cost->alu.arith;
9643 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9644 return true;
9645 }
9646 return false;
9647 }
9648
9649 if (mode == DImode)
9650 {
9651 if (arm_arch3m
9652 && GET_CODE (XEXP (x, 0)) == MULT
9653 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9654 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9655 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9656 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9657 {
9658 *cost = COSTS_N_INSNS (1);
9659 if (speed_p)
9660 *cost += extra_cost->mult[1].extend_add;
9661 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9662 ZERO_EXTEND, 0, speed_p)
9663 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9664 ZERO_EXTEND, 0, speed_p)
9665 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9666 return true;
9667 }
9668
9669 *cost = COSTS_N_INSNS (2);
9670
9671 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9672 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9673 {
9674 if (speed_p)
9675 *cost += (extra_cost->alu.arith
9676 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9677 ? extra_cost->alu.arith
9678 : extra_cost->alu.arith_shift));
9679
9680 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9681 speed_p)
9682 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9683 return true;
9684 }
9685
9686 if (speed_p)
9687 *cost += 2 * extra_cost->alu.arith;
9688 return false;
9689 }
9690
9691 /* Vector mode? */
9692 *cost = LIBCALL_COST (2);
9693 return false;
9694
9695 case AND: case XOR: case IOR:
9696 if (mode == SImode)
9697 {
9698 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9699 rtx op0 = XEXP (x, 0);
9700 rtx shift_op, shift_reg;
9701
9702 *cost = COSTS_N_INSNS (1);
9703
9704 if (subcode == NOT
9705 && (code == AND
9706 || (code == IOR && TARGET_THUMB2)))
9707 op0 = XEXP (op0, 0);
9708
9709 shift_reg = NULL;
9710 shift_op = shifter_op_p (op0, &shift_reg);
9711 if (shift_op != NULL)
9712 {
9713 if (shift_reg)
9714 {
9715 if (speed_p)
9716 *cost += extra_cost->alu.log_shift_reg;
9717 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9718 }
9719 else if (speed_p)
9720 *cost += extra_cost->alu.log_shift;
9721
9722 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9723 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9724 return true;
9725 }
9726
9727 if (CONST_INT_P (XEXP (x, 1)))
9728 {
9729 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9730 INTVAL (XEXP (x, 1)), NULL_RTX,
9731 NULL_RTX, 1, 0);
9732
9733 *cost = COSTS_N_INSNS (insns);
9734 if (speed_p)
9735 *cost += insns * extra_cost->alu.logical;
9736 *cost += rtx_cost (op0, code, 0, speed_p);
9737 return true;
9738 }
9739
9740 if (speed_p)
9741 *cost += extra_cost->alu.logical;
9742 *cost += (rtx_cost (op0, code, 0, speed_p)
9743 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9744 return true;
9745 }
9746
9747 if (mode == DImode)
9748 {
9749 rtx op0 = XEXP (x, 0);
9750 enum rtx_code subcode = GET_CODE (op0);
9751
9752 *cost = COSTS_N_INSNS (2);
9753
9754 if (subcode == NOT
9755 && (code == AND
9756 || (code == IOR && TARGET_THUMB2)))
9757 op0 = XEXP (op0, 0);
9758
9759 if (GET_CODE (op0) == ZERO_EXTEND)
9760 {
9761 if (speed_p)
9762 *cost += 2 * extra_cost->alu.logical;
9763
9764 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9765 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9766 return true;
9767 }
9768 else if (GET_CODE (op0) == SIGN_EXTEND)
9769 {
9770 if (speed_p)
9771 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9772
9773 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9774 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9775 return true;
9776 }
9777
9778 if (speed_p)
9779 *cost += 2 * extra_cost->alu.logical;
9780
9781 return true;
9782 }
9783 /* Vector mode? */
9784
9785 *cost = LIBCALL_COST (2);
9786 return false;
9787
9788 case MULT:
9789 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9790 && (mode == SFmode || !TARGET_VFP_SINGLE))
9791 {
9792 rtx op0 = XEXP (x, 0);
9793
9794 *cost = COSTS_N_INSNS (1);
9795
9796 if (GET_CODE (op0) == NEG)
9797 op0 = XEXP (op0, 0);
9798
9799 if (speed_p)
9800 *cost += extra_cost->fp[mode != SFmode].mult;
9801
9802 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9803 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9804 return true;
9805 }
9806 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9807 {
9808 *cost = LIBCALL_COST (2);
9809 return false;
9810 }
9811
9812 if (mode == SImode)
9813 {
9814 *cost = COSTS_N_INSNS (1);
9815 if (TARGET_DSP_MULTIPLY
9816 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9817 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9818 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9819 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9820 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9821 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9822 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9823 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9824 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9825 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9826 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9827 && (INTVAL (XEXP (XEXP (x, 1), 1))
9828 == 16))))))
9829 {
9830 /* SMUL[TB][TB]. */
9831 if (speed_p)
9832 *cost += extra_cost->mult[0].extend;
9833 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9834 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9835 return true;
9836 }
9837 if (speed_p)
9838 *cost += extra_cost->mult[0].simple;
9839 return false;
9840 }
9841
9842 if (mode == DImode)
9843 {
9844 if (arm_arch3m
9845 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9846 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9847 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9848 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9849 {
9850 *cost = COSTS_N_INSNS (1);
9851 if (speed_p)
9852 *cost += extra_cost->mult[1].extend;
9853 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9854 ZERO_EXTEND, 0, speed_p)
9855 + rtx_cost (XEXP (XEXP (x, 1), 0),
9856 ZERO_EXTEND, 0, speed_p));
9857 return true;
9858 }
9859
9860 *cost = LIBCALL_COST (2);
9861 return false;
9862 }
9863
9864 /* Vector mode? */
9865 *cost = LIBCALL_COST (2);
9866 return false;
9867
9868 case NEG:
9869 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9870 && (mode == SFmode || !TARGET_VFP_SINGLE))
9871 {
9872 *cost = COSTS_N_INSNS (1);
9873 if (speed_p)
9874 *cost += extra_cost->fp[mode != SFmode].neg;
9875
9876 return false;
9877 }
9878 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9879 {
9880 *cost = LIBCALL_COST (1);
9881 return false;
9882 }
9883
9884 if (mode == SImode)
9885 {
9886 if (GET_CODE (XEXP (x, 0)) == ABS)
9887 {
9888 *cost = COSTS_N_INSNS (2);
9889 /* Assume the non-flag-changing variant. */
9890 if (speed_p)
9891 *cost += (extra_cost->alu.log_shift
9892 + extra_cost->alu.arith_shift);
9893 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9894 return true;
9895 }
9896
9897 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9898 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9899 {
9900 *cost = COSTS_N_INSNS (2);
9901 /* No extra cost for MOV imm and MVN imm. */
9902 /* If the comparison op is using the flags, there's no further
9903 cost, otherwise we need to add the cost of the comparison. */
9904 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9905 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9906 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9907 {
9908 *cost += (COSTS_N_INSNS (1)
9909 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9910 speed_p)
9911 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9912 speed_p));
9913 if (speed_p)
9914 *cost += extra_cost->alu.arith;
9915 }
9916 return true;
9917 }
9918 *cost = COSTS_N_INSNS (1);
9919 if (speed_p)
9920 *cost += extra_cost->alu.arith;
9921 return false;
9922 }
9923
9924 if (GET_MODE_CLASS (mode) == MODE_INT
9925 && GET_MODE_SIZE (mode) < 4)
9926 {
9927 /* Slightly disparage, as we might need an extend operation. */
9928 *cost = 1 + COSTS_N_INSNS (1);
9929 if (speed_p)
9930 *cost += extra_cost->alu.arith;
9931 return false;
9932 }
9933
9934 if (mode == DImode)
9935 {
9936 *cost = COSTS_N_INSNS (2);
9937 if (speed_p)
9938 *cost += 2 * extra_cost->alu.arith;
9939 return false;
9940 }
9941
9942 /* Vector mode? */
9943 *cost = LIBCALL_COST (1);
9944 return false;
9945
9946 case NOT:
9947 if (mode == SImode)
9948 {
9949 rtx shift_op;
9950 rtx shift_reg = NULL;
9951
9952 *cost = COSTS_N_INSNS (1);
9953 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9954
9955 if (shift_op)
9956 {
9957 if (shift_reg != NULL)
9958 {
9959 if (speed_p)
9960 *cost += extra_cost->alu.log_shift_reg;
9961 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9962 }
9963 else if (speed_p)
9964 *cost += extra_cost->alu.log_shift;
9965 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9966 return true;
9967 }
9968
9969 if (speed_p)
9970 *cost += extra_cost->alu.logical;
9971 return false;
9972 }
9973 if (mode == DImode)
9974 {
9975 *cost = COSTS_N_INSNS (2);
9976 return false;
9977 }
9978
9979 /* Vector mode? */
9980
9981 *cost += LIBCALL_COST (1);
9982 return false;
9983
9984 case IF_THEN_ELSE:
9985 {
9986 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9987 {
9988 *cost = COSTS_N_INSNS (4);
9989 return true;
9990 }
9991 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
9992 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
9993
9994 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
9995 /* Assume that if one arm of the if_then_else is a register,
9996 that it will be tied with the result and eliminate the
9997 conditional insn. */
9998 if (REG_P (XEXP (x, 1)))
9999 *cost += op2cost;
10000 else if (REG_P (XEXP (x, 2)))
10001 *cost += op1cost;
10002 else
10003 {
10004 if (speed_p)
10005 {
10006 if (extra_cost->alu.non_exec_costs_exec)
10007 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10008 else
10009 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10010 }
10011 else
10012 *cost += op1cost + op2cost;
10013 }
10014 }
10015 return true;
10016
10017 case COMPARE:
10018 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10019 *cost = 0;
10020 else
10021 {
10022 enum machine_mode op0mode;
10023 /* We'll mostly assume that the cost of a compare is the cost of the
10024 LHS. However, there are some notable exceptions. */
10025
10026 /* Floating point compares are never done as side-effects. */
10027 op0mode = GET_MODE (XEXP (x, 0));
10028 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10029 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10030 {
10031 *cost = COSTS_N_INSNS (1);
10032 if (speed_p)
10033 *cost += extra_cost->fp[op0mode != SFmode].compare;
10034
10035 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10036 {
10037 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10038 return true;
10039 }
10040
10041 return false;
10042 }
10043 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10044 {
10045 *cost = LIBCALL_COST (2);
10046 return false;
10047 }
10048
10049 /* DImode compares normally take two insns. */
10050 if (op0mode == DImode)
10051 {
10052 *cost = COSTS_N_INSNS (2);
10053 if (speed_p)
10054 *cost += 2 * extra_cost->alu.arith;
10055 return false;
10056 }
10057
10058 if (op0mode == SImode)
10059 {
10060 rtx shift_op;
10061 rtx shift_reg;
10062
10063 if (XEXP (x, 1) == const0_rtx
10064 && !(REG_P (XEXP (x, 0))
10065 || (GET_CODE (XEXP (x, 0)) == SUBREG
10066 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10067 {
10068 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10069
10070 /* Multiply operations that set the flags are often
10071 significantly more expensive. */
10072 if (speed_p
10073 && GET_CODE (XEXP (x, 0)) == MULT
10074 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10075 *cost += extra_cost->mult[0].flag_setting;
10076
10077 if (speed_p
10078 && GET_CODE (XEXP (x, 0)) == PLUS
10079 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10080 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10081 0), 1), mode))
10082 *cost += extra_cost->mult[0].flag_setting;
10083 return true;
10084 }
10085
10086 shift_reg = NULL;
10087 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10088 if (shift_op != NULL)
10089 {
10090 *cost = COSTS_N_INSNS (1);
10091 if (shift_reg != NULL)
10092 {
10093 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10094 if (speed_p)
10095 *cost += extra_cost->alu.arith_shift_reg;
10096 }
10097 else if (speed_p)
10098 *cost += extra_cost->alu.arith_shift;
10099 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10100 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10101 return true;
10102 }
10103
10104 *cost = COSTS_N_INSNS (1);
10105 if (speed_p)
10106 *cost += extra_cost->alu.arith;
10107 if (CONST_INT_P (XEXP (x, 1))
10108 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10109 {
10110 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10111 return true;
10112 }
10113 return false;
10114 }
10115
10116 /* Vector mode? */
10117
10118 *cost = LIBCALL_COST (2);
10119 return false;
10120 }
10121 return true;
10122
10123 case EQ:
10124 case NE:
10125 case LT:
10126 case LE:
10127 case GT:
10128 case GE:
10129 case LTU:
10130 case LEU:
10131 case GEU:
10132 case GTU:
10133 case ORDERED:
10134 case UNORDERED:
10135 case UNEQ:
10136 case UNLE:
10137 case UNLT:
10138 case UNGE:
10139 case UNGT:
10140 case LTGT:
10141 if (outer_code == SET)
10142 {
10143 /* Is it a store-flag operation? */
10144 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10145 && XEXP (x, 1) == const0_rtx)
10146 {
10147 /* Thumb also needs an IT insn. */
10148 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10149 return true;
10150 }
10151 if (XEXP (x, 1) == const0_rtx)
10152 {
10153 switch (code)
10154 {
10155 case LT:
10156 /* LSR Rd, Rn, #31. */
10157 *cost = COSTS_N_INSNS (1);
10158 if (speed_p)
10159 *cost += extra_cost->alu.shift;
10160 break;
10161
10162 case EQ:
10163 /* RSBS T1, Rn, #0
10164 ADC Rd, Rn, T1. */
10165
10166 case NE:
10167 /* SUBS T1, Rn, #1
10168 SBC Rd, Rn, T1. */
10169 *cost = COSTS_N_INSNS (2);
10170 break;
10171
10172 case LE:
10173 /* RSBS T1, Rn, Rn, LSR #31
10174 ADC Rd, Rn, T1. */
10175 *cost = COSTS_N_INSNS (2);
10176 if (speed_p)
10177 *cost += extra_cost->alu.arith_shift;
10178 break;
10179
10180 case GT:
10181 /* RSB Rd, Rn, Rn, ASR #1
10182 LSR Rd, Rd, #31. */
10183 *cost = COSTS_N_INSNS (2);
10184 if (speed_p)
10185 *cost += (extra_cost->alu.arith_shift
10186 + extra_cost->alu.shift);
10187 break;
10188
10189 case GE:
10190 /* ASR Rd, Rn, #31
10191 ADD Rd, Rn, #1. */
10192 *cost = COSTS_N_INSNS (2);
10193 if (speed_p)
10194 *cost += extra_cost->alu.shift;
10195 break;
10196
10197 default:
10198 /* Remaining cases are either meaningless or would take
10199 three insns anyway. */
10200 *cost = COSTS_N_INSNS (3);
10201 break;
10202 }
10203 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10204 return true;
10205 }
10206 else
10207 {
10208 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10209 if (CONST_INT_P (XEXP (x, 1))
10210 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10211 {
10212 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10213 return true;
10214 }
10215
10216 return false;
10217 }
10218 }
10219 /* Not directly inside a set. If it involves the condition code
10220 register it must be the condition for a branch, cond_exec or
10221 I_T_E operation. Since the comparison is performed elsewhere
10222 this is just the control part which has no additional
10223 cost. */
10224 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10225 && XEXP (x, 1) == const0_rtx)
10226 {
10227 *cost = 0;
10228 return true;
10229 }
10230 return false;
10231
10232 case ABS:
10233 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10234 && (mode == SFmode || !TARGET_VFP_SINGLE))
10235 {
10236 *cost = COSTS_N_INSNS (1);
10237 if (speed_p)
10238 *cost += extra_cost->fp[mode != SFmode].neg;
10239
10240 return false;
10241 }
10242 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10243 {
10244 *cost = LIBCALL_COST (1);
10245 return false;
10246 }
10247
10248 if (mode == SImode)
10249 {
10250 *cost = COSTS_N_INSNS (1);
10251 if (speed_p)
10252 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10253 return false;
10254 }
10255 /* Vector mode? */
10256 *cost = LIBCALL_COST (1);
10257 return false;
10258
10259 case SIGN_EXTEND:
10260 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10261 && MEM_P (XEXP (x, 0)))
10262 {
10263 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10264
10265 if (mode == DImode)
10266 *cost += COSTS_N_INSNS (1);
10267
10268 if (!speed_p)
10269 return true;
10270
10271 if (GET_MODE (XEXP (x, 0)) == SImode)
10272 *cost += extra_cost->ldst.load;
10273 else
10274 *cost += extra_cost->ldst.load_sign_extend;
10275
10276 if (mode == DImode)
10277 *cost += extra_cost->alu.shift;
10278
10279 return true;
10280 }
10281
10282 /* Widening from less than 32-bits requires an extend operation. */
10283 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10284 {
10285 /* We have SXTB/SXTH. */
10286 *cost = COSTS_N_INSNS (1);
10287 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10288 if (speed_p)
10289 *cost += extra_cost->alu.extnd;
10290 }
10291 else if (GET_MODE (XEXP (x, 0)) != SImode)
10292 {
10293 /* Needs two shifts. */
10294 *cost = COSTS_N_INSNS (2);
10295 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10296 if (speed_p)
10297 *cost += 2 * extra_cost->alu.shift;
10298 }
10299
10300 /* Widening beyond 32-bits requires one more insn. */
10301 if (mode == DImode)
10302 {
10303 *cost += COSTS_N_INSNS (1);
10304 if (speed_p)
10305 *cost += extra_cost->alu.shift;
10306 }
10307
10308 return true;
10309
10310 case ZERO_EXTEND:
10311 if ((arm_arch4
10312 || GET_MODE (XEXP (x, 0)) == SImode
10313 || GET_MODE (XEXP (x, 0)) == QImode)
10314 && MEM_P (XEXP (x, 0)))
10315 {
10316 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10317
10318 if (mode == DImode)
10319 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10320
10321 return true;
10322 }
10323
10324 /* Widening from less than 32-bits requires an extend operation. */
10325 if (GET_MODE (XEXP (x, 0)) == QImode)
10326 {
10327 /* UXTB can be a shorter instruction in Thumb2, but it might
10328 be slower than the AND Rd, Rn, #255 alternative. When
10329 optimizing for speed it should never be slower to use
10330 AND, and we don't really model 16-bit vs 32-bit insns
10331 here. */
10332 *cost = COSTS_N_INSNS (1);
10333 if (speed_p)
10334 *cost += extra_cost->alu.logical;
10335 }
10336 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10337 {
10338 /* We have UXTB/UXTH. */
10339 *cost = COSTS_N_INSNS (1);
10340 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10341 if (speed_p)
10342 *cost += extra_cost->alu.extnd;
10343 }
10344 else if (GET_MODE (XEXP (x, 0)) != SImode)
10345 {
10346 /* Needs two shifts. It's marginally preferable to use
10347 shifts rather than two BIC instructions as the second
10348 shift may merge with a subsequent insn as a shifter
10349 op. */
10350 *cost = COSTS_N_INSNS (2);
10351 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10352 if (speed_p)
10353 *cost += 2 * extra_cost->alu.shift;
10354 }
10355 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10356 *cost = COSTS_N_INSNS (1);
10357
10358 /* Widening beyond 32-bits requires one more insn. */
10359 if (mode == DImode)
10360 {
10361 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10362 }
10363
10364 return true;
10365
10366 case CONST_INT:
10367 *cost = 0;
10368 /* CONST_INT has no mode, so we cannot tell for sure how many
10369 insns are really going to be needed. The best we can do is
10370 look at the value passed. If it fits in SImode, then assume
10371 that's the mode it will be used for. Otherwise assume it
10372 will be used in DImode. */
10373 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10374 mode = SImode;
10375 else
10376 mode = DImode;
10377
10378 /* Avoid blowing up in arm_gen_constant (). */
10379 if (!(outer_code == PLUS
10380 || outer_code == AND
10381 || outer_code == IOR
10382 || outer_code == XOR
10383 || outer_code == MINUS))
10384 outer_code = SET;
10385
10386 const_int_cost:
10387 if (mode == SImode)
10388 {
10389 *cost += 0;
10390 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10391 INTVAL (x), NULL, NULL,
10392 0, 0));
10393 /* Extra costs? */
10394 }
10395 else
10396 {
10397 *cost += COSTS_N_INSNS (arm_gen_constant
10398 (outer_code, SImode, NULL,
10399 trunc_int_for_mode (INTVAL (x), SImode),
10400 NULL, NULL, 0, 0)
10401 + arm_gen_constant (outer_code, SImode, NULL,
10402 INTVAL (x) >> 32, NULL,
10403 NULL, 0, 0));
10404 /* Extra costs? */
10405 }
10406
10407 return true;
10408
10409 case CONST:
10410 case LABEL_REF:
10411 case SYMBOL_REF:
10412 if (speed_p)
10413 {
10414 if (arm_arch_thumb2 && !flag_pic)
10415 *cost = COSTS_N_INSNS (2);
10416 else
10417 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10418 }
10419 else
10420 *cost = COSTS_N_INSNS (2);
10421
10422 if (flag_pic)
10423 {
10424 *cost += COSTS_N_INSNS (1);
10425 if (speed_p)
10426 *cost += extra_cost->alu.arith;
10427 }
10428
10429 return true;
10430
10431 case CONST_FIXED:
10432 *cost = COSTS_N_INSNS (4);
10433 /* Fixme. */
10434 return true;
10435
10436 case CONST_DOUBLE:
10437 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10438 && (mode == SFmode || !TARGET_VFP_SINGLE))
10439 {
10440 if (vfp3_const_double_rtx (x))
10441 {
10442 *cost = COSTS_N_INSNS (1);
10443 if (speed_p)
10444 *cost += extra_cost->fp[mode == DFmode].fpconst;
10445 return true;
10446 }
10447
10448 if (speed_p)
10449 {
10450 *cost = COSTS_N_INSNS (1);
10451 if (mode == DFmode)
10452 *cost += extra_cost->ldst.loadd;
10453 else
10454 *cost += extra_cost->ldst.loadf;
10455 }
10456 else
10457 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10458
10459 return true;
10460 }
10461 *cost = COSTS_N_INSNS (4);
10462 return true;
10463
10464 case CONST_VECTOR:
10465 /* Fixme. */
10466 if (TARGET_NEON
10467 && TARGET_HARD_FLOAT
10468 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10469 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10470 *cost = COSTS_N_INSNS (1);
10471 else
10472 *cost = COSTS_N_INSNS (4);
10473 return true;
10474
10475 case HIGH:
10476 case LO_SUM:
10477 *cost = COSTS_N_INSNS (1);
10478 /* When optimizing for size, we prefer constant pool entries to
10479 MOVW/MOVT pairs, so bump the cost of these slightly. */
10480 if (!speed_p)
10481 *cost += 1;
10482 return true;
10483
10484 case CLZ:
10485 *cost = COSTS_N_INSNS (1);
10486 if (speed_p)
10487 *cost += extra_cost->alu.clz;
10488 return false;
10489
10490 case SMIN:
10491 if (XEXP (x, 1) == const0_rtx)
10492 {
10493 *cost = COSTS_N_INSNS (1);
10494 if (speed_p)
10495 *cost += extra_cost->alu.log_shift;
10496 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10497 return true;
10498 }
10499 /* Fall through. */
10500 case SMAX:
10501 case UMIN:
10502 case UMAX:
10503 *cost = COSTS_N_INSNS (2);
10504 return false;
10505
10506 case TRUNCATE:
10507 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10508 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10509 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10510 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10511 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10512 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10513 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10514 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10515 == ZERO_EXTEND))))
10516 {
10517 *cost = COSTS_N_INSNS (1);
10518 if (speed_p)
10519 *cost += extra_cost->mult[1].extend;
10520 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10521 speed_p)
10522 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10523 0, speed_p));
10524 return true;
10525 }
10526 *cost = LIBCALL_COST (1);
10527 return false;
10528
10529 case UNSPEC:
10530 return arm_unspec_cost (x, outer_code, speed_p, cost);
10531
10532 case PC:
10533 /* Reading the PC is like reading any other register. Writing it
10534 is more expensive, but we take that into account elsewhere. */
10535 *cost = 0;
10536 return true;
10537
10538 case ZERO_EXTRACT:
10539 /* TODO: Simple zero_extract of bottom bits using AND. */
10540 /* Fall through. */
10541 case SIGN_EXTRACT:
10542 if (arm_arch6
10543 && mode == SImode
10544 && CONST_INT_P (XEXP (x, 1))
10545 && CONST_INT_P (XEXP (x, 2)))
10546 {
10547 *cost = COSTS_N_INSNS (1);
10548 if (speed_p)
10549 *cost += extra_cost->alu.bfx;
10550 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10551 return true;
10552 }
10553 /* Without UBFX/SBFX, need to resort to shift operations. */
10554 *cost = COSTS_N_INSNS (2);
10555 if (speed_p)
10556 *cost += 2 * extra_cost->alu.shift;
10557 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10558 return true;
10559
10560 case FLOAT_EXTEND:
10561 if (TARGET_HARD_FLOAT)
10562 {
10563 *cost = COSTS_N_INSNS (1);
10564 if (speed_p)
10565 *cost += extra_cost->fp[mode == DFmode].widen;
10566 if (!TARGET_FPU_ARMV8
10567 && GET_MODE (XEXP (x, 0)) == HFmode)
10568 {
10569 /* Pre v8, widening HF->DF is a two-step process, first
10570 widening to SFmode. */
10571 *cost += COSTS_N_INSNS (1);
10572 if (speed_p)
10573 *cost += extra_cost->fp[0].widen;
10574 }
10575 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10576 return true;
10577 }
10578
10579 *cost = LIBCALL_COST (1);
10580 return false;
10581
10582 case FLOAT_TRUNCATE:
10583 if (TARGET_HARD_FLOAT)
10584 {
10585 *cost = COSTS_N_INSNS (1);
10586 if (speed_p)
10587 *cost += extra_cost->fp[mode == DFmode].narrow;
10588 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10589 return true;
10590 /* Vector modes? */
10591 }
10592 *cost = LIBCALL_COST (1);
10593 return false;
10594
10595 case FIX:
10596 case UNSIGNED_FIX:
10597 if (TARGET_HARD_FLOAT)
10598 {
10599 if (GET_MODE_CLASS (mode) == MODE_INT)
10600 {
10601 *cost = COSTS_N_INSNS (1);
10602 if (speed_p)
10603 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10604 /* Strip of the 'cost' of rounding towards zero. */
10605 if (GET_CODE (XEXP (x, 0)) == FIX)
10606 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10607 else
10608 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10609 /* ??? Increase the cost to deal with transferring from
10610 FP -> CORE registers? */
10611 return true;
10612 }
10613 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10614 && TARGET_FPU_ARMV8)
10615 {
10616 *cost = COSTS_N_INSNS (1);
10617 if (speed_p)
10618 *cost += extra_cost->fp[mode == DFmode].roundint;
10619 return false;
10620 }
10621 /* Vector costs? */
10622 }
10623 *cost = LIBCALL_COST (1);
10624 return false;
10625
10626 case FLOAT:
10627 case UNSIGNED_FLOAT:
10628 if (TARGET_HARD_FLOAT)
10629 {
10630 /* ??? Increase the cost to deal with transferring from CORE
10631 -> FP registers? */
10632 *cost = COSTS_N_INSNS (1);
10633 if (speed_p)
10634 *cost += extra_cost->fp[mode == DFmode].fromint;
10635 return false;
10636 }
10637 *cost = LIBCALL_COST (1);
10638 return false;
10639
10640 case CALL:
10641 *cost = COSTS_N_INSNS (1);
10642 return true;
10643
10644 case ASM_OPERANDS:
10645 /* Just a guess. Cost one insn per input. */
10646 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10647 return true;
10648
10649 default:
10650 if (mode != VOIDmode)
10651 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10652 else
10653 *cost = COSTS_N_INSNS (4); /* Who knows? */
10654 return false;
10655 }
10656 }
10657
10658 #undef HANDLE_NARROW_SHIFT_ARITH
10659
10660 /* RTX costs when optimizing for size. */
10661 static bool
10662 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10663 int *total, bool speed)
10664 {
10665 bool result;
10666
10667 if (TARGET_OLD_RTX_COSTS
10668 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10669 {
10670 /* Old way. (Deprecated.) */
10671 if (!speed)
10672 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10673 (enum rtx_code) outer_code, total);
10674 else
10675 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10676 (enum rtx_code) outer_code, total,
10677 speed);
10678 }
10679 else
10680 {
10681 /* New way. */
10682 if (current_tune->insn_extra_cost)
10683 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10684 (enum rtx_code) outer_code,
10685 current_tune->insn_extra_cost,
10686 total, speed);
10687 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10688 && current_tune->insn_extra_cost != NULL */
10689 else
10690 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10691 (enum rtx_code) outer_code,
10692 &generic_extra_costs, total, speed);
10693 }
10694
10695 if (dump_file && (dump_flags & TDF_DETAILS))
10696 {
10697 print_rtl_single (dump_file, x);
10698 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10699 *total, result ? "final" : "partial");
10700 }
10701 return result;
10702 }
10703
10704 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10705 supported on any "slowmul" cores, so it can be ignored. */
10706
10707 static bool
10708 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10709 int *total, bool speed)
10710 {
10711 enum machine_mode mode = GET_MODE (x);
10712
10713 if (TARGET_THUMB)
10714 {
10715 *total = thumb1_rtx_costs (x, code, outer_code);
10716 return true;
10717 }
10718
10719 switch (code)
10720 {
10721 case MULT:
10722 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10723 || mode == DImode)
10724 {
10725 *total = COSTS_N_INSNS (20);
10726 return false;
10727 }
10728
10729 if (CONST_INT_P (XEXP (x, 1)))
10730 {
10731 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10732 & (unsigned HOST_WIDE_INT) 0xffffffff);
10733 int cost, const_ok = const_ok_for_arm (i);
10734 int j, booth_unit_size;
10735
10736 /* Tune as appropriate. */
10737 cost = const_ok ? 4 : 8;
10738 booth_unit_size = 2;
10739 for (j = 0; i && j < 32; j += booth_unit_size)
10740 {
10741 i >>= booth_unit_size;
10742 cost++;
10743 }
10744
10745 *total = COSTS_N_INSNS (cost);
10746 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10747 return true;
10748 }
10749
10750 *total = COSTS_N_INSNS (20);
10751 return false;
10752
10753 default:
10754 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10755 }
10756 }
10757
10758
10759 /* RTX cost for cores with a fast multiply unit (M variants). */
10760
10761 static bool
10762 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10763 int *total, bool speed)
10764 {
10765 enum machine_mode mode = GET_MODE (x);
10766
10767 if (TARGET_THUMB1)
10768 {
10769 *total = thumb1_rtx_costs (x, code, outer_code);
10770 return true;
10771 }
10772
10773 /* ??? should thumb2 use different costs? */
10774 switch (code)
10775 {
10776 case MULT:
10777 /* There is no point basing this on the tuning, since it is always the
10778 fast variant if it exists at all. */
10779 if (mode == DImode
10780 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10781 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10782 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10783 {
10784 *total = COSTS_N_INSNS(2);
10785 return false;
10786 }
10787
10788
10789 if (mode == DImode)
10790 {
10791 *total = COSTS_N_INSNS (5);
10792 return false;
10793 }
10794
10795 if (CONST_INT_P (XEXP (x, 1)))
10796 {
10797 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10798 & (unsigned HOST_WIDE_INT) 0xffffffff);
10799 int cost, const_ok = const_ok_for_arm (i);
10800 int j, booth_unit_size;
10801
10802 /* Tune as appropriate. */
10803 cost = const_ok ? 4 : 8;
10804 booth_unit_size = 8;
10805 for (j = 0; i && j < 32; j += booth_unit_size)
10806 {
10807 i >>= booth_unit_size;
10808 cost++;
10809 }
10810
10811 *total = COSTS_N_INSNS(cost);
10812 return false;
10813 }
10814
10815 if (mode == SImode)
10816 {
10817 *total = COSTS_N_INSNS (4);
10818 return false;
10819 }
10820
10821 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10822 {
10823 if (TARGET_HARD_FLOAT
10824 && (mode == SFmode
10825 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10826 {
10827 *total = COSTS_N_INSNS (1);
10828 return false;
10829 }
10830 }
10831
10832 /* Requires a lib call */
10833 *total = COSTS_N_INSNS (20);
10834 return false;
10835
10836 default:
10837 return arm_rtx_costs_1 (x, outer_code, total, speed);
10838 }
10839 }
10840
10841
10842 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10843 so it can be ignored. */
10844
10845 static bool
10846 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10847 int *total, bool speed)
10848 {
10849 enum machine_mode mode = GET_MODE (x);
10850
10851 if (TARGET_THUMB)
10852 {
10853 *total = thumb1_rtx_costs (x, code, outer_code);
10854 return true;
10855 }
10856
10857 switch (code)
10858 {
10859 case COMPARE:
10860 if (GET_CODE (XEXP (x, 0)) != MULT)
10861 return arm_rtx_costs_1 (x, outer_code, total, speed);
10862
10863 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10864 will stall until the multiplication is complete. */
10865 *total = COSTS_N_INSNS (3);
10866 return false;
10867
10868 case MULT:
10869 /* There is no point basing this on the tuning, since it is always the
10870 fast variant if it exists at all. */
10871 if (mode == DImode
10872 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10873 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10874 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10875 {
10876 *total = COSTS_N_INSNS (2);
10877 return false;
10878 }
10879
10880
10881 if (mode == DImode)
10882 {
10883 *total = COSTS_N_INSNS (5);
10884 return false;
10885 }
10886
10887 if (CONST_INT_P (XEXP (x, 1)))
10888 {
10889 /* If operand 1 is a constant we can more accurately
10890 calculate the cost of the multiply. The multiplier can
10891 retire 15 bits on the first cycle and a further 12 on the
10892 second. We do, of course, have to load the constant into
10893 a register first. */
10894 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10895 /* There's a general overhead of one cycle. */
10896 int cost = 1;
10897 unsigned HOST_WIDE_INT masked_const;
10898
10899 if (i & 0x80000000)
10900 i = ~i;
10901
10902 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10903
10904 masked_const = i & 0xffff8000;
10905 if (masked_const != 0)
10906 {
10907 cost++;
10908 masked_const = i & 0xf8000000;
10909 if (masked_const != 0)
10910 cost++;
10911 }
10912 *total = COSTS_N_INSNS (cost);
10913 return false;
10914 }
10915
10916 if (mode == SImode)
10917 {
10918 *total = COSTS_N_INSNS (3);
10919 return false;
10920 }
10921
10922 /* Requires a lib call */
10923 *total = COSTS_N_INSNS (20);
10924 return false;
10925
10926 default:
10927 return arm_rtx_costs_1 (x, outer_code, total, speed);
10928 }
10929 }
10930
10931
10932 /* RTX costs for 9e (and later) cores. */
10933
10934 static bool
10935 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10936 int *total, bool speed)
10937 {
10938 enum machine_mode mode = GET_MODE (x);
10939
10940 if (TARGET_THUMB1)
10941 {
10942 switch (code)
10943 {
10944 case MULT:
10945 *total = COSTS_N_INSNS (3);
10946 return true;
10947
10948 default:
10949 *total = thumb1_rtx_costs (x, code, outer_code);
10950 return true;
10951 }
10952 }
10953
10954 switch (code)
10955 {
10956 case MULT:
10957 /* There is no point basing this on the tuning, since it is always the
10958 fast variant if it exists at all. */
10959 if (mode == DImode
10960 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10961 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10962 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10963 {
10964 *total = COSTS_N_INSNS (2);
10965 return false;
10966 }
10967
10968
10969 if (mode == DImode)
10970 {
10971 *total = COSTS_N_INSNS (5);
10972 return false;
10973 }
10974
10975 if (mode == SImode)
10976 {
10977 *total = COSTS_N_INSNS (2);
10978 return false;
10979 }
10980
10981 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10982 {
10983 if (TARGET_HARD_FLOAT
10984 && (mode == SFmode
10985 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10986 {
10987 *total = COSTS_N_INSNS (1);
10988 return false;
10989 }
10990 }
10991
10992 *total = COSTS_N_INSNS (20);
10993 return false;
10994
10995 default:
10996 return arm_rtx_costs_1 (x, outer_code, total, speed);
10997 }
10998 }
10999 /* All address computations that can be done are free, but rtx cost returns
11000 the same for practically all of them. So we weight the different types
11001 of address here in the order (most pref first):
11002 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11003 static inline int
11004 arm_arm_address_cost (rtx x)
11005 {
11006 enum rtx_code c = GET_CODE (x);
11007
11008 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11009 return 0;
11010 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11011 return 10;
11012
11013 if (c == PLUS)
11014 {
11015 if (CONST_INT_P (XEXP (x, 1)))
11016 return 2;
11017
11018 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11019 return 3;
11020
11021 return 4;
11022 }
11023
11024 return 6;
11025 }
11026
11027 static inline int
11028 arm_thumb_address_cost (rtx x)
11029 {
11030 enum rtx_code c = GET_CODE (x);
11031
11032 if (c == REG)
11033 return 1;
11034 if (c == PLUS
11035 && REG_P (XEXP (x, 0))
11036 && CONST_INT_P (XEXP (x, 1)))
11037 return 1;
11038
11039 return 2;
11040 }
11041
11042 static int
11043 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11044 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11045 {
11046 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11047 }
11048
11049 /* Adjust cost hook for XScale. */
11050 static bool
11051 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11052 {
11053 /* Some true dependencies can have a higher cost depending
11054 on precisely how certain input operands are used. */
11055 if (REG_NOTE_KIND(link) == 0
11056 && recog_memoized (insn) >= 0
11057 && recog_memoized (dep) >= 0)
11058 {
11059 int shift_opnum = get_attr_shift (insn);
11060 enum attr_type attr_type = get_attr_type (dep);
11061
11062 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11063 operand for INSN. If we have a shifted input operand and the
11064 instruction we depend on is another ALU instruction, then we may
11065 have to account for an additional stall. */
11066 if (shift_opnum != 0
11067 && (attr_type == TYPE_ALU_SHIFT_IMM
11068 || attr_type == TYPE_ALUS_SHIFT_IMM
11069 || attr_type == TYPE_LOGIC_SHIFT_IMM
11070 || attr_type == TYPE_LOGICS_SHIFT_IMM
11071 || attr_type == TYPE_ALU_SHIFT_REG
11072 || attr_type == TYPE_ALUS_SHIFT_REG
11073 || attr_type == TYPE_LOGIC_SHIFT_REG
11074 || attr_type == TYPE_LOGICS_SHIFT_REG
11075 || attr_type == TYPE_MOV_SHIFT
11076 || attr_type == TYPE_MVN_SHIFT
11077 || attr_type == TYPE_MOV_SHIFT_REG
11078 || attr_type == TYPE_MVN_SHIFT_REG))
11079 {
11080 rtx shifted_operand;
11081 int opno;
11082
11083 /* Get the shifted operand. */
11084 extract_insn (insn);
11085 shifted_operand = recog_data.operand[shift_opnum];
11086
11087 /* Iterate over all the operands in DEP. If we write an operand
11088 that overlaps with SHIFTED_OPERAND, then we have increase the
11089 cost of this dependency. */
11090 extract_insn (dep);
11091 preprocess_constraints ();
11092 for (opno = 0; opno < recog_data.n_operands; opno++)
11093 {
11094 /* We can ignore strict inputs. */
11095 if (recog_data.operand_type[opno] == OP_IN)
11096 continue;
11097
11098 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11099 shifted_operand))
11100 {
11101 *cost = 2;
11102 return false;
11103 }
11104 }
11105 }
11106 }
11107 return true;
11108 }
11109
11110 /* Adjust cost hook for Cortex A9. */
11111 static bool
11112 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11113 {
11114 switch (REG_NOTE_KIND (link))
11115 {
11116 case REG_DEP_ANTI:
11117 *cost = 0;
11118 return false;
11119
11120 case REG_DEP_TRUE:
11121 case REG_DEP_OUTPUT:
11122 if (recog_memoized (insn) >= 0
11123 && recog_memoized (dep) >= 0)
11124 {
11125 if (GET_CODE (PATTERN (insn)) == SET)
11126 {
11127 if (GET_MODE_CLASS
11128 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11129 || GET_MODE_CLASS
11130 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11131 {
11132 enum attr_type attr_type_insn = get_attr_type (insn);
11133 enum attr_type attr_type_dep = get_attr_type (dep);
11134
11135 /* By default all dependencies of the form
11136 s0 = s0 <op> s1
11137 s0 = s0 <op> s2
11138 have an extra latency of 1 cycle because
11139 of the input and output dependency in this
11140 case. However this gets modeled as an true
11141 dependency and hence all these checks. */
11142 if (REG_P (SET_DEST (PATTERN (insn)))
11143 && REG_P (SET_DEST (PATTERN (dep)))
11144 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11145 SET_DEST (PATTERN (dep))))
11146 {
11147 /* FMACS is a special case where the dependent
11148 instruction can be issued 3 cycles before
11149 the normal latency in case of an output
11150 dependency. */
11151 if ((attr_type_insn == TYPE_FMACS
11152 || attr_type_insn == TYPE_FMACD)
11153 && (attr_type_dep == TYPE_FMACS
11154 || attr_type_dep == TYPE_FMACD))
11155 {
11156 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11157 *cost = insn_default_latency (dep) - 3;
11158 else
11159 *cost = insn_default_latency (dep);
11160 return false;
11161 }
11162 else
11163 {
11164 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11165 *cost = insn_default_latency (dep) + 1;
11166 else
11167 *cost = insn_default_latency (dep);
11168 }
11169 return false;
11170 }
11171 }
11172 }
11173 }
11174 break;
11175
11176 default:
11177 gcc_unreachable ();
11178 }
11179
11180 return true;
11181 }
11182
11183 /* Adjust cost hook for FA726TE. */
11184 static bool
11185 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11186 {
11187 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11188 have penalty of 3. */
11189 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11190 && recog_memoized (insn) >= 0
11191 && recog_memoized (dep) >= 0
11192 && get_attr_conds (dep) == CONDS_SET)
11193 {
11194 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11195 if (get_attr_conds (insn) == CONDS_USE
11196 && get_attr_type (insn) != TYPE_BRANCH)
11197 {
11198 *cost = 3;
11199 return false;
11200 }
11201
11202 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11203 || get_attr_conds (insn) == CONDS_USE)
11204 {
11205 *cost = 0;
11206 return false;
11207 }
11208 }
11209
11210 return true;
11211 }
11212
11213 /* Implement TARGET_REGISTER_MOVE_COST.
11214
11215 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11216 it is typically more expensive than a single memory access. We set
11217 the cost to less than two memory accesses so that floating
11218 point to integer conversion does not go through memory. */
11219
11220 int
11221 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11222 reg_class_t from, reg_class_t to)
11223 {
11224 if (TARGET_32BIT)
11225 {
11226 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11227 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11228 return 15;
11229 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11230 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11231 return 4;
11232 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11233 return 20;
11234 else
11235 return 2;
11236 }
11237 else
11238 {
11239 if (from == HI_REGS || to == HI_REGS)
11240 return 4;
11241 else
11242 return 2;
11243 }
11244 }
11245
11246 /* Implement TARGET_MEMORY_MOVE_COST. */
11247
11248 int
11249 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11250 bool in ATTRIBUTE_UNUSED)
11251 {
11252 if (TARGET_32BIT)
11253 return 10;
11254 else
11255 {
11256 if (GET_MODE_SIZE (mode) < 4)
11257 return 8;
11258 else
11259 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11260 }
11261 }
11262
11263 /* Vectorizer cost model implementation. */
11264
11265 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11266 static int
11267 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11268 tree vectype,
11269 int misalign ATTRIBUTE_UNUSED)
11270 {
11271 unsigned elements;
11272
11273 switch (type_of_cost)
11274 {
11275 case scalar_stmt:
11276 return current_tune->vec_costs->scalar_stmt_cost;
11277
11278 case scalar_load:
11279 return current_tune->vec_costs->scalar_load_cost;
11280
11281 case scalar_store:
11282 return current_tune->vec_costs->scalar_store_cost;
11283
11284 case vector_stmt:
11285 return current_tune->vec_costs->vec_stmt_cost;
11286
11287 case vector_load:
11288 return current_tune->vec_costs->vec_align_load_cost;
11289
11290 case vector_store:
11291 return current_tune->vec_costs->vec_store_cost;
11292
11293 case vec_to_scalar:
11294 return current_tune->vec_costs->vec_to_scalar_cost;
11295
11296 case scalar_to_vec:
11297 return current_tune->vec_costs->scalar_to_vec_cost;
11298
11299 case unaligned_load:
11300 return current_tune->vec_costs->vec_unalign_load_cost;
11301
11302 case unaligned_store:
11303 return current_tune->vec_costs->vec_unalign_store_cost;
11304
11305 case cond_branch_taken:
11306 return current_tune->vec_costs->cond_taken_branch_cost;
11307
11308 case cond_branch_not_taken:
11309 return current_tune->vec_costs->cond_not_taken_branch_cost;
11310
11311 case vec_perm:
11312 case vec_promote_demote:
11313 return current_tune->vec_costs->vec_stmt_cost;
11314
11315 case vec_construct:
11316 elements = TYPE_VECTOR_SUBPARTS (vectype);
11317 return elements / 2 + 1;
11318
11319 default:
11320 gcc_unreachable ();
11321 }
11322 }
11323
11324 /* Implement targetm.vectorize.add_stmt_cost. */
11325
11326 static unsigned
11327 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11328 struct _stmt_vec_info *stmt_info, int misalign,
11329 enum vect_cost_model_location where)
11330 {
11331 unsigned *cost = (unsigned *) data;
11332 unsigned retval = 0;
11333
11334 if (flag_vect_cost_model)
11335 {
11336 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11337 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11338
11339 /* Statements in an inner loop relative to the loop being
11340 vectorized are weighted more heavily. The value here is
11341 arbitrary and could potentially be improved with analysis. */
11342 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11343 count *= 50; /* FIXME. */
11344
11345 retval = (unsigned) (count * stmt_cost);
11346 cost[where] += retval;
11347 }
11348
11349 return retval;
11350 }
11351
11352 /* Return true if and only if this insn can dual-issue only as older. */
11353 static bool
11354 cortexa7_older_only (rtx insn)
11355 {
11356 if (recog_memoized (insn) < 0)
11357 return false;
11358
11359 switch (get_attr_type (insn))
11360 {
11361 case TYPE_ALU_REG:
11362 case TYPE_ALUS_REG:
11363 case TYPE_LOGIC_REG:
11364 case TYPE_LOGICS_REG:
11365 case TYPE_ADC_REG:
11366 case TYPE_ADCS_REG:
11367 case TYPE_ADR:
11368 case TYPE_BFM:
11369 case TYPE_REV:
11370 case TYPE_MVN_REG:
11371 case TYPE_SHIFT_IMM:
11372 case TYPE_SHIFT_REG:
11373 case TYPE_LOAD_BYTE:
11374 case TYPE_LOAD1:
11375 case TYPE_STORE1:
11376 case TYPE_FFARITHS:
11377 case TYPE_FADDS:
11378 case TYPE_FFARITHD:
11379 case TYPE_FADDD:
11380 case TYPE_FMOV:
11381 case TYPE_F_CVT:
11382 case TYPE_FCMPS:
11383 case TYPE_FCMPD:
11384 case TYPE_FCONSTS:
11385 case TYPE_FCONSTD:
11386 case TYPE_FMULS:
11387 case TYPE_FMACS:
11388 case TYPE_FMULD:
11389 case TYPE_FMACD:
11390 case TYPE_FDIVS:
11391 case TYPE_FDIVD:
11392 case TYPE_F_MRC:
11393 case TYPE_F_MRRC:
11394 case TYPE_F_FLAG:
11395 case TYPE_F_LOADS:
11396 case TYPE_F_STORES:
11397 return true;
11398 default:
11399 return false;
11400 }
11401 }
11402
11403 /* Return true if and only if this insn can dual-issue as younger. */
11404 static bool
11405 cortexa7_younger (FILE *file, int verbose, rtx insn)
11406 {
11407 if (recog_memoized (insn) < 0)
11408 {
11409 if (verbose > 5)
11410 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11411 return false;
11412 }
11413
11414 switch (get_attr_type (insn))
11415 {
11416 case TYPE_ALU_IMM:
11417 case TYPE_ALUS_IMM:
11418 case TYPE_LOGIC_IMM:
11419 case TYPE_LOGICS_IMM:
11420 case TYPE_EXTEND:
11421 case TYPE_MVN_IMM:
11422 case TYPE_MOV_IMM:
11423 case TYPE_MOV_REG:
11424 case TYPE_MOV_SHIFT:
11425 case TYPE_MOV_SHIFT_REG:
11426 case TYPE_BRANCH:
11427 case TYPE_CALL:
11428 return true;
11429 default:
11430 return false;
11431 }
11432 }
11433
11434
11435 /* Look for an instruction that can dual issue only as an older
11436 instruction, and move it in front of any instructions that can
11437 dual-issue as younger, while preserving the relative order of all
11438 other instructions in the ready list. This is a hueuristic to help
11439 dual-issue in later cycles, by postponing issue of more flexible
11440 instructions. This heuristic may affect dual issue opportunities
11441 in the current cycle. */
11442 static void
11443 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11444 int clock)
11445 {
11446 int i;
11447 int first_older_only = -1, first_younger = -1;
11448
11449 if (verbose > 5)
11450 fprintf (file,
11451 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11452 clock,
11453 *n_readyp);
11454
11455 /* Traverse the ready list from the head (the instruction to issue
11456 first), and looking for the first instruction that can issue as
11457 younger and the first instruction that can dual-issue only as
11458 older. */
11459 for (i = *n_readyp - 1; i >= 0; i--)
11460 {
11461 rtx insn = ready[i];
11462 if (cortexa7_older_only (insn))
11463 {
11464 first_older_only = i;
11465 if (verbose > 5)
11466 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11467 break;
11468 }
11469 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11470 first_younger = i;
11471 }
11472
11473 /* Nothing to reorder because either no younger insn found or insn
11474 that can dual-issue only as older appears before any insn that
11475 can dual-issue as younger. */
11476 if (first_younger == -1)
11477 {
11478 if (verbose > 5)
11479 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11480 return;
11481 }
11482
11483 /* Nothing to reorder because no older-only insn in the ready list. */
11484 if (first_older_only == -1)
11485 {
11486 if (verbose > 5)
11487 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11488 return;
11489 }
11490
11491 /* Move first_older_only insn before first_younger. */
11492 if (verbose > 5)
11493 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11494 INSN_UID(ready [first_older_only]),
11495 INSN_UID(ready [first_younger]));
11496 rtx first_older_only_insn = ready [first_older_only];
11497 for (i = first_older_only; i < first_younger; i++)
11498 {
11499 ready[i] = ready[i+1];
11500 }
11501
11502 ready[i] = first_older_only_insn;
11503 return;
11504 }
11505
11506 /* Implement TARGET_SCHED_REORDER. */
11507 static int
11508 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11509 int clock)
11510 {
11511 switch (arm_tune)
11512 {
11513 case cortexa7:
11514 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11515 break;
11516 default:
11517 /* Do nothing for other cores. */
11518 break;
11519 }
11520
11521 return arm_issue_rate ();
11522 }
11523
11524 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11525 It corrects the value of COST based on the relationship between
11526 INSN and DEP through the dependence LINK. It returns the new
11527 value. There is a per-core adjust_cost hook to adjust scheduler costs
11528 and the per-core hook can choose to completely override the generic
11529 adjust_cost function. Only put bits of code into arm_adjust_cost that
11530 are common across all cores. */
11531 static int
11532 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11533 {
11534 rtx i_pat, d_pat;
11535
11536 /* When generating Thumb-1 code, we want to place flag-setting operations
11537 close to a conditional branch which depends on them, so that we can
11538 omit the comparison. */
11539 if (TARGET_THUMB1
11540 && REG_NOTE_KIND (link) == 0
11541 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11542 && recog_memoized (dep) >= 0
11543 && get_attr_conds (dep) == CONDS_SET)
11544 return 0;
11545
11546 if (current_tune->sched_adjust_cost != NULL)
11547 {
11548 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11549 return cost;
11550 }
11551
11552 /* XXX Is this strictly true? */
11553 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11554 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11555 return 0;
11556
11557 /* Call insns don't incur a stall, even if they follow a load. */
11558 if (REG_NOTE_KIND (link) == 0
11559 && CALL_P (insn))
11560 return 1;
11561
11562 if ((i_pat = single_set (insn)) != NULL
11563 && MEM_P (SET_SRC (i_pat))
11564 && (d_pat = single_set (dep)) != NULL
11565 && MEM_P (SET_DEST (d_pat)))
11566 {
11567 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11568 /* This is a load after a store, there is no conflict if the load reads
11569 from a cached area. Assume that loads from the stack, and from the
11570 constant pool are cached, and that others will miss. This is a
11571 hack. */
11572
11573 if ((GET_CODE (src_mem) == SYMBOL_REF
11574 && CONSTANT_POOL_ADDRESS_P (src_mem))
11575 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11576 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11577 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11578 return 1;
11579 }
11580
11581 return cost;
11582 }
11583
11584 int
11585 arm_max_conditional_execute (void)
11586 {
11587 return max_insns_skipped;
11588 }
11589
11590 static int
11591 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11592 {
11593 if (TARGET_32BIT)
11594 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11595 else
11596 return (optimize > 0) ? 2 : 0;
11597 }
11598
11599 static int
11600 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11601 {
11602 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11603 }
11604
11605 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11606 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11607 sequences of non-executed instructions in IT blocks probably take the same
11608 amount of time as executed instructions (and the IT instruction itself takes
11609 space in icache). This function was experimentally determined to give good
11610 results on a popular embedded benchmark. */
11611
11612 static int
11613 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11614 {
11615 return (TARGET_32BIT && speed_p) ? 1
11616 : arm_default_branch_cost (speed_p, predictable_p);
11617 }
11618
11619 static bool fp_consts_inited = false;
11620
11621 static REAL_VALUE_TYPE value_fp0;
11622
11623 static void
11624 init_fp_table (void)
11625 {
11626 REAL_VALUE_TYPE r;
11627
11628 r = REAL_VALUE_ATOF ("0", DFmode);
11629 value_fp0 = r;
11630 fp_consts_inited = true;
11631 }
11632
11633 /* Return TRUE if rtx X is a valid immediate FP constant. */
11634 int
11635 arm_const_double_rtx (rtx x)
11636 {
11637 REAL_VALUE_TYPE r;
11638
11639 if (!fp_consts_inited)
11640 init_fp_table ();
11641
11642 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11643 if (REAL_VALUE_MINUS_ZERO (r))
11644 return 0;
11645
11646 if (REAL_VALUES_EQUAL (r, value_fp0))
11647 return 1;
11648
11649 return 0;
11650 }
11651
11652 /* VFPv3 has a fairly wide range of representable immediates, formed from
11653 "quarter-precision" floating-point values. These can be evaluated using this
11654 formula (with ^ for exponentiation):
11655
11656 -1^s * n * 2^-r
11657
11658 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11659 16 <= n <= 31 and 0 <= r <= 7.
11660
11661 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11662
11663 - A (most-significant) is the sign bit.
11664 - BCD are the exponent (encoded as r XOR 3).
11665 - EFGH are the mantissa (encoded as n - 16).
11666 */
11667
11668 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11669 fconst[sd] instruction, or -1 if X isn't suitable. */
11670 static int
11671 vfp3_const_double_index (rtx x)
11672 {
11673 REAL_VALUE_TYPE r, m;
11674 int sign, exponent;
11675 unsigned HOST_WIDE_INT mantissa, mant_hi;
11676 unsigned HOST_WIDE_INT mask;
11677 HOST_WIDE_INT m1, m2;
11678 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11679
11680 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11681 return -1;
11682
11683 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11684
11685 /* We can't represent these things, so detect them first. */
11686 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11687 return -1;
11688
11689 /* Extract sign, exponent and mantissa. */
11690 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11691 r = real_value_abs (&r);
11692 exponent = REAL_EXP (&r);
11693 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11694 highest (sign) bit, with a fixed binary point at bit point_pos.
11695 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11696 bits for the mantissa, this may fail (low bits would be lost). */
11697 real_ldexp (&m, &r, point_pos - exponent);
11698 REAL_VALUE_TO_INT (&m1, &m2, m);
11699 mantissa = m1;
11700 mant_hi = m2;
11701
11702 /* If there are bits set in the low part of the mantissa, we can't
11703 represent this value. */
11704 if (mantissa != 0)
11705 return -1;
11706
11707 /* Now make it so that mantissa contains the most-significant bits, and move
11708 the point_pos to indicate that the least-significant bits have been
11709 discarded. */
11710 point_pos -= HOST_BITS_PER_WIDE_INT;
11711 mantissa = mant_hi;
11712
11713 /* We can permit four significant bits of mantissa only, plus a high bit
11714 which is always 1. */
11715 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11716 if ((mantissa & mask) != 0)
11717 return -1;
11718
11719 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11720 mantissa >>= point_pos - 5;
11721
11722 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11723 floating-point immediate zero with Neon using an integer-zero load, but
11724 that case is handled elsewhere.) */
11725 if (mantissa == 0)
11726 return -1;
11727
11728 gcc_assert (mantissa >= 16 && mantissa <= 31);
11729
11730 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11731 normalized significands are in the range [1, 2). (Our mantissa is shifted
11732 left 4 places at this point relative to normalized IEEE754 values). GCC
11733 internally uses [0.5, 1) (see real.c), so the exponent returned from
11734 REAL_EXP must be altered. */
11735 exponent = 5 - exponent;
11736
11737 if (exponent < 0 || exponent > 7)
11738 return -1;
11739
11740 /* Sign, mantissa and exponent are now in the correct form to plug into the
11741 formula described in the comment above. */
11742 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11743 }
11744
11745 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11746 int
11747 vfp3_const_double_rtx (rtx x)
11748 {
11749 if (!TARGET_VFP3)
11750 return 0;
11751
11752 return vfp3_const_double_index (x) != -1;
11753 }
11754
11755 /* Recognize immediates which can be used in various Neon instructions. Legal
11756 immediates are described by the following table (for VMVN variants, the
11757 bitwise inverse of the constant shown is recognized. In either case, VMOV
11758 is output and the correct instruction to use for a given constant is chosen
11759 by the assembler). The constant shown is replicated across all elements of
11760 the destination vector.
11761
11762 insn elems variant constant (binary)
11763 ---- ----- ------- -----------------
11764 vmov i32 0 00000000 00000000 00000000 abcdefgh
11765 vmov i32 1 00000000 00000000 abcdefgh 00000000
11766 vmov i32 2 00000000 abcdefgh 00000000 00000000
11767 vmov i32 3 abcdefgh 00000000 00000000 00000000
11768 vmov i16 4 00000000 abcdefgh
11769 vmov i16 5 abcdefgh 00000000
11770 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11771 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11772 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11773 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11774 vmvn i16 10 00000000 abcdefgh
11775 vmvn i16 11 abcdefgh 00000000
11776 vmov i32 12 00000000 00000000 abcdefgh 11111111
11777 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11778 vmov i32 14 00000000 abcdefgh 11111111 11111111
11779 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11780 vmov i8 16 abcdefgh
11781 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11782 eeeeeeee ffffffff gggggggg hhhhhhhh
11783 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11784 vmov f32 19 00000000 00000000 00000000 00000000
11785
11786 For case 18, B = !b. Representable values are exactly those accepted by
11787 vfp3_const_double_index, but are output as floating-point numbers rather
11788 than indices.
11789
11790 For case 19, we will change it to vmov.i32 when assembling.
11791
11792 Variants 0-5 (inclusive) may also be used as immediates for the second
11793 operand of VORR/VBIC instructions.
11794
11795 The INVERSE argument causes the bitwise inverse of the given operand to be
11796 recognized instead (used for recognizing legal immediates for the VAND/VORN
11797 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11798 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11799 output, rather than the real insns vbic/vorr).
11800
11801 INVERSE makes no difference to the recognition of float vectors.
11802
11803 The return value is the variant of immediate as shown in the above table, or
11804 -1 if the given value doesn't match any of the listed patterns.
11805 */
11806 static int
11807 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11808 rtx *modconst, int *elementwidth)
11809 {
11810 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11811 matches = 1; \
11812 for (i = 0; i < idx; i += (STRIDE)) \
11813 if (!(TEST)) \
11814 matches = 0; \
11815 if (matches) \
11816 { \
11817 immtype = (CLASS); \
11818 elsize = (ELSIZE); \
11819 break; \
11820 }
11821
11822 unsigned int i, elsize = 0, idx = 0, n_elts;
11823 unsigned int innersize;
11824 unsigned char bytes[16];
11825 int immtype = -1, matches;
11826 unsigned int invmask = inverse ? 0xff : 0;
11827 bool vector = GET_CODE (op) == CONST_VECTOR;
11828
11829 if (vector)
11830 {
11831 n_elts = CONST_VECTOR_NUNITS (op);
11832 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11833 }
11834 else
11835 {
11836 n_elts = 1;
11837 if (mode == VOIDmode)
11838 mode = DImode;
11839 innersize = GET_MODE_SIZE (mode);
11840 }
11841
11842 /* Vectors of float constants. */
11843 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11844 {
11845 rtx el0 = CONST_VECTOR_ELT (op, 0);
11846 REAL_VALUE_TYPE r0;
11847
11848 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11849 return -1;
11850
11851 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11852
11853 for (i = 1; i < n_elts; i++)
11854 {
11855 rtx elt = CONST_VECTOR_ELT (op, i);
11856 REAL_VALUE_TYPE re;
11857
11858 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11859
11860 if (!REAL_VALUES_EQUAL (r0, re))
11861 return -1;
11862 }
11863
11864 if (modconst)
11865 *modconst = CONST_VECTOR_ELT (op, 0);
11866
11867 if (elementwidth)
11868 *elementwidth = 0;
11869
11870 if (el0 == CONST0_RTX (GET_MODE (el0)))
11871 return 19;
11872 else
11873 return 18;
11874 }
11875
11876 /* Splat vector constant out into a byte vector. */
11877 for (i = 0; i < n_elts; i++)
11878 {
11879 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11880 unsigned HOST_WIDE_INT elpart;
11881 unsigned int part, parts;
11882
11883 if (CONST_INT_P (el))
11884 {
11885 elpart = INTVAL (el);
11886 parts = 1;
11887 }
11888 else if (CONST_DOUBLE_P (el))
11889 {
11890 elpart = CONST_DOUBLE_LOW (el);
11891 parts = 2;
11892 }
11893 else
11894 gcc_unreachable ();
11895
11896 for (part = 0; part < parts; part++)
11897 {
11898 unsigned int byte;
11899 for (byte = 0; byte < innersize; byte++)
11900 {
11901 bytes[idx++] = (elpart & 0xff) ^ invmask;
11902 elpart >>= BITS_PER_UNIT;
11903 }
11904 if (CONST_DOUBLE_P (el))
11905 elpart = CONST_DOUBLE_HIGH (el);
11906 }
11907 }
11908
11909 /* Sanity check. */
11910 gcc_assert (idx == GET_MODE_SIZE (mode));
11911
11912 do
11913 {
11914 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11915 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11916
11917 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11918 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11919
11920 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11921 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11922
11923 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11924 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11925
11926 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11927
11928 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11929
11930 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11931 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11932
11933 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11934 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11935
11936 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11937 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11938
11939 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11940 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11941
11942 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11943
11944 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11945
11946 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11947 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11948
11949 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11950 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11951
11952 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11953 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11954
11955 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11956 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11957
11958 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11959
11960 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11961 && bytes[i] == bytes[(i + 8) % idx]);
11962 }
11963 while (0);
11964
11965 if (immtype == -1)
11966 return -1;
11967
11968 if (elementwidth)
11969 *elementwidth = elsize;
11970
11971 if (modconst)
11972 {
11973 unsigned HOST_WIDE_INT imm = 0;
11974
11975 /* Un-invert bytes of recognized vector, if necessary. */
11976 if (invmask != 0)
11977 for (i = 0; i < idx; i++)
11978 bytes[i] ^= invmask;
11979
11980 if (immtype == 17)
11981 {
11982 /* FIXME: Broken on 32-bit H_W_I hosts. */
11983 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11984
11985 for (i = 0; i < 8; i++)
11986 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11987 << (i * BITS_PER_UNIT);
11988
11989 *modconst = GEN_INT (imm);
11990 }
11991 else
11992 {
11993 unsigned HOST_WIDE_INT imm = 0;
11994
11995 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11996 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11997
11998 *modconst = GEN_INT (imm);
11999 }
12000 }
12001
12002 return immtype;
12003 #undef CHECK
12004 }
12005
12006 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12007 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12008 float elements), and a modified constant (whatever should be output for a
12009 VMOV) in *MODCONST. */
12010
12011 int
12012 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12013 rtx *modconst, int *elementwidth)
12014 {
12015 rtx tmpconst;
12016 int tmpwidth;
12017 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12018
12019 if (retval == -1)
12020 return 0;
12021
12022 if (modconst)
12023 *modconst = tmpconst;
12024
12025 if (elementwidth)
12026 *elementwidth = tmpwidth;
12027
12028 return 1;
12029 }
12030
12031 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12032 the immediate is valid, write a constant suitable for using as an operand
12033 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12034 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12035
12036 int
12037 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12038 rtx *modconst, int *elementwidth)
12039 {
12040 rtx tmpconst;
12041 int tmpwidth;
12042 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12043
12044 if (retval < 0 || retval > 5)
12045 return 0;
12046
12047 if (modconst)
12048 *modconst = tmpconst;
12049
12050 if (elementwidth)
12051 *elementwidth = tmpwidth;
12052
12053 return 1;
12054 }
12055
12056 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12057 the immediate is valid, write a constant suitable for using as an operand
12058 to VSHR/VSHL to *MODCONST and the corresponding element width to
12059 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12060 because they have different limitations. */
12061
12062 int
12063 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12064 rtx *modconst, int *elementwidth,
12065 bool isleftshift)
12066 {
12067 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12068 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12069 unsigned HOST_WIDE_INT last_elt = 0;
12070 unsigned HOST_WIDE_INT maxshift;
12071
12072 /* Split vector constant out into a byte vector. */
12073 for (i = 0; i < n_elts; i++)
12074 {
12075 rtx el = CONST_VECTOR_ELT (op, i);
12076 unsigned HOST_WIDE_INT elpart;
12077
12078 if (CONST_INT_P (el))
12079 elpart = INTVAL (el);
12080 else if (CONST_DOUBLE_P (el))
12081 return 0;
12082 else
12083 gcc_unreachable ();
12084
12085 if (i != 0 && elpart != last_elt)
12086 return 0;
12087
12088 last_elt = elpart;
12089 }
12090
12091 /* Shift less than element size. */
12092 maxshift = innersize * 8;
12093
12094 if (isleftshift)
12095 {
12096 /* Left shift immediate value can be from 0 to <size>-1. */
12097 if (last_elt >= maxshift)
12098 return 0;
12099 }
12100 else
12101 {
12102 /* Right shift immediate value can be from 1 to <size>. */
12103 if (last_elt == 0 || last_elt > maxshift)
12104 return 0;
12105 }
12106
12107 if (elementwidth)
12108 *elementwidth = innersize * 8;
12109
12110 if (modconst)
12111 *modconst = CONST_VECTOR_ELT (op, 0);
12112
12113 return 1;
12114 }
12115
12116 /* Return a string suitable for output of Neon immediate logic operation
12117 MNEM. */
12118
12119 char *
12120 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12121 int inverse, int quad)
12122 {
12123 int width, is_valid;
12124 static char templ[40];
12125
12126 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12127
12128 gcc_assert (is_valid != 0);
12129
12130 if (quad)
12131 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12132 else
12133 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12134
12135 return templ;
12136 }
12137
12138 /* Return a string suitable for output of Neon immediate shift operation
12139 (VSHR or VSHL) MNEM. */
12140
12141 char *
12142 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12143 enum machine_mode mode, int quad,
12144 bool isleftshift)
12145 {
12146 int width, is_valid;
12147 static char templ[40];
12148
12149 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12150 gcc_assert (is_valid != 0);
12151
12152 if (quad)
12153 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12154 else
12155 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12156
12157 return templ;
12158 }
12159
12160 /* Output a sequence of pairwise operations to implement a reduction.
12161 NOTE: We do "too much work" here, because pairwise operations work on two
12162 registers-worth of operands in one go. Unfortunately we can't exploit those
12163 extra calculations to do the full operation in fewer steps, I don't think.
12164 Although all vector elements of the result but the first are ignored, we
12165 actually calculate the same result in each of the elements. An alternative
12166 such as initially loading a vector with zero to use as each of the second
12167 operands would use up an additional register and take an extra instruction,
12168 for no particular gain. */
12169
12170 void
12171 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12172 rtx (*reduc) (rtx, rtx, rtx))
12173 {
12174 enum machine_mode inner = GET_MODE_INNER (mode);
12175 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12176 rtx tmpsum = op1;
12177
12178 for (i = parts / 2; i >= 1; i /= 2)
12179 {
12180 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12181 emit_insn (reduc (dest, tmpsum, tmpsum));
12182 tmpsum = dest;
12183 }
12184 }
12185
12186 /* If VALS is a vector constant that can be loaded into a register
12187 using VDUP, generate instructions to do so and return an RTX to
12188 assign to the register. Otherwise return NULL_RTX. */
12189
12190 static rtx
12191 neon_vdup_constant (rtx vals)
12192 {
12193 enum machine_mode mode = GET_MODE (vals);
12194 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12195 int n_elts = GET_MODE_NUNITS (mode);
12196 bool all_same = true;
12197 rtx x;
12198 int i;
12199
12200 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12201 return NULL_RTX;
12202
12203 for (i = 0; i < n_elts; ++i)
12204 {
12205 x = XVECEXP (vals, 0, i);
12206 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12207 all_same = false;
12208 }
12209
12210 if (!all_same)
12211 /* The elements are not all the same. We could handle repeating
12212 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12213 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12214 vdup.i16). */
12215 return NULL_RTX;
12216
12217 /* We can load this constant by using VDUP and a constant in a
12218 single ARM register. This will be cheaper than a vector
12219 load. */
12220
12221 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12222 return gen_rtx_VEC_DUPLICATE (mode, x);
12223 }
12224
12225 /* Generate code to load VALS, which is a PARALLEL containing only
12226 constants (for vec_init) or CONST_VECTOR, efficiently into a
12227 register. Returns an RTX to copy into the register, or NULL_RTX
12228 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12229
12230 rtx
12231 neon_make_constant (rtx vals)
12232 {
12233 enum machine_mode mode = GET_MODE (vals);
12234 rtx target;
12235 rtx const_vec = NULL_RTX;
12236 int n_elts = GET_MODE_NUNITS (mode);
12237 int n_const = 0;
12238 int i;
12239
12240 if (GET_CODE (vals) == CONST_VECTOR)
12241 const_vec = vals;
12242 else if (GET_CODE (vals) == PARALLEL)
12243 {
12244 /* A CONST_VECTOR must contain only CONST_INTs and
12245 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12246 Only store valid constants in a CONST_VECTOR. */
12247 for (i = 0; i < n_elts; ++i)
12248 {
12249 rtx x = XVECEXP (vals, 0, i);
12250 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12251 n_const++;
12252 }
12253 if (n_const == n_elts)
12254 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12255 }
12256 else
12257 gcc_unreachable ();
12258
12259 if (const_vec != NULL
12260 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12261 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12262 return const_vec;
12263 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12264 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12265 pipeline cycle; creating the constant takes one or two ARM
12266 pipeline cycles. */
12267 return target;
12268 else if (const_vec != NULL_RTX)
12269 /* Load from constant pool. On Cortex-A8 this takes two cycles
12270 (for either double or quad vectors). We can not take advantage
12271 of single-cycle VLD1 because we need a PC-relative addressing
12272 mode. */
12273 return const_vec;
12274 else
12275 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12276 We can not construct an initializer. */
12277 return NULL_RTX;
12278 }
12279
12280 /* Initialize vector TARGET to VALS. */
12281
12282 void
12283 neon_expand_vector_init (rtx target, rtx vals)
12284 {
12285 enum machine_mode mode = GET_MODE (target);
12286 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12287 int n_elts = GET_MODE_NUNITS (mode);
12288 int n_var = 0, one_var = -1;
12289 bool all_same = true;
12290 rtx x, mem;
12291 int i;
12292
12293 for (i = 0; i < n_elts; ++i)
12294 {
12295 x = XVECEXP (vals, 0, i);
12296 if (!CONSTANT_P (x))
12297 ++n_var, one_var = i;
12298
12299 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12300 all_same = false;
12301 }
12302
12303 if (n_var == 0)
12304 {
12305 rtx constant = neon_make_constant (vals);
12306 if (constant != NULL_RTX)
12307 {
12308 emit_move_insn (target, constant);
12309 return;
12310 }
12311 }
12312
12313 /* Splat a single non-constant element if we can. */
12314 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12315 {
12316 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12317 emit_insn (gen_rtx_SET (VOIDmode, target,
12318 gen_rtx_VEC_DUPLICATE (mode, x)));
12319 return;
12320 }
12321
12322 /* One field is non-constant. Load constant then overwrite varying
12323 field. This is more efficient than using the stack. */
12324 if (n_var == 1)
12325 {
12326 rtx copy = copy_rtx (vals);
12327 rtx index = GEN_INT (one_var);
12328
12329 /* Load constant part of vector, substitute neighboring value for
12330 varying element. */
12331 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12332 neon_expand_vector_init (target, copy);
12333
12334 /* Insert variable. */
12335 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12336 switch (mode)
12337 {
12338 case V8QImode:
12339 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12340 break;
12341 case V16QImode:
12342 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12343 break;
12344 case V4HImode:
12345 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12346 break;
12347 case V8HImode:
12348 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12349 break;
12350 case V2SImode:
12351 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12352 break;
12353 case V4SImode:
12354 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12355 break;
12356 case V2SFmode:
12357 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12358 break;
12359 case V4SFmode:
12360 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12361 break;
12362 case V2DImode:
12363 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12364 break;
12365 default:
12366 gcc_unreachable ();
12367 }
12368 return;
12369 }
12370
12371 /* Construct the vector in memory one field at a time
12372 and load the whole vector. */
12373 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12374 for (i = 0; i < n_elts; i++)
12375 emit_move_insn (adjust_address_nv (mem, inner_mode,
12376 i * GET_MODE_SIZE (inner_mode)),
12377 XVECEXP (vals, 0, i));
12378 emit_move_insn (target, mem);
12379 }
12380
12381 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12382 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12383 reported source locations are bogus. */
12384
12385 static void
12386 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12387 const char *err)
12388 {
12389 HOST_WIDE_INT lane;
12390
12391 gcc_assert (CONST_INT_P (operand));
12392
12393 lane = INTVAL (operand);
12394
12395 if (lane < low || lane >= high)
12396 error (err);
12397 }
12398
12399 /* Bounds-check lanes. */
12400
12401 void
12402 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12403 {
12404 bounds_check (operand, low, high, "lane out of range");
12405 }
12406
12407 /* Bounds-check constants. */
12408
12409 void
12410 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12411 {
12412 bounds_check (operand, low, high, "constant out of range");
12413 }
12414
12415 HOST_WIDE_INT
12416 neon_element_bits (enum machine_mode mode)
12417 {
12418 if (mode == DImode)
12419 return GET_MODE_BITSIZE (mode);
12420 else
12421 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12422 }
12423
12424 \f
12425 /* Predicates for `match_operand' and `match_operator'. */
12426
12427 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12428 WB is true if full writeback address modes are allowed and is false
12429 if limited writeback address modes (POST_INC and PRE_DEC) are
12430 allowed. */
12431
12432 int
12433 arm_coproc_mem_operand (rtx op, bool wb)
12434 {
12435 rtx ind;
12436
12437 /* Reject eliminable registers. */
12438 if (! (reload_in_progress || reload_completed)
12439 && ( reg_mentioned_p (frame_pointer_rtx, op)
12440 || reg_mentioned_p (arg_pointer_rtx, op)
12441 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12442 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12443 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12444 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12445 return FALSE;
12446
12447 /* Constants are converted into offsets from labels. */
12448 if (!MEM_P (op))
12449 return FALSE;
12450
12451 ind = XEXP (op, 0);
12452
12453 if (reload_completed
12454 && (GET_CODE (ind) == LABEL_REF
12455 || (GET_CODE (ind) == CONST
12456 && GET_CODE (XEXP (ind, 0)) == PLUS
12457 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12458 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12459 return TRUE;
12460
12461 /* Match: (mem (reg)). */
12462 if (REG_P (ind))
12463 return arm_address_register_rtx_p (ind, 0);
12464
12465 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12466 acceptable in any case (subject to verification by
12467 arm_address_register_rtx_p). We need WB to be true to accept
12468 PRE_INC and POST_DEC. */
12469 if (GET_CODE (ind) == POST_INC
12470 || GET_CODE (ind) == PRE_DEC
12471 || (wb
12472 && (GET_CODE (ind) == PRE_INC
12473 || GET_CODE (ind) == POST_DEC)))
12474 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12475
12476 if (wb
12477 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12478 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12479 && GET_CODE (XEXP (ind, 1)) == PLUS
12480 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12481 ind = XEXP (ind, 1);
12482
12483 /* Match:
12484 (plus (reg)
12485 (const)). */
12486 if (GET_CODE (ind) == PLUS
12487 && REG_P (XEXP (ind, 0))
12488 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12489 && CONST_INT_P (XEXP (ind, 1))
12490 && INTVAL (XEXP (ind, 1)) > -1024
12491 && INTVAL (XEXP (ind, 1)) < 1024
12492 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12493 return TRUE;
12494
12495 return FALSE;
12496 }
12497
12498 /* Return TRUE if OP is a memory operand which we can load or store a vector
12499 to/from. TYPE is one of the following values:
12500 0 - Vector load/stor (vldr)
12501 1 - Core registers (ldm)
12502 2 - Element/structure loads (vld1)
12503 */
12504 int
12505 neon_vector_mem_operand (rtx op, int type, bool strict)
12506 {
12507 rtx ind;
12508
12509 /* Reject eliminable registers. */
12510 if (! (reload_in_progress || reload_completed)
12511 && ( reg_mentioned_p (frame_pointer_rtx, op)
12512 || reg_mentioned_p (arg_pointer_rtx, op)
12513 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12514 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12515 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12516 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12517 return !strict;
12518
12519 /* Constants are converted into offsets from labels. */
12520 if (!MEM_P (op))
12521 return FALSE;
12522
12523 ind = XEXP (op, 0);
12524
12525 if (reload_completed
12526 && (GET_CODE (ind) == LABEL_REF
12527 || (GET_CODE (ind) == CONST
12528 && GET_CODE (XEXP (ind, 0)) == PLUS
12529 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12530 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12531 return TRUE;
12532
12533 /* Match: (mem (reg)). */
12534 if (REG_P (ind))
12535 return arm_address_register_rtx_p (ind, 0);
12536
12537 /* Allow post-increment with Neon registers. */
12538 if ((type != 1 && GET_CODE (ind) == POST_INC)
12539 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12540 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12541
12542 /* FIXME: vld1 allows register post-modify. */
12543
12544 /* Match:
12545 (plus (reg)
12546 (const)). */
12547 if (type == 0
12548 && GET_CODE (ind) == PLUS
12549 && REG_P (XEXP (ind, 0))
12550 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12551 && CONST_INT_P (XEXP (ind, 1))
12552 && INTVAL (XEXP (ind, 1)) > -1024
12553 /* For quad modes, we restrict the constant offset to be slightly less
12554 than what the instruction format permits. We have no such constraint
12555 on double mode offsets. (This must match arm_legitimate_index_p.) */
12556 && (INTVAL (XEXP (ind, 1))
12557 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12558 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12559 return TRUE;
12560
12561 return FALSE;
12562 }
12563
12564 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12565 type. */
12566 int
12567 neon_struct_mem_operand (rtx op)
12568 {
12569 rtx ind;
12570
12571 /* Reject eliminable registers. */
12572 if (! (reload_in_progress || reload_completed)
12573 && ( reg_mentioned_p (frame_pointer_rtx, op)
12574 || reg_mentioned_p (arg_pointer_rtx, op)
12575 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12576 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12577 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12578 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12579 return FALSE;
12580
12581 /* Constants are converted into offsets from labels. */
12582 if (!MEM_P (op))
12583 return FALSE;
12584
12585 ind = XEXP (op, 0);
12586
12587 if (reload_completed
12588 && (GET_CODE (ind) == LABEL_REF
12589 || (GET_CODE (ind) == CONST
12590 && GET_CODE (XEXP (ind, 0)) == PLUS
12591 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12592 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12593 return TRUE;
12594
12595 /* Match: (mem (reg)). */
12596 if (REG_P (ind))
12597 return arm_address_register_rtx_p (ind, 0);
12598
12599 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12600 if (GET_CODE (ind) == POST_INC
12601 || GET_CODE (ind) == PRE_DEC)
12602 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12603
12604 return FALSE;
12605 }
12606
12607 /* Return true if X is a register that will be eliminated later on. */
12608 int
12609 arm_eliminable_register (rtx x)
12610 {
12611 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12612 || REGNO (x) == ARG_POINTER_REGNUM
12613 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12614 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12615 }
12616
12617 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12618 coprocessor registers. Otherwise return NO_REGS. */
12619
12620 enum reg_class
12621 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12622 {
12623 if (mode == HFmode)
12624 {
12625 if (!TARGET_NEON_FP16)
12626 return GENERAL_REGS;
12627 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12628 return NO_REGS;
12629 return GENERAL_REGS;
12630 }
12631
12632 /* The neon move patterns handle all legitimate vector and struct
12633 addresses. */
12634 if (TARGET_NEON
12635 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12636 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12637 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12638 || VALID_NEON_STRUCT_MODE (mode)))
12639 return NO_REGS;
12640
12641 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12642 return NO_REGS;
12643
12644 return GENERAL_REGS;
12645 }
12646
12647 /* Values which must be returned in the most-significant end of the return
12648 register. */
12649
12650 static bool
12651 arm_return_in_msb (const_tree valtype)
12652 {
12653 return (TARGET_AAPCS_BASED
12654 && BYTES_BIG_ENDIAN
12655 && (AGGREGATE_TYPE_P (valtype)
12656 || TREE_CODE (valtype) == COMPLEX_TYPE
12657 || FIXED_POINT_TYPE_P (valtype)));
12658 }
12659
12660 /* Return TRUE if X references a SYMBOL_REF. */
12661 int
12662 symbol_mentioned_p (rtx x)
12663 {
12664 const char * fmt;
12665 int i;
12666
12667 if (GET_CODE (x) == SYMBOL_REF)
12668 return 1;
12669
12670 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12671 are constant offsets, not symbols. */
12672 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12673 return 0;
12674
12675 fmt = GET_RTX_FORMAT (GET_CODE (x));
12676
12677 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12678 {
12679 if (fmt[i] == 'E')
12680 {
12681 int j;
12682
12683 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12684 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12685 return 1;
12686 }
12687 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12688 return 1;
12689 }
12690
12691 return 0;
12692 }
12693
12694 /* Return TRUE if X references a LABEL_REF. */
12695 int
12696 label_mentioned_p (rtx x)
12697 {
12698 const char * fmt;
12699 int i;
12700
12701 if (GET_CODE (x) == LABEL_REF)
12702 return 1;
12703
12704 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12705 instruction, but they are constant offsets, not symbols. */
12706 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12707 return 0;
12708
12709 fmt = GET_RTX_FORMAT (GET_CODE (x));
12710 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12711 {
12712 if (fmt[i] == 'E')
12713 {
12714 int j;
12715
12716 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12717 if (label_mentioned_p (XVECEXP (x, i, j)))
12718 return 1;
12719 }
12720 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12721 return 1;
12722 }
12723
12724 return 0;
12725 }
12726
12727 int
12728 tls_mentioned_p (rtx x)
12729 {
12730 switch (GET_CODE (x))
12731 {
12732 case CONST:
12733 return tls_mentioned_p (XEXP (x, 0));
12734
12735 case UNSPEC:
12736 if (XINT (x, 1) == UNSPEC_TLS)
12737 return 1;
12738
12739 default:
12740 return 0;
12741 }
12742 }
12743
12744 /* Must not copy any rtx that uses a pc-relative address. */
12745
12746 static int
12747 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12748 {
12749 if (GET_CODE (*x) == UNSPEC
12750 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12751 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12752 return 1;
12753 return 0;
12754 }
12755
12756 static bool
12757 arm_cannot_copy_insn_p (rtx insn)
12758 {
12759 /* The tls call insn cannot be copied, as it is paired with a data
12760 word. */
12761 if (recog_memoized (insn) == CODE_FOR_tlscall)
12762 return true;
12763
12764 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12765 }
12766
12767 enum rtx_code
12768 minmax_code (rtx x)
12769 {
12770 enum rtx_code code = GET_CODE (x);
12771
12772 switch (code)
12773 {
12774 case SMAX:
12775 return GE;
12776 case SMIN:
12777 return LE;
12778 case UMIN:
12779 return LEU;
12780 case UMAX:
12781 return GEU;
12782 default:
12783 gcc_unreachable ();
12784 }
12785 }
12786
12787 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12788
12789 bool
12790 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12791 int *mask, bool *signed_sat)
12792 {
12793 /* The high bound must be a power of two minus one. */
12794 int log = exact_log2 (INTVAL (hi_bound) + 1);
12795 if (log == -1)
12796 return false;
12797
12798 /* The low bound is either zero (for usat) or one less than the
12799 negation of the high bound (for ssat). */
12800 if (INTVAL (lo_bound) == 0)
12801 {
12802 if (mask)
12803 *mask = log;
12804 if (signed_sat)
12805 *signed_sat = false;
12806
12807 return true;
12808 }
12809
12810 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12811 {
12812 if (mask)
12813 *mask = log + 1;
12814 if (signed_sat)
12815 *signed_sat = true;
12816
12817 return true;
12818 }
12819
12820 return false;
12821 }
12822
12823 /* Return 1 if memory locations are adjacent. */
12824 int
12825 adjacent_mem_locations (rtx a, rtx b)
12826 {
12827 /* We don't guarantee to preserve the order of these memory refs. */
12828 if (volatile_refs_p (a) || volatile_refs_p (b))
12829 return 0;
12830
12831 if ((REG_P (XEXP (a, 0))
12832 || (GET_CODE (XEXP (a, 0)) == PLUS
12833 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12834 && (REG_P (XEXP (b, 0))
12835 || (GET_CODE (XEXP (b, 0)) == PLUS
12836 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12837 {
12838 HOST_WIDE_INT val0 = 0, val1 = 0;
12839 rtx reg0, reg1;
12840 int val_diff;
12841
12842 if (GET_CODE (XEXP (a, 0)) == PLUS)
12843 {
12844 reg0 = XEXP (XEXP (a, 0), 0);
12845 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12846 }
12847 else
12848 reg0 = XEXP (a, 0);
12849
12850 if (GET_CODE (XEXP (b, 0)) == PLUS)
12851 {
12852 reg1 = XEXP (XEXP (b, 0), 0);
12853 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12854 }
12855 else
12856 reg1 = XEXP (b, 0);
12857
12858 /* Don't accept any offset that will require multiple
12859 instructions to handle, since this would cause the
12860 arith_adjacentmem pattern to output an overlong sequence. */
12861 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12862 return 0;
12863
12864 /* Don't allow an eliminable register: register elimination can make
12865 the offset too large. */
12866 if (arm_eliminable_register (reg0))
12867 return 0;
12868
12869 val_diff = val1 - val0;
12870
12871 if (arm_ld_sched)
12872 {
12873 /* If the target has load delay slots, then there's no benefit
12874 to using an ldm instruction unless the offset is zero and
12875 we are optimizing for size. */
12876 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12877 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12878 && (val_diff == 4 || val_diff == -4));
12879 }
12880
12881 return ((REGNO (reg0) == REGNO (reg1))
12882 && (val_diff == 4 || val_diff == -4));
12883 }
12884
12885 return 0;
12886 }
12887
12888 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12889 for load operations, false for store operations. CONSECUTIVE is true
12890 if the register numbers in the operation must be consecutive in the register
12891 bank. RETURN_PC is true if value is to be loaded in PC.
12892 The pattern we are trying to match for load is:
12893 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12894 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12895 :
12896 :
12897 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12898 ]
12899 where
12900 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12901 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12902 3. If consecutive is TRUE, then for kth register being loaded,
12903 REGNO (R_dk) = REGNO (R_d0) + k.
12904 The pattern for store is similar. */
12905 bool
12906 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12907 bool consecutive, bool return_pc)
12908 {
12909 HOST_WIDE_INT count = XVECLEN (op, 0);
12910 rtx reg, mem, addr;
12911 unsigned regno;
12912 unsigned first_regno;
12913 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12914 rtx elt;
12915 bool addr_reg_in_reglist = false;
12916 bool update = false;
12917 int reg_increment;
12918 int offset_adj;
12919 int regs_per_val;
12920
12921 /* If not in SImode, then registers must be consecutive
12922 (e.g., VLDM instructions for DFmode). */
12923 gcc_assert ((mode == SImode) || consecutive);
12924 /* Setting return_pc for stores is illegal. */
12925 gcc_assert (!return_pc || load);
12926
12927 /* Set up the increments and the regs per val based on the mode. */
12928 reg_increment = GET_MODE_SIZE (mode);
12929 regs_per_val = reg_increment / 4;
12930 offset_adj = return_pc ? 1 : 0;
12931
12932 if (count <= 1
12933 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12934 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12935 return false;
12936
12937 /* Check if this is a write-back. */
12938 elt = XVECEXP (op, 0, offset_adj);
12939 if (GET_CODE (SET_SRC (elt)) == PLUS)
12940 {
12941 i++;
12942 base = 1;
12943 update = true;
12944
12945 /* The offset adjustment must be the number of registers being
12946 popped times the size of a single register. */
12947 if (!REG_P (SET_DEST (elt))
12948 || !REG_P (XEXP (SET_SRC (elt), 0))
12949 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12950 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12951 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12952 ((count - 1 - offset_adj) * reg_increment))
12953 return false;
12954 }
12955
12956 i = i + offset_adj;
12957 base = base + offset_adj;
12958 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12959 success depends on the type: VLDM can do just one reg,
12960 LDM must do at least two. */
12961 if ((count <= i) && (mode == SImode))
12962 return false;
12963
12964 elt = XVECEXP (op, 0, i - 1);
12965 if (GET_CODE (elt) != SET)
12966 return false;
12967
12968 if (load)
12969 {
12970 reg = SET_DEST (elt);
12971 mem = SET_SRC (elt);
12972 }
12973 else
12974 {
12975 reg = SET_SRC (elt);
12976 mem = SET_DEST (elt);
12977 }
12978
12979 if (!REG_P (reg) || !MEM_P (mem))
12980 return false;
12981
12982 regno = REGNO (reg);
12983 first_regno = regno;
12984 addr = XEXP (mem, 0);
12985 if (GET_CODE (addr) == PLUS)
12986 {
12987 if (!CONST_INT_P (XEXP (addr, 1)))
12988 return false;
12989
12990 offset = INTVAL (XEXP (addr, 1));
12991 addr = XEXP (addr, 0);
12992 }
12993
12994 if (!REG_P (addr))
12995 return false;
12996
12997 /* Don't allow SP to be loaded unless it is also the base register. It
12998 guarantees that SP is reset correctly when an LDM instruction
12999 is interrupted. Otherwise, we might end up with a corrupt stack. */
13000 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13001 return false;
13002
13003 for (; i < count; i++)
13004 {
13005 elt = XVECEXP (op, 0, i);
13006 if (GET_CODE (elt) != SET)
13007 return false;
13008
13009 if (load)
13010 {
13011 reg = SET_DEST (elt);
13012 mem = SET_SRC (elt);
13013 }
13014 else
13015 {
13016 reg = SET_SRC (elt);
13017 mem = SET_DEST (elt);
13018 }
13019
13020 if (!REG_P (reg)
13021 || GET_MODE (reg) != mode
13022 || REGNO (reg) <= regno
13023 || (consecutive
13024 && (REGNO (reg) !=
13025 (unsigned int) (first_regno + regs_per_val * (i - base))))
13026 /* Don't allow SP to be loaded unless it is also the base register. It
13027 guarantees that SP is reset correctly when an LDM instruction
13028 is interrupted. Otherwise, we might end up with a corrupt stack. */
13029 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13030 || !MEM_P (mem)
13031 || GET_MODE (mem) != mode
13032 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13033 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13034 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13035 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13036 offset + (i - base) * reg_increment))
13037 && (!REG_P (XEXP (mem, 0))
13038 || offset + (i - base) * reg_increment != 0)))
13039 return false;
13040
13041 regno = REGNO (reg);
13042 if (regno == REGNO (addr))
13043 addr_reg_in_reglist = true;
13044 }
13045
13046 if (load)
13047 {
13048 if (update && addr_reg_in_reglist)
13049 return false;
13050
13051 /* For Thumb-1, address register is always modified - either by write-back
13052 or by explicit load. If the pattern does not describe an update,
13053 then the address register must be in the list of loaded registers. */
13054 if (TARGET_THUMB1)
13055 return update || addr_reg_in_reglist;
13056 }
13057
13058 return true;
13059 }
13060
13061 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13062 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13063 instruction. ADD_OFFSET is nonzero if the base address register needs
13064 to be modified with an add instruction before we can use it. */
13065
13066 static bool
13067 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13068 int nops, HOST_WIDE_INT add_offset)
13069 {
13070 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13071 if the offset isn't small enough. The reason 2 ldrs are faster
13072 is because these ARMs are able to do more than one cache access
13073 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13074 whilst the ARM8 has a double bandwidth cache. This means that
13075 these cores can do both an instruction fetch and a data fetch in
13076 a single cycle, so the trick of calculating the address into a
13077 scratch register (one of the result regs) and then doing a load
13078 multiple actually becomes slower (and no smaller in code size).
13079 That is the transformation
13080
13081 ldr rd1, [rbase + offset]
13082 ldr rd2, [rbase + offset + 4]
13083
13084 to
13085
13086 add rd1, rbase, offset
13087 ldmia rd1, {rd1, rd2}
13088
13089 produces worse code -- '3 cycles + any stalls on rd2' instead of
13090 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13091 access per cycle, the first sequence could never complete in less
13092 than 6 cycles, whereas the ldm sequence would only take 5 and
13093 would make better use of sequential accesses if not hitting the
13094 cache.
13095
13096 We cheat here and test 'arm_ld_sched' which we currently know to
13097 only be true for the ARM8, ARM9 and StrongARM. If this ever
13098 changes, then the test below needs to be reworked. */
13099 if (nops == 2 && arm_ld_sched && add_offset != 0)
13100 return false;
13101
13102 /* XScale has load-store double instructions, but they have stricter
13103 alignment requirements than load-store multiple, so we cannot
13104 use them.
13105
13106 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13107 the pipeline until completion.
13108
13109 NREGS CYCLES
13110 1 3
13111 2 4
13112 3 5
13113 4 6
13114
13115 An ldr instruction takes 1-3 cycles, but does not block the
13116 pipeline.
13117
13118 NREGS CYCLES
13119 1 1-3
13120 2 2-6
13121 3 3-9
13122 4 4-12
13123
13124 Best case ldr will always win. However, the more ldr instructions
13125 we issue, the less likely we are to be able to schedule them well.
13126 Using ldr instructions also increases code size.
13127
13128 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13129 for counts of 3 or 4 regs. */
13130 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13131 return false;
13132 return true;
13133 }
13134
13135 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13136 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13137 an array ORDER which describes the sequence to use when accessing the
13138 offsets that produces an ascending order. In this sequence, each
13139 offset must be larger by exactly 4 than the previous one. ORDER[0]
13140 must have been filled in with the lowest offset by the caller.
13141 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13142 we use to verify that ORDER produces an ascending order of registers.
13143 Return true if it was possible to construct such an order, false if
13144 not. */
13145
13146 static bool
13147 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13148 int *unsorted_regs)
13149 {
13150 int i;
13151 for (i = 1; i < nops; i++)
13152 {
13153 int j;
13154
13155 order[i] = order[i - 1];
13156 for (j = 0; j < nops; j++)
13157 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13158 {
13159 /* We must find exactly one offset that is higher than the
13160 previous one by 4. */
13161 if (order[i] != order[i - 1])
13162 return false;
13163 order[i] = j;
13164 }
13165 if (order[i] == order[i - 1])
13166 return false;
13167 /* The register numbers must be ascending. */
13168 if (unsorted_regs != NULL
13169 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13170 return false;
13171 }
13172 return true;
13173 }
13174
13175 /* Used to determine in a peephole whether a sequence of load
13176 instructions can be changed into a load-multiple instruction.
13177 NOPS is the number of separate load instructions we are examining. The
13178 first NOPS entries in OPERANDS are the destination registers, the
13179 next NOPS entries are memory operands. If this function is
13180 successful, *BASE is set to the common base register of the memory
13181 accesses; *LOAD_OFFSET is set to the first memory location's offset
13182 from that base register.
13183 REGS is an array filled in with the destination register numbers.
13184 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13185 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13186 the sequence of registers in REGS matches the loads from ascending memory
13187 locations, and the function verifies that the register numbers are
13188 themselves ascending. If CHECK_REGS is false, the register numbers
13189 are stored in the order they are found in the operands. */
13190 static int
13191 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13192 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13193 {
13194 int unsorted_regs[MAX_LDM_STM_OPS];
13195 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13196 int order[MAX_LDM_STM_OPS];
13197 rtx base_reg_rtx = NULL;
13198 int base_reg = -1;
13199 int i, ldm_case;
13200
13201 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13202 easily extended if required. */
13203 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13204
13205 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13206
13207 /* Loop over the operands and check that the memory references are
13208 suitable (i.e. immediate offsets from the same base register). At
13209 the same time, extract the target register, and the memory
13210 offsets. */
13211 for (i = 0; i < nops; i++)
13212 {
13213 rtx reg;
13214 rtx offset;
13215
13216 /* Convert a subreg of a mem into the mem itself. */
13217 if (GET_CODE (operands[nops + i]) == SUBREG)
13218 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13219
13220 gcc_assert (MEM_P (operands[nops + i]));
13221
13222 /* Don't reorder volatile memory references; it doesn't seem worth
13223 looking for the case where the order is ok anyway. */
13224 if (MEM_VOLATILE_P (operands[nops + i]))
13225 return 0;
13226
13227 offset = const0_rtx;
13228
13229 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13230 || (GET_CODE (reg) == SUBREG
13231 && REG_P (reg = SUBREG_REG (reg))))
13232 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13233 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13234 || (GET_CODE (reg) == SUBREG
13235 && REG_P (reg = SUBREG_REG (reg))))
13236 && (CONST_INT_P (offset
13237 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13238 {
13239 if (i == 0)
13240 {
13241 base_reg = REGNO (reg);
13242 base_reg_rtx = reg;
13243 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13244 return 0;
13245 }
13246 else if (base_reg != (int) REGNO (reg))
13247 /* Not addressed from the same base register. */
13248 return 0;
13249
13250 unsorted_regs[i] = (REG_P (operands[i])
13251 ? REGNO (operands[i])
13252 : REGNO (SUBREG_REG (operands[i])));
13253
13254 /* If it isn't an integer register, or if it overwrites the
13255 base register but isn't the last insn in the list, then
13256 we can't do this. */
13257 if (unsorted_regs[i] < 0
13258 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13259 || unsorted_regs[i] > 14
13260 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13261 return 0;
13262
13263 /* Don't allow SP to be loaded unless it is also the base
13264 register. It guarantees that SP is reset correctly when
13265 an LDM instruction is interrupted. Otherwise, we might
13266 end up with a corrupt stack. */
13267 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13268 return 0;
13269
13270 unsorted_offsets[i] = INTVAL (offset);
13271 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13272 order[0] = i;
13273 }
13274 else
13275 /* Not a suitable memory address. */
13276 return 0;
13277 }
13278
13279 /* All the useful information has now been extracted from the
13280 operands into unsorted_regs and unsorted_offsets; additionally,
13281 order[0] has been set to the lowest offset in the list. Sort
13282 the offsets into order, verifying that they are adjacent, and
13283 check that the register numbers are ascending. */
13284 if (!compute_offset_order (nops, unsorted_offsets, order,
13285 check_regs ? unsorted_regs : NULL))
13286 return 0;
13287
13288 if (saved_order)
13289 memcpy (saved_order, order, sizeof order);
13290
13291 if (base)
13292 {
13293 *base = base_reg;
13294
13295 for (i = 0; i < nops; i++)
13296 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13297
13298 *load_offset = unsorted_offsets[order[0]];
13299 }
13300
13301 if (TARGET_THUMB1
13302 && !peep2_reg_dead_p (nops, base_reg_rtx))
13303 return 0;
13304
13305 if (unsorted_offsets[order[0]] == 0)
13306 ldm_case = 1; /* ldmia */
13307 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13308 ldm_case = 2; /* ldmib */
13309 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13310 ldm_case = 3; /* ldmda */
13311 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13312 ldm_case = 4; /* ldmdb */
13313 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13314 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13315 ldm_case = 5;
13316 else
13317 return 0;
13318
13319 if (!multiple_operation_profitable_p (false, nops,
13320 ldm_case == 5
13321 ? unsorted_offsets[order[0]] : 0))
13322 return 0;
13323
13324 return ldm_case;
13325 }
13326
13327 /* Used to determine in a peephole whether a sequence of store instructions can
13328 be changed into a store-multiple instruction.
13329 NOPS is the number of separate store instructions we are examining.
13330 NOPS_TOTAL is the total number of instructions recognized by the peephole
13331 pattern.
13332 The first NOPS entries in OPERANDS are the source registers, the next
13333 NOPS entries are memory operands. If this function is successful, *BASE is
13334 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13335 to the first memory location's offset from that base register. REGS is an
13336 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13337 likewise filled with the corresponding rtx's.
13338 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13339 numbers to an ascending order of stores.
13340 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13341 from ascending memory locations, and the function verifies that the register
13342 numbers are themselves ascending. If CHECK_REGS is false, the register
13343 numbers are stored in the order they are found in the operands. */
13344 static int
13345 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13346 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13347 HOST_WIDE_INT *load_offset, bool check_regs)
13348 {
13349 int unsorted_regs[MAX_LDM_STM_OPS];
13350 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13351 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13352 int order[MAX_LDM_STM_OPS];
13353 int base_reg = -1;
13354 rtx base_reg_rtx = NULL;
13355 int i, stm_case;
13356
13357 /* Write back of base register is currently only supported for Thumb 1. */
13358 int base_writeback = TARGET_THUMB1;
13359
13360 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13361 easily extended if required. */
13362 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13363
13364 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13365
13366 /* Loop over the operands and check that the memory references are
13367 suitable (i.e. immediate offsets from the same base register). At
13368 the same time, extract the target register, and the memory
13369 offsets. */
13370 for (i = 0; i < nops; i++)
13371 {
13372 rtx reg;
13373 rtx offset;
13374
13375 /* Convert a subreg of a mem into the mem itself. */
13376 if (GET_CODE (operands[nops + i]) == SUBREG)
13377 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13378
13379 gcc_assert (MEM_P (operands[nops + i]));
13380
13381 /* Don't reorder volatile memory references; it doesn't seem worth
13382 looking for the case where the order is ok anyway. */
13383 if (MEM_VOLATILE_P (operands[nops + i]))
13384 return 0;
13385
13386 offset = const0_rtx;
13387
13388 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13389 || (GET_CODE (reg) == SUBREG
13390 && REG_P (reg = SUBREG_REG (reg))))
13391 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13392 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13393 || (GET_CODE (reg) == SUBREG
13394 && REG_P (reg = SUBREG_REG (reg))))
13395 && (CONST_INT_P (offset
13396 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13397 {
13398 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13399 ? operands[i] : SUBREG_REG (operands[i]));
13400 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13401
13402 if (i == 0)
13403 {
13404 base_reg = REGNO (reg);
13405 base_reg_rtx = reg;
13406 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13407 return 0;
13408 }
13409 else if (base_reg != (int) REGNO (reg))
13410 /* Not addressed from the same base register. */
13411 return 0;
13412
13413 /* If it isn't an integer register, then we can't do this. */
13414 if (unsorted_regs[i] < 0
13415 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13416 /* The effects are unpredictable if the base register is
13417 both updated and stored. */
13418 || (base_writeback && unsorted_regs[i] == base_reg)
13419 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13420 || unsorted_regs[i] > 14)
13421 return 0;
13422
13423 unsorted_offsets[i] = INTVAL (offset);
13424 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13425 order[0] = i;
13426 }
13427 else
13428 /* Not a suitable memory address. */
13429 return 0;
13430 }
13431
13432 /* All the useful information has now been extracted from the
13433 operands into unsorted_regs and unsorted_offsets; additionally,
13434 order[0] has been set to the lowest offset in the list. Sort
13435 the offsets into order, verifying that they are adjacent, and
13436 check that the register numbers are ascending. */
13437 if (!compute_offset_order (nops, unsorted_offsets, order,
13438 check_regs ? unsorted_regs : NULL))
13439 return 0;
13440
13441 if (saved_order)
13442 memcpy (saved_order, order, sizeof order);
13443
13444 if (base)
13445 {
13446 *base = base_reg;
13447
13448 for (i = 0; i < nops; i++)
13449 {
13450 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13451 if (reg_rtxs)
13452 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13453 }
13454
13455 *load_offset = unsorted_offsets[order[0]];
13456 }
13457
13458 if (TARGET_THUMB1
13459 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13460 return 0;
13461
13462 if (unsorted_offsets[order[0]] == 0)
13463 stm_case = 1; /* stmia */
13464 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13465 stm_case = 2; /* stmib */
13466 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13467 stm_case = 3; /* stmda */
13468 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13469 stm_case = 4; /* stmdb */
13470 else
13471 return 0;
13472
13473 if (!multiple_operation_profitable_p (false, nops, 0))
13474 return 0;
13475
13476 return stm_case;
13477 }
13478 \f
13479 /* Routines for use in generating RTL. */
13480
13481 /* Generate a load-multiple instruction. COUNT is the number of loads in
13482 the instruction; REGS and MEMS are arrays containing the operands.
13483 BASEREG is the base register to be used in addressing the memory operands.
13484 WBACK_OFFSET is nonzero if the instruction should update the base
13485 register. */
13486
13487 static rtx
13488 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13489 HOST_WIDE_INT wback_offset)
13490 {
13491 int i = 0, j;
13492 rtx result;
13493
13494 if (!multiple_operation_profitable_p (false, count, 0))
13495 {
13496 rtx seq;
13497
13498 start_sequence ();
13499
13500 for (i = 0; i < count; i++)
13501 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13502
13503 if (wback_offset != 0)
13504 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13505
13506 seq = get_insns ();
13507 end_sequence ();
13508
13509 return seq;
13510 }
13511
13512 result = gen_rtx_PARALLEL (VOIDmode,
13513 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13514 if (wback_offset != 0)
13515 {
13516 XVECEXP (result, 0, 0)
13517 = gen_rtx_SET (VOIDmode, basereg,
13518 plus_constant (Pmode, basereg, wback_offset));
13519 i = 1;
13520 count++;
13521 }
13522
13523 for (j = 0; i < count; i++, j++)
13524 XVECEXP (result, 0, i)
13525 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13526
13527 return result;
13528 }
13529
13530 /* Generate a store-multiple instruction. COUNT is the number of stores in
13531 the instruction; REGS and MEMS are arrays containing the operands.
13532 BASEREG is the base register to be used in addressing the memory operands.
13533 WBACK_OFFSET is nonzero if the instruction should update the base
13534 register. */
13535
13536 static rtx
13537 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13538 HOST_WIDE_INT wback_offset)
13539 {
13540 int i = 0, j;
13541 rtx result;
13542
13543 if (GET_CODE (basereg) == PLUS)
13544 basereg = XEXP (basereg, 0);
13545
13546 if (!multiple_operation_profitable_p (false, count, 0))
13547 {
13548 rtx seq;
13549
13550 start_sequence ();
13551
13552 for (i = 0; i < count; i++)
13553 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13554
13555 if (wback_offset != 0)
13556 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13557
13558 seq = get_insns ();
13559 end_sequence ();
13560
13561 return seq;
13562 }
13563
13564 result = gen_rtx_PARALLEL (VOIDmode,
13565 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13566 if (wback_offset != 0)
13567 {
13568 XVECEXP (result, 0, 0)
13569 = gen_rtx_SET (VOIDmode, basereg,
13570 plus_constant (Pmode, basereg, wback_offset));
13571 i = 1;
13572 count++;
13573 }
13574
13575 for (j = 0; i < count; i++, j++)
13576 XVECEXP (result, 0, i)
13577 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13578
13579 return result;
13580 }
13581
13582 /* Generate either a load-multiple or a store-multiple instruction. This
13583 function can be used in situations where we can start with a single MEM
13584 rtx and adjust its address upwards.
13585 COUNT is the number of operations in the instruction, not counting a
13586 possible update of the base register. REGS is an array containing the
13587 register operands.
13588 BASEREG is the base register to be used in addressing the memory operands,
13589 which are constructed from BASEMEM.
13590 WRITE_BACK specifies whether the generated instruction should include an
13591 update of the base register.
13592 OFFSETP is used to pass an offset to and from this function; this offset
13593 is not used when constructing the address (instead BASEMEM should have an
13594 appropriate offset in its address), it is used only for setting
13595 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13596
13597 static rtx
13598 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13599 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13600 {
13601 rtx mems[MAX_LDM_STM_OPS];
13602 HOST_WIDE_INT offset = *offsetp;
13603 int i;
13604
13605 gcc_assert (count <= MAX_LDM_STM_OPS);
13606
13607 if (GET_CODE (basereg) == PLUS)
13608 basereg = XEXP (basereg, 0);
13609
13610 for (i = 0; i < count; i++)
13611 {
13612 rtx addr = plus_constant (Pmode, basereg, i * 4);
13613 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13614 offset += 4;
13615 }
13616
13617 if (write_back)
13618 *offsetp = offset;
13619
13620 if (is_load)
13621 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13622 write_back ? 4 * count : 0);
13623 else
13624 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13625 write_back ? 4 * count : 0);
13626 }
13627
13628 rtx
13629 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13630 rtx basemem, HOST_WIDE_INT *offsetp)
13631 {
13632 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13633 offsetp);
13634 }
13635
13636 rtx
13637 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13638 rtx basemem, HOST_WIDE_INT *offsetp)
13639 {
13640 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13641 offsetp);
13642 }
13643
13644 /* Called from a peephole2 expander to turn a sequence of loads into an
13645 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13646 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13647 is true if we can reorder the registers because they are used commutatively
13648 subsequently.
13649 Returns true iff we could generate a new instruction. */
13650
13651 bool
13652 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13653 {
13654 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13655 rtx mems[MAX_LDM_STM_OPS];
13656 int i, j, base_reg;
13657 rtx base_reg_rtx;
13658 HOST_WIDE_INT offset;
13659 int write_back = FALSE;
13660 int ldm_case;
13661 rtx addr;
13662
13663 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13664 &base_reg, &offset, !sort_regs);
13665
13666 if (ldm_case == 0)
13667 return false;
13668
13669 if (sort_regs)
13670 for (i = 0; i < nops - 1; i++)
13671 for (j = i + 1; j < nops; j++)
13672 if (regs[i] > regs[j])
13673 {
13674 int t = regs[i];
13675 regs[i] = regs[j];
13676 regs[j] = t;
13677 }
13678 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13679
13680 if (TARGET_THUMB1)
13681 {
13682 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13683 gcc_assert (ldm_case == 1 || ldm_case == 5);
13684 write_back = TRUE;
13685 }
13686
13687 if (ldm_case == 5)
13688 {
13689 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13690 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13691 offset = 0;
13692 if (!TARGET_THUMB1)
13693 {
13694 base_reg = regs[0];
13695 base_reg_rtx = newbase;
13696 }
13697 }
13698
13699 for (i = 0; i < nops; i++)
13700 {
13701 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13702 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13703 SImode, addr, 0);
13704 }
13705 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13706 write_back ? offset + i * 4 : 0));
13707 return true;
13708 }
13709
13710 /* Called from a peephole2 expander to turn a sequence of stores into an
13711 STM instruction. OPERANDS are the operands found by the peephole matcher;
13712 NOPS indicates how many separate stores we are trying to combine.
13713 Returns true iff we could generate a new instruction. */
13714
13715 bool
13716 gen_stm_seq (rtx *operands, int nops)
13717 {
13718 int i;
13719 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13720 rtx mems[MAX_LDM_STM_OPS];
13721 int base_reg;
13722 rtx base_reg_rtx;
13723 HOST_WIDE_INT offset;
13724 int write_back = FALSE;
13725 int stm_case;
13726 rtx addr;
13727 bool base_reg_dies;
13728
13729 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13730 mem_order, &base_reg, &offset, true);
13731
13732 if (stm_case == 0)
13733 return false;
13734
13735 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13736
13737 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13738 if (TARGET_THUMB1)
13739 {
13740 gcc_assert (base_reg_dies);
13741 write_back = TRUE;
13742 }
13743
13744 if (stm_case == 5)
13745 {
13746 gcc_assert (base_reg_dies);
13747 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13748 offset = 0;
13749 }
13750
13751 addr = plus_constant (Pmode, base_reg_rtx, offset);
13752
13753 for (i = 0; i < nops; i++)
13754 {
13755 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13756 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13757 SImode, addr, 0);
13758 }
13759 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13760 write_back ? offset + i * 4 : 0));
13761 return true;
13762 }
13763
13764 /* Called from a peephole2 expander to turn a sequence of stores that are
13765 preceded by constant loads into an STM instruction. OPERANDS are the
13766 operands found by the peephole matcher; NOPS indicates how many
13767 separate stores we are trying to combine; there are 2 * NOPS
13768 instructions in the peephole.
13769 Returns true iff we could generate a new instruction. */
13770
13771 bool
13772 gen_const_stm_seq (rtx *operands, int nops)
13773 {
13774 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13775 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13776 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13777 rtx mems[MAX_LDM_STM_OPS];
13778 int base_reg;
13779 rtx base_reg_rtx;
13780 HOST_WIDE_INT offset;
13781 int write_back = FALSE;
13782 int stm_case;
13783 rtx addr;
13784 bool base_reg_dies;
13785 int i, j;
13786 HARD_REG_SET allocated;
13787
13788 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13789 mem_order, &base_reg, &offset, false);
13790
13791 if (stm_case == 0)
13792 return false;
13793
13794 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13795
13796 /* If the same register is used more than once, try to find a free
13797 register. */
13798 CLEAR_HARD_REG_SET (allocated);
13799 for (i = 0; i < nops; i++)
13800 {
13801 for (j = i + 1; j < nops; j++)
13802 if (regs[i] == regs[j])
13803 {
13804 rtx t = peep2_find_free_register (0, nops * 2,
13805 TARGET_THUMB1 ? "l" : "r",
13806 SImode, &allocated);
13807 if (t == NULL_RTX)
13808 return false;
13809 reg_rtxs[i] = t;
13810 regs[i] = REGNO (t);
13811 }
13812 }
13813
13814 /* Compute an ordering that maps the register numbers to an ascending
13815 sequence. */
13816 reg_order[0] = 0;
13817 for (i = 0; i < nops; i++)
13818 if (regs[i] < regs[reg_order[0]])
13819 reg_order[0] = i;
13820
13821 for (i = 1; i < nops; i++)
13822 {
13823 int this_order = reg_order[i - 1];
13824 for (j = 0; j < nops; j++)
13825 if (regs[j] > regs[reg_order[i - 1]]
13826 && (this_order == reg_order[i - 1]
13827 || regs[j] < regs[this_order]))
13828 this_order = j;
13829 reg_order[i] = this_order;
13830 }
13831
13832 /* Ensure that registers that must be live after the instruction end
13833 up with the correct value. */
13834 for (i = 0; i < nops; i++)
13835 {
13836 int this_order = reg_order[i];
13837 if ((this_order != mem_order[i]
13838 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13839 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13840 return false;
13841 }
13842
13843 /* Load the constants. */
13844 for (i = 0; i < nops; i++)
13845 {
13846 rtx op = operands[2 * nops + mem_order[i]];
13847 sorted_regs[i] = regs[reg_order[i]];
13848 emit_move_insn (reg_rtxs[reg_order[i]], op);
13849 }
13850
13851 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13852
13853 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13854 if (TARGET_THUMB1)
13855 {
13856 gcc_assert (base_reg_dies);
13857 write_back = TRUE;
13858 }
13859
13860 if (stm_case == 5)
13861 {
13862 gcc_assert (base_reg_dies);
13863 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13864 offset = 0;
13865 }
13866
13867 addr = plus_constant (Pmode, base_reg_rtx, offset);
13868
13869 for (i = 0; i < nops; i++)
13870 {
13871 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13872 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13873 SImode, addr, 0);
13874 }
13875 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13876 write_back ? offset + i * 4 : 0));
13877 return true;
13878 }
13879
13880 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13881 unaligned copies on processors which support unaligned semantics for those
13882 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13883 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13884 An interleave factor of 1 (the minimum) will perform no interleaving.
13885 Load/store multiple are used for aligned addresses where possible. */
13886
13887 static void
13888 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13889 HOST_WIDE_INT length,
13890 unsigned int interleave_factor)
13891 {
13892 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13893 int *regnos = XALLOCAVEC (int, interleave_factor);
13894 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13895 HOST_WIDE_INT i, j;
13896 HOST_WIDE_INT remaining = length, words;
13897 rtx halfword_tmp = NULL, byte_tmp = NULL;
13898 rtx dst, src;
13899 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13900 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13901 HOST_WIDE_INT srcoffset, dstoffset;
13902 HOST_WIDE_INT src_autoinc, dst_autoinc;
13903 rtx mem, addr;
13904
13905 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13906
13907 /* Use hard registers if we have aligned source or destination so we can use
13908 load/store multiple with contiguous registers. */
13909 if (dst_aligned || src_aligned)
13910 for (i = 0; i < interleave_factor; i++)
13911 regs[i] = gen_rtx_REG (SImode, i);
13912 else
13913 for (i = 0; i < interleave_factor; i++)
13914 regs[i] = gen_reg_rtx (SImode);
13915
13916 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13917 src = copy_addr_to_reg (XEXP (srcbase, 0));
13918
13919 srcoffset = dstoffset = 0;
13920
13921 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13922 For copying the last bytes we want to subtract this offset again. */
13923 src_autoinc = dst_autoinc = 0;
13924
13925 for (i = 0; i < interleave_factor; i++)
13926 regnos[i] = i;
13927
13928 /* Copy BLOCK_SIZE_BYTES chunks. */
13929
13930 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13931 {
13932 /* Load words. */
13933 if (src_aligned && interleave_factor > 1)
13934 {
13935 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13936 TRUE, srcbase, &srcoffset));
13937 src_autoinc += UNITS_PER_WORD * interleave_factor;
13938 }
13939 else
13940 {
13941 for (j = 0; j < interleave_factor; j++)
13942 {
13943 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13944 - src_autoinc));
13945 mem = adjust_automodify_address (srcbase, SImode, addr,
13946 srcoffset + j * UNITS_PER_WORD);
13947 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13948 }
13949 srcoffset += block_size_bytes;
13950 }
13951
13952 /* Store words. */
13953 if (dst_aligned && interleave_factor > 1)
13954 {
13955 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13956 TRUE, dstbase, &dstoffset));
13957 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13958 }
13959 else
13960 {
13961 for (j = 0; j < interleave_factor; j++)
13962 {
13963 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13964 - dst_autoinc));
13965 mem = adjust_automodify_address (dstbase, SImode, addr,
13966 dstoffset + j * UNITS_PER_WORD);
13967 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13968 }
13969 dstoffset += block_size_bytes;
13970 }
13971
13972 remaining -= block_size_bytes;
13973 }
13974
13975 /* Copy any whole words left (note these aren't interleaved with any
13976 subsequent halfword/byte load/stores in the interests of simplicity). */
13977
13978 words = remaining / UNITS_PER_WORD;
13979
13980 gcc_assert (words < interleave_factor);
13981
13982 if (src_aligned && words > 1)
13983 {
13984 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13985 &srcoffset));
13986 src_autoinc += UNITS_PER_WORD * words;
13987 }
13988 else
13989 {
13990 for (j = 0; j < words; j++)
13991 {
13992 addr = plus_constant (Pmode, src,
13993 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13994 mem = adjust_automodify_address (srcbase, SImode, addr,
13995 srcoffset + j * UNITS_PER_WORD);
13996 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13997 }
13998 srcoffset += words * UNITS_PER_WORD;
13999 }
14000
14001 if (dst_aligned && words > 1)
14002 {
14003 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14004 &dstoffset));
14005 dst_autoinc += words * UNITS_PER_WORD;
14006 }
14007 else
14008 {
14009 for (j = 0; j < words; j++)
14010 {
14011 addr = plus_constant (Pmode, dst,
14012 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14013 mem = adjust_automodify_address (dstbase, SImode, addr,
14014 dstoffset + j * UNITS_PER_WORD);
14015 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14016 }
14017 dstoffset += words * UNITS_PER_WORD;
14018 }
14019
14020 remaining -= words * UNITS_PER_WORD;
14021
14022 gcc_assert (remaining < 4);
14023
14024 /* Copy a halfword if necessary. */
14025
14026 if (remaining >= 2)
14027 {
14028 halfword_tmp = gen_reg_rtx (SImode);
14029
14030 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14031 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14032 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14033
14034 /* Either write out immediately, or delay until we've loaded the last
14035 byte, depending on interleave factor. */
14036 if (interleave_factor == 1)
14037 {
14038 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14039 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14040 emit_insn (gen_unaligned_storehi (mem,
14041 gen_lowpart (HImode, halfword_tmp)));
14042 halfword_tmp = NULL;
14043 dstoffset += 2;
14044 }
14045
14046 remaining -= 2;
14047 srcoffset += 2;
14048 }
14049
14050 gcc_assert (remaining < 2);
14051
14052 /* Copy last byte. */
14053
14054 if ((remaining & 1) != 0)
14055 {
14056 byte_tmp = gen_reg_rtx (SImode);
14057
14058 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14059 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14060 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14061
14062 if (interleave_factor == 1)
14063 {
14064 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14065 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14066 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14067 byte_tmp = NULL;
14068 dstoffset++;
14069 }
14070
14071 remaining--;
14072 srcoffset++;
14073 }
14074
14075 /* Store last halfword if we haven't done so already. */
14076
14077 if (halfword_tmp)
14078 {
14079 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14080 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14081 emit_insn (gen_unaligned_storehi (mem,
14082 gen_lowpart (HImode, halfword_tmp)));
14083 dstoffset += 2;
14084 }
14085
14086 /* Likewise for last byte. */
14087
14088 if (byte_tmp)
14089 {
14090 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14091 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14092 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14093 dstoffset++;
14094 }
14095
14096 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14097 }
14098
14099 /* From mips_adjust_block_mem:
14100
14101 Helper function for doing a loop-based block operation on memory
14102 reference MEM. Each iteration of the loop will operate on LENGTH
14103 bytes of MEM.
14104
14105 Create a new base register for use within the loop and point it to
14106 the start of MEM. Create a new memory reference that uses this
14107 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14108
14109 static void
14110 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14111 rtx *loop_mem)
14112 {
14113 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14114
14115 /* Although the new mem does not refer to a known location,
14116 it does keep up to LENGTH bytes of alignment. */
14117 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14118 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14119 }
14120
14121 /* From mips_block_move_loop:
14122
14123 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14124 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14125 the memory regions do not overlap. */
14126
14127 static void
14128 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14129 unsigned int interleave_factor,
14130 HOST_WIDE_INT bytes_per_iter)
14131 {
14132 rtx label, src_reg, dest_reg, final_src, test;
14133 HOST_WIDE_INT leftover;
14134
14135 leftover = length % bytes_per_iter;
14136 length -= leftover;
14137
14138 /* Create registers and memory references for use within the loop. */
14139 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14140 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14141
14142 /* Calculate the value that SRC_REG should have after the last iteration of
14143 the loop. */
14144 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14145 0, 0, OPTAB_WIDEN);
14146
14147 /* Emit the start of the loop. */
14148 label = gen_label_rtx ();
14149 emit_label (label);
14150
14151 /* Emit the loop body. */
14152 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14153 interleave_factor);
14154
14155 /* Move on to the next block. */
14156 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14157 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14158
14159 /* Emit the loop condition. */
14160 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14161 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14162
14163 /* Mop up any left-over bytes. */
14164 if (leftover)
14165 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14166 }
14167
14168 /* Emit a block move when either the source or destination is unaligned (not
14169 aligned to a four-byte boundary). This may need further tuning depending on
14170 core type, optimize_size setting, etc. */
14171
14172 static int
14173 arm_movmemqi_unaligned (rtx *operands)
14174 {
14175 HOST_WIDE_INT length = INTVAL (operands[2]);
14176
14177 if (optimize_size)
14178 {
14179 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14180 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14181 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14182 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14183 or dst_aligned though: allow more interleaving in those cases since the
14184 resulting code can be smaller. */
14185 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14186 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14187
14188 if (length > 12)
14189 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14190 interleave_factor, bytes_per_iter);
14191 else
14192 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14193 interleave_factor);
14194 }
14195 else
14196 {
14197 /* Note that the loop created by arm_block_move_unaligned_loop may be
14198 subject to loop unrolling, which makes tuning this condition a little
14199 redundant. */
14200 if (length > 32)
14201 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14202 else
14203 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14204 }
14205
14206 return 1;
14207 }
14208
14209 int
14210 arm_gen_movmemqi (rtx *operands)
14211 {
14212 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14213 HOST_WIDE_INT srcoffset, dstoffset;
14214 int i;
14215 rtx src, dst, srcbase, dstbase;
14216 rtx part_bytes_reg = NULL;
14217 rtx mem;
14218
14219 if (!CONST_INT_P (operands[2])
14220 || !CONST_INT_P (operands[3])
14221 || INTVAL (operands[2]) > 64)
14222 return 0;
14223
14224 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14225 return arm_movmemqi_unaligned (operands);
14226
14227 if (INTVAL (operands[3]) & 3)
14228 return 0;
14229
14230 dstbase = operands[0];
14231 srcbase = operands[1];
14232
14233 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14234 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14235
14236 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14237 out_words_to_go = INTVAL (operands[2]) / 4;
14238 last_bytes = INTVAL (operands[2]) & 3;
14239 dstoffset = srcoffset = 0;
14240
14241 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14242 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14243
14244 for (i = 0; in_words_to_go >= 2; i+=4)
14245 {
14246 if (in_words_to_go > 4)
14247 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14248 TRUE, srcbase, &srcoffset));
14249 else
14250 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14251 src, FALSE, srcbase,
14252 &srcoffset));
14253
14254 if (out_words_to_go)
14255 {
14256 if (out_words_to_go > 4)
14257 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14258 TRUE, dstbase, &dstoffset));
14259 else if (out_words_to_go != 1)
14260 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14261 out_words_to_go, dst,
14262 (last_bytes == 0
14263 ? FALSE : TRUE),
14264 dstbase, &dstoffset));
14265 else
14266 {
14267 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14268 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14269 if (last_bytes != 0)
14270 {
14271 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14272 dstoffset += 4;
14273 }
14274 }
14275 }
14276
14277 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14278 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14279 }
14280
14281 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14282 if (out_words_to_go)
14283 {
14284 rtx sreg;
14285
14286 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14287 sreg = copy_to_reg (mem);
14288
14289 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14290 emit_move_insn (mem, sreg);
14291 in_words_to_go--;
14292
14293 gcc_assert (!in_words_to_go); /* Sanity check */
14294 }
14295
14296 if (in_words_to_go)
14297 {
14298 gcc_assert (in_words_to_go > 0);
14299
14300 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14301 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14302 }
14303
14304 gcc_assert (!last_bytes || part_bytes_reg);
14305
14306 if (BYTES_BIG_ENDIAN && last_bytes)
14307 {
14308 rtx tmp = gen_reg_rtx (SImode);
14309
14310 /* The bytes we want are in the top end of the word. */
14311 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14312 GEN_INT (8 * (4 - last_bytes))));
14313 part_bytes_reg = tmp;
14314
14315 while (last_bytes)
14316 {
14317 mem = adjust_automodify_address (dstbase, QImode,
14318 plus_constant (Pmode, dst,
14319 last_bytes - 1),
14320 dstoffset + last_bytes - 1);
14321 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14322
14323 if (--last_bytes)
14324 {
14325 tmp = gen_reg_rtx (SImode);
14326 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14327 part_bytes_reg = tmp;
14328 }
14329 }
14330
14331 }
14332 else
14333 {
14334 if (last_bytes > 1)
14335 {
14336 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14337 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14338 last_bytes -= 2;
14339 if (last_bytes)
14340 {
14341 rtx tmp = gen_reg_rtx (SImode);
14342 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14343 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14344 part_bytes_reg = tmp;
14345 dstoffset += 2;
14346 }
14347 }
14348
14349 if (last_bytes)
14350 {
14351 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14352 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14353 }
14354 }
14355
14356 return 1;
14357 }
14358
14359 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14360 by mode size. */
14361 inline static rtx
14362 next_consecutive_mem (rtx mem)
14363 {
14364 enum machine_mode mode = GET_MODE (mem);
14365 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14366 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14367
14368 return adjust_automodify_address (mem, mode, addr, offset);
14369 }
14370
14371 /* Copy using LDRD/STRD instructions whenever possible.
14372 Returns true upon success. */
14373 bool
14374 gen_movmem_ldrd_strd (rtx *operands)
14375 {
14376 unsigned HOST_WIDE_INT len;
14377 HOST_WIDE_INT align;
14378 rtx src, dst, base;
14379 rtx reg0;
14380 bool src_aligned, dst_aligned;
14381 bool src_volatile, dst_volatile;
14382
14383 gcc_assert (CONST_INT_P (operands[2]));
14384 gcc_assert (CONST_INT_P (operands[3]));
14385
14386 len = UINTVAL (operands[2]);
14387 if (len > 64)
14388 return false;
14389
14390 /* Maximum alignment we can assume for both src and dst buffers. */
14391 align = INTVAL (operands[3]);
14392
14393 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14394 return false;
14395
14396 /* Place src and dst addresses in registers
14397 and update the corresponding mem rtx. */
14398 dst = operands[0];
14399 dst_volatile = MEM_VOLATILE_P (dst);
14400 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14401 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14402 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14403
14404 src = operands[1];
14405 src_volatile = MEM_VOLATILE_P (src);
14406 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14407 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14408 src = adjust_automodify_address (src, VOIDmode, base, 0);
14409
14410 if (!unaligned_access && !(src_aligned && dst_aligned))
14411 return false;
14412
14413 if (src_volatile || dst_volatile)
14414 return false;
14415
14416 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14417 if (!(dst_aligned || src_aligned))
14418 return arm_gen_movmemqi (operands);
14419
14420 src = adjust_address (src, DImode, 0);
14421 dst = adjust_address (dst, DImode, 0);
14422 while (len >= 8)
14423 {
14424 len -= 8;
14425 reg0 = gen_reg_rtx (DImode);
14426 if (src_aligned)
14427 emit_move_insn (reg0, src);
14428 else
14429 emit_insn (gen_unaligned_loaddi (reg0, src));
14430
14431 if (dst_aligned)
14432 emit_move_insn (dst, reg0);
14433 else
14434 emit_insn (gen_unaligned_storedi (dst, reg0));
14435
14436 src = next_consecutive_mem (src);
14437 dst = next_consecutive_mem (dst);
14438 }
14439
14440 gcc_assert (len < 8);
14441 if (len >= 4)
14442 {
14443 /* More than a word but less than a double-word to copy. Copy a word. */
14444 reg0 = gen_reg_rtx (SImode);
14445 src = adjust_address (src, SImode, 0);
14446 dst = adjust_address (dst, SImode, 0);
14447 if (src_aligned)
14448 emit_move_insn (reg0, src);
14449 else
14450 emit_insn (gen_unaligned_loadsi (reg0, src));
14451
14452 if (dst_aligned)
14453 emit_move_insn (dst, reg0);
14454 else
14455 emit_insn (gen_unaligned_storesi (dst, reg0));
14456
14457 src = next_consecutive_mem (src);
14458 dst = next_consecutive_mem (dst);
14459 len -= 4;
14460 }
14461
14462 if (len == 0)
14463 return true;
14464
14465 /* Copy the remaining bytes. */
14466 if (len >= 2)
14467 {
14468 dst = adjust_address (dst, HImode, 0);
14469 src = adjust_address (src, HImode, 0);
14470 reg0 = gen_reg_rtx (SImode);
14471 if (src_aligned)
14472 emit_insn (gen_zero_extendhisi2 (reg0, src));
14473 else
14474 emit_insn (gen_unaligned_loadhiu (reg0, src));
14475
14476 if (dst_aligned)
14477 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14478 else
14479 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14480
14481 src = next_consecutive_mem (src);
14482 dst = next_consecutive_mem (dst);
14483 if (len == 2)
14484 return true;
14485 }
14486
14487 dst = adjust_address (dst, QImode, 0);
14488 src = adjust_address (src, QImode, 0);
14489 reg0 = gen_reg_rtx (QImode);
14490 emit_move_insn (reg0, src);
14491 emit_move_insn (dst, reg0);
14492 return true;
14493 }
14494
14495 /* Select a dominance comparison mode if possible for a test of the general
14496 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14497 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14498 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14499 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14500 In all cases OP will be either EQ or NE, but we don't need to know which
14501 here. If we are unable to support a dominance comparison we return
14502 CC mode. This will then fail to match for the RTL expressions that
14503 generate this call. */
14504 enum machine_mode
14505 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14506 {
14507 enum rtx_code cond1, cond2;
14508 int swapped = 0;
14509
14510 /* Currently we will probably get the wrong result if the individual
14511 comparisons are not simple. This also ensures that it is safe to
14512 reverse a comparison if necessary. */
14513 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14514 != CCmode)
14515 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14516 != CCmode))
14517 return CCmode;
14518
14519 /* The if_then_else variant of this tests the second condition if the
14520 first passes, but is true if the first fails. Reverse the first
14521 condition to get a true "inclusive-or" expression. */
14522 if (cond_or == DOM_CC_NX_OR_Y)
14523 cond1 = reverse_condition (cond1);
14524
14525 /* If the comparisons are not equal, and one doesn't dominate the other,
14526 then we can't do this. */
14527 if (cond1 != cond2
14528 && !comparison_dominates_p (cond1, cond2)
14529 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14530 return CCmode;
14531
14532 if (swapped)
14533 {
14534 enum rtx_code temp = cond1;
14535 cond1 = cond2;
14536 cond2 = temp;
14537 }
14538
14539 switch (cond1)
14540 {
14541 case EQ:
14542 if (cond_or == DOM_CC_X_AND_Y)
14543 return CC_DEQmode;
14544
14545 switch (cond2)
14546 {
14547 case EQ: return CC_DEQmode;
14548 case LE: return CC_DLEmode;
14549 case LEU: return CC_DLEUmode;
14550 case GE: return CC_DGEmode;
14551 case GEU: return CC_DGEUmode;
14552 default: gcc_unreachable ();
14553 }
14554
14555 case LT:
14556 if (cond_or == DOM_CC_X_AND_Y)
14557 return CC_DLTmode;
14558
14559 switch (cond2)
14560 {
14561 case LT:
14562 return CC_DLTmode;
14563 case LE:
14564 return CC_DLEmode;
14565 case NE:
14566 return CC_DNEmode;
14567 default:
14568 gcc_unreachable ();
14569 }
14570
14571 case GT:
14572 if (cond_or == DOM_CC_X_AND_Y)
14573 return CC_DGTmode;
14574
14575 switch (cond2)
14576 {
14577 case GT:
14578 return CC_DGTmode;
14579 case GE:
14580 return CC_DGEmode;
14581 case NE:
14582 return CC_DNEmode;
14583 default:
14584 gcc_unreachable ();
14585 }
14586
14587 case LTU:
14588 if (cond_or == DOM_CC_X_AND_Y)
14589 return CC_DLTUmode;
14590
14591 switch (cond2)
14592 {
14593 case LTU:
14594 return CC_DLTUmode;
14595 case LEU:
14596 return CC_DLEUmode;
14597 case NE:
14598 return CC_DNEmode;
14599 default:
14600 gcc_unreachable ();
14601 }
14602
14603 case GTU:
14604 if (cond_or == DOM_CC_X_AND_Y)
14605 return CC_DGTUmode;
14606
14607 switch (cond2)
14608 {
14609 case GTU:
14610 return CC_DGTUmode;
14611 case GEU:
14612 return CC_DGEUmode;
14613 case NE:
14614 return CC_DNEmode;
14615 default:
14616 gcc_unreachable ();
14617 }
14618
14619 /* The remaining cases only occur when both comparisons are the
14620 same. */
14621 case NE:
14622 gcc_assert (cond1 == cond2);
14623 return CC_DNEmode;
14624
14625 case LE:
14626 gcc_assert (cond1 == cond2);
14627 return CC_DLEmode;
14628
14629 case GE:
14630 gcc_assert (cond1 == cond2);
14631 return CC_DGEmode;
14632
14633 case LEU:
14634 gcc_assert (cond1 == cond2);
14635 return CC_DLEUmode;
14636
14637 case GEU:
14638 gcc_assert (cond1 == cond2);
14639 return CC_DGEUmode;
14640
14641 default:
14642 gcc_unreachable ();
14643 }
14644 }
14645
14646 enum machine_mode
14647 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14648 {
14649 /* All floating point compares return CCFP if it is an equality
14650 comparison, and CCFPE otherwise. */
14651 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14652 {
14653 switch (op)
14654 {
14655 case EQ:
14656 case NE:
14657 case UNORDERED:
14658 case ORDERED:
14659 case UNLT:
14660 case UNLE:
14661 case UNGT:
14662 case UNGE:
14663 case UNEQ:
14664 case LTGT:
14665 return CCFPmode;
14666
14667 case LT:
14668 case LE:
14669 case GT:
14670 case GE:
14671 return CCFPEmode;
14672
14673 default:
14674 gcc_unreachable ();
14675 }
14676 }
14677
14678 /* A compare with a shifted operand. Because of canonicalization, the
14679 comparison will have to be swapped when we emit the assembler. */
14680 if (GET_MODE (y) == SImode
14681 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14682 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14683 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14684 || GET_CODE (x) == ROTATERT))
14685 return CC_SWPmode;
14686
14687 /* This operation is performed swapped, but since we only rely on the Z
14688 flag we don't need an additional mode. */
14689 if (GET_MODE (y) == SImode
14690 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14691 && GET_CODE (x) == NEG
14692 && (op == EQ || op == NE))
14693 return CC_Zmode;
14694
14695 /* This is a special case that is used by combine to allow a
14696 comparison of a shifted byte load to be split into a zero-extend
14697 followed by a comparison of the shifted integer (only valid for
14698 equalities and unsigned inequalities). */
14699 if (GET_MODE (x) == SImode
14700 && GET_CODE (x) == ASHIFT
14701 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14702 && GET_CODE (XEXP (x, 0)) == SUBREG
14703 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14704 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14705 && (op == EQ || op == NE
14706 || op == GEU || op == GTU || op == LTU || op == LEU)
14707 && CONST_INT_P (y))
14708 return CC_Zmode;
14709
14710 /* A construct for a conditional compare, if the false arm contains
14711 0, then both conditions must be true, otherwise either condition
14712 must be true. Not all conditions are possible, so CCmode is
14713 returned if it can't be done. */
14714 if (GET_CODE (x) == IF_THEN_ELSE
14715 && (XEXP (x, 2) == const0_rtx
14716 || XEXP (x, 2) == const1_rtx)
14717 && COMPARISON_P (XEXP (x, 0))
14718 && COMPARISON_P (XEXP (x, 1)))
14719 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14720 INTVAL (XEXP (x, 2)));
14721
14722 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14723 if (GET_CODE (x) == AND
14724 && (op == EQ || op == NE)
14725 && COMPARISON_P (XEXP (x, 0))
14726 && COMPARISON_P (XEXP (x, 1)))
14727 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14728 DOM_CC_X_AND_Y);
14729
14730 if (GET_CODE (x) == IOR
14731 && (op == EQ || op == NE)
14732 && COMPARISON_P (XEXP (x, 0))
14733 && COMPARISON_P (XEXP (x, 1)))
14734 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14735 DOM_CC_X_OR_Y);
14736
14737 /* An operation (on Thumb) where we want to test for a single bit.
14738 This is done by shifting that bit up into the top bit of a
14739 scratch register; we can then branch on the sign bit. */
14740 if (TARGET_THUMB1
14741 && GET_MODE (x) == SImode
14742 && (op == EQ || op == NE)
14743 && GET_CODE (x) == ZERO_EXTRACT
14744 && XEXP (x, 1) == const1_rtx)
14745 return CC_Nmode;
14746
14747 /* An operation that sets the condition codes as a side-effect, the
14748 V flag is not set correctly, so we can only use comparisons where
14749 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14750 instead.) */
14751 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14752 if (GET_MODE (x) == SImode
14753 && y == const0_rtx
14754 && (op == EQ || op == NE || op == LT || op == GE)
14755 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14756 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14757 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14758 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14759 || GET_CODE (x) == LSHIFTRT
14760 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14761 || GET_CODE (x) == ROTATERT
14762 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14763 return CC_NOOVmode;
14764
14765 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14766 return CC_Zmode;
14767
14768 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14769 && GET_CODE (x) == PLUS
14770 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14771 return CC_Cmode;
14772
14773 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14774 {
14775 switch (op)
14776 {
14777 case EQ:
14778 case NE:
14779 /* A DImode comparison against zero can be implemented by
14780 or'ing the two halves together. */
14781 if (y == const0_rtx)
14782 return CC_Zmode;
14783
14784 /* We can do an equality test in three Thumb instructions. */
14785 if (!TARGET_32BIT)
14786 return CC_Zmode;
14787
14788 /* FALLTHROUGH */
14789
14790 case LTU:
14791 case LEU:
14792 case GTU:
14793 case GEU:
14794 /* DImode unsigned comparisons can be implemented by cmp +
14795 cmpeq without a scratch register. Not worth doing in
14796 Thumb-2. */
14797 if (TARGET_32BIT)
14798 return CC_CZmode;
14799
14800 /* FALLTHROUGH */
14801
14802 case LT:
14803 case LE:
14804 case GT:
14805 case GE:
14806 /* DImode signed and unsigned comparisons can be implemented
14807 by cmp + sbcs with a scratch register, but that does not
14808 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14809 gcc_assert (op != EQ && op != NE);
14810 return CC_NCVmode;
14811
14812 default:
14813 gcc_unreachable ();
14814 }
14815 }
14816
14817 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14818 return GET_MODE (x);
14819
14820 return CCmode;
14821 }
14822
14823 /* X and Y are two things to compare using CODE. Emit the compare insn and
14824 return the rtx for register 0 in the proper mode. FP means this is a
14825 floating point compare: I don't think that it is needed on the arm. */
14826 rtx
14827 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14828 {
14829 enum machine_mode mode;
14830 rtx cc_reg;
14831 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14832
14833 /* We might have X as a constant, Y as a register because of the predicates
14834 used for cmpdi. If so, force X to a register here. */
14835 if (dimode_comparison && !REG_P (x))
14836 x = force_reg (DImode, x);
14837
14838 mode = SELECT_CC_MODE (code, x, y);
14839 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14840
14841 if (dimode_comparison
14842 && mode != CC_CZmode)
14843 {
14844 rtx clobber, set;
14845
14846 /* To compare two non-zero values for equality, XOR them and
14847 then compare against zero. Not used for ARM mode; there
14848 CC_CZmode is cheaper. */
14849 if (mode == CC_Zmode && y != const0_rtx)
14850 {
14851 gcc_assert (!reload_completed);
14852 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14853 y = const0_rtx;
14854 }
14855
14856 /* A scratch register is required. */
14857 if (reload_completed)
14858 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14859 else
14860 scratch = gen_rtx_SCRATCH (SImode);
14861
14862 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14863 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14864 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14865 }
14866 else
14867 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14868
14869 return cc_reg;
14870 }
14871
14872 /* Generate a sequence of insns that will generate the correct return
14873 address mask depending on the physical architecture that the program
14874 is running on. */
14875 rtx
14876 arm_gen_return_addr_mask (void)
14877 {
14878 rtx reg = gen_reg_rtx (Pmode);
14879
14880 emit_insn (gen_return_addr_mask (reg));
14881 return reg;
14882 }
14883
14884 void
14885 arm_reload_in_hi (rtx *operands)
14886 {
14887 rtx ref = operands[1];
14888 rtx base, scratch;
14889 HOST_WIDE_INT offset = 0;
14890
14891 if (GET_CODE (ref) == SUBREG)
14892 {
14893 offset = SUBREG_BYTE (ref);
14894 ref = SUBREG_REG (ref);
14895 }
14896
14897 if (REG_P (ref))
14898 {
14899 /* We have a pseudo which has been spilt onto the stack; there
14900 are two cases here: the first where there is a simple
14901 stack-slot replacement and a second where the stack-slot is
14902 out of range, or is used as a subreg. */
14903 if (reg_equiv_mem (REGNO (ref)))
14904 {
14905 ref = reg_equiv_mem (REGNO (ref));
14906 base = find_replacement (&XEXP (ref, 0));
14907 }
14908 else
14909 /* The slot is out of range, or was dressed up in a SUBREG. */
14910 base = reg_equiv_address (REGNO (ref));
14911 }
14912 else
14913 base = find_replacement (&XEXP (ref, 0));
14914
14915 /* Handle the case where the address is too complex to be offset by 1. */
14916 if (GET_CODE (base) == MINUS
14917 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14918 {
14919 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14920
14921 emit_set_insn (base_plus, base);
14922 base = base_plus;
14923 }
14924 else if (GET_CODE (base) == PLUS)
14925 {
14926 /* The addend must be CONST_INT, or we would have dealt with it above. */
14927 HOST_WIDE_INT hi, lo;
14928
14929 offset += INTVAL (XEXP (base, 1));
14930 base = XEXP (base, 0);
14931
14932 /* Rework the address into a legal sequence of insns. */
14933 /* Valid range for lo is -4095 -> 4095 */
14934 lo = (offset >= 0
14935 ? (offset & 0xfff)
14936 : -((-offset) & 0xfff));
14937
14938 /* Corner case, if lo is the max offset then we would be out of range
14939 once we have added the additional 1 below, so bump the msb into the
14940 pre-loading insn(s). */
14941 if (lo == 4095)
14942 lo &= 0x7ff;
14943
14944 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14945 ^ (HOST_WIDE_INT) 0x80000000)
14946 - (HOST_WIDE_INT) 0x80000000);
14947
14948 gcc_assert (hi + lo == offset);
14949
14950 if (hi != 0)
14951 {
14952 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14953
14954 /* Get the base address; addsi3 knows how to handle constants
14955 that require more than one insn. */
14956 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14957 base = base_plus;
14958 offset = lo;
14959 }
14960 }
14961
14962 /* Operands[2] may overlap operands[0] (though it won't overlap
14963 operands[1]), that's why we asked for a DImode reg -- so we can
14964 use the bit that does not overlap. */
14965 if (REGNO (operands[2]) == REGNO (operands[0]))
14966 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14967 else
14968 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14969
14970 emit_insn (gen_zero_extendqisi2 (scratch,
14971 gen_rtx_MEM (QImode,
14972 plus_constant (Pmode, base,
14973 offset))));
14974 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14975 gen_rtx_MEM (QImode,
14976 plus_constant (Pmode, base,
14977 offset + 1))));
14978 if (!BYTES_BIG_ENDIAN)
14979 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14980 gen_rtx_IOR (SImode,
14981 gen_rtx_ASHIFT
14982 (SImode,
14983 gen_rtx_SUBREG (SImode, operands[0], 0),
14984 GEN_INT (8)),
14985 scratch));
14986 else
14987 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14988 gen_rtx_IOR (SImode,
14989 gen_rtx_ASHIFT (SImode, scratch,
14990 GEN_INT (8)),
14991 gen_rtx_SUBREG (SImode, operands[0], 0)));
14992 }
14993
14994 /* Handle storing a half-word to memory during reload by synthesizing as two
14995 byte stores. Take care not to clobber the input values until after we
14996 have moved them somewhere safe. This code assumes that if the DImode
14997 scratch in operands[2] overlaps either the input value or output address
14998 in some way, then that value must die in this insn (we absolutely need
14999 two scratch registers for some corner cases). */
15000 void
15001 arm_reload_out_hi (rtx *operands)
15002 {
15003 rtx ref = operands[0];
15004 rtx outval = operands[1];
15005 rtx base, scratch;
15006 HOST_WIDE_INT offset = 0;
15007
15008 if (GET_CODE (ref) == SUBREG)
15009 {
15010 offset = SUBREG_BYTE (ref);
15011 ref = SUBREG_REG (ref);
15012 }
15013
15014 if (REG_P (ref))
15015 {
15016 /* We have a pseudo which has been spilt onto the stack; there
15017 are two cases here: the first where there is a simple
15018 stack-slot replacement and a second where the stack-slot is
15019 out of range, or is used as a subreg. */
15020 if (reg_equiv_mem (REGNO (ref)))
15021 {
15022 ref = reg_equiv_mem (REGNO (ref));
15023 base = find_replacement (&XEXP (ref, 0));
15024 }
15025 else
15026 /* The slot is out of range, or was dressed up in a SUBREG. */
15027 base = reg_equiv_address (REGNO (ref));
15028 }
15029 else
15030 base = find_replacement (&XEXP (ref, 0));
15031
15032 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15033
15034 /* Handle the case where the address is too complex to be offset by 1. */
15035 if (GET_CODE (base) == MINUS
15036 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15037 {
15038 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15039
15040 /* Be careful not to destroy OUTVAL. */
15041 if (reg_overlap_mentioned_p (base_plus, outval))
15042 {
15043 /* Updating base_plus might destroy outval, see if we can
15044 swap the scratch and base_plus. */
15045 if (!reg_overlap_mentioned_p (scratch, outval))
15046 {
15047 rtx tmp = scratch;
15048 scratch = base_plus;
15049 base_plus = tmp;
15050 }
15051 else
15052 {
15053 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15054
15055 /* Be conservative and copy OUTVAL into the scratch now,
15056 this should only be necessary if outval is a subreg
15057 of something larger than a word. */
15058 /* XXX Might this clobber base? I can't see how it can,
15059 since scratch is known to overlap with OUTVAL, and
15060 must be wider than a word. */
15061 emit_insn (gen_movhi (scratch_hi, outval));
15062 outval = scratch_hi;
15063 }
15064 }
15065
15066 emit_set_insn (base_plus, base);
15067 base = base_plus;
15068 }
15069 else if (GET_CODE (base) == PLUS)
15070 {
15071 /* The addend must be CONST_INT, or we would have dealt with it above. */
15072 HOST_WIDE_INT hi, lo;
15073
15074 offset += INTVAL (XEXP (base, 1));
15075 base = XEXP (base, 0);
15076
15077 /* Rework the address into a legal sequence of insns. */
15078 /* Valid range for lo is -4095 -> 4095 */
15079 lo = (offset >= 0
15080 ? (offset & 0xfff)
15081 : -((-offset) & 0xfff));
15082
15083 /* Corner case, if lo is the max offset then we would be out of range
15084 once we have added the additional 1 below, so bump the msb into the
15085 pre-loading insn(s). */
15086 if (lo == 4095)
15087 lo &= 0x7ff;
15088
15089 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15090 ^ (HOST_WIDE_INT) 0x80000000)
15091 - (HOST_WIDE_INT) 0x80000000);
15092
15093 gcc_assert (hi + lo == offset);
15094
15095 if (hi != 0)
15096 {
15097 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15098
15099 /* Be careful not to destroy OUTVAL. */
15100 if (reg_overlap_mentioned_p (base_plus, outval))
15101 {
15102 /* Updating base_plus might destroy outval, see if we
15103 can swap the scratch and base_plus. */
15104 if (!reg_overlap_mentioned_p (scratch, outval))
15105 {
15106 rtx tmp = scratch;
15107 scratch = base_plus;
15108 base_plus = tmp;
15109 }
15110 else
15111 {
15112 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15113
15114 /* Be conservative and copy outval into scratch now,
15115 this should only be necessary if outval is a
15116 subreg of something larger than a word. */
15117 /* XXX Might this clobber base? I can't see how it
15118 can, since scratch is known to overlap with
15119 outval. */
15120 emit_insn (gen_movhi (scratch_hi, outval));
15121 outval = scratch_hi;
15122 }
15123 }
15124
15125 /* Get the base address; addsi3 knows how to handle constants
15126 that require more than one insn. */
15127 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15128 base = base_plus;
15129 offset = lo;
15130 }
15131 }
15132
15133 if (BYTES_BIG_ENDIAN)
15134 {
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15136 plus_constant (Pmode, base,
15137 offset + 1)),
15138 gen_lowpart (QImode, outval)));
15139 emit_insn (gen_lshrsi3 (scratch,
15140 gen_rtx_SUBREG (SImode, outval, 0),
15141 GEN_INT (8)));
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15143 offset)),
15144 gen_lowpart (QImode, scratch)));
15145 }
15146 else
15147 {
15148 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15149 offset)),
15150 gen_lowpart (QImode, outval)));
15151 emit_insn (gen_lshrsi3 (scratch,
15152 gen_rtx_SUBREG (SImode, outval, 0),
15153 GEN_INT (8)));
15154 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15155 plus_constant (Pmode, base,
15156 offset + 1)),
15157 gen_lowpart (QImode, scratch)));
15158 }
15159 }
15160
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162 (padded to the size of a word) should be passed in a register. */
15163
15164 static bool
15165 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15166 {
15167 if (TARGET_AAPCS_BASED)
15168 return must_pass_in_stack_var_size (mode, type);
15169 else
15170 return must_pass_in_stack_var_size_or_pad (mode, type);
15171 }
15172
15173
15174 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15175 Return true if an argument passed on the stack should be padded upwards,
15176 i.e. if the least-significant byte has useful data.
15177 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15178 aggregate types are placed in the lowest memory address. */
15179
15180 bool
15181 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15182 {
15183 if (!TARGET_AAPCS_BASED)
15184 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15185
15186 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15187 return false;
15188
15189 return true;
15190 }
15191
15192
15193 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15194 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15195 register has useful data, and return the opposite if the most
15196 significant byte does. */
15197
15198 bool
15199 arm_pad_reg_upward (enum machine_mode mode,
15200 tree type, int first ATTRIBUTE_UNUSED)
15201 {
15202 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15203 {
15204 /* For AAPCS, small aggregates, small fixed-point types,
15205 and small complex types are always padded upwards. */
15206 if (type)
15207 {
15208 if ((AGGREGATE_TYPE_P (type)
15209 || TREE_CODE (type) == COMPLEX_TYPE
15210 || FIXED_POINT_TYPE_P (type))
15211 && int_size_in_bytes (type) <= 4)
15212 return true;
15213 }
15214 else
15215 {
15216 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15217 && GET_MODE_SIZE (mode) <= 4)
15218 return true;
15219 }
15220 }
15221
15222 /* Otherwise, use default padding. */
15223 return !BYTES_BIG_ENDIAN;
15224 }
15225
15226 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15227 assuming that the address in the base register is word aligned. */
15228 bool
15229 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15230 {
15231 HOST_WIDE_INT max_offset;
15232
15233 /* Offset must be a multiple of 4 in Thumb mode. */
15234 if (TARGET_THUMB2 && ((offset & 3) != 0))
15235 return false;
15236
15237 if (TARGET_THUMB2)
15238 max_offset = 1020;
15239 else if (TARGET_ARM)
15240 max_offset = 255;
15241 else
15242 return false;
15243
15244 return ((offset <= max_offset) && (offset >= -max_offset));
15245 }
15246
15247 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15248 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15249 Assumes that the address in the base register RN is word aligned. Pattern
15250 guarantees that both memory accesses use the same base register,
15251 the offsets are constants within the range, and the gap between the offsets is 4.
15252 If preload complete then check that registers are legal. WBACK indicates whether
15253 address is updated. LOAD indicates whether memory access is load or store. */
15254 bool
15255 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15256 bool wback, bool load)
15257 {
15258 unsigned int t, t2, n;
15259
15260 if (!reload_completed)
15261 return true;
15262
15263 if (!offset_ok_for_ldrd_strd (offset))
15264 return false;
15265
15266 t = REGNO (rt);
15267 t2 = REGNO (rt2);
15268 n = REGNO (rn);
15269
15270 if ((TARGET_THUMB2)
15271 && ((wback && (n == t || n == t2))
15272 || (t == SP_REGNUM)
15273 || (t == PC_REGNUM)
15274 || (t2 == SP_REGNUM)
15275 || (t2 == PC_REGNUM)
15276 || (!load && (n == PC_REGNUM))
15277 || (load && (t == t2))
15278 /* Triggers Cortex-M3 LDRD errata. */
15279 || (!wback && load && fix_cm3_ldrd && (n == t))))
15280 return false;
15281
15282 if ((TARGET_ARM)
15283 && ((wback && (n == t || n == t2))
15284 || (t2 == PC_REGNUM)
15285 || (t % 2 != 0) /* First destination register is not even. */
15286 || (t2 != t + 1)
15287 /* PC can be used as base register (for offset addressing only),
15288 but it is depricated. */
15289 || (n == PC_REGNUM)))
15290 return false;
15291
15292 return true;
15293 }
15294
15295 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15296 operand MEM's address contains an immediate offset from the base
15297 register and has no side effects, in which case it sets BASE and
15298 OFFSET accordingly. */
15299 static bool
15300 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15301 {
15302 rtx addr;
15303
15304 gcc_assert (base != NULL && offset != NULL);
15305
15306 /* TODO: Handle more general memory operand patterns, such as
15307 PRE_DEC and PRE_INC. */
15308
15309 if (side_effects_p (mem))
15310 return false;
15311
15312 /* Can't deal with subregs. */
15313 if (GET_CODE (mem) == SUBREG)
15314 return false;
15315
15316 gcc_assert (MEM_P (mem));
15317
15318 *offset = const0_rtx;
15319
15320 addr = XEXP (mem, 0);
15321
15322 /* If addr isn't valid for DImode, then we can't handle it. */
15323 if (!arm_legitimate_address_p (DImode, addr,
15324 reload_in_progress || reload_completed))
15325 return false;
15326
15327 if (REG_P (addr))
15328 {
15329 *base = addr;
15330 return true;
15331 }
15332 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15333 {
15334 *base = XEXP (addr, 0);
15335 *offset = XEXP (addr, 1);
15336 return (REG_P (*base) && CONST_INT_P (*offset));
15337 }
15338
15339 return false;
15340 }
15341
15342 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15343
15344 /* Called from a peephole2 to replace two word-size accesses with a
15345 single LDRD/STRD instruction. Returns true iff we can generate a
15346 new instruction sequence. That is, both accesses use the same base
15347 register and the gap between constant offsets is 4. This function
15348 may reorder its operands to match ldrd/strd RTL templates.
15349 OPERANDS are the operands found by the peephole matcher;
15350 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15351 corresponding memory operands. LOAD indicaates whether the access
15352 is load or store. CONST_STORE indicates a store of constant
15353 integer values held in OPERANDS[4,5] and assumes that the pattern
15354 is of length 4 insn, for the purpose of checking dead registers.
15355 COMMUTE indicates that register operands may be reordered. */
15356 bool
15357 gen_operands_ldrd_strd (rtx *operands, bool load,
15358 bool const_store, bool commute)
15359 {
15360 int nops = 2;
15361 HOST_WIDE_INT offsets[2], offset;
15362 rtx base = NULL_RTX;
15363 rtx cur_base, cur_offset, tmp;
15364 int i, gap;
15365 HARD_REG_SET regset;
15366
15367 gcc_assert (!const_store || !load);
15368 /* Check that the memory references are immediate offsets from the
15369 same base register. Extract the base register, the destination
15370 registers, and the corresponding memory offsets. */
15371 for (i = 0; i < nops; i++)
15372 {
15373 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15374 return false;
15375
15376 if (i == 0)
15377 base = cur_base;
15378 else if (REGNO (base) != REGNO (cur_base))
15379 return false;
15380
15381 offsets[i] = INTVAL (cur_offset);
15382 if (GET_CODE (operands[i]) == SUBREG)
15383 {
15384 tmp = SUBREG_REG (operands[i]);
15385 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15386 operands[i] = tmp;
15387 }
15388 }
15389
15390 /* Make sure there is no dependency between the individual loads. */
15391 if (load && REGNO (operands[0]) == REGNO (base))
15392 return false; /* RAW */
15393
15394 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15395 return false; /* WAW */
15396
15397 /* If the same input register is used in both stores
15398 when storing different constants, try to find a free register.
15399 For example, the code
15400 mov r0, 0
15401 str r0, [r2]
15402 mov r0, 1
15403 str r0, [r2, #4]
15404 can be transformed into
15405 mov r1, 0
15406 strd r1, r0, [r2]
15407 in Thumb mode assuming that r1 is free. */
15408 if (const_store
15409 && REGNO (operands[0]) == REGNO (operands[1])
15410 && INTVAL (operands[4]) != INTVAL (operands[5]))
15411 {
15412 if (TARGET_THUMB2)
15413 {
15414 CLEAR_HARD_REG_SET (regset);
15415 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15416 if (tmp == NULL_RTX)
15417 return false;
15418
15419 /* Use the new register in the first load to ensure that
15420 if the original input register is not dead after peephole,
15421 then it will have the correct constant value. */
15422 operands[0] = tmp;
15423 }
15424 else if (TARGET_ARM)
15425 {
15426 return false;
15427 int regno = REGNO (operands[0]);
15428 if (!peep2_reg_dead_p (4, operands[0]))
15429 {
15430 /* When the input register is even and is not dead after the
15431 pattern, it has to hold the second constant but we cannot
15432 form a legal STRD in ARM mode with this register as the second
15433 register. */
15434 if (regno % 2 == 0)
15435 return false;
15436
15437 /* Is regno-1 free? */
15438 SET_HARD_REG_SET (regset);
15439 CLEAR_HARD_REG_BIT(regset, regno - 1);
15440 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15441 if (tmp == NULL_RTX)
15442 return false;
15443
15444 operands[0] = tmp;
15445 }
15446 else
15447 {
15448 /* Find a DImode register. */
15449 CLEAR_HARD_REG_SET (regset);
15450 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15451 if (tmp != NULL_RTX)
15452 {
15453 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15454 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15455 }
15456 else
15457 {
15458 /* Can we use the input register to form a DI register? */
15459 SET_HARD_REG_SET (regset);
15460 CLEAR_HARD_REG_BIT(regset,
15461 regno % 2 == 0 ? regno + 1 : regno - 1);
15462 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15463 if (tmp == NULL_RTX)
15464 return false;
15465 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15466 }
15467 }
15468
15469 gcc_assert (operands[0] != NULL_RTX);
15470 gcc_assert (operands[1] != NULL_RTX);
15471 gcc_assert (REGNO (operands[0]) % 2 == 0);
15472 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15473 }
15474 }
15475
15476 /* Make sure the instructions are ordered with lower memory access first. */
15477 if (offsets[0] > offsets[1])
15478 {
15479 gap = offsets[0] - offsets[1];
15480 offset = offsets[1];
15481
15482 /* Swap the instructions such that lower memory is accessed first. */
15483 SWAP_RTX (operands[0], operands[1]);
15484 SWAP_RTX (operands[2], operands[3]);
15485 if (const_store)
15486 SWAP_RTX (operands[4], operands[5]);
15487 }
15488 else
15489 {
15490 gap = offsets[1] - offsets[0];
15491 offset = offsets[0];
15492 }
15493
15494 /* Make sure accesses are to consecutive memory locations. */
15495 if (gap != 4)
15496 return false;
15497
15498 /* Make sure we generate legal instructions. */
15499 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15500 false, load))
15501 return true;
15502
15503 /* In Thumb state, where registers are almost unconstrained, there
15504 is little hope to fix it. */
15505 if (TARGET_THUMB2)
15506 return false;
15507
15508 if (load && commute)
15509 {
15510 /* Try reordering registers. */
15511 SWAP_RTX (operands[0], operands[1]);
15512 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15513 false, load))
15514 return true;
15515 }
15516
15517 if (const_store)
15518 {
15519 /* If input registers are dead after this pattern, they can be
15520 reordered or replaced by other registers that are free in the
15521 current pattern. */
15522 if (!peep2_reg_dead_p (4, operands[0])
15523 || !peep2_reg_dead_p (4, operands[1]))
15524 return false;
15525
15526 /* Try to reorder the input registers. */
15527 /* For example, the code
15528 mov r0, 0
15529 mov r1, 1
15530 str r1, [r2]
15531 str r0, [r2, #4]
15532 can be transformed into
15533 mov r1, 0
15534 mov r0, 1
15535 strd r0, [r2]
15536 */
15537 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15538 false, false))
15539 {
15540 SWAP_RTX (operands[0], operands[1]);
15541 return true;
15542 }
15543
15544 /* Try to find a free DI register. */
15545 CLEAR_HARD_REG_SET (regset);
15546 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15547 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15548 while (true)
15549 {
15550 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15551 if (tmp == NULL_RTX)
15552 return false;
15553
15554 /* DREG must be an even-numbered register in DImode.
15555 Split it into SI registers. */
15556 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15557 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15558 gcc_assert (operands[0] != NULL_RTX);
15559 gcc_assert (operands[1] != NULL_RTX);
15560 gcc_assert (REGNO (operands[0]) % 2 == 0);
15561 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15562
15563 return (operands_ok_ldrd_strd (operands[0], operands[1],
15564 base, offset,
15565 false, load));
15566 }
15567 }
15568
15569 return false;
15570 }
15571 #undef SWAP_RTX
15572
15573
15574
15575 \f
15576 /* Print a symbolic form of X to the debug file, F. */
15577 static void
15578 arm_print_value (FILE *f, rtx x)
15579 {
15580 switch (GET_CODE (x))
15581 {
15582 case CONST_INT:
15583 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15584 return;
15585
15586 case CONST_DOUBLE:
15587 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15588 return;
15589
15590 case CONST_VECTOR:
15591 {
15592 int i;
15593
15594 fprintf (f, "<");
15595 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15596 {
15597 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15598 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15599 fputc (',', f);
15600 }
15601 fprintf (f, ">");
15602 }
15603 return;
15604
15605 case CONST_STRING:
15606 fprintf (f, "\"%s\"", XSTR (x, 0));
15607 return;
15608
15609 case SYMBOL_REF:
15610 fprintf (f, "`%s'", XSTR (x, 0));
15611 return;
15612
15613 case LABEL_REF:
15614 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15615 return;
15616
15617 case CONST:
15618 arm_print_value (f, XEXP (x, 0));
15619 return;
15620
15621 case PLUS:
15622 arm_print_value (f, XEXP (x, 0));
15623 fprintf (f, "+");
15624 arm_print_value (f, XEXP (x, 1));
15625 return;
15626
15627 case PC:
15628 fprintf (f, "pc");
15629 return;
15630
15631 default:
15632 fprintf (f, "????");
15633 return;
15634 }
15635 }
15636 \f
15637 /* Routines for manipulation of the constant pool. */
15638
15639 /* Arm instructions cannot load a large constant directly into a
15640 register; they have to come from a pc relative load. The constant
15641 must therefore be placed in the addressable range of the pc
15642 relative load. Depending on the precise pc relative load
15643 instruction the range is somewhere between 256 bytes and 4k. This
15644 means that we often have to dump a constant inside a function, and
15645 generate code to branch around it.
15646
15647 It is important to minimize this, since the branches will slow
15648 things down and make the code larger.
15649
15650 Normally we can hide the table after an existing unconditional
15651 branch so that there is no interruption of the flow, but in the
15652 worst case the code looks like this:
15653
15654 ldr rn, L1
15655 ...
15656 b L2
15657 align
15658 L1: .long value
15659 L2:
15660 ...
15661
15662 ldr rn, L3
15663 ...
15664 b L4
15665 align
15666 L3: .long value
15667 L4:
15668 ...
15669
15670 We fix this by performing a scan after scheduling, which notices
15671 which instructions need to have their operands fetched from the
15672 constant table and builds the table.
15673
15674 The algorithm starts by building a table of all the constants that
15675 need fixing up and all the natural barriers in the function (places
15676 where a constant table can be dropped without breaking the flow).
15677 For each fixup we note how far the pc-relative replacement will be
15678 able to reach and the offset of the instruction into the function.
15679
15680 Having built the table we then group the fixes together to form
15681 tables that are as large as possible (subject to addressing
15682 constraints) and emit each table of constants after the last
15683 barrier that is within range of all the instructions in the group.
15684 If a group does not contain a barrier, then we forcibly create one
15685 by inserting a jump instruction into the flow. Once the table has
15686 been inserted, the insns are then modified to reference the
15687 relevant entry in the pool.
15688
15689 Possible enhancements to the algorithm (not implemented) are:
15690
15691 1) For some processors and object formats, there may be benefit in
15692 aligning the pools to the start of cache lines; this alignment
15693 would need to be taken into account when calculating addressability
15694 of a pool. */
15695
15696 /* These typedefs are located at the start of this file, so that
15697 they can be used in the prototypes there. This comment is to
15698 remind readers of that fact so that the following structures
15699 can be understood more easily.
15700
15701 typedef struct minipool_node Mnode;
15702 typedef struct minipool_fixup Mfix; */
15703
15704 struct minipool_node
15705 {
15706 /* Doubly linked chain of entries. */
15707 Mnode * next;
15708 Mnode * prev;
15709 /* The maximum offset into the code that this entry can be placed. While
15710 pushing fixes for forward references, all entries are sorted in order
15711 of increasing max_address. */
15712 HOST_WIDE_INT max_address;
15713 /* Similarly for an entry inserted for a backwards ref. */
15714 HOST_WIDE_INT min_address;
15715 /* The number of fixes referencing this entry. This can become zero
15716 if we "unpush" an entry. In this case we ignore the entry when we
15717 come to emit the code. */
15718 int refcount;
15719 /* The offset from the start of the minipool. */
15720 HOST_WIDE_INT offset;
15721 /* The value in table. */
15722 rtx value;
15723 /* The mode of value. */
15724 enum machine_mode mode;
15725 /* The size of the value. With iWMMXt enabled
15726 sizes > 4 also imply an alignment of 8-bytes. */
15727 int fix_size;
15728 };
15729
15730 struct minipool_fixup
15731 {
15732 Mfix * next;
15733 rtx insn;
15734 HOST_WIDE_INT address;
15735 rtx * loc;
15736 enum machine_mode mode;
15737 int fix_size;
15738 rtx value;
15739 Mnode * minipool;
15740 HOST_WIDE_INT forwards;
15741 HOST_WIDE_INT backwards;
15742 };
15743
15744 /* Fixes less than a word need padding out to a word boundary. */
15745 #define MINIPOOL_FIX_SIZE(mode) \
15746 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15747
15748 static Mnode * minipool_vector_head;
15749 static Mnode * minipool_vector_tail;
15750 static rtx minipool_vector_label;
15751 static int minipool_pad;
15752
15753 /* The linked list of all minipool fixes required for this function. */
15754 Mfix * minipool_fix_head;
15755 Mfix * minipool_fix_tail;
15756 /* The fix entry for the current minipool, once it has been placed. */
15757 Mfix * minipool_barrier;
15758
15759 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15760 #define JUMP_TABLES_IN_TEXT_SECTION 0
15761 #endif
15762
15763 static HOST_WIDE_INT
15764 get_jump_table_size (rtx insn)
15765 {
15766 /* ADDR_VECs only take room if read-only data does into the text
15767 section. */
15768 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15769 {
15770 rtx body = PATTERN (insn);
15771 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15772 HOST_WIDE_INT size;
15773 HOST_WIDE_INT modesize;
15774
15775 modesize = GET_MODE_SIZE (GET_MODE (body));
15776 size = modesize * XVECLEN (body, elt);
15777 switch (modesize)
15778 {
15779 case 1:
15780 /* Round up size of TBB table to a halfword boundary. */
15781 size = (size + 1) & ~(HOST_WIDE_INT)1;
15782 break;
15783 case 2:
15784 /* No padding necessary for TBH. */
15785 break;
15786 case 4:
15787 /* Add two bytes for alignment on Thumb. */
15788 if (TARGET_THUMB)
15789 size += 2;
15790 break;
15791 default:
15792 gcc_unreachable ();
15793 }
15794 return size;
15795 }
15796
15797 return 0;
15798 }
15799
15800 /* Return the maximum amount of padding that will be inserted before
15801 label LABEL. */
15802
15803 static HOST_WIDE_INT
15804 get_label_padding (rtx label)
15805 {
15806 HOST_WIDE_INT align, min_insn_size;
15807
15808 align = 1 << label_to_alignment (label);
15809 min_insn_size = TARGET_THUMB ? 2 : 4;
15810 return align > min_insn_size ? align - min_insn_size : 0;
15811 }
15812
15813 /* Move a minipool fix MP from its current location to before MAX_MP.
15814 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15815 constraints may need updating. */
15816 static Mnode *
15817 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15818 HOST_WIDE_INT max_address)
15819 {
15820 /* The code below assumes these are different. */
15821 gcc_assert (mp != max_mp);
15822
15823 if (max_mp == NULL)
15824 {
15825 if (max_address < mp->max_address)
15826 mp->max_address = max_address;
15827 }
15828 else
15829 {
15830 if (max_address > max_mp->max_address - mp->fix_size)
15831 mp->max_address = max_mp->max_address - mp->fix_size;
15832 else
15833 mp->max_address = max_address;
15834
15835 /* Unlink MP from its current position. Since max_mp is non-null,
15836 mp->prev must be non-null. */
15837 mp->prev->next = mp->next;
15838 if (mp->next != NULL)
15839 mp->next->prev = mp->prev;
15840 else
15841 minipool_vector_tail = mp->prev;
15842
15843 /* Re-insert it before MAX_MP. */
15844 mp->next = max_mp;
15845 mp->prev = max_mp->prev;
15846 max_mp->prev = mp;
15847
15848 if (mp->prev != NULL)
15849 mp->prev->next = mp;
15850 else
15851 minipool_vector_head = mp;
15852 }
15853
15854 /* Save the new entry. */
15855 max_mp = mp;
15856
15857 /* Scan over the preceding entries and adjust their addresses as
15858 required. */
15859 while (mp->prev != NULL
15860 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15861 {
15862 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15863 mp = mp->prev;
15864 }
15865
15866 return max_mp;
15867 }
15868
15869 /* Add a constant to the minipool for a forward reference. Returns the
15870 node added or NULL if the constant will not fit in this pool. */
15871 static Mnode *
15872 add_minipool_forward_ref (Mfix *fix)
15873 {
15874 /* If set, max_mp is the first pool_entry that has a lower
15875 constraint than the one we are trying to add. */
15876 Mnode * max_mp = NULL;
15877 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15878 Mnode * mp;
15879
15880 /* If the minipool starts before the end of FIX->INSN then this FIX
15881 can not be placed into the current pool. Furthermore, adding the
15882 new constant pool entry may cause the pool to start FIX_SIZE bytes
15883 earlier. */
15884 if (minipool_vector_head &&
15885 (fix->address + get_attr_length (fix->insn)
15886 >= minipool_vector_head->max_address - fix->fix_size))
15887 return NULL;
15888
15889 /* Scan the pool to see if a constant with the same value has
15890 already been added. While we are doing this, also note the
15891 location where we must insert the constant if it doesn't already
15892 exist. */
15893 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15894 {
15895 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15896 && fix->mode == mp->mode
15897 && (!LABEL_P (fix->value)
15898 || (CODE_LABEL_NUMBER (fix->value)
15899 == CODE_LABEL_NUMBER (mp->value)))
15900 && rtx_equal_p (fix->value, mp->value))
15901 {
15902 /* More than one fix references this entry. */
15903 mp->refcount++;
15904 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15905 }
15906
15907 /* Note the insertion point if necessary. */
15908 if (max_mp == NULL
15909 && mp->max_address > max_address)
15910 max_mp = mp;
15911
15912 /* If we are inserting an 8-bytes aligned quantity and
15913 we have not already found an insertion point, then
15914 make sure that all such 8-byte aligned quantities are
15915 placed at the start of the pool. */
15916 if (ARM_DOUBLEWORD_ALIGN
15917 && max_mp == NULL
15918 && fix->fix_size >= 8
15919 && mp->fix_size < 8)
15920 {
15921 max_mp = mp;
15922 max_address = mp->max_address;
15923 }
15924 }
15925
15926 /* The value is not currently in the minipool, so we need to create
15927 a new entry for it. If MAX_MP is NULL, the entry will be put on
15928 the end of the list since the placement is less constrained than
15929 any existing entry. Otherwise, we insert the new fix before
15930 MAX_MP and, if necessary, adjust the constraints on the other
15931 entries. */
15932 mp = XNEW (Mnode);
15933 mp->fix_size = fix->fix_size;
15934 mp->mode = fix->mode;
15935 mp->value = fix->value;
15936 mp->refcount = 1;
15937 /* Not yet required for a backwards ref. */
15938 mp->min_address = -65536;
15939
15940 if (max_mp == NULL)
15941 {
15942 mp->max_address = max_address;
15943 mp->next = NULL;
15944 mp->prev = minipool_vector_tail;
15945
15946 if (mp->prev == NULL)
15947 {
15948 minipool_vector_head = mp;
15949 minipool_vector_label = gen_label_rtx ();
15950 }
15951 else
15952 mp->prev->next = mp;
15953
15954 minipool_vector_tail = mp;
15955 }
15956 else
15957 {
15958 if (max_address > max_mp->max_address - mp->fix_size)
15959 mp->max_address = max_mp->max_address - mp->fix_size;
15960 else
15961 mp->max_address = max_address;
15962
15963 mp->next = max_mp;
15964 mp->prev = max_mp->prev;
15965 max_mp->prev = mp;
15966 if (mp->prev != NULL)
15967 mp->prev->next = mp;
15968 else
15969 minipool_vector_head = mp;
15970 }
15971
15972 /* Save the new entry. */
15973 max_mp = mp;
15974
15975 /* Scan over the preceding entries and adjust their addresses as
15976 required. */
15977 while (mp->prev != NULL
15978 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15979 {
15980 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15981 mp = mp->prev;
15982 }
15983
15984 return max_mp;
15985 }
15986
15987 static Mnode *
15988 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15989 HOST_WIDE_INT min_address)
15990 {
15991 HOST_WIDE_INT offset;
15992
15993 /* The code below assumes these are different. */
15994 gcc_assert (mp != min_mp);
15995
15996 if (min_mp == NULL)
15997 {
15998 if (min_address > mp->min_address)
15999 mp->min_address = min_address;
16000 }
16001 else
16002 {
16003 /* We will adjust this below if it is too loose. */
16004 mp->min_address = min_address;
16005
16006 /* Unlink MP from its current position. Since min_mp is non-null,
16007 mp->next must be non-null. */
16008 mp->next->prev = mp->prev;
16009 if (mp->prev != NULL)
16010 mp->prev->next = mp->next;
16011 else
16012 minipool_vector_head = mp->next;
16013
16014 /* Reinsert it after MIN_MP. */
16015 mp->prev = min_mp;
16016 mp->next = min_mp->next;
16017 min_mp->next = mp;
16018 if (mp->next != NULL)
16019 mp->next->prev = mp;
16020 else
16021 minipool_vector_tail = mp;
16022 }
16023
16024 min_mp = mp;
16025
16026 offset = 0;
16027 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16028 {
16029 mp->offset = offset;
16030 if (mp->refcount > 0)
16031 offset += mp->fix_size;
16032
16033 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16034 mp->next->min_address = mp->min_address + mp->fix_size;
16035 }
16036
16037 return min_mp;
16038 }
16039
16040 /* Add a constant to the minipool for a backward reference. Returns the
16041 node added or NULL if the constant will not fit in this pool.
16042
16043 Note that the code for insertion for a backwards reference can be
16044 somewhat confusing because the calculated offsets for each fix do
16045 not take into account the size of the pool (which is still under
16046 construction. */
16047 static Mnode *
16048 add_minipool_backward_ref (Mfix *fix)
16049 {
16050 /* If set, min_mp is the last pool_entry that has a lower constraint
16051 than the one we are trying to add. */
16052 Mnode *min_mp = NULL;
16053 /* This can be negative, since it is only a constraint. */
16054 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16055 Mnode *mp;
16056
16057 /* If we can't reach the current pool from this insn, or if we can't
16058 insert this entry at the end of the pool without pushing other
16059 fixes out of range, then we don't try. This ensures that we
16060 can't fail later on. */
16061 if (min_address >= minipool_barrier->address
16062 || (minipool_vector_tail->min_address + fix->fix_size
16063 >= minipool_barrier->address))
16064 return NULL;
16065
16066 /* Scan the pool to see if a constant with the same value has
16067 already been added. While we are doing this, also note the
16068 location where we must insert the constant if it doesn't already
16069 exist. */
16070 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16071 {
16072 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16073 && fix->mode == mp->mode
16074 && (!LABEL_P (fix->value)
16075 || (CODE_LABEL_NUMBER (fix->value)
16076 == CODE_LABEL_NUMBER (mp->value)))
16077 && rtx_equal_p (fix->value, mp->value)
16078 /* Check that there is enough slack to move this entry to the
16079 end of the table (this is conservative). */
16080 && (mp->max_address
16081 > (minipool_barrier->address
16082 + minipool_vector_tail->offset
16083 + minipool_vector_tail->fix_size)))
16084 {
16085 mp->refcount++;
16086 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16087 }
16088
16089 if (min_mp != NULL)
16090 mp->min_address += fix->fix_size;
16091 else
16092 {
16093 /* Note the insertion point if necessary. */
16094 if (mp->min_address < min_address)
16095 {
16096 /* For now, we do not allow the insertion of 8-byte alignment
16097 requiring nodes anywhere but at the start of the pool. */
16098 if (ARM_DOUBLEWORD_ALIGN
16099 && fix->fix_size >= 8 && mp->fix_size < 8)
16100 return NULL;
16101 else
16102 min_mp = mp;
16103 }
16104 else if (mp->max_address
16105 < minipool_barrier->address + mp->offset + fix->fix_size)
16106 {
16107 /* Inserting before this entry would push the fix beyond
16108 its maximum address (which can happen if we have
16109 re-located a forwards fix); force the new fix to come
16110 after it. */
16111 if (ARM_DOUBLEWORD_ALIGN
16112 && fix->fix_size >= 8 && mp->fix_size < 8)
16113 return NULL;
16114 else
16115 {
16116 min_mp = mp;
16117 min_address = mp->min_address + fix->fix_size;
16118 }
16119 }
16120 /* Do not insert a non-8-byte aligned quantity before 8-byte
16121 aligned quantities. */
16122 else if (ARM_DOUBLEWORD_ALIGN
16123 && fix->fix_size < 8
16124 && mp->fix_size >= 8)
16125 {
16126 min_mp = mp;
16127 min_address = mp->min_address + fix->fix_size;
16128 }
16129 }
16130 }
16131
16132 /* We need to create a new entry. */
16133 mp = XNEW (Mnode);
16134 mp->fix_size = fix->fix_size;
16135 mp->mode = fix->mode;
16136 mp->value = fix->value;
16137 mp->refcount = 1;
16138 mp->max_address = minipool_barrier->address + 65536;
16139
16140 mp->min_address = min_address;
16141
16142 if (min_mp == NULL)
16143 {
16144 mp->prev = NULL;
16145 mp->next = minipool_vector_head;
16146
16147 if (mp->next == NULL)
16148 {
16149 minipool_vector_tail = mp;
16150 minipool_vector_label = gen_label_rtx ();
16151 }
16152 else
16153 mp->next->prev = mp;
16154
16155 minipool_vector_head = mp;
16156 }
16157 else
16158 {
16159 mp->next = min_mp->next;
16160 mp->prev = min_mp;
16161 min_mp->next = mp;
16162
16163 if (mp->next != NULL)
16164 mp->next->prev = mp;
16165 else
16166 minipool_vector_tail = mp;
16167 }
16168
16169 /* Save the new entry. */
16170 min_mp = mp;
16171
16172 if (mp->prev)
16173 mp = mp->prev;
16174 else
16175 mp->offset = 0;
16176
16177 /* Scan over the following entries and adjust their offsets. */
16178 while (mp->next != NULL)
16179 {
16180 if (mp->next->min_address < mp->min_address + mp->fix_size)
16181 mp->next->min_address = mp->min_address + mp->fix_size;
16182
16183 if (mp->refcount)
16184 mp->next->offset = mp->offset + mp->fix_size;
16185 else
16186 mp->next->offset = mp->offset;
16187
16188 mp = mp->next;
16189 }
16190
16191 return min_mp;
16192 }
16193
16194 static void
16195 assign_minipool_offsets (Mfix *barrier)
16196 {
16197 HOST_WIDE_INT offset = 0;
16198 Mnode *mp;
16199
16200 minipool_barrier = barrier;
16201
16202 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16203 {
16204 mp->offset = offset;
16205
16206 if (mp->refcount > 0)
16207 offset += mp->fix_size;
16208 }
16209 }
16210
16211 /* Output the literal table */
16212 static void
16213 dump_minipool (rtx scan)
16214 {
16215 Mnode * mp;
16216 Mnode * nmp;
16217 int align64 = 0;
16218
16219 if (ARM_DOUBLEWORD_ALIGN)
16220 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16221 if (mp->refcount > 0 && mp->fix_size >= 8)
16222 {
16223 align64 = 1;
16224 break;
16225 }
16226
16227 if (dump_file)
16228 fprintf (dump_file,
16229 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16230 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16231
16232 scan = emit_label_after (gen_label_rtx (), scan);
16233 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16234 scan = emit_label_after (minipool_vector_label, scan);
16235
16236 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16237 {
16238 if (mp->refcount > 0)
16239 {
16240 if (dump_file)
16241 {
16242 fprintf (dump_file,
16243 ";; Offset %u, min %ld, max %ld ",
16244 (unsigned) mp->offset, (unsigned long) mp->min_address,
16245 (unsigned long) mp->max_address);
16246 arm_print_value (dump_file, mp->value);
16247 fputc ('\n', dump_file);
16248 }
16249
16250 switch (mp->fix_size)
16251 {
16252 #ifdef HAVE_consttable_1
16253 case 1:
16254 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16255 break;
16256
16257 #endif
16258 #ifdef HAVE_consttable_2
16259 case 2:
16260 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16261 break;
16262
16263 #endif
16264 #ifdef HAVE_consttable_4
16265 case 4:
16266 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16267 break;
16268
16269 #endif
16270 #ifdef HAVE_consttable_8
16271 case 8:
16272 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16273 break;
16274
16275 #endif
16276 #ifdef HAVE_consttable_16
16277 case 16:
16278 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16279 break;
16280
16281 #endif
16282 default:
16283 gcc_unreachable ();
16284 }
16285 }
16286
16287 nmp = mp->next;
16288 free (mp);
16289 }
16290
16291 minipool_vector_head = minipool_vector_tail = NULL;
16292 scan = emit_insn_after (gen_consttable_end (), scan);
16293 scan = emit_barrier_after (scan);
16294 }
16295
16296 /* Return the cost of forcibly inserting a barrier after INSN. */
16297 static int
16298 arm_barrier_cost (rtx insn)
16299 {
16300 /* Basing the location of the pool on the loop depth is preferable,
16301 but at the moment, the basic block information seems to be
16302 corrupt by this stage of the compilation. */
16303 int base_cost = 50;
16304 rtx next = next_nonnote_insn (insn);
16305
16306 if (next != NULL && LABEL_P (next))
16307 base_cost -= 20;
16308
16309 switch (GET_CODE (insn))
16310 {
16311 case CODE_LABEL:
16312 /* It will always be better to place the table before the label, rather
16313 than after it. */
16314 return 50;
16315
16316 case INSN:
16317 case CALL_INSN:
16318 return base_cost;
16319
16320 case JUMP_INSN:
16321 return base_cost - 10;
16322
16323 default:
16324 return base_cost + 10;
16325 }
16326 }
16327
16328 /* Find the best place in the insn stream in the range
16329 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330 Create the barrier by inserting a jump and add a new fix entry for
16331 it. */
16332 static Mfix *
16333 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16334 {
16335 HOST_WIDE_INT count = 0;
16336 rtx barrier;
16337 rtx from = fix->insn;
16338 /* The instruction after which we will insert the jump. */
16339 rtx selected = NULL;
16340 int selected_cost;
16341 /* The address at which the jump instruction will be placed. */
16342 HOST_WIDE_INT selected_address;
16343 Mfix * new_fix;
16344 HOST_WIDE_INT max_count = max_address - fix->address;
16345 rtx label = gen_label_rtx ();
16346
16347 selected_cost = arm_barrier_cost (from);
16348 selected_address = fix->address;
16349
16350 while (from && count < max_count)
16351 {
16352 rtx tmp;
16353 int new_cost;
16354
16355 /* This code shouldn't have been called if there was a natural barrier
16356 within range. */
16357 gcc_assert (!BARRIER_P (from));
16358
16359 /* Count the length of this insn. This must stay in sync with the
16360 code that pushes minipool fixes. */
16361 if (LABEL_P (from))
16362 count += get_label_padding (from);
16363 else
16364 count += get_attr_length (from);
16365
16366 /* If there is a jump table, add its length. */
16367 if (tablejump_p (from, NULL, &tmp))
16368 {
16369 count += get_jump_table_size (tmp);
16370
16371 /* Jump tables aren't in a basic block, so base the cost on
16372 the dispatch insn. If we select this location, we will
16373 still put the pool after the table. */
16374 new_cost = arm_barrier_cost (from);
16375
16376 if (count < max_count
16377 && (!selected || new_cost <= selected_cost))
16378 {
16379 selected = tmp;
16380 selected_cost = new_cost;
16381 selected_address = fix->address + count;
16382 }
16383
16384 /* Continue after the dispatch table. */
16385 from = NEXT_INSN (tmp);
16386 continue;
16387 }
16388
16389 new_cost = arm_barrier_cost (from);
16390
16391 if (count < max_count
16392 && (!selected || new_cost <= selected_cost))
16393 {
16394 selected = from;
16395 selected_cost = new_cost;
16396 selected_address = fix->address + count;
16397 }
16398
16399 from = NEXT_INSN (from);
16400 }
16401
16402 /* Make sure that we found a place to insert the jump. */
16403 gcc_assert (selected);
16404
16405 /* Make sure we do not split a call and its corresponding
16406 CALL_ARG_LOCATION note. */
16407 if (CALL_P (selected))
16408 {
16409 rtx next = NEXT_INSN (selected);
16410 if (next && NOTE_P (next)
16411 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16412 selected = next;
16413 }
16414
16415 /* Create a new JUMP_INSN that branches around a barrier. */
16416 from = emit_jump_insn_after (gen_jump (label), selected);
16417 JUMP_LABEL (from) = label;
16418 barrier = emit_barrier_after (from);
16419 emit_label_after (label, barrier);
16420
16421 /* Create a minipool barrier entry for the new barrier. */
16422 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16423 new_fix->insn = barrier;
16424 new_fix->address = selected_address;
16425 new_fix->next = fix->next;
16426 fix->next = new_fix;
16427
16428 return new_fix;
16429 }
16430
16431 /* Record that there is a natural barrier in the insn stream at
16432 ADDRESS. */
16433 static void
16434 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16435 {
16436 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16437
16438 fix->insn = insn;
16439 fix->address = address;
16440
16441 fix->next = NULL;
16442 if (minipool_fix_head != NULL)
16443 minipool_fix_tail->next = fix;
16444 else
16445 minipool_fix_head = fix;
16446
16447 minipool_fix_tail = fix;
16448 }
16449
16450 /* Record INSN, which will need fixing up to load a value from the
16451 minipool. ADDRESS is the offset of the insn since the start of the
16452 function; LOC is a pointer to the part of the insn which requires
16453 fixing; VALUE is the constant that must be loaded, which is of type
16454 MODE. */
16455 static void
16456 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16457 enum machine_mode mode, rtx value)
16458 {
16459 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16460
16461 fix->insn = insn;
16462 fix->address = address;
16463 fix->loc = loc;
16464 fix->mode = mode;
16465 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16466 fix->value = value;
16467 fix->forwards = get_attr_pool_range (insn);
16468 fix->backwards = get_attr_neg_pool_range (insn);
16469 fix->minipool = NULL;
16470
16471 /* If an insn doesn't have a range defined for it, then it isn't
16472 expecting to be reworked by this code. Better to stop now than
16473 to generate duff assembly code. */
16474 gcc_assert (fix->forwards || fix->backwards);
16475
16476 /* If an entry requires 8-byte alignment then assume all constant pools
16477 require 4 bytes of padding. Trying to do this later on a per-pool
16478 basis is awkward because existing pool entries have to be modified. */
16479 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16480 minipool_pad = 4;
16481
16482 if (dump_file)
16483 {
16484 fprintf (dump_file,
16485 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16486 GET_MODE_NAME (mode),
16487 INSN_UID (insn), (unsigned long) address,
16488 -1 * (long)fix->backwards, (long)fix->forwards);
16489 arm_print_value (dump_file, fix->value);
16490 fprintf (dump_file, "\n");
16491 }
16492
16493 /* Add it to the chain of fixes. */
16494 fix->next = NULL;
16495
16496 if (minipool_fix_head != NULL)
16497 minipool_fix_tail->next = fix;
16498 else
16499 minipool_fix_head = fix;
16500
16501 minipool_fix_tail = fix;
16502 }
16503
16504 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16505 Returns the number of insns needed, or 99 if we always want to synthesize
16506 the value. */
16507 int
16508 arm_max_const_double_inline_cost ()
16509 {
16510 /* Let the value get synthesized to avoid the use of literal pools. */
16511 if (arm_disable_literal_pool)
16512 return 99;
16513
16514 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16515 }
16516
16517 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16518 Returns the number of insns needed, or 99 if we don't know how to
16519 do it. */
16520 int
16521 arm_const_double_inline_cost (rtx val)
16522 {
16523 rtx lowpart, highpart;
16524 enum machine_mode mode;
16525
16526 mode = GET_MODE (val);
16527
16528 if (mode == VOIDmode)
16529 mode = DImode;
16530
16531 gcc_assert (GET_MODE_SIZE (mode) == 8);
16532
16533 lowpart = gen_lowpart (SImode, val);
16534 highpart = gen_highpart_mode (SImode, mode, val);
16535
16536 gcc_assert (CONST_INT_P (lowpart));
16537 gcc_assert (CONST_INT_P (highpart));
16538
16539 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16540 NULL_RTX, NULL_RTX, 0, 0)
16541 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16542 NULL_RTX, NULL_RTX, 0, 0));
16543 }
16544
16545 /* Return true if it is worthwhile to split a 64-bit constant into two
16546 32-bit operations. This is the case if optimizing for size, or
16547 if we have load delay slots, or if one 32-bit part can be done with
16548 a single data operation. */
16549 bool
16550 arm_const_double_by_parts (rtx val)
16551 {
16552 enum machine_mode mode = GET_MODE (val);
16553 rtx part;
16554
16555 if (optimize_size || arm_ld_sched)
16556 return true;
16557
16558 if (mode == VOIDmode)
16559 mode = DImode;
16560
16561 part = gen_highpart_mode (SImode, mode, val);
16562
16563 gcc_assert (CONST_INT_P (part));
16564
16565 if (const_ok_for_arm (INTVAL (part))
16566 || const_ok_for_arm (~INTVAL (part)))
16567 return true;
16568
16569 part = gen_lowpart (SImode, val);
16570
16571 gcc_assert (CONST_INT_P (part));
16572
16573 if (const_ok_for_arm (INTVAL (part))
16574 || const_ok_for_arm (~INTVAL (part)))
16575 return true;
16576
16577 return false;
16578 }
16579
16580 /* Return true if it is possible to inline both the high and low parts
16581 of a 64-bit constant into 32-bit data processing instructions. */
16582 bool
16583 arm_const_double_by_immediates (rtx val)
16584 {
16585 enum machine_mode mode = GET_MODE (val);
16586 rtx part;
16587
16588 if (mode == VOIDmode)
16589 mode = DImode;
16590
16591 part = gen_highpart_mode (SImode, mode, val);
16592
16593 gcc_assert (CONST_INT_P (part));
16594
16595 if (!const_ok_for_arm (INTVAL (part)))
16596 return false;
16597
16598 part = gen_lowpart (SImode, val);
16599
16600 gcc_assert (CONST_INT_P (part));
16601
16602 if (!const_ok_for_arm (INTVAL (part)))
16603 return false;
16604
16605 return true;
16606 }
16607
16608 /* Scan INSN and note any of its operands that need fixing.
16609 If DO_PUSHES is false we do not actually push any of the fixups
16610 needed. */
16611 static void
16612 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16613 {
16614 int opno;
16615
16616 extract_insn (insn);
16617
16618 if (!constrain_operands (1))
16619 fatal_insn_not_found (insn);
16620
16621 if (recog_data.n_alternatives == 0)
16622 return;
16623
16624 /* Fill in recog_op_alt with information about the constraints of
16625 this insn. */
16626 preprocess_constraints ();
16627
16628 for (opno = 0; opno < recog_data.n_operands; opno++)
16629 {
16630 /* Things we need to fix can only occur in inputs. */
16631 if (recog_data.operand_type[opno] != OP_IN)
16632 continue;
16633
16634 /* If this alternative is a memory reference, then any mention
16635 of constants in this alternative is really to fool reload
16636 into allowing us to accept one there. We need to fix them up
16637 now so that we output the right code. */
16638 if (recog_op_alt[opno][which_alternative].memory_ok)
16639 {
16640 rtx op = recog_data.operand[opno];
16641
16642 if (CONSTANT_P (op))
16643 {
16644 if (do_pushes)
16645 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16646 recog_data.operand_mode[opno], op);
16647 }
16648 else if (MEM_P (op)
16649 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16650 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16651 {
16652 if (do_pushes)
16653 {
16654 rtx cop = avoid_constant_pool_reference (op);
16655
16656 /* Casting the address of something to a mode narrower
16657 than a word can cause avoid_constant_pool_reference()
16658 to return the pool reference itself. That's no good to
16659 us here. Lets just hope that we can use the
16660 constant pool value directly. */
16661 if (op == cop)
16662 cop = get_pool_constant (XEXP (op, 0));
16663
16664 push_minipool_fix (insn, address,
16665 recog_data.operand_loc[opno],
16666 recog_data.operand_mode[opno], cop);
16667 }
16668
16669 }
16670 }
16671 }
16672
16673 return;
16674 }
16675
16676 /* Rewrite move insn into subtract of 0 if the condition codes will
16677 be useful in next conditional jump insn. */
16678
16679 static void
16680 thumb1_reorg (void)
16681 {
16682 basic_block bb;
16683
16684 FOR_EACH_BB_FN (bb, cfun)
16685 {
16686 rtx dest, src;
16687 rtx pat, op0, set = NULL;
16688 rtx prev, insn = BB_END (bb);
16689 bool insn_clobbered = false;
16690
16691 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16692 insn = PREV_INSN (insn);
16693
16694 /* Find the last cbranchsi4_insn in basic block BB. */
16695 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16696 continue;
16697
16698 /* Get the register with which we are comparing. */
16699 pat = PATTERN (insn);
16700 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16701
16702 /* Find the first flag setting insn before INSN in basic block BB. */
16703 gcc_assert (insn != BB_HEAD (bb));
16704 for (prev = PREV_INSN (insn);
16705 (!insn_clobbered
16706 && prev != BB_HEAD (bb)
16707 && (NOTE_P (prev)
16708 || DEBUG_INSN_P (prev)
16709 || ((set = single_set (prev)) != NULL
16710 && get_attr_conds (prev) == CONDS_NOCOND)));
16711 prev = PREV_INSN (prev))
16712 {
16713 if (reg_set_p (op0, prev))
16714 insn_clobbered = true;
16715 }
16716
16717 /* Skip if op0 is clobbered by insn other than prev. */
16718 if (insn_clobbered)
16719 continue;
16720
16721 if (!set)
16722 continue;
16723
16724 dest = SET_DEST (set);
16725 src = SET_SRC (set);
16726 if (!low_register_operand (dest, SImode)
16727 || !low_register_operand (src, SImode))
16728 continue;
16729
16730 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16731 in INSN. Both src and dest of the move insn are checked. */
16732 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16733 {
16734 dest = copy_rtx (dest);
16735 src = copy_rtx (src);
16736 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16737 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16738 INSN_CODE (prev) = -1;
16739 /* Set test register in INSN to dest. */
16740 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16741 INSN_CODE (insn) = -1;
16742 }
16743 }
16744 }
16745
16746 /* Convert instructions to their cc-clobbering variant if possible, since
16747 that allows us to use smaller encodings. */
16748
16749 static void
16750 thumb2_reorg (void)
16751 {
16752 basic_block bb;
16753 regset_head live;
16754
16755 INIT_REG_SET (&live);
16756
16757 /* We are freeing block_for_insn in the toplev to keep compatibility
16758 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16759 compute_bb_for_insn ();
16760 df_analyze ();
16761
16762 FOR_EACH_BB_FN (bb, cfun)
16763 {
16764 rtx insn;
16765
16766 COPY_REG_SET (&live, DF_LR_OUT (bb));
16767 df_simulate_initialize_backwards (bb, &live);
16768 FOR_BB_INSNS_REVERSE (bb, insn)
16769 {
16770 if (NONJUMP_INSN_P (insn)
16771 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16772 && GET_CODE (PATTERN (insn)) == SET)
16773 {
16774 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16775 rtx pat = PATTERN (insn);
16776 rtx dst = XEXP (pat, 0);
16777 rtx src = XEXP (pat, 1);
16778 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16779
16780 if (!OBJECT_P (src))
16781 op0 = XEXP (src, 0);
16782
16783 if (BINARY_P (src))
16784 op1 = XEXP (src, 1);
16785
16786 if (low_register_operand (dst, SImode))
16787 {
16788 switch (GET_CODE (src))
16789 {
16790 case PLUS:
16791 /* Adding two registers and storing the result
16792 in the first source is already a 16-bit
16793 operation. */
16794 if (rtx_equal_p (dst, op0)
16795 && register_operand (op1, SImode))
16796 break;
16797
16798 if (low_register_operand (op0, SImode))
16799 {
16800 /* ADDS <Rd>,<Rn>,<Rm> */
16801 if (low_register_operand (op1, SImode))
16802 action = CONV;
16803 /* ADDS <Rdn>,#<imm8> */
16804 /* SUBS <Rdn>,#<imm8> */
16805 else if (rtx_equal_p (dst, op0)
16806 && CONST_INT_P (op1)
16807 && IN_RANGE (INTVAL (op1), -255, 255))
16808 action = CONV;
16809 /* ADDS <Rd>,<Rn>,#<imm3> */
16810 /* SUBS <Rd>,<Rn>,#<imm3> */
16811 else if (CONST_INT_P (op1)
16812 && IN_RANGE (INTVAL (op1), -7, 7))
16813 action = CONV;
16814 }
16815 /* ADCS <Rd>, <Rn> */
16816 else if (GET_CODE (XEXP (src, 0)) == PLUS
16817 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16818 && low_register_operand (XEXP (XEXP (src, 0), 1),
16819 SImode)
16820 && COMPARISON_P (op1)
16821 && cc_register (XEXP (op1, 0), VOIDmode)
16822 && maybe_get_arm_condition_code (op1) == ARM_CS
16823 && XEXP (op1, 1) == const0_rtx)
16824 action = CONV;
16825 break;
16826
16827 case MINUS:
16828 /* RSBS <Rd>,<Rn>,#0
16829 Not handled here: see NEG below. */
16830 /* SUBS <Rd>,<Rn>,#<imm3>
16831 SUBS <Rdn>,#<imm8>
16832 Not handled here: see PLUS above. */
16833 /* SUBS <Rd>,<Rn>,<Rm> */
16834 if (low_register_operand (op0, SImode)
16835 && low_register_operand (op1, SImode))
16836 action = CONV;
16837 break;
16838
16839 case MULT:
16840 /* MULS <Rdm>,<Rn>,<Rdm>
16841 As an exception to the rule, this is only used
16842 when optimizing for size since MULS is slow on all
16843 known implementations. We do not even want to use
16844 MULS in cold code, if optimizing for speed, so we
16845 test the global flag here. */
16846 if (!optimize_size)
16847 break;
16848 /* else fall through. */
16849 case AND:
16850 case IOR:
16851 case XOR:
16852 /* ANDS <Rdn>,<Rm> */
16853 if (rtx_equal_p (dst, op0)
16854 && low_register_operand (op1, SImode))
16855 action = CONV;
16856 else if (rtx_equal_p (dst, op1)
16857 && low_register_operand (op0, SImode))
16858 action = SWAP_CONV;
16859 break;
16860
16861 case ASHIFTRT:
16862 case ASHIFT:
16863 case LSHIFTRT:
16864 /* ASRS <Rdn>,<Rm> */
16865 /* LSRS <Rdn>,<Rm> */
16866 /* LSLS <Rdn>,<Rm> */
16867 if (rtx_equal_p (dst, op0)
16868 && low_register_operand (op1, SImode))
16869 action = CONV;
16870 /* ASRS <Rd>,<Rm>,#<imm5> */
16871 /* LSRS <Rd>,<Rm>,#<imm5> */
16872 /* LSLS <Rd>,<Rm>,#<imm5> */
16873 else if (low_register_operand (op0, SImode)
16874 && CONST_INT_P (op1)
16875 && IN_RANGE (INTVAL (op1), 0, 31))
16876 action = CONV;
16877 break;
16878
16879 case ROTATERT:
16880 /* RORS <Rdn>,<Rm> */
16881 if (rtx_equal_p (dst, op0)
16882 && low_register_operand (op1, SImode))
16883 action = CONV;
16884 break;
16885
16886 case NOT:
16887 case NEG:
16888 /* MVNS <Rd>,<Rm> */
16889 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16890 if (low_register_operand (op0, SImode))
16891 action = CONV;
16892 break;
16893
16894 case CONST_INT:
16895 /* MOVS <Rd>,#<imm8> */
16896 if (CONST_INT_P (src)
16897 && IN_RANGE (INTVAL (src), 0, 255))
16898 action = CONV;
16899 break;
16900
16901 case REG:
16902 /* MOVS and MOV<c> with registers have different
16903 encodings, so are not relevant here. */
16904 break;
16905
16906 default:
16907 break;
16908 }
16909 }
16910
16911 if (action != SKIP)
16912 {
16913 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16914 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16915 rtvec vec;
16916
16917 if (action == SWAP_CONV)
16918 {
16919 src = copy_rtx (src);
16920 XEXP (src, 0) = op1;
16921 XEXP (src, 1) = op0;
16922 pat = gen_rtx_SET (VOIDmode, dst, src);
16923 vec = gen_rtvec (2, pat, clobber);
16924 }
16925 else /* action == CONV */
16926 vec = gen_rtvec (2, pat, clobber);
16927
16928 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16929 INSN_CODE (insn) = -1;
16930 }
16931 }
16932
16933 if (NONDEBUG_INSN_P (insn))
16934 df_simulate_one_insn_backwards (bb, insn, &live);
16935 }
16936 }
16937
16938 CLEAR_REG_SET (&live);
16939 }
16940
16941 /* Gcc puts the pool in the wrong place for ARM, since we can only
16942 load addresses a limited distance around the pc. We do some
16943 special munging to move the constant pool values to the correct
16944 point in the code. */
16945 static void
16946 arm_reorg (void)
16947 {
16948 rtx insn;
16949 HOST_WIDE_INT address = 0;
16950 Mfix * fix;
16951
16952 if (TARGET_THUMB1)
16953 thumb1_reorg ();
16954 else if (TARGET_THUMB2)
16955 thumb2_reorg ();
16956
16957 /* Ensure all insns that must be split have been split at this point.
16958 Otherwise, the pool placement code below may compute incorrect
16959 insn lengths. Note that when optimizing, all insns have already
16960 been split at this point. */
16961 if (!optimize)
16962 split_all_insns_noflow ();
16963
16964 minipool_fix_head = minipool_fix_tail = NULL;
16965
16966 /* The first insn must always be a note, or the code below won't
16967 scan it properly. */
16968 insn = get_insns ();
16969 gcc_assert (NOTE_P (insn));
16970 minipool_pad = 0;
16971
16972 /* Scan all the insns and record the operands that will need fixing. */
16973 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16974 {
16975 if (BARRIER_P (insn))
16976 push_minipool_barrier (insn, address);
16977 else if (INSN_P (insn))
16978 {
16979 rtx table;
16980
16981 note_invalid_constants (insn, address, true);
16982 address += get_attr_length (insn);
16983
16984 /* If the insn is a vector jump, add the size of the table
16985 and skip the table. */
16986 if (tablejump_p (insn, NULL, &table))
16987 {
16988 address += get_jump_table_size (table);
16989 insn = table;
16990 }
16991 }
16992 else if (LABEL_P (insn))
16993 /* Add the worst-case padding due to alignment. We don't add
16994 the _current_ padding because the minipool insertions
16995 themselves might change it. */
16996 address += get_label_padding (insn);
16997 }
16998
16999 fix = minipool_fix_head;
17000
17001 /* Now scan the fixups and perform the required changes. */
17002 while (fix)
17003 {
17004 Mfix * ftmp;
17005 Mfix * fdel;
17006 Mfix * last_added_fix;
17007 Mfix * last_barrier = NULL;
17008 Mfix * this_fix;
17009
17010 /* Skip any further barriers before the next fix. */
17011 while (fix && BARRIER_P (fix->insn))
17012 fix = fix->next;
17013
17014 /* No more fixes. */
17015 if (fix == NULL)
17016 break;
17017
17018 last_added_fix = NULL;
17019
17020 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17021 {
17022 if (BARRIER_P (ftmp->insn))
17023 {
17024 if (ftmp->address >= minipool_vector_head->max_address)
17025 break;
17026
17027 last_barrier = ftmp;
17028 }
17029 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17030 break;
17031
17032 last_added_fix = ftmp; /* Keep track of the last fix added. */
17033 }
17034
17035 /* If we found a barrier, drop back to that; any fixes that we
17036 could have reached but come after the barrier will now go in
17037 the next mini-pool. */
17038 if (last_barrier != NULL)
17039 {
17040 /* Reduce the refcount for those fixes that won't go into this
17041 pool after all. */
17042 for (fdel = last_barrier->next;
17043 fdel && fdel != ftmp;
17044 fdel = fdel->next)
17045 {
17046 fdel->minipool->refcount--;
17047 fdel->minipool = NULL;
17048 }
17049
17050 ftmp = last_barrier;
17051 }
17052 else
17053 {
17054 /* ftmp is first fix that we can't fit into this pool and
17055 there no natural barriers that we could use. Insert a
17056 new barrier in the code somewhere between the previous
17057 fix and this one, and arrange to jump around it. */
17058 HOST_WIDE_INT max_address;
17059
17060 /* The last item on the list of fixes must be a barrier, so
17061 we can never run off the end of the list of fixes without
17062 last_barrier being set. */
17063 gcc_assert (ftmp);
17064
17065 max_address = minipool_vector_head->max_address;
17066 /* Check that there isn't another fix that is in range that
17067 we couldn't fit into this pool because the pool was
17068 already too large: we need to put the pool before such an
17069 instruction. The pool itself may come just after the
17070 fix because create_fix_barrier also allows space for a
17071 jump instruction. */
17072 if (ftmp->address < max_address)
17073 max_address = ftmp->address + 1;
17074
17075 last_barrier = create_fix_barrier (last_added_fix, max_address);
17076 }
17077
17078 assign_minipool_offsets (last_barrier);
17079
17080 while (ftmp)
17081 {
17082 if (!BARRIER_P (ftmp->insn)
17083 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17084 == NULL))
17085 break;
17086
17087 ftmp = ftmp->next;
17088 }
17089
17090 /* Scan over the fixes we have identified for this pool, fixing them
17091 up and adding the constants to the pool itself. */
17092 for (this_fix = fix; this_fix && ftmp != this_fix;
17093 this_fix = this_fix->next)
17094 if (!BARRIER_P (this_fix->insn))
17095 {
17096 rtx addr
17097 = plus_constant (Pmode,
17098 gen_rtx_LABEL_REF (VOIDmode,
17099 minipool_vector_label),
17100 this_fix->minipool->offset);
17101 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17102 }
17103
17104 dump_minipool (last_barrier->insn);
17105 fix = ftmp;
17106 }
17107
17108 /* From now on we must synthesize any constants that we can't handle
17109 directly. This can happen if the RTL gets split during final
17110 instruction generation. */
17111 after_arm_reorg = 1;
17112
17113 /* Free the minipool memory. */
17114 obstack_free (&minipool_obstack, minipool_startobj);
17115 }
17116 \f
17117 /* Routines to output assembly language. */
17118
17119 /* If the rtx is the correct value then return the string of the number.
17120 In this way we can ensure that valid double constants are generated even
17121 when cross compiling. */
17122 const char *
17123 fp_immediate_constant (rtx x)
17124 {
17125 REAL_VALUE_TYPE r;
17126
17127 if (!fp_consts_inited)
17128 init_fp_table ();
17129
17130 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17131
17132 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17133 return "0";
17134 }
17135
17136 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17137 static const char *
17138 fp_const_from_val (REAL_VALUE_TYPE *r)
17139 {
17140 if (!fp_consts_inited)
17141 init_fp_table ();
17142
17143 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17144 return "0";
17145 }
17146
17147 /* OPERANDS[0] is the entire list of insns that constitute pop,
17148 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17149 is in the list, UPDATE is true iff the list contains explicit
17150 update of base register. */
17151 void
17152 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17153 bool update)
17154 {
17155 int i;
17156 char pattern[100];
17157 int offset;
17158 const char *conditional;
17159 int num_saves = XVECLEN (operands[0], 0);
17160 unsigned int regno;
17161 unsigned int regno_base = REGNO (operands[1]);
17162
17163 offset = 0;
17164 offset += update ? 1 : 0;
17165 offset += return_pc ? 1 : 0;
17166
17167 /* Is the base register in the list? */
17168 for (i = offset; i < num_saves; i++)
17169 {
17170 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17171 /* If SP is in the list, then the base register must be SP. */
17172 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17173 /* If base register is in the list, there must be no explicit update. */
17174 if (regno == regno_base)
17175 gcc_assert (!update);
17176 }
17177
17178 conditional = reverse ? "%?%D0" : "%?%d0";
17179 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17180 {
17181 /* Output pop (not stmfd) because it has a shorter encoding. */
17182 gcc_assert (update);
17183 sprintf (pattern, "pop%s\t{", conditional);
17184 }
17185 else
17186 {
17187 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17188 It's just a convention, their semantics are identical. */
17189 if (regno_base == SP_REGNUM)
17190 sprintf (pattern, "ldm%sfd\t", conditional);
17191 else if (TARGET_UNIFIED_ASM)
17192 sprintf (pattern, "ldmia%s\t", conditional);
17193 else
17194 sprintf (pattern, "ldm%sia\t", conditional);
17195
17196 strcat (pattern, reg_names[regno_base]);
17197 if (update)
17198 strcat (pattern, "!, {");
17199 else
17200 strcat (pattern, ", {");
17201 }
17202
17203 /* Output the first destination register. */
17204 strcat (pattern,
17205 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17206
17207 /* Output the rest of the destination registers. */
17208 for (i = offset + 1; i < num_saves; i++)
17209 {
17210 strcat (pattern, ", ");
17211 strcat (pattern,
17212 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17213 }
17214
17215 strcat (pattern, "}");
17216
17217 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17218 strcat (pattern, "^");
17219
17220 output_asm_insn (pattern, &cond);
17221 }
17222
17223
17224 /* Output the assembly for a store multiple. */
17225
17226 const char *
17227 vfp_output_fstmd (rtx * operands)
17228 {
17229 char pattern[100];
17230 int p;
17231 int base;
17232 int i;
17233
17234 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17235 p = strlen (pattern);
17236
17237 gcc_assert (REG_P (operands[1]));
17238
17239 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17240 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17241 {
17242 p += sprintf (&pattern[p], ", d%d", base + i);
17243 }
17244 strcpy (&pattern[p], "}");
17245
17246 output_asm_insn (pattern, operands);
17247 return "";
17248 }
17249
17250
17251 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17252 number of bytes pushed. */
17253
17254 static int
17255 vfp_emit_fstmd (int base_reg, int count)
17256 {
17257 rtx par;
17258 rtx dwarf;
17259 rtx tmp, reg;
17260 int i;
17261
17262 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17263 register pairs are stored by a store multiple insn. We avoid this
17264 by pushing an extra pair. */
17265 if (count == 2 && !arm_arch6)
17266 {
17267 if (base_reg == LAST_VFP_REGNUM - 3)
17268 base_reg -= 2;
17269 count++;
17270 }
17271
17272 /* FSTMD may not store more than 16 doubleword registers at once. Split
17273 larger stores into multiple parts (up to a maximum of two, in
17274 practice). */
17275 if (count > 16)
17276 {
17277 int saved;
17278 /* NOTE: base_reg is an internal register number, so each D register
17279 counts as 2. */
17280 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17281 saved += vfp_emit_fstmd (base_reg, 16);
17282 return saved;
17283 }
17284
17285 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17286 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17287
17288 reg = gen_rtx_REG (DFmode, base_reg);
17289 base_reg += 2;
17290
17291 XVECEXP (par, 0, 0)
17292 = gen_rtx_SET (VOIDmode,
17293 gen_frame_mem
17294 (BLKmode,
17295 gen_rtx_PRE_MODIFY (Pmode,
17296 stack_pointer_rtx,
17297 plus_constant
17298 (Pmode, stack_pointer_rtx,
17299 - (count * 8)))
17300 ),
17301 gen_rtx_UNSPEC (BLKmode,
17302 gen_rtvec (1, reg),
17303 UNSPEC_PUSH_MULT));
17304
17305 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17306 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17307 RTX_FRAME_RELATED_P (tmp) = 1;
17308 XVECEXP (dwarf, 0, 0) = tmp;
17309
17310 tmp = gen_rtx_SET (VOIDmode,
17311 gen_frame_mem (DFmode, stack_pointer_rtx),
17312 reg);
17313 RTX_FRAME_RELATED_P (tmp) = 1;
17314 XVECEXP (dwarf, 0, 1) = tmp;
17315
17316 for (i = 1; i < count; i++)
17317 {
17318 reg = gen_rtx_REG (DFmode, base_reg);
17319 base_reg += 2;
17320 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17321
17322 tmp = gen_rtx_SET (VOIDmode,
17323 gen_frame_mem (DFmode,
17324 plus_constant (Pmode,
17325 stack_pointer_rtx,
17326 i * 8)),
17327 reg);
17328 RTX_FRAME_RELATED_P (tmp) = 1;
17329 XVECEXP (dwarf, 0, i + 1) = tmp;
17330 }
17331
17332 par = emit_insn (par);
17333 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17334 RTX_FRAME_RELATED_P (par) = 1;
17335
17336 return count * 8;
17337 }
17338
17339 /* Emit a call instruction with pattern PAT. ADDR is the address of
17340 the call target. */
17341
17342 void
17343 arm_emit_call_insn (rtx pat, rtx addr)
17344 {
17345 rtx insn;
17346
17347 insn = emit_call_insn (pat);
17348
17349 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17350 If the call might use such an entry, add a use of the PIC register
17351 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17352 if (TARGET_VXWORKS_RTP
17353 && flag_pic
17354 && GET_CODE (addr) == SYMBOL_REF
17355 && (SYMBOL_REF_DECL (addr)
17356 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17357 : !SYMBOL_REF_LOCAL_P (addr)))
17358 {
17359 require_pic_register ();
17360 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17361 }
17362 }
17363
17364 /* Output a 'call' insn. */
17365 const char *
17366 output_call (rtx *operands)
17367 {
17368 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17369
17370 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17371 if (REGNO (operands[0]) == LR_REGNUM)
17372 {
17373 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17374 output_asm_insn ("mov%?\t%0, %|lr", operands);
17375 }
17376
17377 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17378
17379 if (TARGET_INTERWORK || arm_arch4t)
17380 output_asm_insn ("bx%?\t%0", operands);
17381 else
17382 output_asm_insn ("mov%?\t%|pc, %0", operands);
17383
17384 return "";
17385 }
17386
17387 /* Output a 'call' insn that is a reference in memory. This is
17388 disabled for ARMv5 and we prefer a blx instead because otherwise
17389 there's a significant performance overhead. */
17390 const char *
17391 output_call_mem (rtx *operands)
17392 {
17393 gcc_assert (!arm_arch5);
17394 if (TARGET_INTERWORK)
17395 {
17396 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17397 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17398 output_asm_insn ("bx%?\t%|ip", operands);
17399 }
17400 else if (regno_use_in (LR_REGNUM, operands[0]))
17401 {
17402 /* LR is used in the memory address. We load the address in the
17403 first instruction. It's safe to use IP as the target of the
17404 load since the call will kill it anyway. */
17405 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17406 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17407 if (arm_arch4t)
17408 output_asm_insn ("bx%?\t%|ip", operands);
17409 else
17410 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17411 }
17412 else
17413 {
17414 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17415 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17416 }
17417
17418 return "";
17419 }
17420
17421
17422 /* Output a move from arm registers to arm registers of a long double
17423 OPERANDS[0] is the destination.
17424 OPERANDS[1] is the source. */
17425 const char *
17426 output_mov_long_double_arm_from_arm (rtx *operands)
17427 {
17428 /* We have to be careful here because the two might overlap. */
17429 int dest_start = REGNO (operands[0]);
17430 int src_start = REGNO (operands[1]);
17431 rtx ops[2];
17432 int i;
17433
17434 if (dest_start < src_start)
17435 {
17436 for (i = 0; i < 3; i++)
17437 {
17438 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17439 ops[1] = gen_rtx_REG (SImode, src_start + i);
17440 output_asm_insn ("mov%?\t%0, %1", ops);
17441 }
17442 }
17443 else
17444 {
17445 for (i = 2; i >= 0; i--)
17446 {
17447 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17448 ops[1] = gen_rtx_REG (SImode, src_start + i);
17449 output_asm_insn ("mov%?\t%0, %1", ops);
17450 }
17451 }
17452
17453 return "";
17454 }
17455
17456 void
17457 arm_emit_movpair (rtx dest, rtx src)
17458 {
17459 /* If the src is an immediate, simplify it. */
17460 if (CONST_INT_P (src))
17461 {
17462 HOST_WIDE_INT val = INTVAL (src);
17463 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17464 if ((val >> 16) & 0x0000ffff)
17465 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17466 GEN_INT (16)),
17467 GEN_INT ((val >> 16) & 0x0000ffff));
17468 return;
17469 }
17470 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17471 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17472 }
17473
17474 /* Output a move between double words. It must be REG<-MEM
17475 or MEM<-REG. */
17476 const char *
17477 output_move_double (rtx *operands, bool emit, int *count)
17478 {
17479 enum rtx_code code0 = GET_CODE (operands[0]);
17480 enum rtx_code code1 = GET_CODE (operands[1]);
17481 rtx otherops[3];
17482 if (count)
17483 *count = 1;
17484
17485 /* The only case when this might happen is when
17486 you are looking at the length of a DImode instruction
17487 that has an invalid constant in it. */
17488 if (code0 == REG && code1 != MEM)
17489 {
17490 gcc_assert (!emit);
17491 *count = 2;
17492 return "";
17493 }
17494
17495 if (code0 == REG)
17496 {
17497 unsigned int reg0 = REGNO (operands[0]);
17498
17499 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17500
17501 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17502
17503 switch (GET_CODE (XEXP (operands[1], 0)))
17504 {
17505 case REG:
17506
17507 if (emit)
17508 {
17509 if (TARGET_LDRD
17510 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17511 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17512 else
17513 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17514 }
17515 break;
17516
17517 case PRE_INC:
17518 gcc_assert (TARGET_LDRD);
17519 if (emit)
17520 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17521 break;
17522
17523 case PRE_DEC:
17524 if (emit)
17525 {
17526 if (TARGET_LDRD)
17527 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17528 else
17529 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17530 }
17531 break;
17532
17533 case POST_INC:
17534 if (emit)
17535 {
17536 if (TARGET_LDRD)
17537 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17538 else
17539 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17540 }
17541 break;
17542
17543 case POST_DEC:
17544 gcc_assert (TARGET_LDRD);
17545 if (emit)
17546 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17547 break;
17548
17549 case PRE_MODIFY:
17550 case POST_MODIFY:
17551 /* Autoicrement addressing modes should never have overlapping
17552 base and destination registers, and overlapping index registers
17553 are already prohibited, so this doesn't need to worry about
17554 fix_cm3_ldrd. */
17555 otherops[0] = operands[0];
17556 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17557 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17558
17559 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17560 {
17561 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17562 {
17563 /* Registers overlap so split out the increment. */
17564 if (emit)
17565 {
17566 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17567 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17568 }
17569 if (count)
17570 *count = 2;
17571 }
17572 else
17573 {
17574 /* Use a single insn if we can.
17575 FIXME: IWMMXT allows offsets larger than ldrd can
17576 handle, fix these up with a pair of ldr. */
17577 if (TARGET_THUMB2
17578 || !CONST_INT_P (otherops[2])
17579 || (INTVAL (otherops[2]) > -256
17580 && INTVAL (otherops[2]) < 256))
17581 {
17582 if (emit)
17583 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17584 }
17585 else
17586 {
17587 if (emit)
17588 {
17589 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17590 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17591 }
17592 if (count)
17593 *count = 2;
17594
17595 }
17596 }
17597 }
17598 else
17599 {
17600 /* Use a single insn if we can.
17601 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17602 fix these up with a pair of ldr. */
17603 if (TARGET_THUMB2
17604 || !CONST_INT_P (otherops[2])
17605 || (INTVAL (otherops[2]) > -256
17606 && INTVAL (otherops[2]) < 256))
17607 {
17608 if (emit)
17609 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17610 }
17611 else
17612 {
17613 if (emit)
17614 {
17615 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17616 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17617 }
17618 if (count)
17619 *count = 2;
17620 }
17621 }
17622 break;
17623
17624 case LABEL_REF:
17625 case CONST:
17626 /* We might be able to use ldrd %0, %1 here. However the range is
17627 different to ldr/adr, and it is broken on some ARMv7-M
17628 implementations. */
17629 /* Use the second register of the pair to avoid problematic
17630 overlap. */
17631 otherops[1] = operands[1];
17632 if (emit)
17633 output_asm_insn ("adr%?\t%0, %1", otherops);
17634 operands[1] = otherops[0];
17635 if (emit)
17636 {
17637 if (TARGET_LDRD)
17638 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17639 else
17640 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17641 }
17642
17643 if (count)
17644 *count = 2;
17645 break;
17646
17647 /* ??? This needs checking for thumb2. */
17648 default:
17649 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17650 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17651 {
17652 otherops[0] = operands[0];
17653 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17654 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17655
17656 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17657 {
17658 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17659 {
17660 switch ((int) INTVAL (otherops[2]))
17661 {
17662 case -8:
17663 if (emit)
17664 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17665 return "";
17666 case -4:
17667 if (TARGET_THUMB2)
17668 break;
17669 if (emit)
17670 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17671 return "";
17672 case 4:
17673 if (TARGET_THUMB2)
17674 break;
17675 if (emit)
17676 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17677 return "";
17678 }
17679 }
17680 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17681 operands[1] = otherops[0];
17682 if (TARGET_LDRD
17683 && (REG_P (otherops[2])
17684 || TARGET_THUMB2
17685 || (CONST_INT_P (otherops[2])
17686 && INTVAL (otherops[2]) > -256
17687 && INTVAL (otherops[2]) < 256)))
17688 {
17689 if (reg_overlap_mentioned_p (operands[0],
17690 otherops[2]))
17691 {
17692 rtx tmp;
17693 /* Swap base and index registers over to
17694 avoid a conflict. */
17695 tmp = otherops[1];
17696 otherops[1] = otherops[2];
17697 otherops[2] = tmp;
17698 }
17699 /* If both registers conflict, it will usually
17700 have been fixed by a splitter. */
17701 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17702 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17703 {
17704 if (emit)
17705 {
17706 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17707 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17708 }
17709 if (count)
17710 *count = 2;
17711 }
17712 else
17713 {
17714 otherops[0] = operands[0];
17715 if (emit)
17716 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17717 }
17718 return "";
17719 }
17720
17721 if (CONST_INT_P (otherops[2]))
17722 {
17723 if (emit)
17724 {
17725 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17726 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17727 else
17728 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17729 }
17730 }
17731 else
17732 {
17733 if (emit)
17734 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17735 }
17736 }
17737 else
17738 {
17739 if (emit)
17740 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17741 }
17742
17743 if (count)
17744 *count = 2;
17745
17746 if (TARGET_LDRD)
17747 return "ldr%(d%)\t%0, [%1]";
17748
17749 return "ldm%(ia%)\t%1, %M0";
17750 }
17751 else
17752 {
17753 otherops[1] = adjust_address (operands[1], SImode, 4);
17754 /* Take care of overlapping base/data reg. */
17755 if (reg_mentioned_p (operands[0], operands[1]))
17756 {
17757 if (emit)
17758 {
17759 output_asm_insn ("ldr%?\t%0, %1", otherops);
17760 output_asm_insn ("ldr%?\t%0, %1", operands);
17761 }
17762 if (count)
17763 *count = 2;
17764
17765 }
17766 else
17767 {
17768 if (emit)
17769 {
17770 output_asm_insn ("ldr%?\t%0, %1", operands);
17771 output_asm_insn ("ldr%?\t%0, %1", otherops);
17772 }
17773 if (count)
17774 *count = 2;
17775 }
17776 }
17777 }
17778 }
17779 else
17780 {
17781 /* Constraints should ensure this. */
17782 gcc_assert (code0 == MEM && code1 == REG);
17783 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17784 || (TARGET_ARM && TARGET_LDRD));
17785
17786 switch (GET_CODE (XEXP (operands[0], 0)))
17787 {
17788 case REG:
17789 if (emit)
17790 {
17791 if (TARGET_LDRD)
17792 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17793 else
17794 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17795 }
17796 break;
17797
17798 case PRE_INC:
17799 gcc_assert (TARGET_LDRD);
17800 if (emit)
17801 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17802 break;
17803
17804 case PRE_DEC:
17805 if (emit)
17806 {
17807 if (TARGET_LDRD)
17808 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17809 else
17810 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17811 }
17812 break;
17813
17814 case POST_INC:
17815 if (emit)
17816 {
17817 if (TARGET_LDRD)
17818 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17819 else
17820 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17821 }
17822 break;
17823
17824 case POST_DEC:
17825 gcc_assert (TARGET_LDRD);
17826 if (emit)
17827 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17828 break;
17829
17830 case PRE_MODIFY:
17831 case POST_MODIFY:
17832 otherops[0] = operands[1];
17833 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17834 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17835
17836 /* IWMMXT allows offsets larger than ldrd can handle,
17837 fix these up with a pair of ldr. */
17838 if (!TARGET_THUMB2
17839 && CONST_INT_P (otherops[2])
17840 && (INTVAL(otherops[2]) <= -256
17841 || INTVAL(otherops[2]) >= 256))
17842 {
17843 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17844 {
17845 if (emit)
17846 {
17847 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17848 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17849 }
17850 if (count)
17851 *count = 2;
17852 }
17853 else
17854 {
17855 if (emit)
17856 {
17857 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17858 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17859 }
17860 if (count)
17861 *count = 2;
17862 }
17863 }
17864 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17865 {
17866 if (emit)
17867 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17868 }
17869 else
17870 {
17871 if (emit)
17872 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17873 }
17874 break;
17875
17876 case PLUS:
17877 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17878 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17879 {
17880 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17881 {
17882 case -8:
17883 if (emit)
17884 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17885 return "";
17886
17887 case -4:
17888 if (TARGET_THUMB2)
17889 break;
17890 if (emit)
17891 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17892 return "";
17893
17894 case 4:
17895 if (TARGET_THUMB2)
17896 break;
17897 if (emit)
17898 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17899 return "";
17900 }
17901 }
17902 if (TARGET_LDRD
17903 && (REG_P (otherops[2])
17904 || TARGET_THUMB2
17905 || (CONST_INT_P (otherops[2])
17906 && INTVAL (otherops[2]) > -256
17907 && INTVAL (otherops[2]) < 256)))
17908 {
17909 otherops[0] = operands[1];
17910 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17911 if (emit)
17912 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17913 return "";
17914 }
17915 /* Fall through */
17916
17917 default:
17918 otherops[0] = adjust_address (operands[0], SImode, 4);
17919 otherops[1] = operands[1];
17920 if (emit)
17921 {
17922 output_asm_insn ("str%?\t%1, %0", operands);
17923 output_asm_insn ("str%?\t%H1, %0", otherops);
17924 }
17925 if (count)
17926 *count = 2;
17927 }
17928 }
17929
17930 return "";
17931 }
17932
17933 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17934 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17935
17936 const char *
17937 output_move_quad (rtx *operands)
17938 {
17939 if (REG_P (operands[0]))
17940 {
17941 /* Load, or reg->reg move. */
17942
17943 if (MEM_P (operands[1]))
17944 {
17945 switch (GET_CODE (XEXP (operands[1], 0)))
17946 {
17947 case REG:
17948 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17949 break;
17950
17951 case LABEL_REF:
17952 case CONST:
17953 output_asm_insn ("adr%?\t%0, %1", operands);
17954 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17955 break;
17956
17957 default:
17958 gcc_unreachable ();
17959 }
17960 }
17961 else
17962 {
17963 rtx ops[2];
17964 int dest, src, i;
17965
17966 gcc_assert (REG_P (operands[1]));
17967
17968 dest = REGNO (operands[0]);
17969 src = REGNO (operands[1]);
17970
17971 /* This seems pretty dumb, but hopefully GCC won't try to do it
17972 very often. */
17973 if (dest < src)
17974 for (i = 0; i < 4; i++)
17975 {
17976 ops[0] = gen_rtx_REG (SImode, dest + i);
17977 ops[1] = gen_rtx_REG (SImode, src + i);
17978 output_asm_insn ("mov%?\t%0, %1", ops);
17979 }
17980 else
17981 for (i = 3; i >= 0; i--)
17982 {
17983 ops[0] = gen_rtx_REG (SImode, dest + i);
17984 ops[1] = gen_rtx_REG (SImode, src + i);
17985 output_asm_insn ("mov%?\t%0, %1", ops);
17986 }
17987 }
17988 }
17989 else
17990 {
17991 gcc_assert (MEM_P (operands[0]));
17992 gcc_assert (REG_P (operands[1]));
17993 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17994
17995 switch (GET_CODE (XEXP (operands[0], 0)))
17996 {
17997 case REG:
17998 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17999 break;
18000
18001 default:
18002 gcc_unreachable ();
18003 }
18004 }
18005
18006 return "";
18007 }
18008
18009 /* Output a VFP load or store instruction. */
18010
18011 const char *
18012 output_move_vfp (rtx *operands)
18013 {
18014 rtx reg, mem, addr, ops[2];
18015 int load = REG_P (operands[0]);
18016 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18017 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18018 const char *templ;
18019 char buff[50];
18020 enum machine_mode mode;
18021
18022 reg = operands[!load];
18023 mem = operands[load];
18024
18025 mode = GET_MODE (reg);
18026
18027 gcc_assert (REG_P (reg));
18028 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18029 gcc_assert (mode == SFmode
18030 || mode == DFmode
18031 || mode == SImode
18032 || mode == DImode
18033 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18034 gcc_assert (MEM_P (mem));
18035
18036 addr = XEXP (mem, 0);
18037
18038 switch (GET_CODE (addr))
18039 {
18040 case PRE_DEC:
18041 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18042 ops[0] = XEXP (addr, 0);
18043 ops[1] = reg;
18044 break;
18045
18046 case POST_INC:
18047 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18048 ops[0] = XEXP (addr, 0);
18049 ops[1] = reg;
18050 break;
18051
18052 default:
18053 templ = "f%s%c%%?\t%%%s0, %%1%s";
18054 ops[0] = reg;
18055 ops[1] = mem;
18056 break;
18057 }
18058
18059 sprintf (buff, templ,
18060 load ? "ld" : "st",
18061 dp ? 'd' : 's',
18062 dp ? "P" : "",
18063 integer_p ? "\t%@ int" : "");
18064 output_asm_insn (buff, ops);
18065
18066 return "";
18067 }
18068
18069 /* Output a Neon double-word or quad-word load or store, or a load
18070 or store for larger structure modes.
18071
18072 WARNING: The ordering of elements is weird in big-endian mode,
18073 because the EABI requires that vectors stored in memory appear
18074 as though they were stored by a VSTM, as required by the EABI.
18075 GCC RTL defines element ordering based on in-memory order.
18076 This can be different from the architectural ordering of elements
18077 within a NEON register. The intrinsics defined in arm_neon.h use the
18078 NEON register element ordering, not the GCC RTL element ordering.
18079
18080 For example, the in-memory ordering of a big-endian a quadword
18081 vector with 16-bit elements when stored from register pair {d0,d1}
18082 will be (lowest address first, d0[N] is NEON register element N):
18083
18084 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18085
18086 When necessary, quadword registers (dN, dN+1) are moved to ARM
18087 registers from rN in the order:
18088
18089 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18090
18091 So that STM/LDM can be used on vectors in ARM registers, and the
18092 same memory layout will result as if VSTM/VLDM were used.
18093
18094 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18095 possible, which allows use of appropriate alignment tags.
18096 Note that the choice of "64" is independent of the actual vector
18097 element size; this size simply ensures that the behavior is
18098 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18099
18100 Due to limitations of those instructions, use of VST1.64/VLD1.64
18101 is not possible if:
18102 - the address contains PRE_DEC, or
18103 - the mode refers to more than 4 double-word registers
18104
18105 In those cases, it would be possible to replace VSTM/VLDM by a
18106 sequence of instructions; this is not currently implemented since
18107 this is not certain to actually improve performance. */
18108
18109 const char *
18110 output_move_neon (rtx *operands)
18111 {
18112 rtx reg, mem, addr, ops[2];
18113 int regno, nregs, load = REG_P (operands[0]);
18114 const char *templ;
18115 char buff[50];
18116 enum machine_mode mode;
18117
18118 reg = operands[!load];
18119 mem = operands[load];
18120
18121 mode = GET_MODE (reg);
18122
18123 gcc_assert (REG_P (reg));
18124 regno = REGNO (reg);
18125 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18126 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18127 || NEON_REGNO_OK_FOR_QUAD (regno));
18128 gcc_assert (VALID_NEON_DREG_MODE (mode)
18129 || VALID_NEON_QREG_MODE (mode)
18130 || VALID_NEON_STRUCT_MODE (mode));
18131 gcc_assert (MEM_P (mem));
18132
18133 addr = XEXP (mem, 0);
18134
18135 /* Strip off const from addresses like (const (plus (...))). */
18136 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18137 addr = XEXP (addr, 0);
18138
18139 switch (GET_CODE (addr))
18140 {
18141 case POST_INC:
18142 /* We have to use vldm / vstm for too-large modes. */
18143 if (nregs > 4)
18144 {
18145 templ = "v%smia%%?\t%%0!, %%h1";
18146 ops[0] = XEXP (addr, 0);
18147 }
18148 else
18149 {
18150 templ = "v%s1.64\t%%h1, %%A0";
18151 ops[0] = mem;
18152 }
18153 ops[1] = reg;
18154 break;
18155
18156 case PRE_DEC:
18157 /* We have to use vldm / vstm in this case, since there is no
18158 pre-decrement form of the vld1 / vst1 instructions. */
18159 templ = "v%smdb%%?\t%%0!, %%h1";
18160 ops[0] = XEXP (addr, 0);
18161 ops[1] = reg;
18162 break;
18163
18164 case POST_MODIFY:
18165 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18166 gcc_unreachable ();
18167
18168 case LABEL_REF:
18169 case PLUS:
18170 {
18171 int i;
18172 int overlap = -1;
18173 for (i = 0; i < nregs; i++)
18174 {
18175 /* We're only using DImode here because it's a convenient size. */
18176 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18177 ops[1] = adjust_address (mem, DImode, 8 * i);
18178 if (reg_overlap_mentioned_p (ops[0], mem))
18179 {
18180 gcc_assert (overlap == -1);
18181 overlap = i;
18182 }
18183 else
18184 {
18185 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18186 output_asm_insn (buff, ops);
18187 }
18188 }
18189 if (overlap != -1)
18190 {
18191 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18192 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18193 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18194 output_asm_insn (buff, ops);
18195 }
18196
18197 return "";
18198 }
18199
18200 default:
18201 /* We have to use vldm / vstm for too-large modes. */
18202 if (nregs > 4)
18203 templ = "v%smia%%?\t%%m0, %%h1";
18204 else
18205 templ = "v%s1.64\t%%h1, %%A0";
18206
18207 ops[0] = mem;
18208 ops[1] = reg;
18209 }
18210
18211 sprintf (buff, templ, load ? "ld" : "st");
18212 output_asm_insn (buff, ops);
18213
18214 return "";
18215 }
18216
18217 /* Compute and return the length of neon_mov<mode>, where <mode> is
18218 one of VSTRUCT modes: EI, OI, CI or XI. */
18219 int
18220 arm_attr_length_move_neon (rtx insn)
18221 {
18222 rtx reg, mem, addr;
18223 int load;
18224 enum machine_mode mode;
18225
18226 extract_insn_cached (insn);
18227
18228 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18229 {
18230 mode = GET_MODE (recog_data.operand[0]);
18231 switch (mode)
18232 {
18233 case EImode:
18234 case OImode:
18235 return 8;
18236 case CImode:
18237 return 12;
18238 case XImode:
18239 return 16;
18240 default:
18241 gcc_unreachable ();
18242 }
18243 }
18244
18245 load = REG_P (recog_data.operand[0]);
18246 reg = recog_data.operand[!load];
18247 mem = recog_data.operand[load];
18248
18249 gcc_assert (MEM_P (mem));
18250
18251 mode = GET_MODE (reg);
18252 addr = XEXP (mem, 0);
18253
18254 /* Strip off const from addresses like (const (plus (...))). */
18255 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18256 addr = XEXP (addr, 0);
18257
18258 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18259 {
18260 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18261 return insns * 4;
18262 }
18263 else
18264 return 4;
18265 }
18266
18267 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18268 return zero. */
18269
18270 int
18271 arm_address_offset_is_imm (rtx insn)
18272 {
18273 rtx mem, addr;
18274
18275 extract_insn_cached (insn);
18276
18277 if (REG_P (recog_data.operand[0]))
18278 return 0;
18279
18280 mem = recog_data.operand[0];
18281
18282 gcc_assert (MEM_P (mem));
18283
18284 addr = XEXP (mem, 0);
18285
18286 if (REG_P (addr)
18287 || (GET_CODE (addr) == PLUS
18288 && REG_P (XEXP (addr, 0))
18289 && CONST_INT_P (XEXP (addr, 1))))
18290 return 1;
18291 else
18292 return 0;
18293 }
18294
18295 /* Output an ADD r, s, #n where n may be too big for one instruction.
18296 If adding zero to one register, output nothing. */
18297 const char *
18298 output_add_immediate (rtx *operands)
18299 {
18300 HOST_WIDE_INT n = INTVAL (operands[2]);
18301
18302 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18303 {
18304 if (n < 0)
18305 output_multi_immediate (operands,
18306 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18307 -n);
18308 else
18309 output_multi_immediate (operands,
18310 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18311 n);
18312 }
18313
18314 return "";
18315 }
18316
18317 /* Output a multiple immediate operation.
18318 OPERANDS is the vector of operands referred to in the output patterns.
18319 INSTR1 is the output pattern to use for the first constant.
18320 INSTR2 is the output pattern to use for subsequent constants.
18321 IMMED_OP is the index of the constant slot in OPERANDS.
18322 N is the constant value. */
18323 static const char *
18324 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18325 int immed_op, HOST_WIDE_INT n)
18326 {
18327 #if HOST_BITS_PER_WIDE_INT > 32
18328 n &= 0xffffffff;
18329 #endif
18330
18331 if (n == 0)
18332 {
18333 /* Quick and easy output. */
18334 operands[immed_op] = const0_rtx;
18335 output_asm_insn (instr1, operands);
18336 }
18337 else
18338 {
18339 int i;
18340 const char * instr = instr1;
18341
18342 /* Note that n is never zero here (which would give no output). */
18343 for (i = 0; i < 32; i += 2)
18344 {
18345 if (n & (3 << i))
18346 {
18347 operands[immed_op] = GEN_INT (n & (255 << i));
18348 output_asm_insn (instr, operands);
18349 instr = instr2;
18350 i += 6;
18351 }
18352 }
18353 }
18354
18355 return "";
18356 }
18357
18358 /* Return the name of a shifter operation. */
18359 static const char *
18360 arm_shift_nmem(enum rtx_code code)
18361 {
18362 switch (code)
18363 {
18364 case ASHIFT:
18365 return ARM_LSL_NAME;
18366
18367 case ASHIFTRT:
18368 return "asr";
18369
18370 case LSHIFTRT:
18371 return "lsr";
18372
18373 case ROTATERT:
18374 return "ror";
18375
18376 default:
18377 abort();
18378 }
18379 }
18380
18381 /* Return the appropriate ARM instruction for the operation code.
18382 The returned result should not be overwritten. OP is the rtx of the
18383 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18384 was shifted. */
18385 const char *
18386 arithmetic_instr (rtx op, int shift_first_arg)
18387 {
18388 switch (GET_CODE (op))
18389 {
18390 case PLUS:
18391 return "add";
18392
18393 case MINUS:
18394 return shift_first_arg ? "rsb" : "sub";
18395
18396 case IOR:
18397 return "orr";
18398
18399 case XOR:
18400 return "eor";
18401
18402 case AND:
18403 return "and";
18404
18405 case ASHIFT:
18406 case ASHIFTRT:
18407 case LSHIFTRT:
18408 case ROTATERT:
18409 return arm_shift_nmem(GET_CODE(op));
18410
18411 default:
18412 gcc_unreachable ();
18413 }
18414 }
18415
18416 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18417 for the operation code. The returned result should not be overwritten.
18418 OP is the rtx code of the shift.
18419 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18420 shift. */
18421 static const char *
18422 shift_op (rtx op, HOST_WIDE_INT *amountp)
18423 {
18424 const char * mnem;
18425 enum rtx_code code = GET_CODE (op);
18426
18427 switch (code)
18428 {
18429 case ROTATE:
18430 if (!CONST_INT_P (XEXP (op, 1)))
18431 {
18432 output_operand_lossage ("invalid shift operand");
18433 return NULL;
18434 }
18435
18436 code = ROTATERT;
18437 *amountp = 32 - INTVAL (XEXP (op, 1));
18438 mnem = "ror";
18439 break;
18440
18441 case ASHIFT:
18442 case ASHIFTRT:
18443 case LSHIFTRT:
18444 case ROTATERT:
18445 mnem = arm_shift_nmem(code);
18446 if (CONST_INT_P (XEXP (op, 1)))
18447 {
18448 *amountp = INTVAL (XEXP (op, 1));
18449 }
18450 else if (REG_P (XEXP (op, 1)))
18451 {
18452 *amountp = -1;
18453 return mnem;
18454 }
18455 else
18456 {
18457 output_operand_lossage ("invalid shift operand");
18458 return NULL;
18459 }
18460 break;
18461
18462 case MULT:
18463 /* We never have to worry about the amount being other than a
18464 power of 2, since this case can never be reloaded from a reg. */
18465 if (!CONST_INT_P (XEXP (op, 1)))
18466 {
18467 output_operand_lossage ("invalid shift operand");
18468 return NULL;
18469 }
18470
18471 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18472
18473 /* Amount must be a power of two. */
18474 if (*amountp & (*amountp - 1))
18475 {
18476 output_operand_lossage ("invalid shift operand");
18477 return NULL;
18478 }
18479
18480 *amountp = int_log2 (*amountp);
18481 return ARM_LSL_NAME;
18482
18483 default:
18484 output_operand_lossage ("invalid shift operand");
18485 return NULL;
18486 }
18487
18488 /* This is not 100% correct, but follows from the desire to merge
18489 multiplication by a power of 2 with the recognizer for a
18490 shift. >=32 is not a valid shift for "lsl", so we must try and
18491 output a shift that produces the correct arithmetical result.
18492 Using lsr #32 is identical except for the fact that the carry bit
18493 is not set correctly if we set the flags; but we never use the
18494 carry bit from such an operation, so we can ignore that. */
18495 if (code == ROTATERT)
18496 /* Rotate is just modulo 32. */
18497 *amountp &= 31;
18498 else if (*amountp != (*amountp & 31))
18499 {
18500 if (code == ASHIFT)
18501 mnem = "lsr";
18502 *amountp = 32;
18503 }
18504
18505 /* Shifts of 0 are no-ops. */
18506 if (*amountp == 0)
18507 return NULL;
18508
18509 return mnem;
18510 }
18511
18512 /* Obtain the shift from the POWER of two. */
18513
18514 static HOST_WIDE_INT
18515 int_log2 (HOST_WIDE_INT power)
18516 {
18517 HOST_WIDE_INT shift = 0;
18518
18519 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18520 {
18521 gcc_assert (shift <= 31);
18522 shift++;
18523 }
18524
18525 return shift;
18526 }
18527
18528 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18529 because /bin/as is horribly restrictive. The judgement about
18530 whether or not each character is 'printable' (and can be output as
18531 is) or not (and must be printed with an octal escape) must be made
18532 with reference to the *host* character set -- the situation is
18533 similar to that discussed in the comments above pp_c_char in
18534 c-pretty-print.c. */
18535
18536 #define MAX_ASCII_LEN 51
18537
18538 void
18539 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18540 {
18541 int i;
18542 int len_so_far = 0;
18543
18544 fputs ("\t.ascii\t\"", stream);
18545
18546 for (i = 0; i < len; i++)
18547 {
18548 int c = p[i];
18549
18550 if (len_so_far >= MAX_ASCII_LEN)
18551 {
18552 fputs ("\"\n\t.ascii\t\"", stream);
18553 len_so_far = 0;
18554 }
18555
18556 if (ISPRINT (c))
18557 {
18558 if (c == '\\' || c == '\"')
18559 {
18560 putc ('\\', stream);
18561 len_so_far++;
18562 }
18563 putc (c, stream);
18564 len_so_far++;
18565 }
18566 else
18567 {
18568 fprintf (stream, "\\%03o", c);
18569 len_so_far += 4;
18570 }
18571 }
18572
18573 fputs ("\"\n", stream);
18574 }
18575 \f
18576 /* Compute the register save mask for registers 0 through 12
18577 inclusive. This code is used by arm_compute_save_reg_mask. */
18578
18579 static unsigned long
18580 arm_compute_save_reg0_reg12_mask (void)
18581 {
18582 unsigned long func_type = arm_current_func_type ();
18583 unsigned long save_reg_mask = 0;
18584 unsigned int reg;
18585
18586 if (IS_INTERRUPT (func_type))
18587 {
18588 unsigned int max_reg;
18589 /* Interrupt functions must not corrupt any registers,
18590 even call clobbered ones. If this is a leaf function
18591 we can just examine the registers used by the RTL, but
18592 otherwise we have to assume that whatever function is
18593 called might clobber anything, and so we have to save
18594 all the call-clobbered registers as well. */
18595 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18596 /* FIQ handlers have registers r8 - r12 banked, so
18597 we only need to check r0 - r7, Normal ISRs only
18598 bank r14 and r15, so we must check up to r12.
18599 r13 is the stack pointer which is always preserved,
18600 so we do not need to consider it here. */
18601 max_reg = 7;
18602 else
18603 max_reg = 12;
18604
18605 for (reg = 0; reg <= max_reg; reg++)
18606 if (df_regs_ever_live_p (reg)
18607 || (! crtl->is_leaf && call_used_regs[reg]))
18608 save_reg_mask |= (1 << reg);
18609
18610 /* Also save the pic base register if necessary. */
18611 if (flag_pic
18612 && !TARGET_SINGLE_PIC_BASE
18613 && arm_pic_register != INVALID_REGNUM
18614 && crtl->uses_pic_offset_table)
18615 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18616 }
18617 else if (IS_VOLATILE(func_type))
18618 {
18619 /* For noreturn functions we historically omitted register saves
18620 altogether. However this really messes up debugging. As a
18621 compromise save just the frame pointers. Combined with the link
18622 register saved elsewhere this should be sufficient to get
18623 a backtrace. */
18624 if (frame_pointer_needed)
18625 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18626 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18627 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18628 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18629 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18630 }
18631 else
18632 {
18633 /* In the normal case we only need to save those registers
18634 which are call saved and which are used by this function. */
18635 for (reg = 0; reg <= 11; reg++)
18636 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18637 save_reg_mask |= (1 << reg);
18638
18639 /* Handle the frame pointer as a special case. */
18640 if (frame_pointer_needed)
18641 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18642
18643 /* If we aren't loading the PIC register,
18644 don't stack it even though it may be live. */
18645 if (flag_pic
18646 && !TARGET_SINGLE_PIC_BASE
18647 && arm_pic_register != INVALID_REGNUM
18648 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18649 || crtl->uses_pic_offset_table))
18650 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18651
18652 /* The prologue will copy SP into R0, so save it. */
18653 if (IS_STACKALIGN (func_type))
18654 save_reg_mask |= 1;
18655 }
18656
18657 /* Save registers so the exception handler can modify them. */
18658 if (crtl->calls_eh_return)
18659 {
18660 unsigned int i;
18661
18662 for (i = 0; ; i++)
18663 {
18664 reg = EH_RETURN_DATA_REGNO (i);
18665 if (reg == INVALID_REGNUM)
18666 break;
18667 save_reg_mask |= 1 << reg;
18668 }
18669 }
18670
18671 return save_reg_mask;
18672 }
18673
18674 /* Return true if r3 is live at the start of the function. */
18675
18676 static bool
18677 arm_r3_live_at_start_p (void)
18678 {
18679 /* Just look at cfg info, which is still close enough to correct at this
18680 point. This gives false positives for broken functions that might use
18681 uninitialized data that happens to be allocated in r3, but who cares? */
18682 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
18683 3);
18684 }
18685
18686 /* Compute the number of bytes used to store the static chain register on the
18687 stack, above the stack frame. We need to know this accurately to get the
18688 alignment of the rest of the stack frame correct. */
18689
18690 static int
18691 arm_compute_static_chain_stack_bytes (void)
18692 {
18693 /* See the defining assertion in arm_expand_prologue. */
18694 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18695 && IS_NESTED (arm_current_func_type ())
18696 && arm_r3_live_at_start_p ()
18697 && crtl->args.pretend_args_size == 0)
18698 return 4;
18699
18700 return 0;
18701 }
18702
18703 /* Compute a bit mask of which registers need to be
18704 saved on the stack for the current function.
18705 This is used by arm_get_frame_offsets, which may add extra registers. */
18706
18707 static unsigned long
18708 arm_compute_save_reg_mask (void)
18709 {
18710 unsigned int save_reg_mask = 0;
18711 unsigned long func_type = arm_current_func_type ();
18712 unsigned int reg;
18713
18714 if (IS_NAKED (func_type))
18715 /* This should never really happen. */
18716 return 0;
18717
18718 /* If we are creating a stack frame, then we must save the frame pointer,
18719 IP (which will hold the old stack pointer), LR and the PC. */
18720 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18721 save_reg_mask |=
18722 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18723 | (1 << IP_REGNUM)
18724 | (1 << LR_REGNUM)
18725 | (1 << PC_REGNUM);
18726
18727 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18728
18729 /* Decide if we need to save the link register.
18730 Interrupt routines have their own banked link register,
18731 so they never need to save it.
18732 Otherwise if we do not use the link register we do not need to save
18733 it. If we are pushing other registers onto the stack however, we
18734 can save an instruction in the epilogue by pushing the link register
18735 now and then popping it back into the PC. This incurs extra memory
18736 accesses though, so we only do it when optimizing for size, and only
18737 if we know that we will not need a fancy return sequence. */
18738 if (df_regs_ever_live_p (LR_REGNUM)
18739 || (save_reg_mask
18740 && optimize_size
18741 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18742 && !crtl->calls_eh_return))
18743 save_reg_mask |= 1 << LR_REGNUM;
18744
18745 if (cfun->machine->lr_save_eliminated)
18746 save_reg_mask &= ~ (1 << LR_REGNUM);
18747
18748 if (TARGET_REALLY_IWMMXT
18749 && ((bit_count (save_reg_mask)
18750 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18751 arm_compute_static_chain_stack_bytes())
18752 ) % 2) != 0)
18753 {
18754 /* The total number of registers that are going to be pushed
18755 onto the stack is odd. We need to ensure that the stack
18756 is 64-bit aligned before we start to save iWMMXt registers,
18757 and also before we start to create locals. (A local variable
18758 might be a double or long long which we will load/store using
18759 an iWMMXt instruction). Therefore we need to push another
18760 ARM register, so that the stack will be 64-bit aligned. We
18761 try to avoid using the arg registers (r0 -r3) as they might be
18762 used to pass values in a tail call. */
18763 for (reg = 4; reg <= 12; reg++)
18764 if ((save_reg_mask & (1 << reg)) == 0)
18765 break;
18766
18767 if (reg <= 12)
18768 save_reg_mask |= (1 << reg);
18769 else
18770 {
18771 cfun->machine->sibcall_blocked = 1;
18772 save_reg_mask |= (1 << 3);
18773 }
18774 }
18775
18776 /* We may need to push an additional register for use initializing the
18777 PIC base register. */
18778 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18779 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18780 {
18781 reg = thumb_find_work_register (1 << 4);
18782 if (!call_used_regs[reg])
18783 save_reg_mask |= (1 << reg);
18784 }
18785
18786 return save_reg_mask;
18787 }
18788
18789
18790 /* Compute a bit mask of which registers need to be
18791 saved on the stack for the current function. */
18792 static unsigned long
18793 thumb1_compute_save_reg_mask (void)
18794 {
18795 unsigned long mask;
18796 unsigned reg;
18797
18798 mask = 0;
18799 for (reg = 0; reg < 12; reg ++)
18800 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18801 mask |= 1 << reg;
18802
18803 if (flag_pic
18804 && !TARGET_SINGLE_PIC_BASE
18805 && arm_pic_register != INVALID_REGNUM
18806 && crtl->uses_pic_offset_table)
18807 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18808
18809 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18810 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18811 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18812
18813 /* LR will also be pushed if any lo regs are pushed. */
18814 if (mask & 0xff || thumb_force_lr_save ())
18815 mask |= (1 << LR_REGNUM);
18816
18817 /* Make sure we have a low work register if we need one.
18818 We will need one if we are going to push a high register,
18819 but we are not currently intending to push a low register. */
18820 if ((mask & 0xff) == 0
18821 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18822 {
18823 /* Use thumb_find_work_register to choose which register
18824 we will use. If the register is live then we will
18825 have to push it. Use LAST_LO_REGNUM as our fallback
18826 choice for the register to select. */
18827 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18828 /* Make sure the register returned by thumb_find_work_register is
18829 not part of the return value. */
18830 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18831 reg = LAST_LO_REGNUM;
18832
18833 if (! call_used_regs[reg])
18834 mask |= 1 << reg;
18835 }
18836
18837 /* The 504 below is 8 bytes less than 512 because there are two possible
18838 alignment words. We can't tell here if they will be present or not so we
18839 have to play it safe and assume that they are. */
18840 if ((CALLER_INTERWORKING_SLOT_SIZE +
18841 ROUND_UP_WORD (get_frame_size ()) +
18842 crtl->outgoing_args_size) >= 504)
18843 {
18844 /* This is the same as the code in thumb1_expand_prologue() which
18845 determines which register to use for stack decrement. */
18846 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18847 if (mask & (1 << reg))
18848 break;
18849
18850 if (reg > LAST_LO_REGNUM)
18851 {
18852 /* Make sure we have a register available for stack decrement. */
18853 mask |= 1 << LAST_LO_REGNUM;
18854 }
18855 }
18856
18857 return mask;
18858 }
18859
18860
18861 /* Return the number of bytes required to save VFP registers. */
18862 static int
18863 arm_get_vfp_saved_size (void)
18864 {
18865 unsigned int regno;
18866 int count;
18867 int saved;
18868
18869 saved = 0;
18870 /* Space for saved VFP registers. */
18871 if (TARGET_HARD_FLOAT && TARGET_VFP)
18872 {
18873 count = 0;
18874 for (regno = FIRST_VFP_REGNUM;
18875 regno < LAST_VFP_REGNUM;
18876 regno += 2)
18877 {
18878 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18879 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18880 {
18881 if (count > 0)
18882 {
18883 /* Workaround ARM10 VFPr1 bug. */
18884 if (count == 2 && !arm_arch6)
18885 count++;
18886 saved += count * 8;
18887 }
18888 count = 0;
18889 }
18890 else
18891 count++;
18892 }
18893 if (count > 0)
18894 {
18895 if (count == 2 && !arm_arch6)
18896 count++;
18897 saved += count * 8;
18898 }
18899 }
18900 return saved;
18901 }
18902
18903
18904 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18905 everything bar the final return instruction. If simple_return is true,
18906 then do not output epilogue, because it has already been emitted in RTL. */
18907 const char *
18908 output_return_instruction (rtx operand, bool really_return, bool reverse,
18909 bool simple_return)
18910 {
18911 char conditional[10];
18912 char instr[100];
18913 unsigned reg;
18914 unsigned long live_regs_mask;
18915 unsigned long func_type;
18916 arm_stack_offsets *offsets;
18917
18918 func_type = arm_current_func_type ();
18919
18920 if (IS_NAKED (func_type))
18921 return "";
18922
18923 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18924 {
18925 /* If this function was declared non-returning, and we have
18926 found a tail call, then we have to trust that the called
18927 function won't return. */
18928 if (really_return)
18929 {
18930 rtx ops[2];
18931
18932 /* Otherwise, trap an attempted return by aborting. */
18933 ops[0] = operand;
18934 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18935 : "abort");
18936 assemble_external_libcall (ops[1]);
18937 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18938 }
18939
18940 return "";
18941 }
18942
18943 gcc_assert (!cfun->calls_alloca || really_return);
18944
18945 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18946
18947 cfun->machine->return_used_this_function = 1;
18948
18949 offsets = arm_get_frame_offsets ();
18950 live_regs_mask = offsets->saved_regs_mask;
18951
18952 if (!simple_return && live_regs_mask)
18953 {
18954 const char * return_reg;
18955
18956 /* If we do not have any special requirements for function exit
18957 (e.g. interworking) then we can load the return address
18958 directly into the PC. Otherwise we must load it into LR. */
18959 if (really_return
18960 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18961 return_reg = reg_names[PC_REGNUM];
18962 else
18963 return_reg = reg_names[LR_REGNUM];
18964
18965 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18966 {
18967 /* There are three possible reasons for the IP register
18968 being saved. 1) a stack frame was created, in which case
18969 IP contains the old stack pointer, or 2) an ISR routine
18970 corrupted it, or 3) it was saved to align the stack on
18971 iWMMXt. In case 1, restore IP into SP, otherwise just
18972 restore IP. */
18973 if (frame_pointer_needed)
18974 {
18975 live_regs_mask &= ~ (1 << IP_REGNUM);
18976 live_regs_mask |= (1 << SP_REGNUM);
18977 }
18978 else
18979 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18980 }
18981
18982 /* On some ARM architectures it is faster to use LDR rather than
18983 LDM to load a single register. On other architectures, the
18984 cost is the same. In 26 bit mode, or for exception handlers,
18985 we have to use LDM to load the PC so that the CPSR is also
18986 restored. */
18987 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18988 if (live_regs_mask == (1U << reg))
18989 break;
18990
18991 if (reg <= LAST_ARM_REGNUM
18992 && (reg != LR_REGNUM
18993 || ! really_return
18994 || ! IS_INTERRUPT (func_type)))
18995 {
18996 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18997 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18998 }
18999 else
19000 {
19001 char *p;
19002 int first = 1;
19003
19004 /* Generate the load multiple instruction to restore the
19005 registers. Note we can get here, even if
19006 frame_pointer_needed is true, but only if sp already
19007 points to the base of the saved core registers. */
19008 if (live_regs_mask & (1 << SP_REGNUM))
19009 {
19010 unsigned HOST_WIDE_INT stack_adjust;
19011
19012 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19013 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19014
19015 if (stack_adjust && arm_arch5 && TARGET_ARM)
19016 if (TARGET_UNIFIED_ASM)
19017 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19018 else
19019 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19020 else
19021 {
19022 /* If we can't use ldmib (SA110 bug),
19023 then try to pop r3 instead. */
19024 if (stack_adjust)
19025 live_regs_mask |= 1 << 3;
19026
19027 if (TARGET_UNIFIED_ASM)
19028 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19029 else
19030 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19031 }
19032 }
19033 else
19034 if (TARGET_UNIFIED_ASM)
19035 sprintf (instr, "pop%s\t{", conditional);
19036 else
19037 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19038
19039 p = instr + strlen (instr);
19040
19041 for (reg = 0; reg <= SP_REGNUM; reg++)
19042 if (live_regs_mask & (1 << reg))
19043 {
19044 int l = strlen (reg_names[reg]);
19045
19046 if (first)
19047 first = 0;
19048 else
19049 {
19050 memcpy (p, ", ", 2);
19051 p += 2;
19052 }
19053
19054 memcpy (p, "%|", 2);
19055 memcpy (p + 2, reg_names[reg], l);
19056 p += l + 2;
19057 }
19058
19059 if (live_regs_mask & (1 << LR_REGNUM))
19060 {
19061 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19062 /* If returning from an interrupt, restore the CPSR. */
19063 if (IS_INTERRUPT (func_type))
19064 strcat (p, "^");
19065 }
19066 else
19067 strcpy (p, "}");
19068 }
19069
19070 output_asm_insn (instr, & operand);
19071
19072 /* See if we need to generate an extra instruction to
19073 perform the actual function return. */
19074 if (really_return
19075 && func_type != ARM_FT_INTERWORKED
19076 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19077 {
19078 /* The return has already been handled
19079 by loading the LR into the PC. */
19080 return "";
19081 }
19082 }
19083
19084 if (really_return)
19085 {
19086 switch ((int) ARM_FUNC_TYPE (func_type))
19087 {
19088 case ARM_FT_ISR:
19089 case ARM_FT_FIQ:
19090 /* ??? This is wrong for unified assembly syntax. */
19091 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19092 break;
19093
19094 case ARM_FT_INTERWORKED:
19095 sprintf (instr, "bx%s\t%%|lr", conditional);
19096 break;
19097
19098 case ARM_FT_EXCEPTION:
19099 /* ??? This is wrong for unified assembly syntax. */
19100 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19101 break;
19102
19103 default:
19104 /* Use bx if it's available. */
19105 if (arm_arch5 || arm_arch4t)
19106 sprintf (instr, "bx%s\t%%|lr", conditional);
19107 else
19108 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19109 break;
19110 }
19111
19112 output_asm_insn (instr, & operand);
19113 }
19114
19115 return "";
19116 }
19117
19118 /* Write the function name into the code section, directly preceding
19119 the function prologue.
19120
19121 Code will be output similar to this:
19122 t0
19123 .ascii "arm_poke_function_name", 0
19124 .align
19125 t1
19126 .word 0xff000000 + (t1 - t0)
19127 arm_poke_function_name
19128 mov ip, sp
19129 stmfd sp!, {fp, ip, lr, pc}
19130 sub fp, ip, #4
19131
19132 When performing a stack backtrace, code can inspect the value
19133 of 'pc' stored at 'fp' + 0. If the trace function then looks
19134 at location pc - 12 and the top 8 bits are set, then we know
19135 that there is a function name embedded immediately preceding this
19136 location and has length ((pc[-3]) & 0xff000000).
19137
19138 We assume that pc is declared as a pointer to an unsigned long.
19139
19140 It is of no benefit to output the function name if we are assembling
19141 a leaf function. These function types will not contain a stack
19142 backtrace structure, therefore it is not possible to determine the
19143 function name. */
19144 void
19145 arm_poke_function_name (FILE *stream, const char *name)
19146 {
19147 unsigned long alignlength;
19148 unsigned long length;
19149 rtx x;
19150
19151 length = strlen (name) + 1;
19152 alignlength = ROUND_UP_WORD (length);
19153
19154 ASM_OUTPUT_ASCII (stream, name, length);
19155 ASM_OUTPUT_ALIGN (stream, 2);
19156 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19157 assemble_aligned_integer (UNITS_PER_WORD, x);
19158 }
19159
19160 /* Place some comments into the assembler stream
19161 describing the current function. */
19162 static void
19163 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19164 {
19165 unsigned long func_type;
19166
19167 /* ??? Do we want to print some of the below anyway? */
19168 if (TARGET_THUMB1)
19169 return;
19170
19171 /* Sanity check. */
19172 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19173
19174 func_type = arm_current_func_type ();
19175
19176 switch ((int) ARM_FUNC_TYPE (func_type))
19177 {
19178 default:
19179 case ARM_FT_NORMAL:
19180 break;
19181 case ARM_FT_INTERWORKED:
19182 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19183 break;
19184 case ARM_FT_ISR:
19185 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19186 break;
19187 case ARM_FT_FIQ:
19188 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19189 break;
19190 case ARM_FT_EXCEPTION:
19191 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19192 break;
19193 }
19194
19195 if (IS_NAKED (func_type))
19196 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19197
19198 if (IS_VOLATILE (func_type))
19199 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19200
19201 if (IS_NESTED (func_type))
19202 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19203 if (IS_STACKALIGN (func_type))
19204 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19205
19206 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19207 crtl->args.size,
19208 crtl->args.pretend_args_size, frame_size);
19209
19210 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19211 frame_pointer_needed,
19212 cfun->machine->uses_anonymous_args);
19213
19214 if (cfun->machine->lr_save_eliminated)
19215 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19216
19217 if (crtl->calls_eh_return)
19218 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19219
19220 }
19221
19222 static void
19223 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19224 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19225 {
19226 arm_stack_offsets *offsets;
19227
19228 if (TARGET_THUMB1)
19229 {
19230 int regno;
19231
19232 /* Emit any call-via-reg trampolines that are needed for v4t support
19233 of call_reg and call_value_reg type insns. */
19234 for (regno = 0; regno < LR_REGNUM; regno++)
19235 {
19236 rtx label = cfun->machine->call_via[regno];
19237
19238 if (label != NULL)
19239 {
19240 switch_to_section (function_section (current_function_decl));
19241 targetm.asm_out.internal_label (asm_out_file, "L",
19242 CODE_LABEL_NUMBER (label));
19243 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19244 }
19245 }
19246
19247 /* ??? Probably not safe to set this here, since it assumes that a
19248 function will be emitted as assembly immediately after we generate
19249 RTL for it. This does not happen for inline functions. */
19250 cfun->machine->return_used_this_function = 0;
19251 }
19252 else /* TARGET_32BIT */
19253 {
19254 /* We need to take into account any stack-frame rounding. */
19255 offsets = arm_get_frame_offsets ();
19256
19257 gcc_assert (!use_return_insn (FALSE, NULL)
19258 || (cfun->machine->return_used_this_function != 0)
19259 || offsets->saved_regs == offsets->outgoing_args
19260 || frame_pointer_needed);
19261
19262 /* Reset the ARM-specific per-function variables. */
19263 after_arm_reorg = 0;
19264 }
19265 }
19266
19267 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19268 STR and STRD. If an even number of registers are being pushed, one
19269 or more STRD patterns are created for each register pair. If an
19270 odd number of registers are pushed, emit an initial STR followed by
19271 as many STRD instructions as are needed. This works best when the
19272 stack is initially 64-bit aligned (the normal case), since it
19273 ensures that each STRD is also 64-bit aligned. */
19274 static void
19275 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19276 {
19277 int num_regs = 0;
19278 int i;
19279 int regno;
19280 rtx par = NULL_RTX;
19281 rtx dwarf = NULL_RTX;
19282 rtx tmp;
19283 bool first = true;
19284
19285 num_regs = bit_count (saved_regs_mask);
19286
19287 /* Must be at least one register to save, and can't save SP or PC. */
19288 gcc_assert (num_regs > 0 && num_regs <= 14);
19289 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19290 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19291
19292 /* Create sequence for DWARF info. All the frame-related data for
19293 debugging is held in this wrapper. */
19294 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19295
19296 /* Describe the stack adjustment. */
19297 tmp = gen_rtx_SET (VOIDmode,
19298 stack_pointer_rtx,
19299 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19300 RTX_FRAME_RELATED_P (tmp) = 1;
19301 XVECEXP (dwarf, 0, 0) = tmp;
19302
19303 /* Find the first register. */
19304 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19305 ;
19306
19307 i = 0;
19308
19309 /* If there's an odd number of registers to push. Start off by
19310 pushing a single register. This ensures that subsequent strd
19311 operations are dword aligned (assuming that SP was originally
19312 64-bit aligned). */
19313 if ((num_regs & 1) != 0)
19314 {
19315 rtx reg, mem, insn;
19316
19317 reg = gen_rtx_REG (SImode, regno);
19318 if (num_regs == 1)
19319 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19320 stack_pointer_rtx));
19321 else
19322 mem = gen_frame_mem (Pmode,
19323 gen_rtx_PRE_MODIFY
19324 (Pmode, stack_pointer_rtx,
19325 plus_constant (Pmode, stack_pointer_rtx,
19326 -4 * num_regs)));
19327
19328 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19329 RTX_FRAME_RELATED_P (tmp) = 1;
19330 insn = emit_insn (tmp);
19331 RTX_FRAME_RELATED_P (insn) = 1;
19332 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19333 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19334 reg);
19335 RTX_FRAME_RELATED_P (tmp) = 1;
19336 i++;
19337 regno++;
19338 XVECEXP (dwarf, 0, i) = tmp;
19339 first = false;
19340 }
19341
19342 while (i < num_regs)
19343 if (saved_regs_mask & (1 << regno))
19344 {
19345 rtx reg1, reg2, mem1, mem2;
19346 rtx tmp0, tmp1, tmp2;
19347 int regno2;
19348
19349 /* Find the register to pair with this one. */
19350 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19351 regno2++)
19352 ;
19353
19354 reg1 = gen_rtx_REG (SImode, regno);
19355 reg2 = gen_rtx_REG (SImode, regno2);
19356
19357 if (first)
19358 {
19359 rtx insn;
19360
19361 first = false;
19362 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19363 stack_pointer_rtx,
19364 -4 * num_regs));
19365 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19366 stack_pointer_rtx,
19367 -4 * (num_regs - 1)));
19368 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19369 plus_constant (Pmode, stack_pointer_rtx,
19370 -4 * (num_regs)));
19371 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19372 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19373 RTX_FRAME_RELATED_P (tmp0) = 1;
19374 RTX_FRAME_RELATED_P (tmp1) = 1;
19375 RTX_FRAME_RELATED_P (tmp2) = 1;
19376 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19377 XVECEXP (par, 0, 0) = tmp0;
19378 XVECEXP (par, 0, 1) = tmp1;
19379 XVECEXP (par, 0, 2) = tmp2;
19380 insn = emit_insn (par);
19381 RTX_FRAME_RELATED_P (insn) = 1;
19382 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19383 }
19384 else
19385 {
19386 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19387 stack_pointer_rtx,
19388 4 * i));
19389 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19390 stack_pointer_rtx,
19391 4 * (i + 1)));
19392 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19393 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19394 RTX_FRAME_RELATED_P (tmp1) = 1;
19395 RTX_FRAME_RELATED_P (tmp2) = 1;
19396 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19397 XVECEXP (par, 0, 0) = tmp1;
19398 XVECEXP (par, 0, 1) = tmp2;
19399 emit_insn (par);
19400 }
19401
19402 /* Create unwind information. This is an approximation. */
19403 tmp1 = gen_rtx_SET (VOIDmode,
19404 gen_frame_mem (Pmode,
19405 plus_constant (Pmode,
19406 stack_pointer_rtx,
19407 4 * i)),
19408 reg1);
19409 tmp2 = gen_rtx_SET (VOIDmode,
19410 gen_frame_mem (Pmode,
19411 plus_constant (Pmode,
19412 stack_pointer_rtx,
19413 4 * (i + 1))),
19414 reg2);
19415
19416 RTX_FRAME_RELATED_P (tmp1) = 1;
19417 RTX_FRAME_RELATED_P (tmp2) = 1;
19418 XVECEXP (dwarf, 0, i + 1) = tmp1;
19419 XVECEXP (dwarf, 0, i + 2) = tmp2;
19420 i += 2;
19421 regno = regno2 + 1;
19422 }
19423 else
19424 regno++;
19425
19426 return;
19427 }
19428
19429 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19430 whenever possible, otherwise it emits single-word stores. The first store
19431 also allocates stack space for all saved registers, using writeback with
19432 post-addressing mode. All other stores use offset addressing. If no STRD
19433 can be emitted, this function emits a sequence of single-word stores,
19434 and not an STM as before, because single-word stores provide more freedom
19435 scheduling and can be turned into an STM by peephole optimizations. */
19436 static void
19437 arm_emit_strd_push (unsigned long saved_regs_mask)
19438 {
19439 int num_regs = 0;
19440 int i, j, dwarf_index = 0;
19441 int offset = 0;
19442 rtx dwarf = NULL_RTX;
19443 rtx insn = NULL_RTX;
19444 rtx tmp, mem;
19445
19446 /* TODO: A more efficient code can be emitted by changing the
19447 layout, e.g., first push all pairs that can use STRD to keep the
19448 stack aligned, and then push all other registers. */
19449 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19450 if (saved_regs_mask & (1 << i))
19451 num_regs++;
19452
19453 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19454 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19455 gcc_assert (num_regs > 0);
19456
19457 /* Create sequence for DWARF info. */
19458 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19459
19460 /* For dwarf info, we generate explicit stack update. */
19461 tmp = gen_rtx_SET (VOIDmode,
19462 stack_pointer_rtx,
19463 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19464 RTX_FRAME_RELATED_P (tmp) = 1;
19465 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19466
19467 /* Save registers. */
19468 offset = - 4 * num_regs;
19469 j = 0;
19470 while (j <= LAST_ARM_REGNUM)
19471 if (saved_regs_mask & (1 << j))
19472 {
19473 if ((j % 2 == 0)
19474 && (saved_regs_mask & (1 << (j + 1))))
19475 {
19476 /* Current register and previous register form register pair for
19477 which STRD can be generated. */
19478 if (offset < 0)
19479 {
19480 /* Allocate stack space for all saved registers. */
19481 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19482 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19483 mem = gen_frame_mem (DImode, tmp);
19484 offset = 0;
19485 }
19486 else if (offset > 0)
19487 mem = gen_frame_mem (DImode,
19488 plus_constant (Pmode,
19489 stack_pointer_rtx,
19490 offset));
19491 else
19492 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19493
19494 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19495 RTX_FRAME_RELATED_P (tmp) = 1;
19496 tmp = emit_insn (tmp);
19497
19498 /* Record the first store insn. */
19499 if (dwarf_index == 1)
19500 insn = tmp;
19501
19502 /* Generate dwarf info. */
19503 mem = gen_frame_mem (SImode,
19504 plus_constant (Pmode,
19505 stack_pointer_rtx,
19506 offset));
19507 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19508 RTX_FRAME_RELATED_P (tmp) = 1;
19509 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19510
19511 mem = gen_frame_mem (SImode,
19512 plus_constant (Pmode,
19513 stack_pointer_rtx,
19514 offset + 4));
19515 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19516 RTX_FRAME_RELATED_P (tmp) = 1;
19517 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19518
19519 offset += 8;
19520 j += 2;
19521 }
19522 else
19523 {
19524 /* Emit a single word store. */
19525 if (offset < 0)
19526 {
19527 /* Allocate stack space for all saved registers. */
19528 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19529 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19530 mem = gen_frame_mem (SImode, tmp);
19531 offset = 0;
19532 }
19533 else if (offset > 0)
19534 mem = gen_frame_mem (SImode,
19535 plus_constant (Pmode,
19536 stack_pointer_rtx,
19537 offset));
19538 else
19539 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19540
19541 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19542 RTX_FRAME_RELATED_P (tmp) = 1;
19543 tmp = emit_insn (tmp);
19544
19545 /* Record the first store insn. */
19546 if (dwarf_index == 1)
19547 insn = tmp;
19548
19549 /* Generate dwarf info. */
19550 mem = gen_frame_mem (SImode,
19551 plus_constant(Pmode,
19552 stack_pointer_rtx,
19553 offset));
19554 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19555 RTX_FRAME_RELATED_P (tmp) = 1;
19556 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19557
19558 offset += 4;
19559 j += 1;
19560 }
19561 }
19562 else
19563 j++;
19564
19565 /* Attach dwarf info to the first insn we generate. */
19566 gcc_assert (insn != NULL_RTX);
19567 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19568 RTX_FRAME_RELATED_P (insn) = 1;
19569 }
19570
19571 /* Generate and emit an insn that we will recognize as a push_multi.
19572 Unfortunately, since this insn does not reflect very well the actual
19573 semantics of the operation, we need to annotate the insn for the benefit
19574 of DWARF2 frame unwind information. */
19575 static rtx
19576 emit_multi_reg_push (unsigned long mask)
19577 {
19578 int num_regs = 0;
19579 int num_dwarf_regs;
19580 int i, j;
19581 rtx par;
19582 rtx dwarf;
19583 int dwarf_par_index;
19584 rtx tmp, reg;
19585
19586 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19587 if (mask & (1 << i))
19588 num_regs++;
19589
19590 gcc_assert (num_regs && num_regs <= 16);
19591
19592 /* We don't record the PC in the dwarf frame information. */
19593 num_dwarf_regs = num_regs;
19594 if (mask & (1 << PC_REGNUM))
19595 num_dwarf_regs--;
19596
19597 /* For the body of the insn we are going to generate an UNSPEC in
19598 parallel with several USEs. This allows the insn to be recognized
19599 by the push_multi pattern in the arm.md file.
19600
19601 The body of the insn looks something like this:
19602
19603 (parallel [
19604 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19605 (const_int:SI <num>)))
19606 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19607 (use (reg:SI XX))
19608 (use (reg:SI YY))
19609 ...
19610 ])
19611
19612 For the frame note however, we try to be more explicit and actually
19613 show each register being stored into the stack frame, plus a (single)
19614 decrement of the stack pointer. We do it this way in order to be
19615 friendly to the stack unwinding code, which only wants to see a single
19616 stack decrement per instruction. The RTL we generate for the note looks
19617 something like this:
19618
19619 (sequence [
19620 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19621 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19622 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19623 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19624 ...
19625 ])
19626
19627 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19628 instead we'd have a parallel expression detailing all
19629 the stores to the various memory addresses so that debug
19630 information is more up-to-date. Remember however while writing
19631 this to take care of the constraints with the push instruction.
19632
19633 Note also that this has to be taken care of for the VFP registers.
19634
19635 For more see PR43399. */
19636
19637 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19638 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19639 dwarf_par_index = 1;
19640
19641 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19642 {
19643 if (mask & (1 << i))
19644 {
19645 reg = gen_rtx_REG (SImode, i);
19646
19647 XVECEXP (par, 0, 0)
19648 = gen_rtx_SET (VOIDmode,
19649 gen_frame_mem
19650 (BLKmode,
19651 gen_rtx_PRE_MODIFY (Pmode,
19652 stack_pointer_rtx,
19653 plus_constant
19654 (Pmode, stack_pointer_rtx,
19655 -4 * num_regs))
19656 ),
19657 gen_rtx_UNSPEC (BLKmode,
19658 gen_rtvec (1, reg),
19659 UNSPEC_PUSH_MULT));
19660
19661 if (i != PC_REGNUM)
19662 {
19663 tmp = gen_rtx_SET (VOIDmode,
19664 gen_frame_mem (SImode, stack_pointer_rtx),
19665 reg);
19666 RTX_FRAME_RELATED_P (tmp) = 1;
19667 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
19668 dwarf_par_index++;
19669 }
19670
19671 break;
19672 }
19673 }
19674
19675 for (j = 1, i++; j < num_regs; i++)
19676 {
19677 if (mask & (1 << i))
19678 {
19679 reg = gen_rtx_REG (SImode, i);
19680
19681 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19682
19683 if (i != PC_REGNUM)
19684 {
19685 tmp
19686 = gen_rtx_SET (VOIDmode,
19687 gen_frame_mem
19688 (SImode,
19689 plus_constant (Pmode, stack_pointer_rtx,
19690 4 * j)),
19691 reg);
19692 RTX_FRAME_RELATED_P (tmp) = 1;
19693 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19694 }
19695
19696 j++;
19697 }
19698 }
19699
19700 par = emit_insn (par);
19701
19702 tmp = gen_rtx_SET (VOIDmode,
19703 stack_pointer_rtx,
19704 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19705 RTX_FRAME_RELATED_P (tmp) = 1;
19706 XVECEXP (dwarf, 0, 0) = tmp;
19707
19708 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19709
19710 return par;
19711 }
19712
19713 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19714 SIZE is the offset to be adjusted.
19715 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19716 static void
19717 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19718 {
19719 rtx dwarf;
19720
19721 RTX_FRAME_RELATED_P (insn) = 1;
19722 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19723 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19724 }
19725
19726 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19727 SAVED_REGS_MASK shows which registers need to be restored.
19728
19729 Unfortunately, since this insn does not reflect very well the actual
19730 semantics of the operation, we need to annotate the insn for the benefit
19731 of DWARF2 frame unwind information. */
19732 static void
19733 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19734 {
19735 int num_regs = 0;
19736 int i, j;
19737 rtx par;
19738 rtx dwarf = NULL_RTX;
19739 rtx tmp, reg;
19740 bool return_in_pc;
19741 int offset_adj;
19742 int emit_update;
19743
19744 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19745 offset_adj = return_in_pc ? 1 : 0;
19746 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19747 if (saved_regs_mask & (1 << i))
19748 num_regs++;
19749
19750 gcc_assert (num_regs && num_regs <= 16);
19751
19752 /* If SP is in reglist, then we don't emit SP update insn. */
19753 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19754
19755 /* The parallel needs to hold num_regs SETs
19756 and one SET for the stack update. */
19757 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19758
19759 if (return_in_pc)
19760 {
19761 tmp = ret_rtx;
19762 XVECEXP (par, 0, 0) = tmp;
19763 }
19764
19765 if (emit_update)
19766 {
19767 /* Increment the stack pointer, based on there being
19768 num_regs 4-byte registers to restore. */
19769 tmp = gen_rtx_SET (VOIDmode,
19770 stack_pointer_rtx,
19771 plus_constant (Pmode,
19772 stack_pointer_rtx,
19773 4 * num_regs));
19774 RTX_FRAME_RELATED_P (tmp) = 1;
19775 XVECEXP (par, 0, offset_adj) = tmp;
19776 }
19777
19778 /* Now restore every reg, which may include PC. */
19779 for (j = 0, i = 0; j < num_regs; i++)
19780 if (saved_regs_mask & (1 << i))
19781 {
19782 reg = gen_rtx_REG (SImode, i);
19783 if ((num_regs == 1) && emit_update && !return_in_pc)
19784 {
19785 /* Emit single load with writeback. */
19786 tmp = gen_frame_mem (SImode,
19787 gen_rtx_POST_INC (Pmode,
19788 stack_pointer_rtx));
19789 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19790 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19791 return;
19792 }
19793
19794 tmp = gen_rtx_SET (VOIDmode,
19795 reg,
19796 gen_frame_mem
19797 (SImode,
19798 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19799 RTX_FRAME_RELATED_P (tmp) = 1;
19800 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19801
19802 /* We need to maintain a sequence for DWARF info too. As dwarf info
19803 should not have PC, skip PC. */
19804 if (i != PC_REGNUM)
19805 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19806
19807 j++;
19808 }
19809
19810 if (return_in_pc)
19811 par = emit_jump_insn (par);
19812 else
19813 par = emit_insn (par);
19814
19815 REG_NOTES (par) = dwarf;
19816 if (!return_in_pc)
19817 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19818 stack_pointer_rtx, stack_pointer_rtx);
19819 }
19820
19821 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19822 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19823
19824 Unfortunately, since this insn does not reflect very well the actual
19825 semantics of the operation, we need to annotate the insn for the benefit
19826 of DWARF2 frame unwind information. */
19827 static void
19828 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19829 {
19830 int i, j;
19831 rtx par;
19832 rtx dwarf = NULL_RTX;
19833 rtx tmp, reg;
19834
19835 gcc_assert (num_regs && num_regs <= 32);
19836
19837 /* Workaround ARM10 VFPr1 bug. */
19838 if (num_regs == 2 && !arm_arch6)
19839 {
19840 if (first_reg == 15)
19841 first_reg--;
19842
19843 num_regs++;
19844 }
19845
19846 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19847 there could be up to 32 D-registers to restore.
19848 If there are more than 16 D-registers, make two recursive calls,
19849 each of which emits one pop_multi instruction. */
19850 if (num_regs > 16)
19851 {
19852 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19853 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19854 return;
19855 }
19856
19857 /* The parallel needs to hold num_regs SETs
19858 and one SET for the stack update. */
19859 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19860
19861 /* Increment the stack pointer, based on there being
19862 num_regs 8-byte registers to restore. */
19863 tmp = gen_rtx_SET (VOIDmode,
19864 base_reg,
19865 plus_constant (Pmode, base_reg, 8 * num_regs));
19866 RTX_FRAME_RELATED_P (tmp) = 1;
19867 XVECEXP (par, 0, 0) = tmp;
19868
19869 /* Now show every reg that will be restored, using a SET for each. */
19870 for (j = 0, i=first_reg; j < num_regs; i += 2)
19871 {
19872 reg = gen_rtx_REG (DFmode, i);
19873
19874 tmp = gen_rtx_SET (VOIDmode,
19875 reg,
19876 gen_frame_mem
19877 (DFmode,
19878 plus_constant (Pmode, base_reg, 8 * j)));
19879 RTX_FRAME_RELATED_P (tmp) = 1;
19880 XVECEXP (par, 0, j + 1) = tmp;
19881
19882 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19883
19884 j++;
19885 }
19886
19887 par = emit_insn (par);
19888 REG_NOTES (par) = dwarf;
19889
19890 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19891 base_reg, base_reg);
19892 }
19893
19894 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19895 number of registers are being popped, multiple LDRD patterns are created for
19896 all register pairs. If odd number of registers are popped, last register is
19897 loaded by using LDR pattern. */
19898 static void
19899 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19900 {
19901 int num_regs = 0;
19902 int i, j;
19903 rtx par = NULL_RTX;
19904 rtx dwarf = NULL_RTX;
19905 rtx tmp, reg, tmp1;
19906 bool return_in_pc;
19907
19908 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19909 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19910 if (saved_regs_mask & (1 << i))
19911 num_regs++;
19912
19913 gcc_assert (num_regs && num_regs <= 16);
19914
19915 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19916 to be popped. So, if num_regs is even, now it will become odd,
19917 and we can generate pop with PC. If num_regs is odd, it will be
19918 even now, and ldr with return can be generated for PC. */
19919 if (return_in_pc)
19920 num_regs--;
19921
19922 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19923
19924 /* Var j iterates over all the registers to gather all the registers in
19925 saved_regs_mask. Var i gives index of saved registers in stack frame.
19926 A PARALLEL RTX of register-pair is created here, so that pattern for
19927 LDRD can be matched. As PC is always last register to be popped, and
19928 we have already decremented num_regs if PC, we don't have to worry
19929 about PC in this loop. */
19930 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19931 if (saved_regs_mask & (1 << j))
19932 {
19933 /* Create RTX for memory load. */
19934 reg = gen_rtx_REG (SImode, j);
19935 tmp = gen_rtx_SET (SImode,
19936 reg,
19937 gen_frame_mem (SImode,
19938 plus_constant (Pmode,
19939 stack_pointer_rtx, 4 * i)));
19940 RTX_FRAME_RELATED_P (tmp) = 1;
19941
19942 if (i % 2 == 0)
19943 {
19944 /* When saved-register index (i) is even, the RTX to be emitted is
19945 yet to be created. Hence create it first. The LDRD pattern we
19946 are generating is :
19947 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19948 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19949 where target registers need not be consecutive. */
19950 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19951 dwarf = NULL_RTX;
19952 }
19953
19954 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19955 added as 0th element and if i is odd, reg_i is added as 1st element
19956 of LDRD pattern shown above. */
19957 XVECEXP (par, 0, (i % 2)) = tmp;
19958 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19959
19960 if ((i % 2) == 1)
19961 {
19962 /* When saved-register index (i) is odd, RTXs for both the registers
19963 to be loaded are generated in above given LDRD pattern, and the
19964 pattern can be emitted now. */
19965 par = emit_insn (par);
19966 REG_NOTES (par) = dwarf;
19967 RTX_FRAME_RELATED_P (par) = 1;
19968 }
19969
19970 i++;
19971 }
19972
19973 /* If the number of registers pushed is odd AND return_in_pc is false OR
19974 number of registers are even AND return_in_pc is true, last register is
19975 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19976 then LDR with post increment. */
19977
19978 /* Increment the stack pointer, based on there being
19979 num_regs 4-byte registers to restore. */
19980 tmp = gen_rtx_SET (VOIDmode,
19981 stack_pointer_rtx,
19982 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19983 RTX_FRAME_RELATED_P (tmp) = 1;
19984 tmp = emit_insn (tmp);
19985 if (!return_in_pc)
19986 {
19987 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19988 stack_pointer_rtx, stack_pointer_rtx);
19989 }
19990
19991 dwarf = NULL_RTX;
19992
19993 if (((num_regs % 2) == 1 && !return_in_pc)
19994 || ((num_regs % 2) == 0 && return_in_pc))
19995 {
19996 /* Scan for the single register to be popped. Skip until the saved
19997 register is found. */
19998 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19999
20000 /* Gen LDR with post increment here. */
20001 tmp1 = gen_rtx_MEM (SImode,
20002 gen_rtx_POST_INC (SImode,
20003 stack_pointer_rtx));
20004 set_mem_alias_set (tmp1, get_frame_alias_set ());
20005
20006 reg = gen_rtx_REG (SImode, j);
20007 tmp = gen_rtx_SET (SImode, reg, tmp1);
20008 RTX_FRAME_RELATED_P (tmp) = 1;
20009 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20010
20011 if (return_in_pc)
20012 {
20013 /* If return_in_pc, j must be PC_REGNUM. */
20014 gcc_assert (j == PC_REGNUM);
20015 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20016 XVECEXP (par, 0, 0) = ret_rtx;
20017 XVECEXP (par, 0, 1) = tmp;
20018 par = emit_jump_insn (par);
20019 }
20020 else
20021 {
20022 par = emit_insn (tmp);
20023 REG_NOTES (par) = dwarf;
20024 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20025 stack_pointer_rtx, stack_pointer_rtx);
20026 }
20027
20028 }
20029 else if ((num_regs % 2) == 1 && return_in_pc)
20030 {
20031 /* There are 2 registers to be popped. So, generate the pattern
20032 pop_multiple_with_stack_update_and_return to pop in PC. */
20033 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20034 }
20035
20036 return;
20037 }
20038
20039 /* LDRD in ARM mode needs consecutive registers as operands. This function
20040 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20041 offset addressing and then generates one separate stack udpate. This provides
20042 more scheduling freedom, compared to writeback on every load. However,
20043 if the function returns using load into PC directly
20044 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20045 before the last load. TODO: Add a peephole optimization to recognize
20046 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20047 peephole optimization to merge the load at stack-offset zero
20048 with the stack update instruction using load with writeback
20049 in post-index addressing mode. */
20050 static void
20051 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20052 {
20053 int j = 0;
20054 int offset = 0;
20055 rtx par = NULL_RTX;
20056 rtx dwarf = NULL_RTX;
20057 rtx tmp, mem;
20058
20059 /* Restore saved registers. */
20060 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20061 j = 0;
20062 while (j <= LAST_ARM_REGNUM)
20063 if (saved_regs_mask & (1 << j))
20064 {
20065 if ((j % 2) == 0
20066 && (saved_regs_mask & (1 << (j + 1)))
20067 && (j + 1) != PC_REGNUM)
20068 {
20069 /* Current register and next register form register pair for which
20070 LDRD can be generated. PC is always the last register popped, and
20071 we handle it separately. */
20072 if (offset > 0)
20073 mem = gen_frame_mem (DImode,
20074 plus_constant (Pmode,
20075 stack_pointer_rtx,
20076 offset));
20077 else
20078 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20079
20080 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20081 tmp = emit_insn (tmp);
20082 RTX_FRAME_RELATED_P (tmp) = 1;
20083
20084 /* Generate dwarf info. */
20085
20086 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20087 gen_rtx_REG (SImode, j),
20088 NULL_RTX);
20089 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20090 gen_rtx_REG (SImode, j + 1),
20091 dwarf);
20092
20093 REG_NOTES (tmp) = dwarf;
20094
20095 offset += 8;
20096 j += 2;
20097 }
20098 else if (j != PC_REGNUM)
20099 {
20100 /* Emit a single word load. */
20101 if (offset > 0)
20102 mem = gen_frame_mem (SImode,
20103 plus_constant (Pmode,
20104 stack_pointer_rtx,
20105 offset));
20106 else
20107 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20108
20109 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20110 tmp = emit_insn (tmp);
20111 RTX_FRAME_RELATED_P (tmp) = 1;
20112
20113 /* Generate dwarf info. */
20114 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20115 gen_rtx_REG (SImode, j),
20116 NULL_RTX);
20117
20118 offset += 4;
20119 j += 1;
20120 }
20121 else /* j == PC_REGNUM */
20122 j++;
20123 }
20124 else
20125 j++;
20126
20127 /* Update the stack. */
20128 if (offset > 0)
20129 {
20130 tmp = gen_rtx_SET (Pmode,
20131 stack_pointer_rtx,
20132 plus_constant (Pmode,
20133 stack_pointer_rtx,
20134 offset));
20135 tmp = emit_insn (tmp);
20136 arm_add_cfa_adjust_cfa_note (tmp, offset,
20137 stack_pointer_rtx, stack_pointer_rtx);
20138 offset = 0;
20139 }
20140
20141 if (saved_regs_mask & (1 << PC_REGNUM))
20142 {
20143 /* Only PC is to be popped. */
20144 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20145 XVECEXP (par, 0, 0) = ret_rtx;
20146 tmp = gen_rtx_SET (SImode,
20147 gen_rtx_REG (SImode, PC_REGNUM),
20148 gen_frame_mem (SImode,
20149 gen_rtx_POST_INC (SImode,
20150 stack_pointer_rtx)));
20151 RTX_FRAME_RELATED_P (tmp) = 1;
20152 XVECEXP (par, 0, 1) = tmp;
20153 par = emit_jump_insn (par);
20154
20155 /* Generate dwarf info. */
20156 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20157 gen_rtx_REG (SImode, PC_REGNUM),
20158 NULL_RTX);
20159 REG_NOTES (par) = dwarf;
20160 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20161 stack_pointer_rtx, stack_pointer_rtx);
20162 }
20163 }
20164
20165 /* Calculate the size of the return value that is passed in registers. */
20166 static unsigned
20167 arm_size_return_regs (void)
20168 {
20169 enum machine_mode mode;
20170
20171 if (crtl->return_rtx != 0)
20172 mode = GET_MODE (crtl->return_rtx);
20173 else
20174 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20175
20176 return GET_MODE_SIZE (mode);
20177 }
20178
20179 /* Return true if the current function needs to save/restore LR. */
20180 static bool
20181 thumb_force_lr_save (void)
20182 {
20183 return !cfun->machine->lr_save_eliminated
20184 && (!leaf_function_p ()
20185 || thumb_far_jump_used_p ()
20186 || df_regs_ever_live_p (LR_REGNUM));
20187 }
20188
20189 /* We do not know if r3 will be available because
20190 we do have an indirect tailcall happening in this
20191 particular case. */
20192 static bool
20193 is_indirect_tailcall_p (rtx call)
20194 {
20195 rtx pat = PATTERN (call);
20196
20197 /* Indirect tail call. */
20198 pat = XVECEXP (pat, 0, 0);
20199 if (GET_CODE (pat) == SET)
20200 pat = SET_SRC (pat);
20201
20202 pat = XEXP (XEXP (pat, 0), 0);
20203 return REG_P (pat);
20204 }
20205
20206 /* Return true if r3 is used by any of the tail call insns in the
20207 current function. */
20208 static bool
20209 any_sibcall_could_use_r3 (void)
20210 {
20211 edge_iterator ei;
20212 edge e;
20213
20214 if (!crtl->tail_call_emit)
20215 return false;
20216 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20217 if (e->flags & EDGE_SIBCALL)
20218 {
20219 rtx call = BB_END (e->src);
20220 if (!CALL_P (call))
20221 call = prev_nonnote_nondebug_insn (call);
20222 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20223 if (find_regno_fusage (call, USE, 3)
20224 || is_indirect_tailcall_p (call))
20225 return true;
20226 }
20227 return false;
20228 }
20229
20230
20231 /* Compute the distance from register FROM to register TO.
20232 These can be the arg pointer (26), the soft frame pointer (25),
20233 the stack pointer (13) or the hard frame pointer (11).
20234 In thumb mode r7 is used as the soft frame pointer, if needed.
20235 Typical stack layout looks like this:
20236
20237 old stack pointer -> | |
20238 ----
20239 | | \
20240 | | saved arguments for
20241 | | vararg functions
20242 | | /
20243 --
20244 hard FP & arg pointer -> | | \
20245 | | stack
20246 | | frame
20247 | | /
20248 --
20249 | | \
20250 | | call saved
20251 | | registers
20252 soft frame pointer -> | | /
20253 --
20254 | | \
20255 | | local
20256 | | variables
20257 locals base pointer -> | | /
20258 --
20259 | | \
20260 | | outgoing
20261 | | arguments
20262 current stack pointer -> | | /
20263 --
20264
20265 For a given function some or all of these stack components
20266 may not be needed, giving rise to the possibility of
20267 eliminating some of the registers.
20268
20269 The values returned by this function must reflect the behavior
20270 of arm_expand_prologue() and arm_compute_save_reg_mask().
20271
20272 The sign of the number returned reflects the direction of stack
20273 growth, so the values are positive for all eliminations except
20274 from the soft frame pointer to the hard frame pointer.
20275
20276 SFP may point just inside the local variables block to ensure correct
20277 alignment. */
20278
20279
20280 /* Calculate stack offsets. These are used to calculate register elimination
20281 offsets and in prologue/epilogue code. Also calculates which registers
20282 should be saved. */
20283
20284 static arm_stack_offsets *
20285 arm_get_frame_offsets (void)
20286 {
20287 struct arm_stack_offsets *offsets;
20288 unsigned long func_type;
20289 int leaf;
20290 int saved;
20291 int core_saved;
20292 HOST_WIDE_INT frame_size;
20293 int i;
20294
20295 offsets = &cfun->machine->stack_offsets;
20296
20297 /* We need to know if we are a leaf function. Unfortunately, it
20298 is possible to be called after start_sequence has been called,
20299 which causes get_insns to return the insns for the sequence,
20300 not the function, which will cause leaf_function_p to return
20301 the incorrect result.
20302
20303 to know about leaf functions once reload has completed, and the
20304 frame size cannot be changed after that time, so we can safely
20305 use the cached value. */
20306
20307 if (reload_completed)
20308 return offsets;
20309
20310 /* Initially this is the size of the local variables. It will translated
20311 into an offset once we have determined the size of preceding data. */
20312 frame_size = ROUND_UP_WORD (get_frame_size ());
20313
20314 leaf = leaf_function_p ();
20315
20316 /* Space for variadic functions. */
20317 offsets->saved_args = crtl->args.pretend_args_size;
20318
20319 /* In Thumb mode this is incorrect, but never used. */
20320 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
20321 arm_compute_static_chain_stack_bytes();
20322
20323 if (TARGET_32BIT)
20324 {
20325 unsigned int regno;
20326
20327 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20328 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20329 saved = core_saved;
20330
20331 /* We know that SP will be doubleword aligned on entry, and we must
20332 preserve that condition at any subroutine call. We also require the
20333 soft frame pointer to be doubleword aligned. */
20334
20335 if (TARGET_REALLY_IWMMXT)
20336 {
20337 /* Check for the call-saved iWMMXt registers. */
20338 for (regno = FIRST_IWMMXT_REGNUM;
20339 regno <= LAST_IWMMXT_REGNUM;
20340 regno++)
20341 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20342 saved += 8;
20343 }
20344
20345 func_type = arm_current_func_type ();
20346 /* Space for saved VFP registers. */
20347 if (! IS_VOLATILE (func_type)
20348 && TARGET_HARD_FLOAT && TARGET_VFP)
20349 saved += arm_get_vfp_saved_size ();
20350 }
20351 else /* TARGET_THUMB1 */
20352 {
20353 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20354 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20355 saved = core_saved;
20356 if (TARGET_BACKTRACE)
20357 saved += 16;
20358 }
20359
20360 /* Saved registers include the stack frame. */
20361 offsets->saved_regs = offsets->saved_args + saved +
20362 arm_compute_static_chain_stack_bytes();
20363 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20364 /* A leaf function does not need any stack alignment if it has nothing
20365 on the stack. */
20366 if (leaf && frame_size == 0
20367 /* However if it calls alloca(), we have a dynamically allocated
20368 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20369 && ! cfun->calls_alloca)
20370 {
20371 offsets->outgoing_args = offsets->soft_frame;
20372 offsets->locals_base = offsets->soft_frame;
20373 return offsets;
20374 }
20375
20376 /* Ensure SFP has the correct alignment. */
20377 if (ARM_DOUBLEWORD_ALIGN
20378 && (offsets->soft_frame & 7))
20379 {
20380 offsets->soft_frame += 4;
20381 /* Try to align stack by pushing an extra reg. Don't bother doing this
20382 when there is a stack frame as the alignment will be rolled into
20383 the normal stack adjustment. */
20384 if (frame_size + crtl->outgoing_args_size == 0)
20385 {
20386 int reg = -1;
20387
20388 /* If it is safe to use r3, then do so. This sometimes
20389 generates better code on Thumb-2 by avoiding the need to
20390 use 32-bit push/pop instructions. */
20391 if (! any_sibcall_could_use_r3 ()
20392 && arm_size_return_regs () <= 12
20393 && (offsets->saved_regs_mask & (1 << 3)) == 0
20394 && (TARGET_THUMB2
20395 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20396 {
20397 reg = 3;
20398 }
20399 else
20400 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20401 {
20402 /* Avoid fixed registers; they may be changed at
20403 arbitrary times so it's unsafe to restore them
20404 during the epilogue. */
20405 if (!fixed_regs[i]
20406 && (offsets->saved_regs_mask & (1 << i)) == 0)
20407 {
20408 reg = i;
20409 break;
20410 }
20411 }
20412
20413 if (reg != -1)
20414 {
20415 offsets->saved_regs += 4;
20416 offsets->saved_regs_mask |= (1 << reg);
20417 }
20418 }
20419 }
20420
20421 offsets->locals_base = offsets->soft_frame + frame_size;
20422 offsets->outgoing_args = (offsets->locals_base
20423 + crtl->outgoing_args_size);
20424
20425 if (ARM_DOUBLEWORD_ALIGN)
20426 {
20427 /* Ensure SP remains doubleword aligned. */
20428 if (offsets->outgoing_args & 7)
20429 offsets->outgoing_args += 4;
20430 gcc_assert (!(offsets->outgoing_args & 7));
20431 }
20432
20433 return offsets;
20434 }
20435
20436
20437 /* Calculate the relative offsets for the different stack pointers. Positive
20438 offsets are in the direction of stack growth. */
20439
20440 HOST_WIDE_INT
20441 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20442 {
20443 arm_stack_offsets *offsets;
20444
20445 offsets = arm_get_frame_offsets ();
20446
20447 /* OK, now we have enough information to compute the distances.
20448 There must be an entry in these switch tables for each pair
20449 of registers in ELIMINABLE_REGS, even if some of the entries
20450 seem to be redundant or useless. */
20451 switch (from)
20452 {
20453 case ARG_POINTER_REGNUM:
20454 switch (to)
20455 {
20456 case THUMB_HARD_FRAME_POINTER_REGNUM:
20457 return 0;
20458
20459 case FRAME_POINTER_REGNUM:
20460 /* This is the reverse of the soft frame pointer
20461 to hard frame pointer elimination below. */
20462 return offsets->soft_frame - offsets->saved_args;
20463
20464 case ARM_HARD_FRAME_POINTER_REGNUM:
20465 /* This is only non-zero in the case where the static chain register
20466 is stored above the frame. */
20467 return offsets->frame - offsets->saved_args - 4;
20468
20469 case STACK_POINTER_REGNUM:
20470 /* If nothing has been pushed on the stack at all
20471 then this will return -4. This *is* correct! */
20472 return offsets->outgoing_args - (offsets->saved_args + 4);
20473
20474 default:
20475 gcc_unreachable ();
20476 }
20477 gcc_unreachable ();
20478
20479 case FRAME_POINTER_REGNUM:
20480 switch (to)
20481 {
20482 case THUMB_HARD_FRAME_POINTER_REGNUM:
20483 return 0;
20484
20485 case ARM_HARD_FRAME_POINTER_REGNUM:
20486 /* The hard frame pointer points to the top entry in the
20487 stack frame. The soft frame pointer to the bottom entry
20488 in the stack frame. If there is no stack frame at all,
20489 then they are identical. */
20490
20491 return offsets->frame - offsets->soft_frame;
20492
20493 case STACK_POINTER_REGNUM:
20494 return offsets->outgoing_args - offsets->soft_frame;
20495
20496 default:
20497 gcc_unreachable ();
20498 }
20499 gcc_unreachable ();
20500
20501 default:
20502 /* You cannot eliminate from the stack pointer.
20503 In theory you could eliminate from the hard frame
20504 pointer to the stack pointer, but this will never
20505 happen, since if a stack frame is not needed the
20506 hard frame pointer will never be used. */
20507 gcc_unreachable ();
20508 }
20509 }
20510
20511 /* Given FROM and TO register numbers, say whether this elimination is
20512 allowed. Frame pointer elimination is automatically handled.
20513
20514 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20515 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20516 pointer, we must eliminate FRAME_POINTER_REGNUM into
20517 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20518 ARG_POINTER_REGNUM. */
20519
20520 bool
20521 arm_can_eliminate (const int from, const int to)
20522 {
20523 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20524 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20525 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20526 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20527 true);
20528 }
20529
20530 /* Emit RTL to save coprocessor registers on function entry. Returns the
20531 number of bytes pushed. */
20532
20533 static int
20534 arm_save_coproc_regs(void)
20535 {
20536 int saved_size = 0;
20537 unsigned reg;
20538 unsigned start_reg;
20539 rtx insn;
20540
20541 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20542 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20543 {
20544 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20545 insn = gen_rtx_MEM (V2SImode, insn);
20546 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20547 RTX_FRAME_RELATED_P (insn) = 1;
20548 saved_size += 8;
20549 }
20550
20551 if (TARGET_HARD_FLOAT && TARGET_VFP)
20552 {
20553 start_reg = FIRST_VFP_REGNUM;
20554
20555 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20556 {
20557 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20558 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20559 {
20560 if (start_reg != reg)
20561 saved_size += vfp_emit_fstmd (start_reg,
20562 (reg - start_reg) / 2);
20563 start_reg = reg + 2;
20564 }
20565 }
20566 if (start_reg != reg)
20567 saved_size += vfp_emit_fstmd (start_reg,
20568 (reg - start_reg) / 2);
20569 }
20570 return saved_size;
20571 }
20572
20573
20574 /* Set the Thumb frame pointer from the stack pointer. */
20575
20576 static void
20577 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20578 {
20579 HOST_WIDE_INT amount;
20580 rtx insn, dwarf;
20581
20582 amount = offsets->outgoing_args - offsets->locals_base;
20583 if (amount < 1024)
20584 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20585 stack_pointer_rtx, GEN_INT (amount)));
20586 else
20587 {
20588 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20589 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20590 expects the first two operands to be the same. */
20591 if (TARGET_THUMB2)
20592 {
20593 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20594 stack_pointer_rtx,
20595 hard_frame_pointer_rtx));
20596 }
20597 else
20598 {
20599 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20600 hard_frame_pointer_rtx,
20601 stack_pointer_rtx));
20602 }
20603 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20604 plus_constant (Pmode, stack_pointer_rtx, amount));
20605 RTX_FRAME_RELATED_P (dwarf) = 1;
20606 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20607 }
20608
20609 RTX_FRAME_RELATED_P (insn) = 1;
20610 }
20611
20612 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20613 function. */
20614 void
20615 arm_expand_prologue (void)
20616 {
20617 rtx amount;
20618 rtx insn;
20619 rtx ip_rtx;
20620 unsigned long live_regs_mask;
20621 unsigned long func_type;
20622 int fp_offset = 0;
20623 int saved_pretend_args = 0;
20624 int saved_regs = 0;
20625 unsigned HOST_WIDE_INT args_to_push;
20626 arm_stack_offsets *offsets;
20627
20628 func_type = arm_current_func_type ();
20629
20630 /* Naked functions don't have prologues. */
20631 if (IS_NAKED (func_type))
20632 return;
20633
20634 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20635 args_to_push = crtl->args.pretend_args_size;
20636
20637 /* Compute which register we will have to save onto the stack. */
20638 offsets = arm_get_frame_offsets ();
20639 live_regs_mask = offsets->saved_regs_mask;
20640
20641 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20642
20643 if (IS_STACKALIGN (func_type))
20644 {
20645 rtx r0, r1;
20646
20647 /* Handle a word-aligned stack pointer. We generate the following:
20648
20649 mov r0, sp
20650 bic r1, r0, #7
20651 mov sp, r1
20652 <save and restore r0 in normal prologue/epilogue>
20653 mov sp, r0
20654 bx lr
20655
20656 The unwinder doesn't need to know about the stack realignment.
20657 Just tell it we saved SP in r0. */
20658 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20659
20660 r0 = gen_rtx_REG (SImode, 0);
20661 r1 = gen_rtx_REG (SImode, 1);
20662
20663 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20664 RTX_FRAME_RELATED_P (insn) = 1;
20665 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20666
20667 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20668
20669 /* ??? The CFA changes here, which may cause GDB to conclude that it
20670 has entered a different function. That said, the unwind info is
20671 correct, individually, before and after this instruction because
20672 we've described the save of SP, which will override the default
20673 handling of SP as restoring from the CFA. */
20674 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20675 }
20676
20677 /* For APCS frames, if IP register is clobbered
20678 when creating frame, save that register in a special
20679 way. */
20680 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20681 {
20682 if (IS_INTERRUPT (func_type))
20683 {
20684 /* Interrupt functions must not corrupt any registers.
20685 Creating a frame pointer however, corrupts the IP
20686 register, so we must push it first. */
20687 emit_multi_reg_push (1 << IP_REGNUM);
20688
20689 /* Do not set RTX_FRAME_RELATED_P on this insn.
20690 The dwarf stack unwinding code only wants to see one
20691 stack decrement per function, and this is not it. If
20692 this instruction is labeled as being part of the frame
20693 creation sequence then dwarf2out_frame_debug_expr will
20694 die when it encounters the assignment of IP to FP
20695 later on, since the use of SP here establishes SP as
20696 the CFA register and not IP.
20697
20698 Anyway this instruction is not really part of the stack
20699 frame creation although it is part of the prologue. */
20700 }
20701 else if (IS_NESTED (func_type))
20702 {
20703 /* The static chain register is the same as the IP register
20704 used as a scratch register during stack frame creation.
20705 To get around this need to find somewhere to store IP
20706 whilst the frame is being created. We try the following
20707 places in order:
20708
20709 1. The last argument register r3.
20710 2. A slot on the stack above the frame. (This only
20711 works if the function is not a varargs function).
20712 3. Register r3 again, after pushing the argument registers
20713 onto the stack.
20714
20715 Note - we only need to tell the dwarf2 backend about the SP
20716 adjustment in the second variant; the static chain register
20717 doesn't need to be unwound, as it doesn't contain a value
20718 inherited from the caller. */
20719
20720 if (!arm_r3_live_at_start_p ())
20721 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20722 else if (args_to_push == 0)
20723 {
20724 rtx dwarf;
20725
20726 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20727 saved_regs += 4;
20728
20729 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
20730 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
20731 fp_offset = 4;
20732
20733 /* Just tell the dwarf backend that we adjusted SP. */
20734 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20735 plus_constant (Pmode, stack_pointer_rtx,
20736 -fp_offset));
20737 RTX_FRAME_RELATED_P (insn) = 1;
20738 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20739 }
20740 else
20741 {
20742 /* Store the args on the stack. */
20743 if (cfun->machine->uses_anonymous_args)
20744 insn = emit_multi_reg_push
20745 ((0xf0 >> (args_to_push / 4)) & 0xf);
20746 else
20747 insn = emit_insn
20748 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20749 GEN_INT (- args_to_push)));
20750
20751 RTX_FRAME_RELATED_P (insn) = 1;
20752
20753 saved_pretend_args = 1;
20754 fp_offset = args_to_push;
20755 args_to_push = 0;
20756
20757 /* Now reuse r3 to preserve IP. */
20758 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20759 }
20760 }
20761
20762 insn = emit_set_insn (ip_rtx,
20763 plus_constant (Pmode, stack_pointer_rtx,
20764 fp_offset));
20765 RTX_FRAME_RELATED_P (insn) = 1;
20766 }
20767
20768 if (args_to_push)
20769 {
20770 /* Push the argument registers, or reserve space for them. */
20771 if (cfun->machine->uses_anonymous_args)
20772 insn = emit_multi_reg_push
20773 ((0xf0 >> (args_to_push / 4)) & 0xf);
20774 else
20775 insn = emit_insn
20776 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20777 GEN_INT (- args_to_push)));
20778 RTX_FRAME_RELATED_P (insn) = 1;
20779 }
20780
20781 /* If this is an interrupt service routine, and the link register
20782 is going to be pushed, and we're not generating extra
20783 push of IP (needed when frame is needed and frame layout if apcs),
20784 subtracting four from LR now will mean that the function return
20785 can be done with a single instruction. */
20786 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20787 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20788 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20789 && TARGET_ARM)
20790 {
20791 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20792
20793 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20794 }
20795
20796 if (live_regs_mask)
20797 {
20798 saved_regs += bit_count (live_regs_mask) * 4;
20799 if (optimize_size && !frame_pointer_needed
20800 && saved_regs == offsets->saved_regs - offsets->saved_args)
20801 {
20802 /* If no coprocessor registers are being pushed and we don't have
20803 to worry about a frame pointer then push extra registers to
20804 create the stack frame. This is done is a way that does not
20805 alter the frame layout, so is independent of the epilogue. */
20806 int n;
20807 int frame;
20808 n = 0;
20809 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20810 n++;
20811 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20812 if (frame && n * 4 >= frame)
20813 {
20814 n = frame / 4;
20815 live_regs_mask |= (1 << n) - 1;
20816 saved_regs += frame;
20817 }
20818 }
20819
20820 if (TARGET_LDRD
20821 && current_tune->prefer_ldrd_strd
20822 && !optimize_function_for_size_p (cfun))
20823 {
20824 if (TARGET_THUMB2)
20825 {
20826 thumb2_emit_strd_push (live_regs_mask);
20827 }
20828 else if (TARGET_ARM
20829 && !TARGET_APCS_FRAME
20830 && !IS_INTERRUPT (func_type))
20831 {
20832 arm_emit_strd_push (live_regs_mask);
20833 }
20834 else
20835 {
20836 insn = emit_multi_reg_push (live_regs_mask);
20837 RTX_FRAME_RELATED_P (insn) = 1;
20838 }
20839 }
20840 else
20841 {
20842 insn = emit_multi_reg_push (live_regs_mask);
20843 RTX_FRAME_RELATED_P (insn) = 1;
20844 }
20845 }
20846
20847 if (! IS_VOLATILE (func_type))
20848 saved_regs += arm_save_coproc_regs ();
20849
20850 if (frame_pointer_needed && TARGET_ARM)
20851 {
20852 /* Create the new frame pointer. */
20853 if (TARGET_APCS_FRAME)
20854 {
20855 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20856 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20857 RTX_FRAME_RELATED_P (insn) = 1;
20858
20859 if (IS_NESTED (func_type))
20860 {
20861 /* Recover the static chain register. */
20862 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20863 insn = gen_rtx_REG (SImode, 3);
20864 else /* if (crtl->args.pretend_args_size == 0) */
20865 {
20866 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20867 insn = gen_frame_mem (SImode, insn);
20868 }
20869 emit_set_insn (ip_rtx, insn);
20870 /* Add a USE to stop propagate_one_insn() from barfing. */
20871 emit_insn (gen_force_register_use (ip_rtx));
20872 }
20873 }
20874 else
20875 {
20876 insn = GEN_INT (saved_regs - 4);
20877 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20878 stack_pointer_rtx, insn));
20879 RTX_FRAME_RELATED_P (insn) = 1;
20880 }
20881 }
20882
20883 if (flag_stack_usage_info)
20884 current_function_static_stack_size
20885 = offsets->outgoing_args - offsets->saved_args;
20886
20887 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20888 {
20889 /* This add can produce multiple insns for a large constant, so we
20890 need to get tricky. */
20891 rtx last = get_last_insn ();
20892
20893 amount = GEN_INT (offsets->saved_args + saved_regs
20894 - offsets->outgoing_args);
20895
20896 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20897 amount));
20898 do
20899 {
20900 last = last ? NEXT_INSN (last) : get_insns ();
20901 RTX_FRAME_RELATED_P (last) = 1;
20902 }
20903 while (last != insn);
20904
20905 /* If the frame pointer is needed, emit a special barrier that
20906 will prevent the scheduler from moving stores to the frame
20907 before the stack adjustment. */
20908 if (frame_pointer_needed)
20909 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20910 hard_frame_pointer_rtx));
20911 }
20912
20913
20914 if (frame_pointer_needed && TARGET_THUMB2)
20915 thumb_set_frame_pointer (offsets);
20916
20917 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20918 {
20919 unsigned long mask;
20920
20921 mask = live_regs_mask;
20922 mask &= THUMB2_WORK_REGS;
20923 if (!IS_NESTED (func_type))
20924 mask |= (1 << IP_REGNUM);
20925 arm_load_pic_register (mask);
20926 }
20927
20928 /* If we are profiling, make sure no instructions are scheduled before
20929 the call to mcount. Similarly if the user has requested no
20930 scheduling in the prolog. Similarly if we want non-call exceptions
20931 using the EABI unwinder, to prevent faulting instructions from being
20932 swapped with a stack adjustment. */
20933 if (crtl->profile || !TARGET_SCHED_PROLOG
20934 || (arm_except_unwind_info (&global_options) == UI_TARGET
20935 && cfun->can_throw_non_call_exceptions))
20936 emit_insn (gen_blockage ());
20937
20938 /* If the link register is being kept alive, with the return address in it,
20939 then make sure that it does not get reused by the ce2 pass. */
20940 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20941 cfun->machine->lr_save_eliminated = 1;
20942 }
20943 \f
20944 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20945 static void
20946 arm_print_condition (FILE *stream)
20947 {
20948 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20949 {
20950 /* Branch conversion is not implemented for Thumb-2. */
20951 if (TARGET_THUMB)
20952 {
20953 output_operand_lossage ("predicated Thumb instruction");
20954 return;
20955 }
20956 if (current_insn_predicate != NULL)
20957 {
20958 output_operand_lossage
20959 ("predicated instruction in conditional sequence");
20960 return;
20961 }
20962
20963 fputs (arm_condition_codes[arm_current_cc], stream);
20964 }
20965 else if (current_insn_predicate)
20966 {
20967 enum arm_cond_code code;
20968
20969 if (TARGET_THUMB1)
20970 {
20971 output_operand_lossage ("predicated Thumb instruction");
20972 return;
20973 }
20974
20975 code = get_arm_condition_code (current_insn_predicate);
20976 fputs (arm_condition_codes[code], stream);
20977 }
20978 }
20979
20980
20981 /* If CODE is 'd', then the X is a condition operand and the instruction
20982 should only be executed if the condition is true.
20983 if CODE is 'D', then the X is a condition operand and the instruction
20984 should only be executed if the condition is false: however, if the mode
20985 of the comparison is CCFPEmode, then always execute the instruction -- we
20986 do this because in these circumstances !GE does not necessarily imply LT;
20987 in these cases the instruction pattern will take care to make sure that
20988 an instruction containing %d will follow, thereby undoing the effects of
20989 doing this instruction unconditionally.
20990 If CODE is 'N' then X is a floating point operand that must be negated
20991 before output.
20992 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20993 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20994 static void
20995 arm_print_operand (FILE *stream, rtx x, int code)
20996 {
20997 switch (code)
20998 {
20999 case '@':
21000 fputs (ASM_COMMENT_START, stream);
21001 return;
21002
21003 case '_':
21004 fputs (user_label_prefix, stream);
21005 return;
21006
21007 case '|':
21008 fputs (REGISTER_PREFIX, stream);
21009 return;
21010
21011 case '?':
21012 arm_print_condition (stream);
21013 return;
21014
21015 case '(':
21016 /* Nothing in unified syntax, otherwise the current condition code. */
21017 if (!TARGET_UNIFIED_ASM)
21018 arm_print_condition (stream);
21019 break;
21020
21021 case ')':
21022 /* The current condition code in unified syntax, otherwise nothing. */
21023 if (TARGET_UNIFIED_ASM)
21024 arm_print_condition (stream);
21025 break;
21026
21027 case '.':
21028 /* The current condition code for a condition code setting instruction.
21029 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21030 if (TARGET_UNIFIED_ASM)
21031 {
21032 fputc('s', stream);
21033 arm_print_condition (stream);
21034 }
21035 else
21036 {
21037 arm_print_condition (stream);
21038 fputc('s', stream);
21039 }
21040 return;
21041
21042 case '!':
21043 /* If the instruction is conditionally executed then print
21044 the current condition code, otherwise print 's'. */
21045 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21046 if (current_insn_predicate)
21047 arm_print_condition (stream);
21048 else
21049 fputc('s', stream);
21050 break;
21051
21052 /* %# is a "break" sequence. It doesn't output anything, but is used to
21053 separate e.g. operand numbers from following text, if that text consists
21054 of further digits which we don't want to be part of the operand
21055 number. */
21056 case '#':
21057 return;
21058
21059 case 'N':
21060 {
21061 REAL_VALUE_TYPE r;
21062 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21063 r = real_value_negate (&r);
21064 fprintf (stream, "%s", fp_const_from_val (&r));
21065 }
21066 return;
21067
21068 /* An integer or symbol address without a preceding # sign. */
21069 case 'c':
21070 switch (GET_CODE (x))
21071 {
21072 case CONST_INT:
21073 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21074 break;
21075
21076 case SYMBOL_REF:
21077 output_addr_const (stream, x);
21078 break;
21079
21080 case CONST:
21081 if (GET_CODE (XEXP (x, 0)) == PLUS
21082 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21083 {
21084 output_addr_const (stream, x);
21085 break;
21086 }
21087 /* Fall through. */
21088
21089 default:
21090 output_operand_lossage ("Unsupported operand for code '%c'", code);
21091 }
21092 return;
21093
21094 /* An integer that we want to print in HEX. */
21095 case 'x':
21096 switch (GET_CODE (x))
21097 {
21098 case CONST_INT:
21099 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21100 break;
21101
21102 default:
21103 output_operand_lossage ("Unsupported operand for code '%c'", code);
21104 }
21105 return;
21106
21107 case 'B':
21108 if (CONST_INT_P (x))
21109 {
21110 HOST_WIDE_INT val;
21111 val = ARM_SIGN_EXTEND (~INTVAL (x));
21112 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21113 }
21114 else
21115 {
21116 putc ('~', stream);
21117 output_addr_const (stream, x);
21118 }
21119 return;
21120
21121 case 'L':
21122 /* The low 16 bits of an immediate constant. */
21123 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21124 return;
21125
21126 case 'i':
21127 fprintf (stream, "%s", arithmetic_instr (x, 1));
21128 return;
21129
21130 case 'I':
21131 fprintf (stream, "%s", arithmetic_instr (x, 0));
21132 return;
21133
21134 case 'S':
21135 {
21136 HOST_WIDE_INT val;
21137 const char *shift;
21138
21139 shift = shift_op (x, &val);
21140
21141 if (shift)
21142 {
21143 fprintf (stream, ", %s ", shift);
21144 if (val == -1)
21145 arm_print_operand (stream, XEXP (x, 1), 0);
21146 else
21147 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21148 }
21149 }
21150 return;
21151
21152 /* An explanation of the 'Q', 'R' and 'H' register operands:
21153
21154 In a pair of registers containing a DI or DF value the 'Q'
21155 operand returns the register number of the register containing
21156 the least significant part of the value. The 'R' operand returns
21157 the register number of the register containing the most
21158 significant part of the value.
21159
21160 The 'H' operand returns the higher of the two register numbers.
21161 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21162 same as the 'Q' operand, since the most significant part of the
21163 value is held in the lower number register. The reverse is true
21164 on systems where WORDS_BIG_ENDIAN is false.
21165
21166 The purpose of these operands is to distinguish between cases
21167 where the endian-ness of the values is important (for example
21168 when they are added together), and cases where the endian-ness
21169 is irrelevant, but the order of register operations is important.
21170 For example when loading a value from memory into a register
21171 pair, the endian-ness does not matter. Provided that the value
21172 from the lower memory address is put into the lower numbered
21173 register, and the value from the higher address is put into the
21174 higher numbered register, the load will work regardless of whether
21175 the value being loaded is big-wordian or little-wordian. The
21176 order of the two register loads can matter however, if the address
21177 of the memory location is actually held in one of the registers
21178 being overwritten by the load.
21179
21180 The 'Q' and 'R' constraints are also available for 64-bit
21181 constants. */
21182 case 'Q':
21183 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21184 {
21185 rtx part = gen_lowpart (SImode, x);
21186 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21187 return;
21188 }
21189
21190 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21191 {
21192 output_operand_lossage ("invalid operand for code '%c'", code);
21193 return;
21194 }
21195
21196 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21197 return;
21198
21199 case 'R':
21200 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21201 {
21202 enum machine_mode mode = GET_MODE (x);
21203 rtx part;
21204
21205 if (mode == VOIDmode)
21206 mode = DImode;
21207 part = gen_highpart_mode (SImode, mode, x);
21208 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21209 return;
21210 }
21211
21212 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21213 {
21214 output_operand_lossage ("invalid operand for code '%c'", code);
21215 return;
21216 }
21217
21218 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21219 return;
21220
21221 case 'H':
21222 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21223 {
21224 output_operand_lossage ("invalid operand for code '%c'", code);
21225 return;
21226 }
21227
21228 asm_fprintf (stream, "%r", REGNO (x) + 1);
21229 return;
21230
21231 case 'J':
21232 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21233 {
21234 output_operand_lossage ("invalid operand for code '%c'", code);
21235 return;
21236 }
21237
21238 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21239 return;
21240
21241 case 'K':
21242 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21243 {
21244 output_operand_lossage ("invalid operand for code '%c'", code);
21245 return;
21246 }
21247
21248 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21249 return;
21250
21251 case 'm':
21252 asm_fprintf (stream, "%r",
21253 REG_P (XEXP (x, 0))
21254 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21255 return;
21256
21257 case 'M':
21258 asm_fprintf (stream, "{%r-%r}",
21259 REGNO (x),
21260 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21261 return;
21262
21263 /* Like 'M', but writing doubleword vector registers, for use by Neon
21264 insns. */
21265 case 'h':
21266 {
21267 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21268 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21269 if (numregs == 1)
21270 asm_fprintf (stream, "{d%d}", regno);
21271 else
21272 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21273 }
21274 return;
21275
21276 case 'd':
21277 /* CONST_TRUE_RTX means always -- that's the default. */
21278 if (x == const_true_rtx)
21279 return;
21280
21281 if (!COMPARISON_P (x))
21282 {
21283 output_operand_lossage ("invalid operand for code '%c'", code);
21284 return;
21285 }
21286
21287 fputs (arm_condition_codes[get_arm_condition_code (x)],
21288 stream);
21289 return;
21290
21291 case 'D':
21292 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21293 want to do that. */
21294 if (x == const_true_rtx)
21295 {
21296 output_operand_lossage ("instruction never executed");
21297 return;
21298 }
21299 if (!COMPARISON_P (x))
21300 {
21301 output_operand_lossage ("invalid operand for code '%c'", code);
21302 return;
21303 }
21304
21305 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21306 (get_arm_condition_code (x))],
21307 stream);
21308 return;
21309
21310 case 's':
21311 case 'V':
21312 case 'W':
21313 case 'X':
21314 case 'Y':
21315 case 'Z':
21316 /* Former Maverick support, removed after GCC-4.7. */
21317 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21318 return;
21319
21320 case 'U':
21321 if (!REG_P (x)
21322 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21323 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21324 /* Bad value for wCG register number. */
21325 {
21326 output_operand_lossage ("invalid operand for code '%c'", code);
21327 return;
21328 }
21329
21330 else
21331 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21332 return;
21333
21334 /* Print an iWMMXt control register name. */
21335 case 'w':
21336 if (!CONST_INT_P (x)
21337 || INTVAL (x) < 0
21338 || INTVAL (x) >= 16)
21339 /* Bad value for wC register number. */
21340 {
21341 output_operand_lossage ("invalid operand for code '%c'", code);
21342 return;
21343 }
21344
21345 else
21346 {
21347 static const char * wc_reg_names [16] =
21348 {
21349 "wCID", "wCon", "wCSSF", "wCASF",
21350 "wC4", "wC5", "wC6", "wC7",
21351 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21352 "wC12", "wC13", "wC14", "wC15"
21353 };
21354
21355 fputs (wc_reg_names [INTVAL (x)], stream);
21356 }
21357 return;
21358
21359 /* Print the high single-precision register of a VFP double-precision
21360 register. */
21361 case 'p':
21362 {
21363 int mode = GET_MODE (x);
21364 int regno;
21365
21366 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21367 {
21368 output_operand_lossage ("invalid operand for code '%c'", code);
21369 return;
21370 }
21371
21372 regno = REGNO (x);
21373 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21374 {
21375 output_operand_lossage ("invalid operand for code '%c'", code);
21376 return;
21377 }
21378
21379 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21380 }
21381 return;
21382
21383 /* Print a VFP/Neon double precision or quad precision register name. */
21384 case 'P':
21385 case 'q':
21386 {
21387 int mode = GET_MODE (x);
21388 int is_quad = (code == 'q');
21389 int regno;
21390
21391 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21392 {
21393 output_operand_lossage ("invalid operand for code '%c'", code);
21394 return;
21395 }
21396
21397 if (!REG_P (x)
21398 || !IS_VFP_REGNUM (REGNO (x)))
21399 {
21400 output_operand_lossage ("invalid operand for code '%c'", code);
21401 return;
21402 }
21403
21404 regno = REGNO (x);
21405 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21406 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21407 {
21408 output_operand_lossage ("invalid operand for code '%c'", code);
21409 return;
21410 }
21411
21412 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21413 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21414 }
21415 return;
21416
21417 /* These two codes print the low/high doubleword register of a Neon quad
21418 register, respectively. For pair-structure types, can also print
21419 low/high quadword registers. */
21420 case 'e':
21421 case 'f':
21422 {
21423 int mode = GET_MODE (x);
21424 int regno;
21425
21426 if ((GET_MODE_SIZE (mode) != 16
21427 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21428 {
21429 output_operand_lossage ("invalid operand for code '%c'", code);
21430 return;
21431 }
21432
21433 regno = REGNO (x);
21434 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21435 {
21436 output_operand_lossage ("invalid operand for code '%c'", code);
21437 return;
21438 }
21439
21440 if (GET_MODE_SIZE (mode) == 16)
21441 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21442 + (code == 'f' ? 1 : 0));
21443 else
21444 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21445 + (code == 'f' ? 1 : 0));
21446 }
21447 return;
21448
21449 /* Print a VFPv3 floating-point constant, represented as an integer
21450 index. */
21451 case 'G':
21452 {
21453 int index = vfp3_const_double_index (x);
21454 gcc_assert (index != -1);
21455 fprintf (stream, "%d", index);
21456 }
21457 return;
21458
21459 /* Print bits representing opcode features for Neon.
21460
21461 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21462 and polynomials as unsigned.
21463
21464 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21465
21466 Bit 2 is 1 for rounding functions, 0 otherwise. */
21467
21468 /* Identify the type as 's', 'u', 'p' or 'f'. */
21469 case 'T':
21470 {
21471 HOST_WIDE_INT bits = INTVAL (x);
21472 fputc ("uspf"[bits & 3], stream);
21473 }
21474 return;
21475
21476 /* Likewise, but signed and unsigned integers are both 'i'. */
21477 case 'F':
21478 {
21479 HOST_WIDE_INT bits = INTVAL (x);
21480 fputc ("iipf"[bits & 3], stream);
21481 }
21482 return;
21483
21484 /* As for 'T', but emit 'u' instead of 'p'. */
21485 case 't':
21486 {
21487 HOST_WIDE_INT bits = INTVAL (x);
21488 fputc ("usuf"[bits & 3], stream);
21489 }
21490 return;
21491
21492 /* Bit 2: rounding (vs none). */
21493 case 'O':
21494 {
21495 HOST_WIDE_INT bits = INTVAL (x);
21496 fputs ((bits & 4) != 0 ? "r" : "", stream);
21497 }
21498 return;
21499
21500 /* Memory operand for vld1/vst1 instruction. */
21501 case 'A':
21502 {
21503 rtx addr;
21504 bool postinc = FALSE;
21505 unsigned align, memsize, align_bits;
21506
21507 gcc_assert (MEM_P (x));
21508 addr = XEXP (x, 0);
21509 if (GET_CODE (addr) == POST_INC)
21510 {
21511 postinc = 1;
21512 addr = XEXP (addr, 0);
21513 }
21514 asm_fprintf (stream, "[%r", REGNO (addr));
21515
21516 /* We know the alignment of this access, so we can emit a hint in the
21517 instruction (for some alignments) as an aid to the memory subsystem
21518 of the target. */
21519 align = MEM_ALIGN (x) >> 3;
21520 memsize = MEM_SIZE (x);
21521
21522 /* Only certain alignment specifiers are supported by the hardware. */
21523 if (memsize == 32 && (align % 32) == 0)
21524 align_bits = 256;
21525 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21526 align_bits = 128;
21527 else if (memsize >= 8 && (align % 8) == 0)
21528 align_bits = 64;
21529 else
21530 align_bits = 0;
21531
21532 if (align_bits != 0)
21533 asm_fprintf (stream, ":%d", align_bits);
21534
21535 asm_fprintf (stream, "]");
21536
21537 if (postinc)
21538 fputs("!", stream);
21539 }
21540 return;
21541
21542 case 'C':
21543 {
21544 rtx addr;
21545
21546 gcc_assert (MEM_P (x));
21547 addr = XEXP (x, 0);
21548 gcc_assert (REG_P (addr));
21549 asm_fprintf (stream, "[%r]", REGNO (addr));
21550 }
21551 return;
21552
21553 /* Translate an S register number into a D register number and element index. */
21554 case 'y':
21555 {
21556 int mode = GET_MODE (x);
21557 int regno;
21558
21559 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21560 {
21561 output_operand_lossage ("invalid operand for code '%c'", code);
21562 return;
21563 }
21564
21565 regno = REGNO (x);
21566 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21567 {
21568 output_operand_lossage ("invalid operand for code '%c'", code);
21569 return;
21570 }
21571
21572 regno = regno - FIRST_VFP_REGNUM;
21573 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21574 }
21575 return;
21576
21577 case 'v':
21578 gcc_assert (CONST_DOUBLE_P (x));
21579 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
21580 return;
21581
21582 /* Register specifier for vld1.16/vst1.16. Translate the S register
21583 number into a D register number and element index. */
21584 case 'z':
21585 {
21586 int mode = GET_MODE (x);
21587 int regno;
21588
21589 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21590 {
21591 output_operand_lossage ("invalid operand for code '%c'", code);
21592 return;
21593 }
21594
21595 regno = REGNO (x);
21596 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21597 {
21598 output_operand_lossage ("invalid operand for code '%c'", code);
21599 return;
21600 }
21601
21602 regno = regno - FIRST_VFP_REGNUM;
21603 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21604 }
21605 return;
21606
21607 default:
21608 if (x == 0)
21609 {
21610 output_operand_lossage ("missing operand");
21611 return;
21612 }
21613
21614 switch (GET_CODE (x))
21615 {
21616 case REG:
21617 asm_fprintf (stream, "%r", REGNO (x));
21618 break;
21619
21620 case MEM:
21621 output_memory_reference_mode = GET_MODE (x);
21622 output_address (XEXP (x, 0));
21623 break;
21624
21625 case CONST_DOUBLE:
21626 if (TARGET_NEON)
21627 {
21628 char fpstr[20];
21629 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21630 sizeof (fpstr), 0, 1);
21631 fprintf (stream, "#%s", fpstr);
21632 }
21633 else
21634 fprintf (stream, "#%s", fp_immediate_constant (x));
21635 break;
21636
21637 default:
21638 gcc_assert (GET_CODE (x) != NEG);
21639 fputc ('#', stream);
21640 if (GET_CODE (x) == HIGH)
21641 {
21642 fputs (":lower16:", stream);
21643 x = XEXP (x, 0);
21644 }
21645
21646 output_addr_const (stream, x);
21647 break;
21648 }
21649 }
21650 }
21651 \f
21652 /* Target hook for printing a memory address. */
21653 static void
21654 arm_print_operand_address (FILE *stream, rtx x)
21655 {
21656 if (TARGET_32BIT)
21657 {
21658 int is_minus = GET_CODE (x) == MINUS;
21659
21660 if (REG_P (x))
21661 asm_fprintf (stream, "[%r]", REGNO (x));
21662 else if (GET_CODE (x) == PLUS || is_minus)
21663 {
21664 rtx base = XEXP (x, 0);
21665 rtx index = XEXP (x, 1);
21666 HOST_WIDE_INT offset = 0;
21667 if (!REG_P (base)
21668 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21669 {
21670 /* Ensure that BASE is a register. */
21671 /* (one of them must be). */
21672 /* Also ensure the SP is not used as in index register. */
21673 rtx temp = base;
21674 base = index;
21675 index = temp;
21676 }
21677 switch (GET_CODE (index))
21678 {
21679 case CONST_INT:
21680 offset = INTVAL (index);
21681 if (is_minus)
21682 offset = -offset;
21683 asm_fprintf (stream, "[%r, #%wd]",
21684 REGNO (base), offset);
21685 break;
21686
21687 case REG:
21688 asm_fprintf (stream, "[%r, %s%r]",
21689 REGNO (base), is_minus ? "-" : "",
21690 REGNO (index));
21691 break;
21692
21693 case MULT:
21694 case ASHIFTRT:
21695 case LSHIFTRT:
21696 case ASHIFT:
21697 case ROTATERT:
21698 {
21699 asm_fprintf (stream, "[%r, %s%r",
21700 REGNO (base), is_minus ? "-" : "",
21701 REGNO (XEXP (index, 0)));
21702 arm_print_operand (stream, index, 'S');
21703 fputs ("]", stream);
21704 break;
21705 }
21706
21707 default:
21708 gcc_unreachable ();
21709 }
21710 }
21711 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21712 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21713 {
21714 extern enum machine_mode output_memory_reference_mode;
21715
21716 gcc_assert (REG_P (XEXP (x, 0)));
21717
21718 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21719 asm_fprintf (stream, "[%r, #%s%d]!",
21720 REGNO (XEXP (x, 0)),
21721 GET_CODE (x) == PRE_DEC ? "-" : "",
21722 GET_MODE_SIZE (output_memory_reference_mode));
21723 else
21724 asm_fprintf (stream, "[%r], #%s%d",
21725 REGNO (XEXP (x, 0)),
21726 GET_CODE (x) == POST_DEC ? "-" : "",
21727 GET_MODE_SIZE (output_memory_reference_mode));
21728 }
21729 else if (GET_CODE (x) == PRE_MODIFY)
21730 {
21731 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21732 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21733 asm_fprintf (stream, "#%wd]!",
21734 INTVAL (XEXP (XEXP (x, 1), 1)));
21735 else
21736 asm_fprintf (stream, "%r]!",
21737 REGNO (XEXP (XEXP (x, 1), 1)));
21738 }
21739 else if (GET_CODE (x) == POST_MODIFY)
21740 {
21741 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21742 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21743 asm_fprintf (stream, "#%wd",
21744 INTVAL (XEXP (XEXP (x, 1), 1)));
21745 else
21746 asm_fprintf (stream, "%r",
21747 REGNO (XEXP (XEXP (x, 1), 1)));
21748 }
21749 else output_addr_const (stream, x);
21750 }
21751 else
21752 {
21753 if (REG_P (x))
21754 asm_fprintf (stream, "[%r]", REGNO (x));
21755 else if (GET_CODE (x) == POST_INC)
21756 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21757 else if (GET_CODE (x) == PLUS)
21758 {
21759 gcc_assert (REG_P (XEXP (x, 0)));
21760 if (CONST_INT_P (XEXP (x, 1)))
21761 asm_fprintf (stream, "[%r, #%wd]",
21762 REGNO (XEXP (x, 0)),
21763 INTVAL (XEXP (x, 1)));
21764 else
21765 asm_fprintf (stream, "[%r, %r]",
21766 REGNO (XEXP (x, 0)),
21767 REGNO (XEXP (x, 1)));
21768 }
21769 else
21770 output_addr_const (stream, x);
21771 }
21772 }
21773 \f
21774 /* Target hook for indicating whether a punctuation character for
21775 TARGET_PRINT_OPERAND is valid. */
21776 static bool
21777 arm_print_operand_punct_valid_p (unsigned char code)
21778 {
21779 return (code == '@' || code == '|' || code == '.'
21780 || code == '(' || code == ')' || code == '#'
21781 || (TARGET_32BIT && (code == '?'))
21782 || (TARGET_THUMB2 && (code == '!'))
21783 || (TARGET_THUMB && (code == '_')));
21784 }
21785 \f
21786 /* Target hook for assembling integer objects. The ARM version needs to
21787 handle word-sized values specially. */
21788 static bool
21789 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21790 {
21791 enum machine_mode mode;
21792
21793 if (size == UNITS_PER_WORD && aligned_p)
21794 {
21795 fputs ("\t.word\t", asm_out_file);
21796 output_addr_const (asm_out_file, x);
21797
21798 /* Mark symbols as position independent. We only do this in the
21799 .text segment, not in the .data segment. */
21800 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21801 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21802 {
21803 /* See legitimize_pic_address for an explanation of the
21804 TARGET_VXWORKS_RTP check. */
21805 if (!arm_pic_data_is_text_relative
21806 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21807 fputs ("(GOT)", asm_out_file);
21808 else
21809 fputs ("(GOTOFF)", asm_out_file);
21810 }
21811 fputc ('\n', asm_out_file);
21812 return true;
21813 }
21814
21815 mode = GET_MODE (x);
21816
21817 if (arm_vector_mode_supported_p (mode))
21818 {
21819 int i, units;
21820
21821 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21822
21823 units = CONST_VECTOR_NUNITS (x);
21824 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21825
21826 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21827 for (i = 0; i < units; i++)
21828 {
21829 rtx elt = CONST_VECTOR_ELT (x, i);
21830 assemble_integer
21831 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21832 }
21833 else
21834 for (i = 0; i < units; i++)
21835 {
21836 rtx elt = CONST_VECTOR_ELT (x, i);
21837 REAL_VALUE_TYPE rval;
21838
21839 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21840
21841 assemble_real
21842 (rval, GET_MODE_INNER (mode),
21843 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21844 }
21845
21846 return true;
21847 }
21848
21849 return default_assemble_integer (x, size, aligned_p);
21850 }
21851
21852 static void
21853 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21854 {
21855 section *s;
21856
21857 if (!TARGET_AAPCS_BASED)
21858 {
21859 (is_ctor ?
21860 default_named_section_asm_out_constructor
21861 : default_named_section_asm_out_destructor) (symbol, priority);
21862 return;
21863 }
21864
21865 /* Put these in the .init_array section, using a special relocation. */
21866 if (priority != DEFAULT_INIT_PRIORITY)
21867 {
21868 char buf[18];
21869 sprintf (buf, "%s.%.5u",
21870 is_ctor ? ".init_array" : ".fini_array",
21871 priority);
21872 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21873 }
21874 else if (is_ctor)
21875 s = ctors_section;
21876 else
21877 s = dtors_section;
21878
21879 switch_to_section (s);
21880 assemble_align (POINTER_SIZE);
21881 fputs ("\t.word\t", asm_out_file);
21882 output_addr_const (asm_out_file, symbol);
21883 fputs ("(target1)\n", asm_out_file);
21884 }
21885
21886 /* Add a function to the list of static constructors. */
21887
21888 static void
21889 arm_elf_asm_constructor (rtx symbol, int priority)
21890 {
21891 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21892 }
21893
21894 /* Add a function to the list of static destructors. */
21895
21896 static void
21897 arm_elf_asm_destructor (rtx symbol, int priority)
21898 {
21899 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21900 }
21901 \f
21902 /* A finite state machine takes care of noticing whether or not instructions
21903 can be conditionally executed, and thus decrease execution time and code
21904 size by deleting branch instructions. The fsm is controlled by
21905 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21906
21907 /* The state of the fsm controlling condition codes are:
21908 0: normal, do nothing special
21909 1: make ASM_OUTPUT_OPCODE not output this instruction
21910 2: make ASM_OUTPUT_OPCODE not output this instruction
21911 3: make instructions conditional
21912 4: make instructions conditional
21913
21914 State transitions (state->state by whom under condition):
21915 0 -> 1 final_prescan_insn if the `target' is a label
21916 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21917 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21918 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21919 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21920 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21921 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21922 (the target insn is arm_target_insn).
21923
21924 If the jump clobbers the conditions then we use states 2 and 4.
21925
21926 A similar thing can be done with conditional return insns.
21927
21928 XXX In case the `target' is an unconditional branch, this conditionalising
21929 of the instructions always reduces code size, but not always execution
21930 time. But then, I want to reduce the code size to somewhere near what
21931 /bin/cc produces. */
21932
21933 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21934 instructions. When a COND_EXEC instruction is seen the subsequent
21935 instructions are scanned so that multiple conditional instructions can be
21936 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21937 specify the length and true/false mask for the IT block. These will be
21938 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21939
21940 /* Returns the index of the ARM condition code string in
21941 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21942 COMPARISON should be an rtx like `(eq (...) (...))'. */
21943
21944 enum arm_cond_code
21945 maybe_get_arm_condition_code (rtx comparison)
21946 {
21947 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
21948 enum arm_cond_code code;
21949 enum rtx_code comp_code = GET_CODE (comparison);
21950
21951 if (GET_MODE_CLASS (mode) != MODE_CC)
21952 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21953 XEXP (comparison, 1));
21954
21955 switch (mode)
21956 {
21957 case CC_DNEmode: code = ARM_NE; goto dominance;
21958 case CC_DEQmode: code = ARM_EQ; goto dominance;
21959 case CC_DGEmode: code = ARM_GE; goto dominance;
21960 case CC_DGTmode: code = ARM_GT; goto dominance;
21961 case CC_DLEmode: code = ARM_LE; goto dominance;
21962 case CC_DLTmode: code = ARM_LT; goto dominance;
21963 case CC_DGEUmode: code = ARM_CS; goto dominance;
21964 case CC_DGTUmode: code = ARM_HI; goto dominance;
21965 case CC_DLEUmode: code = ARM_LS; goto dominance;
21966 case CC_DLTUmode: code = ARM_CC;
21967
21968 dominance:
21969 if (comp_code == EQ)
21970 return ARM_INVERSE_CONDITION_CODE (code);
21971 if (comp_code == NE)
21972 return code;
21973 return ARM_NV;
21974
21975 case CC_NOOVmode:
21976 switch (comp_code)
21977 {
21978 case NE: return ARM_NE;
21979 case EQ: return ARM_EQ;
21980 case GE: return ARM_PL;
21981 case LT: return ARM_MI;
21982 default: return ARM_NV;
21983 }
21984
21985 case CC_Zmode:
21986 switch (comp_code)
21987 {
21988 case NE: return ARM_NE;
21989 case EQ: return ARM_EQ;
21990 default: return ARM_NV;
21991 }
21992
21993 case CC_Nmode:
21994 switch (comp_code)
21995 {
21996 case NE: return ARM_MI;
21997 case EQ: return ARM_PL;
21998 default: return ARM_NV;
21999 }
22000
22001 case CCFPEmode:
22002 case CCFPmode:
22003 /* We can handle all cases except UNEQ and LTGT. */
22004 switch (comp_code)
22005 {
22006 case GE: return ARM_GE;
22007 case GT: return ARM_GT;
22008 case LE: return ARM_LS;
22009 case LT: return ARM_MI;
22010 case NE: return ARM_NE;
22011 case EQ: return ARM_EQ;
22012 case ORDERED: return ARM_VC;
22013 case UNORDERED: return ARM_VS;
22014 case UNLT: return ARM_LT;
22015 case UNLE: return ARM_LE;
22016 case UNGT: return ARM_HI;
22017 case UNGE: return ARM_PL;
22018 /* UNEQ and LTGT do not have a representation. */
22019 case UNEQ: /* Fall through. */
22020 case LTGT: /* Fall through. */
22021 default: return ARM_NV;
22022 }
22023
22024 case CC_SWPmode:
22025 switch (comp_code)
22026 {
22027 case NE: return ARM_NE;
22028 case EQ: return ARM_EQ;
22029 case GE: return ARM_LE;
22030 case GT: return ARM_LT;
22031 case LE: return ARM_GE;
22032 case LT: return ARM_GT;
22033 case GEU: return ARM_LS;
22034 case GTU: return ARM_CC;
22035 case LEU: return ARM_CS;
22036 case LTU: return ARM_HI;
22037 default: return ARM_NV;
22038 }
22039
22040 case CC_Cmode:
22041 switch (comp_code)
22042 {
22043 case LTU: return ARM_CS;
22044 case GEU: return ARM_CC;
22045 default: return ARM_NV;
22046 }
22047
22048 case CC_CZmode:
22049 switch (comp_code)
22050 {
22051 case NE: return ARM_NE;
22052 case EQ: return ARM_EQ;
22053 case GEU: return ARM_CS;
22054 case GTU: return ARM_HI;
22055 case LEU: return ARM_LS;
22056 case LTU: return ARM_CC;
22057 default: return ARM_NV;
22058 }
22059
22060 case CC_NCVmode:
22061 switch (comp_code)
22062 {
22063 case GE: return ARM_GE;
22064 case LT: return ARM_LT;
22065 case GEU: return ARM_CS;
22066 case LTU: return ARM_CC;
22067 default: return ARM_NV;
22068 }
22069
22070 case CCmode:
22071 switch (comp_code)
22072 {
22073 case NE: return ARM_NE;
22074 case EQ: return ARM_EQ;
22075 case GE: return ARM_GE;
22076 case GT: return ARM_GT;
22077 case LE: return ARM_LE;
22078 case LT: return ARM_LT;
22079 case GEU: return ARM_CS;
22080 case GTU: return ARM_HI;
22081 case LEU: return ARM_LS;
22082 case LTU: return ARM_CC;
22083 default: return ARM_NV;
22084 }
22085
22086 default: gcc_unreachable ();
22087 }
22088 }
22089
22090 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22091 static enum arm_cond_code
22092 get_arm_condition_code (rtx comparison)
22093 {
22094 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22095 gcc_assert (code != ARM_NV);
22096 return code;
22097 }
22098
22099 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22100 instructions. */
22101 void
22102 thumb2_final_prescan_insn (rtx insn)
22103 {
22104 rtx first_insn = insn;
22105 rtx body = PATTERN (insn);
22106 rtx predicate;
22107 enum arm_cond_code code;
22108 int n;
22109 int mask;
22110 int max;
22111
22112 /* Maximum number of conditionally executed instructions in a block
22113 is minimum of the two max values: maximum allowed in an IT block
22114 and maximum that is beneficial according to the cost model and tune. */
22115 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
22116 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
22117
22118 /* Remove the previous insn from the count of insns to be output. */
22119 if (arm_condexec_count)
22120 arm_condexec_count--;
22121
22122 /* Nothing to do if we are already inside a conditional block. */
22123 if (arm_condexec_count)
22124 return;
22125
22126 if (GET_CODE (body) != COND_EXEC)
22127 return;
22128
22129 /* Conditional jumps are implemented directly. */
22130 if (JUMP_P (insn))
22131 return;
22132
22133 predicate = COND_EXEC_TEST (body);
22134 arm_current_cc = get_arm_condition_code (predicate);
22135
22136 n = get_attr_ce_count (insn);
22137 arm_condexec_count = 1;
22138 arm_condexec_mask = (1 << n) - 1;
22139 arm_condexec_masklen = n;
22140 /* See if subsequent instructions can be combined into the same block. */
22141 for (;;)
22142 {
22143 insn = next_nonnote_insn (insn);
22144
22145 /* Jumping into the middle of an IT block is illegal, so a label or
22146 barrier terminates the block. */
22147 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22148 break;
22149
22150 body = PATTERN (insn);
22151 /* USE and CLOBBER aren't really insns, so just skip them. */
22152 if (GET_CODE (body) == USE
22153 || GET_CODE (body) == CLOBBER)
22154 continue;
22155
22156 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22157 if (GET_CODE (body) != COND_EXEC)
22158 break;
22159 /* Maximum number of conditionally executed instructions in a block. */
22160 n = get_attr_ce_count (insn);
22161 if (arm_condexec_masklen + n > max)
22162 break;
22163
22164 predicate = COND_EXEC_TEST (body);
22165 code = get_arm_condition_code (predicate);
22166 mask = (1 << n) - 1;
22167 if (arm_current_cc == code)
22168 arm_condexec_mask |= (mask << arm_condexec_masklen);
22169 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22170 break;
22171
22172 arm_condexec_count++;
22173 arm_condexec_masklen += n;
22174
22175 /* A jump must be the last instruction in a conditional block. */
22176 if (JUMP_P (insn))
22177 break;
22178 }
22179 /* Restore recog_data (getting the attributes of other insns can
22180 destroy this array, but final.c assumes that it remains intact
22181 across this call). */
22182 extract_constrain_insn_cached (first_insn);
22183 }
22184
22185 void
22186 arm_final_prescan_insn (rtx insn)
22187 {
22188 /* BODY will hold the body of INSN. */
22189 rtx body = PATTERN (insn);
22190
22191 /* This will be 1 if trying to repeat the trick, and things need to be
22192 reversed if it appears to fail. */
22193 int reverse = 0;
22194
22195 /* If we start with a return insn, we only succeed if we find another one. */
22196 int seeking_return = 0;
22197 enum rtx_code return_code = UNKNOWN;
22198
22199 /* START_INSN will hold the insn from where we start looking. This is the
22200 first insn after the following code_label if REVERSE is true. */
22201 rtx start_insn = insn;
22202
22203 /* If in state 4, check if the target branch is reached, in order to
22204 change back to state 0. */
22205 if (arm_ccfsm_state == 4)
22206 {
22207 if (insn == arm_target_insn)
22208 {
22209 arm_target_insn = NULL;
22210 arm_ccfsm_state = 0;
22211 }
22212 return;
22213 }
22214
22215 /* If in state 3, it is possible to repeat the trick, if this insn is an
22216 unconditional branch to a label, and immediately following this branch
22217 is the previous target label which is only used once, and the label this
22218 branch jumps to is not too far off. */
22219 if (arm_ccfsm_state == 3)
22220 {
22221 if (simplejump_p (insn))
22222 {
22223 start_insn = next_nonnote_insn (start_insn);
22224 if (BARRIER_P (start_insn))
22225 {
22226 /* XXX Isn't this always a barrier? */
22227 start_insn = next_nonnote_insn (start_insn);
22228 }
22229 if (LABEL_P (start_insn)
22230 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22231 && LABEL_NUSES (start_insn) == 1)
22232 reverse = TRUE;
22233 else
22234 return;
22235 }
22236 else if (ANY_RETURN_P (body))
22237 {
22238 start_insn = next_nonnote_insn (start_insn);
22239 if (BARRIER_P (start_insn))
22240 start_insn = next_nonnote_insn (start_insn);
22241 if (LABEL_P (start_insn)
22242 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22243 && LABEL_NUSES (start_insn) == 1)
22244 {
22245 reverse = TRUE;
22246 seeking_return = 1;
22247 return_code = GET_CODE (body);
22248 }
22249 else
22250 return;
22251 }
22252 else
22253 return;
22254 }
22255
22256 gcc_assert (!arm_ccfsm_state || reverse);
22257 if (!JUMP_P (insn))
22258 return;
22259
22260 /* This jump might be paralleled with a clobber of the condition codes
22261 the jump should always come first */
22262 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22263 body = XVECEXP (body, 0, 0);
22264
22265 if (reverse
22266 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22267 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22268 {
22269 int insns_skipped;
22270 int fail = FALSE, succeed = FALSE;
22271 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22272 int then_not_else = TRUE;
22273 rtx this_insn = start_insn, label = 0;
22274
22275 /* Register the insn jumped to. */
22276 if (reverse)
22277 {
22278 if (!seeking_return)
22279 label = XEXP (SET_SRC (body), 0);
22280 }
22281 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22282 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22283 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22284 {
22285 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22286 then_not_else = FALSE;
22287 }
22288 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22289 {
22290 seeking_return = 1;
22291 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22292 }
22293 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22294 {
22295 seeking_return = 1;
22296 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22297 then_not_else = FALSE;
22298 }
22299 else
22300 gcc_unreachable ();
22301
22302 /* See how many insns this branch skips, and what kind of insns. If all
22303 insns are okay, and the label or unconditional branch to the same
22304 label is not too far away, succeed. */
22305 for (insns_skipped = 0;
22306 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22307 {
22308 rtx scanbody;
22309
22310 this_insn = next_nonnote_insn (this_insn);
22311 if (!this_insn)
22312 break;
22313
22314 switch (GET_CODE (this_insn))
22315 {
22316 case CODE_LABEL:
22317 /* Succeed if it is the target label, otherwise fail since
22318 control falls in from somewhere else. */
22319 if (this_insn == label)
22320 {
22321 arm_ccfsm_state = 1;
22322 succeed = TRUE;
22323 }
22324 else
22325 fail = TRUE;
22326 break;
22327
22328 case BARRIER:
22329 /* Succeed if the following insn is the target label.
22330 Otherwise fail.
22331 If return insns are used then the last insn in a function
22332 will be a barrier. */
22333 this_insn = next_nonnote_insn (this_insn);
22334 if (this_insn && this_insn == label)
22335 {
22336 arm_ccfsm_state = 1;
22337 succeed = TRUE;
22338 }
22339 else
22340 fail = TRUE;
22341 break;
22342
22343 case CALL_INSN:
22344 /* The AAPCS says that conditional calls should not be
22345 used since they make interworking inefficient (the
22346 linker can't transform BL<cond> into BLX). That's
22347 only a problem if the machine has BLX. */
22348 if (arm_arch5)
22349 {
22350 fail = TRUE;
22351 break;
22352 }
22353
22354 /* Succeed if the following insn is the target label, or
22355 if the following two insns are a barrier and the
22356 target label. */
22357 this_insn = next_nonnote_insn (this_insn);
22358 if (this_insn && BARRIER_P (this_insn))
22359 this_insn = next_nonnote_insn (this_insn);
22360
22361 if (this_insn && this_insn == label
22362 && insns_skipped < max_insns_skipped)
22363 {
22364 arm_ccfsm_state = 1;
22365 succeed = TRUE;
22366 }
22367 else
22368 fail = TRUE;
22369 break;
22370
22371 case JUMP_INSN:
22372 /* If this is an unconditional branch to the same label, succeed.
22373 If it is to another label, do nothing. If it is conditional,
22374 fail. */
22375 /* XXX Probably, the tests for SET and the PC are
22376 unnecessary. */
22377
22378 scanbody = PATTERN (this_insn);
22379 if (GET_CODE (scanbody) == SET
22380 && GET_CODE (SET_DEST (scanbody)) == PC)
22381 {
22382 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22383 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22384 {
22385 arm_ccfsm_state = 2;
22386 succeed = TRUE;
22387 }
22388 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22389 fail = TRUE;
22390 }
22391 /* Fail if a conditional return is undesirable (e.g. on a
22392 StrongARM), but still allow this if optimizing for size. */
22393 else if (GET_CODE (scanbody) == return_code
22394 && !use_return_insn (TRUE, NULL)
22395 && !optimize_size)
22396 fail = TRUE;
22397 else if (GET_CODE (scanbody) == return_code)
22398 {
22399 arm_ccfsm_state = 2;
22400 succeed = TRUE;
22401 }
22402 else if (GET_CODE (scanbody) == PARALLEL)
22403 {
22404 switch (get_attr_conds (this_insn))
22405 {
22406 case CONDS_NOCOND:
22407 break;
22408 default:
22409 fail = TRUE;
22410 break;
22411 }
22412 }
22413 else
22414 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22415
22416 break;
22417
22418 case INSN:
22419 /* Instructions using or affecting the condition codes make it
22420 fail. */
22421 scanbody = PATTERN (this_insn);
22422 if (!(GET_CODE (scanbody) == SET
22423 || GET_CODE (scanbody) == PARALLEL)
22424 || get_attr_conds (this_insn) != CONDS_NOCOND)
22425 fail = TRUE;
22426 break;
22427
22428 default:
22429 break;
22430 }
22431 }
22432 if (succeed)
22433 {
22434 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22435 arm_target_label = CODE_LABEL_NUMBER (label);
22436 else
22437 {
22438 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22439
22440 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22441 {
22442 this_insn = next_nonnote_insn (this_insn);
22443 gcc_assert (!this_insn
22444 || (!BARRIER_P (this_insn)
22445 && !LABEL_P (this_insn)));
22446 }
22447 if (!this_insn)
22448 {
22449 /* Oh, dear! we ran off the end.. give up. */
22450 extract_constrain_insn_cached (insn);
22451 arm_ccfsm_state = 0;
22452 arm_target_insn = NULL;
22453 return;
22454 }
22455 arm_target_insn = this_insn;
22456 }
22457
22458 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22459 what it was. */
22460 if (!reverse)
22461 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22462
22463 if (reverse || then_not_else)
22464 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22465 }
22466
22467 /* Restore recog_data (getting the attributes of other insns can
22468 destroy this array, but final.c assumes that it remains intact
22469 across this call. */
22470 extract_constrain_insn_cached (insn);
22471 }
22472 }
22473
22474 /* Output IT instructions. */
22475 void
22476 thumb2_asm_output_opcode (FILE * stream)
22477 {
22478 char buff[5];
22479 int n;
22480
22481 if (arm_condexec_mask)
22482 {
22483 for (n = 0; n < arm_condexec_masklen; n++)
22484 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22485 buff[n] = 0;
22486 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22487 arm_condition_codes[arm_current_cc]);
22488 arm_condexec_mask = 0;
22489 }
22490 }
22491
22492 /* Returns true if REGNO is a valid register
22493 for holding a quantity of type MODE. */
22494 int
22495 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22496 {
22497 if (GET_MODE_CLASS (mode) == MODE_CC)
22498 return (regno == CC_REGNUM
22499 || (TARGET_HARD_FLOAT && TARGET_VFP
22500 && regno == VFPCC_REGNUM));
22501
22502 if (TARGET_THUMB1)
22503 /* For the Thumb we only allow values bigger than SImode in
22504 registers 0 - 6, so that there is always a second low
22505 register available to hold the upper part of the value.
22506 We probably we ought to ensure that the register is the
22507 start of an even numbered register pair. */
22508 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22509
22510 if (TARGET_HARD_FLOAT && TARGET_VFP
22511 && IS_VFP_REGNUM (regno))
22512 {
22513 if (mode == SFmode || mode == SImode)
22514 return VFP_REGNO_OK_FOR_SINGLE (regno);
22515
22516 if (mode == DFmode)
22517 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22518
22519 /* VFP registers can hold HFmode values, but there is no point in
22520 putting them there unless we have hardware conversion insns. */
22521 if (mode == HFmode)
22522 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22523
22524 if (TARGET_NEON)
22525 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22526 || (VALID_NEON_QREG_MODE (mode)
22527 && NEON_REGNO_OK_FOR_QUAD (regno))
22528 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22529 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22530 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22531 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22532 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22533
22534 return FALSE;
22535 }
22536
22537 if (TARGET_REALLY_IWMMXT)
22538 {
22539 if (IS_IWMMXT_GR_REGNUM (regno))
22540 return mode == SImode;
22541
22542 if (IS_IWMMXT_REGNUM (regno))
22543 return VALID_IWMMXT_REG_MODE (mode);
22544 }
22545
22546 /* We allow almost any value to be stored in the general registers.
22547 Restrict doubleword quantities to even register pairs so that we can
22548 use ldrd. Do not allow very large Neon structure opaque modes in
22549 general registers; they would use too many. */
22550 if (regno <= LAST_ARM_REGNUM)
22551 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22552 && ARM_NUM_REGS (mode) <= 4;
22553
22554 if (regno == FRAME_POINTER_REGNUM
22555 || regno == ARG_POINTER_REGNUM)
22556 /* We only allow integers in the fake hard registers. */
22557 return GET_MODE_CLASS (mode) == MODE_INT;
22558
22559 return FALSE;
22560 }
22561
22562 /* Implement MODES_TIEABLE_P. */
22563
22564 bool
22565 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22566 {
22567 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22568 return true;
22569
22570 /* We specifically want to allow elements of "structure" modes to
22571 be tieable to the structure. This more general condition allows
22572 other rarer situations too. */
22573 if (TARGET_NEON
22574 && (VALID_NEON_DREG_MODE (mode1)
22575 || VALID_NEON_QREG_MODE (mode1)
22576 || VALID_NEON_STRUCT_MODE (mode1))
22577 && (VALID_NEON_DREG_MODE (mode2)
22578 || VALID_NEON_QREG_MODE (mode2)
22579 || VALID_NEON_STRUCT_MODE (mode2)))
22580 return true;
22581
22582 return false;
22583 }
22584
22585 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22586 not used in arm mode. */
22587
22588 enum reg_class
22589 arm_regno_class (int regno)
22590 {
22591 if (TARGET_THUMB1)
22592 {
22593 if (regno == STACK_POINTER_REGNUM)
22594 return STACK_REG;
22595 if (regno == CC_REGNUM)
22596 return CC_REG;
22597 if (regno < 8)
22598 return LO_REGS;
22599 return HI_REGS;
22600 }
22601
22602 if (TARGET_THUMB2 && regno < 8)
22603 return LO_REGS;
22604
22605 if ( regno <= LAST_ARM_REGNUM
22606 || regno == FRAME_POINTER_REGNUM
22607 || regno == ARG_POINTER_REGNUM)
22608 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22609
22610 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22611 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22612
22613 if (IS_VFP_REGNUM (regno))
22614 {
22615 if (regno <= D7_VFP_REGNUM)
22616 return VFP_D0_D7_REGS;
22617 else if (regno <= LAST_LO_VFP_REGNUM)
22618 return VFP_LO_REGS;
22619 else
22620 return VFP_HI_REGS;
22621 }
22622
22623 if (IS_IWMMXT_REGNUM (regno))
22624 return IWMMXT_REGS;
22625
22626 if (IS_IWMMXT_GR_REGNUM (regno))
22627 return IWMMXT_GR_REGS;
22628
22629 return NO_REGS;
22630 }
22631
22632 /* Handle a special case when computing the offset
22633 of an argument from the frame pointer. */
22634 int
22635 arm_debugger_arg_offset (int value, rtx addr)
22636 {
22637 rtx insn;
22638
22639 /* We are only interested if dbxout_parms() failed to compute the offset. */
22640 if (value != 0)
22641 return 0;
22642
22643 /* We can only cope with the case where the address is held in a register. */
22644 if (!REG_P (addr))
22645 return 0;
22646
22647 /* If we are using the frame pointer to point at the argument, then
22648 an offset of 0 is correct. */
22649 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22650 return 0;
22651
22652 /* If we are using the stack pointer to point at the
22653 argument, then an offset of 0 is correct. */
22654 /* ??? Check this is consistent with thumb2 frame layout. */
22655 if ((TARGET_THUMB || !frame_pointer_needed)
22656 && REGNO (addr) == SP_REGNUM)
22657 return 0;
22658
22659 /* Oh dear. The argument is pointed to by a register rather
22660 than being held in a register, or being stored at a known
22661 offset from the frame pointer. Since GDB only understands
22662 those two kinds of argument we must translate the address
22663 held in the register into an offset from the frame pointer.
22664 We do this by searching through the insns for the function
22665 looking to see where this register gets its value. If the
22666 register is initialized from the frame pointer plus an offset
22667 then we are in luck and we can continue, otherwise we give up.
22668
22669 This code is exercised by producing debugging information
22670 for a function with arguments like this:
22671
22672 double func (double a, double b, int c, double d) {return d;}
22673
22674 Without this code the stab for parameter 'd' will be set to
22675 an offset of 0 from the frame pointer, rather than 8. */
22676
22677 /* The if() statement says:
22678
22679 If the insn is a normal instruction
22680 and if the insn is setting the value in a register
22681 and if the register being set is the register holding the address of the argument
22682 and if the address is computing by an addition
22683 that involves adding to a register
22684 which is the frame pointer
22685 a constant integer
22686
22687 then... */
22688
22689 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22690 {
22691 if ( NONJUMP_INSN_P (insn)
22692 && GET_CODE (PATTERN (insn)) == SET
22693 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22694 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22695 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22696 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22697 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22698 )
22699 {
22700 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22701
22702 break;
22703 }
22704 }
22705
22706 if (value == 0)
22707 {
22708 debug_rtx (addr);
22709 warning (0, "unable to compute real location of stacked parameter");
22710 value = 8; /* XXX magic hack */
22711 }
22712
22713 return value;
22714 }
22715 \f
22716 typedef enum {
22717 T_V8QI,
22718 T_V4HI,
22719 T_V4HF,
22720 T_V2SI,
22721 T_V2SF,
22722 T_DI,
22723 T_V16QI,
22724 T_V8HI,
22725 T_V4SI,
22726 T_V4SF,
22727 T_V2DI,
22728 T_TI,
22729 T_EI,
22730 T_OI,
22731 T_MAX /* Size of enum. Keep last. */
22732 } neon_builtin_type_mode;
22733
22734 #define TYPE_MODE_BIT(X) (1 << (X))
22735
22736 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22737 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22738 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22739 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22740 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22741 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22742
22743 #define v8qi_UP T_V8QI
22744 #define v4hi_UP T_V4HI
22745 #define v4hf_UP T_V4HF
22746 #define v2si_UP T_V2SI
22747 #define v2sf_UP T_V2SF
22748 #define di_UP T_DI
22749 #define v16qi_UP T_V16QI
22750 #define v8hi_UP T_V8HI
22751 #define v4si_UP T_V4SI
22752 #define v4sf_UP T_V4SF
22753 #define v2di_UP T_V2DI
22754 #define ti_UP T_TI
22755 #define ei_UP T_EI
22756 #define oi_UP T_OI
22757
22758 #define UP(X) X##_UP
22759
22760 typedef enum {
22761 NEON_BINOP,
22762 NEON_TERNOP,
22763 NEON_UNOP,
22764 NEON_GETLANE,
22765 NEON_SETLANE,
22766 NEON_CREATE,
22767 NEON_RINT,
22768 NEON_DUP,
22769 NEON_DUPLANE,
22770 NEON_COMBINE,
22771 NEON_SPLIT,
22772 NEON_LANEMUL,
22773 NEON_LANEMULL,
22774 NEON_LANEMULH,
22775 NEON_LANEMAC,
22776 NEON_SCALARMUL,
22777 NEON_SCALARMULL,
22778 NEON_SCALARMULH,
22779 NEON_SCALARMAC,
22780 NEON_CONVERT,
22781 NEON_FLOAT_WIDEN,
22782 NEON_FLOAT_NARROW,
22783 NEON_FIXCONV,
22784 NEON_SELECT,
22785 NEON_RESULTPAIR,
22786 NEON_REINTERP,
22787 NEON_VTBL,
22788 NEON_VTBX,
22789 NEON_LOAD1,
22790 NEON_LOAD1LANE,
22791 NEON_STORE1,
22792 NEON_STORE1LANE,
22793 NEON_LOADSTRUCT,
22794 NEON_LOADSTRUCTLANE,
22795 NEON_STORESTRUCT,
22796 NEON_STORESTRUCTLANE,
22797 NEON_LOGICBINOP,
22798 NEON_SHIFTINSERT,
22799 NEON_SHIFTIMM,
22800 NEON_SHIFTACC
22801 } neon_itype;
22802
22803 typedef struct {
22804 const char *name;
22805 const neon_itype itype;
22806 const neon_builtin_type_mode mode;
22807 const enum insn_code code;
22808 unsigned int fcode;
22809 } neon_builtin_datum;
22810
22811 #define CF(N,X) CODE_FOR_neon_##N##X
22812
22813 #define VAR1(T, N, A) \
22814 {#N, NEON_##T, UP (A), CF (N, A), 0}
22815 #define VAR2(T, N, A, B) \
22816 VAR1 (T, N, A), \
22817 {#N, NEON_##T, UP (B), CF (N, B), 0}
22818 #define VAR3(T, N, A, B, C) \
22819 VAR2 (T, N, A, B), \
22820 {#N, NEON_##T, UP (C), CF (N, C), 0}
22821 #define VAR4(T, N, A, B, C, D) \
22822 VAR3 (T, N, A, B, C), \
22823 {#N, NEON_##T, UP (D), CF (N, D), 0}
22824 #define VAR5(T, N, A, B, C, D, E) \
22825 VAR4 (T, N, A, B, C, D), \
22826 {#N, NEON_##T, UP (E), CF (N, E), 0}
22827 #define VAR6(T, N, A, B, C, D, E, F) \
22828 VAR5 (T, N, A, B, C, D, E), \
22829 {#N, NEON_##T, UP (F), CF (N, F), 0}
22830 #define VAR7(T, N, A, B, C, D, E, F, G) \
22831 VAR6 (T, N, A, B, C, D, E, F), \
22832 {#N, NEON_##T, UP (G), CF (N, G), 0}
22833 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22834 VAR7 (T, N, A, B, C, D, E, F, G), \
22835 {#N, NEON_##T, UP (H), CF (N, H), 0}
22836 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22837 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22838 {#N, NEON_##T, UP (I), CF (N, I), 0}
22839 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22840 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22841 {#N, NEON_##T, UP (J), CF (N, J), 0}
22842
22843 /* The NEON builtin data can be found in arm_neon_builtins.def.
22844 The mode entries in the following table correspond to the "key" type of the
22845 instruction variant, i.e. equivalent to that which would be specified after
22846 the assembler mnemonic, which usually refers to the last vector operand.
22847 (Signed/unsigned/polynomial types are not differentiated between though, and
22848 are all mapped onto the same mode for a given element size.) The modes
22849 listed per instruction should be the same as those defined for that
22850 instruction's pattern in neon.md. */
22851
22852 static neon_builtin_datum neon_builtin_data[] =
22853 {
22854 #include "arm_neon_builtins.def"
22855 };
22856
22857 #undef CF
22858 #undef VAR1
22859 #undef VAR2
22860 #undef VAR3
22861 #undef VAR4
22862 #undef VAR5
22863 #undef VAR6
22864 #undef VAR7
22865 #undef VAR8
22866 #undef VAR9
22867 #undef VAR10
22868
22869 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22870 #define VAR1(T, N, A) \
22871 CF (N, A)
22872 #define VAR2(T, N, A, B) \
22873 VAR1 (T, N, A), \
22874 CF (N, B)
22875 #define VAR3(T, N, A, B, C) \
22876 VAR2 (T, N, A, B), \
22877 CF (N, C)
22878 #define VAR4(T, N, A, B, C, D) \
22879 VAR3 (T, N, A, B, C), \
22880 CF (N, D)
22881 #define VAR5(T, N, A, B, C, D, E) \
22882 VAR4 (T, N, A, B, C, D), \
22883 CF (N, E)
22884 #define VAR6(T, N, A, B, C, D, E, F) \
22885 VAR5 (T, N, A, B, C, D, E), \
22886 CF (N, F)
22887 #define VAR7(T, N, A, B, C, D, E, F, G) \
22888 VAR6 (T, N, A, B, C, D, E, F), \
22889 CF (N, G)
22890 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22891 VAR7 (T, N, A, B, C, D, E, F, G), \
22892 CF (N, H)
22893 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22894 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22895 CF (N, I)
22896 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22897 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22898 CF (N, J)
22899 enum arm_builtins
22900 {
22901 ARM_BUILTIN_GETWCGR0,
22902 ARM_BUILTIN_GETWCGR1,
22903 ARM_BUILTIN_GETWCGR2,
22904 ARM_BUILTIN_GETWCGR3,
22905
22906 ARM_BUILTIN_SETWCGR0,
22907 ARM_BUILTIN_SETWCGR1,
22908 ARM_BUILTIN_SETWCGR2,
22909 ARM_BUILTIN_SETWCGR3,
22910
22911 ARM_BUILTIN_WZERO,
22912
22913 ARM_BUILTIN_WAVG2BR,
22914 ARM_BUILTIN_WAVG2HR,
22915 ARM_BUILTIN_WAVG2B,
22916 ARM_BUILTIN_WAVG2H,
22917
22918 ARM_BUILTIN_WACCB,
22919 ARM_BUILTIN_WACCH,
22920 ARM_BUILTIN_WACCW,
22921
22922 ARM_BUILTIN_WMACS,
22923 ARM_BUILTIN_WMACSZ,
22924 ARM_BUILTIN_WMACU,
22925 ARM_BUILTIN_WMACUZ,
22926
22927 ARM_BUILTIN_WSADB,
22928 ARM_BUILTIN_WSADBZ,
22929 ARM_BUILTIN_WSADH,
22930 ARM_BUILTIN_WSADHZ,
22931
22932 ARM_BUILTIN_WALIGNI,
22933 ARM_BUILTIN_WALIGNR0,
22934 ARM_BUILTIN_WALIGNR1,
22935 ARM_BUILTIN_WALIGNR2,
22936 ARM_BUILTIN_WALIGNR3,
22937
22938 ARM_BUILTIN_TMIA,
22939 ARM_BUILTIN_TMIAPH,
22940 ARM_BUILTIN_TMIABB,
22941 ARM_BUILTIN_TMIABT,
22942 ARM_BUILTIN_TMIATB,
22943 ARM_BUILTIN_TMIATT,
22944
22945 ARM_BUILTIN_TMOVMSKB,
22946 ARM_BUILTIN_TMOVMSKH,
22947 ARM_BUILTIN_TMOVMSKW,
22948
22949 ARM_BUILTIN_TBCSTB,
22950 ARM_BUILTIN_TBCSTH,
22951 ARM_BUILTIN_TBCSTW,
22952
22953 ARM_BUILTIN_WMADDS,
22954 ARM_BUILTIN_WMADDU,
22955
22956 ARM_BUILTIN_WPACKHSS,
22957 ARM_BUILTIN_WPACKWSS,
22958 ARM_BUILTIN_WPACKDSS,
22959 ARM_BUILTIN_WPACKHUS,
22960 ARM_BUILTIN_WPACKWUS,
22961 ARM_BUILTIN_WPACKDUS,
22962
22963 ARM_BUILTIN_WADDB,
22964 ARM_BUILTIN_WADDH,
22965 ARM_BUILTIN_WADDW,
22966 ARM_BUILTIN_WADDSSB,
22967 ARM_BUILTIN_WADDSSH,
22968 ARM_BUILTIN_WADDSSW,
22969 ARM_BUILTIN_WADDUSB,
22970 ARM_BUILTIN_WADDUSH,
22971 ARM_BUILTIN_WADDUSW,
22972 ARM_BUILTIN_WSUBB,
22973 ARM_BUILTIN_WSUBH,
22974 ARM_BUILTIN_WSUBW,
22975 ARM_BUILTIN_WSUBSSB,
22976 ARM_BUILTIN_WSUBSSH,
22977 ARM_BUILTIN_WSUBSSW,
22978 ARM_BUILTIN_WSUBUSB,
22979 ARM_BUILTIN_WSUBUSH,
22980 ARM_BUILTIN_WSUBUSW,
22981
22982 ARM_BUILTIN_WAND,
22983 ARM_BUILTIN_WANDN,
22984 ARM_BUILTIN_WOR,
22985 ARM_BUILTIN_WXOR,
22986
22987 ARM_BUILTIN_WCMPEQB,
22988 ARM_BUILTIN_WCMPEQH,
22989 ARM_BUILTIN_WCMPEQW,
22990 ARM_BUILTIN_WCMPGTUB,
22991 ARM_BUILTIN_WCMPGTUH,
22992 ARM_BUILTIN_WCMPGTUW,
22993 ARM_BUILTIN_WCMPGTSB,
22994 ARM_BUILTIN_WCMPGTSH,
22995 ARM_BUILTIN_WCMPGTSW,
22996
22997 ARM_BUILTIN_TEXTRMSB,
22998 ARM_BUILTIN_TEXTRMSH,
22999 ARM_BUILTIN_TEXTRMSW,
23000 ARM_BUILTIN_TEXTRMUB,
23001 ARM_BUILTIN_TEXTRMUH,
23002 ARM_BUILTIN_TEXTRMUW,
23003 ARM_BUILTIN_TINSRB,
23004 ARM_BUILTIN_TINSRH,
23005 ARM_BUILTIN_TINSRW,
23006
23007 ARM_BUILTIN_WMAXSW,
23008 ARM_BUILTIN_WMAXSH,
23009 ARM_BUILTIN_WMAXSB,
23010 ARM_BUILTIN_WMAXUW,
23011 ARM_BUILTIN_WMAXUH,
23012 ARM_BUILTIN_WMAXUB,
23013 ARM_BUILTIN_WMINSW,
23014 ARM_BUILTIN_WMINSH,
23015 ARM_BUILTIN_WMINSB,
23016 ARM_BUILTIN_WMINUW,
23017 ARM_BUILTIN_WMINUH,
23018 ARM_BUILTIN_WMINUB,
23019
23020 ARM_BUILTIN_WMULUM,
23021 ARM_BUILTIN_WMULSM,
23022 ARM_BUILTIN_WMULUL,
23023
23024 ARM_BUILTIN_PSADBH,
23025 ARM_BUILTIN_WSHUFH,
23026
23027 ARM_BUILTIN_WSLLH,
23028 ARM_BUILTIN_WSLLW,
23029 ARM_BUILTIN_WSLLD,
23030 ARM_BUILTIN_WSRAH,
23031 ARM_BUILTIN_WSRAW,
23032 ARM_BUILTIN_WSRAD,
23033 ARM_BUILTIN_WSRLH,
23034 ARM_BUILTIN_WSRLW,
23035 ARM_BUILTIN_WSRLD,
23036 ARM_BUILTIN_WRORH,
23037 ARM_BUILTIN_WRORW,
23038 ARM_BUILTIN_WRORD,
23039 ARM_BUILTIN_WSLLHI,
23040 ARM_BUILTIN_WSLLWI,
23041 ARM_BUILTIN_WSLLDI,
23042 ARM_BUILTIN_WSRAHI,
23043 ARM_BUILTIN_WSRAWI,
23044 ARM_BUILTIN_WSRADI,
23045 ARM_BUILTIN_WSRLHI,
23046 ARM_BUILTIN_WSRLWI,
23047 ARM_BUILTIN_WSRLDI,
23048 ARM_BUILTIN_WRORHI,
23049 ARM_BUILTIN_WRORWI,
23050 ARM_BUILTIN_WRORDI,
23051
23052 ARM_BUILTIN_WUNPCKIHB,
23053 ARM_BUILTIN_WUNPCKIHH,
23054 ARM_BUILTIN_WUNPCKIHW,
23055 ARM_BUILTIN_WUNPCKILB,
23056 ARM_BUILTIN_WUNPCKILH,
23057 ARM_BUILTIN_WUNPCKILW,
23058
23059 ARM_BUILTIN_WUNPCKEHSB,
23060 ARM_BUILTIN_WUNPCKEHSH,
23061 ARM_BUILTIN_WUNPCKEHSW,
23062 ARM_BUILTIN_WUNPCKEHUB,
23063 ARM_BUILTIN_WUNPCKEHUH,
23064 ARM_BUILTIN_WUNPCKEHUW,
23065 ARM_BUILTIN_WUNPCKELSB,
23066 ARM_BUILTIN_WUNPCKELSH,
23067 ARM_BUILTIN_WUNPCKELSW,
23068 ARM_BUILTIN_WUNPCKELUB,
23069 ARM_BUILTIN_WUNPCKELUH,
23070 ARM_BUILTIN_WUNPCKELUW,
23071
23072 ARM_BUILTIN_WABSB,
23073 ARM_BUILTIN_WABSH,
23074 ARM_BUILTIN_WABSW,
23075
23076 ARM_BUILTIN_WADDSUBHX,
23077 ARM_BUILTIN_WSUBADDHX,
23078
23079 ARM_BUILTIN_WABSDIFFB,
23080 ARM_BUILTIN_WABSDIFFH,
23081 ARM_BUILTIN_WABSDIFFW,
23082
23083 ARM_BUILTIN_WADDCH,
23084 ARM_BUILTIN_WADDCW,
23085
23086 ARM_BUILTIN_WAVG4,
23087 ARM_BUILTIN_WAVG4R,
23088
23089 ARM_BUILTIN_WMADDSX,
23090 ARM_BUILTIN_WMADDUX,
23091
23092 ARM_BUILTIN_WMADDSN,
23093 ARM_BUILTIN_WMADDUN,
23094
23095 ARM_BUILTIN_WMULWSM,
23096 ARM_BUILTIN_WMULWUM,
23097
23098 ARM_BUILTIN_WMULWSMR,
23099 ARM_BUILTIN_WMULWUMR,
23100
23101 ARM_BUILTIN_WMULWL,
23102
23103 ARM_BUILTIN_WMULSMR,
23104 ARM_BUILTIN_WMULUMR,
23105
23106 ARM_BUILTIN_WQMULM,
23107 ARM_BUILTIN_WQMULMR,
23108
23109 ARM_BUILTIN_WQMULWM,
23110 ARM_BUILTIN_WQMULWMR,
23111
23112 ARM_BUILTIN_WADDBHUSM,
23113 ARM_BUILTIN_WADDBHUSL,
23114
23115 ARM_BUILTIN_WQMIABB,
23116 ARM_BUILTIN_WQMIABT,
23117 ARM_BUILTIN_WQMIATB,
23118 ARM_BUILTIN_WQMIATT,
23119
23120 ARM_BUILTIN_WQMIABBN,
23121 ARM_BUILTIN_WQMIABTN,
23122 ARM_BUILTIN_WQMIATBN,
23123 ARM_BUILTIN_WQMIATTN,
23124
23125 ARM_BUILTIN_WMIABB,
23126 ARM_BUILTIN_WMIABT,
23127 ARM_BUILTIN_WMIATB,
23128 ARM_BUILTIN_WMIATT,
23129
23130 ARM_BUILTIN_WMIABBN,
23131 ARM_BUILTIN_WMIABTN,
23132 ARM_BUILTIN_WMIATBN,
23133 ARM_BUILTIN_WMIATTN,
23134
23135 ARM_BUILTIN_WMIAWBB,
23136 ARM_BUILTIN_WMIAWBT,
23137 ARM_BUILTIN_WMIAWTB,
23138 ARM_BUILTIN_WMIAWTT,
23139
23140 ARM_BUILTIN_WMIAWBBN,
23141 ARM_BUILTIN_WMIAWBTN,
23142 ARM_BUILTIN_WMIAWTBN,
23143 ARM_BUILTIN_WMIAWTTN,
23144
23145 ARM_BUILTIN_WMERGE,
23146
23147 ARM_BUILTIN_CRC32B,
23148 ARM_BUILTIN_CRC32H,
23149 ARM_BUILTIN_CRC32W,
23150 ARM_BUILTIN_CRC32CB,
23151 ARM_BUILTIN_CRC32CH,
23152 ARM_BUILTIN_CRC32CW,
23153
23154 #undef CRYPTO1
23155 #undef CRYPTO2
23156 #undef CRYPTO3
23157
23158 #define CRYPTO1(L, U, M1, M2) \
23159 ARM_BUILTIN_CRYPTO_##U,
23160 #define CRYPTO2(L, U, M1, M2, M3) \
23161 ARM_BUILTIN_CRYPTO_##U,
23162 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23163 ARM_BUILTIN_CRYPTO_##U,
23164
23165 #include "crypto.def"
23166
23167 #undef CRYPTO1
23168 #undef CRYPTO2
23169 #undef CRYPTO3
23170
23171 #include "arm_neon_builtins.def"
23172
23173 ,ARM_BUILTIN_MAX
23174 };
23175
23176 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23177
23178 #undef CF
23179 #undef VAR1
23180 #undef VAR2
23181 #undef VAR3
23182 #undef VAR4
23183 #undef VAR5
23184 #undef VAR6
23185 #undef VAR7
23186 #undef VAR8
23187 #undef VAR9
23188 #undef VAR10
23189
23190 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23191
23192 #define NUM_DREG_TYPES 5
23193 #define NUM_QREG_TYPES 6
23194
23195 static void
23196 arm_init_neon_builtins (void)
23197 {
23198 unsigned int i, fcode;
23199 tree decl;
23200
23201 tree neon_intQI_type_node;
23202 tree neon_intHI_type_node;
23203 tree neon_floatHF_type_node;
23204 tree neon_polyQI_type_node;
23205 tree neon_polyHI_type_node;
23206 tree neon_intSI_type_node;
23207 tree neon_intDI_type_node;
23208 tree neon_intUTI_type_node;
23209 tree neon_float_type_node;
23210
23211 tree intQI_pointer_node;
23212 tree intHI_pointer_node;
23213 tree intSI_pointer_node;
23214 tree intDI_pointer_node;
23215 tree float_pointer_node;
23216
23217 tree const_intQI_node;
23218 tree const_intHI_node;
23219 tree const_intSI_node;
23220 tree const_intDI_node;
23221 tree const_float_node;
23222
23223 tree const_intQI_pointer_node;
23224 tree const_intHI_pointer_node;
23225 tree const_intSI_pointer_node;
23226 tree const_intDI_pointer_node;
23227 tree const_float_pointer_node;
23228
23229 tree V8QI_type_node;
23230 tree V4HI_type_node;
23231 tree V4HF_type_node;
23232 tree V2SI_type_node;
23233 tree V2SF_type_node;
23234 tree V16QI_type_node;
23235 tree V8HI_type_node;
23236 tree V4SI_type_node;
23237 tree V4SF_type_node;
23238 tree V2DI_type_node;
23239
23240 tree intUQI_type_node;
23241 tree intUHI_type_node;
23242 tree intUSI_type_node;
23243 tree intUDI_type_node;
23244
23245 tree intEI_type_node;
23246 tree intOI_type_node;
23247 tree intCI_type_node;
23248 tree intXI_type_node;
23249
23250 tree V8QI_pointer_node;
23251 tree V4HI_pointer_node;
23252 tree V2SI_pointer_node;
23253 tree V2SF_pointer_node;
23254 tree V16QI_pointer_node;
23255 tree V8HI_pointer_node;
23256 tree V4SI_pointer_node;
23257 tree V4SF_pointer_node;
23258 tree V2DI_pointer_node;
23259
23260 tree void_ftype_pv8qi_v8qi_v8qi;
23261 tree void_ftype_pv4hi_v4hi_v4hi;
23262 tree void_ftype_pv2si_v2si_v2si;
23263 tree void_ftype_pv2sf_v2sf_v2sf;
23264 tree void_ftype_pdi_di_di;
23265 tree void_ftype_pv16qi_v16qi_v16qi;
23266 tree void_ftype_pv8hi_v8hi_v8hi;
23267 tree void_ftype_pv4si_v4si_v4si;
23268 tree void_ftype_pv4sf_v4sf_v4sf;
23269 tree void_ftype_pv2di_v2di_v2di;
23270
23271 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23272 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23273 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23274
23275 /* Create distinguished type nodes for NEON vector element types,
23276 and pointers to values of such types, so we can detect them later. */
23277 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23278 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23279 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23280 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23281 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23282 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23283 neon_float_type_node = make_node (REAL_TYPE);
23284 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23285 layout_type (neon_float_type_node);
23286 neon_floatHF_type_node = make_node (REAL_TYPE);
23287 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23288 layout_type (neon_floatHF_type_node);
23289
23290 /* Define typedefs which exactly correspond to the modes we are basing vector
23291 types on. If you change these names you'll need to change
23292 the table used by arm_mangle_type too. */
23293 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23294 "__builtin_neon_qi");
23295 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23296 "__builtin_neon_hi");
23297 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23298 "__builtin_neon_hf");
23299 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23300 "__builtin_neon_si");
23301 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23302 "__builtin_neon_sf");
23303 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23304 "__builtin_neon_di");
23305 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23306 "__builtin_neon_poly8");
23307 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23308 "__builtin_neon_poly16");
23309
23310 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23311 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23312 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23313 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23314 float_pointer_node = build_pointer_type (neon_float_type_node);
23315
23316 /* Next create constant-qualified versions of the above types. */
23317 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23318 TYPE_QUAL_CONST);
23319 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23320 TYPE_QUAL_CONST);
23321 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23322 TYPE_QUAL_CONST);
23323 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23324 TYPE_QUAL_CONST);
23325 const_float_node = build_qualified_type (neon_float_type_node,
23326 TYPE_QUAL_CONST);
23327
23328 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23329 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23330 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23331 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23332 const_float_pointer_node = build_pointer_type (const_float_node);
23333
23334 /* Now create vector types based on our NEON element types. */
23335 /* 64-bit vectors. */
23336 V8QI_type_node =
23337 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23338 V4HI_type_node =
23339 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23340 V4HF_type_node =
23341 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23342 V2SI_type_node =
23343 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23344 V2SF_type_node =
23345 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23346 /* 128-bit vectors. */
23347 V16QI_type_node =
23348 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23349 V8HI_type_node =
23350 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23351 V4SI_type_node =
23352 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23353 V4SF_type_node =
23354 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23355 V2DI_type_node =
23356 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23357
23358 /* Unsigned integer types for various mode sizes. */
23359 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23360 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23361 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23362 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23363 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23364
23365
23366 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23367 "__builtin_neon_uqi");
23368 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23369 "__builtin_neon_uhi");
23370 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23371 "__builtin_neon_usi");
23372 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23373 "__builtin_neon_udi");
23374 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23375 "__builtin_neon_poly64");
23376 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23377 "__builtin_neon_poly128");
23378
23379 /* Opaque integer types for structures of vectors. */
23380 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23381 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23382 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23383 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23384
23385 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23386 "__builtin_neon_ti");
23387 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23388 "__builtin_neon_ei");
23389 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23390 "__builtin_neon_oi");
23391 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23392 "__builtin_neon_ci");
23393 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23394 "__builtin_neon_xi");
23395
23396 /* Pointers to vector types. */
23397 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23398 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23399 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23400 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23401 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23402 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23403 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23404 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23405 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23406
23407 /* Operations which return results as pairs. */
23408 void_ftype_pv8qi_v8qi_v8qi =
23409 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23410 V8QI_type_node, NULL);
23411 void_ftype_pv4hi_v4hi_v4hi =
23412 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23413 V4HI_type_node, NULL);
23414 void_ftype_pv2si_v2si_v2si =
23415 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23416 V2SI_type_node, NULL);
23417 void_ftype_pv2sf_v2sf_v2sf =
23418 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23419 V2SF_type_node, NULL);
23420 void_ftype_pdi_di_di =
23421 build_function_type_list (void_type_node, intDI_pointer_node,
23422 neon_intDI_type_node, neon_intDI_type_node, NULL);
23423 void_ftype_pv16qi_v16qi_v16qi =
23424 build_function_type_list (void_type_node, V16QI_pointer_node,
23425 V16QI_type_node, V16QI_type_node, NULL);
23426 void_ftype_pv8hi_v8hi_v8hi =
23427 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23428 V8HI_type_node, NULL);
23429 void_ftype_pv4si_v4si_v4si =
23430 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23431 V4SI_type_node, NULL);
23432 void_ftype_pv4sf_v4sf_v4sf =
23433 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23434 V4SF_type_node, NULL);
23435 void_ftype_pv2di_v2di_v2di =
23436 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23437 V2DI_type_node, NULL);
23438
23439 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23440 {
23441 tree V4USI_type_node =
23442 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23443
23444 tree V16UQI_type_node =
23445 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23446
23447 tree v16uqi_ftype_v16uqi
23448 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23449
23450 tree v16uqi_ftype_v16uqi_v16uqi
23451 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23452 V16UQI_type_node, NULL_TREE);
23453
23454 tree v4usi_ftype_v4usi
23455 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23456
23457 tree v4usi_ftype_v4usi_v4usi
23458 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23459 V4USI_type_node, NULL_TREE);
23460
23461 tree v4usi_ftype_v4usi_v4usi_v4usi
23462 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23463 V4USI_type_node, V4USI_type_node, NULL_TREE);
23464
23465 tree uti_ftype_udi_udi
23466 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23467 intUDI_type_node, NULL_TREE);
23468
23469 #undef CRYPTO1
23470 #undef CRYPTO2
23471 #undef CRYPTO3
23472 #undef C
23473 #undef N
23474 #undef CF
23475 #undef FT1
23476 #undef FT2
23477 #undef FT3
23478
23479 #define C(U) \
23480 ARM_BUILTIN_CRYPTO_##U
23481 #define N(L) \
23482 "__builtin_arm_crypto_"#L
23483 #define FT1(R, A) \
23484 R##_ftype_##A
23485 #define FT2(R, A1, A2) \
23486 R##_ftype_##A1##_##A2
23487 #define FT3(R, A1, A2, A3) \
23488 R##_ftype_##A1##_##A2##_##A3
23489 #define CRYPTO1(L, U, R, A) \
23490 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23491 C (U), BUILT_IN_MD, \
23492 NULL, NULL_TREE);
23493 #define CRYPTO2(L, U, R, A1, A2) \
23494 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23495 C (U), BUILT_IN_MD, \
23496 NULL, NULL_TREE);
23497
23498 #define CRYPTO3(L, U, R, A1, A2, A3) \
23499 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23500 C (U), BUILT_IN_MD, \
23501 NULL, NULL_TREE);
23502 #include "crypto.def"
23503
23504 #undef CRYPTO1
23505 #undef CRYPTO2
23506 #undef CRYPTO3
23507 #undef C
23508 #undef N
23509 #undef FT1
23510 #undef FT2
23511 #undef FT3
23512 }
23513 dreg_types[0] = V8QI_type_node;
23514 dreg_types[1] = V4HI_type_node;
23515 dreg_types[2] = V2SI_type_node;
23516 dreg_types[3] = V2SF_type_node;
23517 dreg_types[4] = neon_intDI_type_node;
23518
23519 qreg_types[0] = V16QI_type_node;
23520 qreg_types[1] = V8HI_type_node;
23521 qreg_types[2] = V4SI_type_node;
23522 qreg_types[3] = V4SF_type_node;
23523 qreg_types[4] = V2DI_type_node;
23524 qreg_types[5] = neon_intUTI_type_node;
23525
23526 for (i = 0; i < NUM_QREG_TYPES; i++)
23527 {
23528 int j;
23529 for (j = 0; j < NUM_QREG_TYPES; j++)
23530 {
23531 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23532 reinterp_ftype_dreg[i][j]
23533 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23534
23535 reinterp_ftype_qreg[i][j]
23536 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23537 }
23538 }
23539
23540 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23541 i < ARRAY_SIZE (neon_builtin_data);
23542 i++, fcode++)
23543 {
23544 neon_builtin_datum *d = &neon_builtin_data[i];
23545
23546 const char* const modenames[] = {
23547 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23548 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23549 "ti", "ei", "oi"
23550 };
23551 char namebuf[60];
23552 tree ftype = NULL;
23553 int is_load = 0, is_store = 0;
23554
23555 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23556
23557 d->fcode = fcode;
23558
23559 switch (d->itype)
23560 {
23561 case NEON_LOAD1:
23562 case NEON_LOAD1LANE:
23563 case NEON_LOADSTRUCT:
23564 case NEON_LOADSTRUCTLANE:
23565 is_load = 1;
23566 /* Fall through. */
23567 case NEON_STORE1:
23568 case NEON_STORE1LANE:
23569 case NEON_STORESTRUCT:
23570 case NEON_STORESTRUCTLANE:
23571 if (!is_load)
23572 is_store = 1;
23573 /* Fall through. */
23574 case NEON_UNOP:
23575 case NEON_RINT:
23576 case NEON_BINOP:
23577 case NEON_LOGICBINOP:
23578 case NEON_SHIFTINSERT:
23579 case NEON_TERNOP:
23580 case NEON_GETLANE:
23581 case NEON_SETLANE:
23582 case NEON_CREATE:
23583 case NEON_DUP:
23584 case NEON_DUPLANE:
23585 case NEON_SHIFTIMM:
23586 case NEON_SHIFTACC:
23587 case NEON_COMBINE:
23588 case NEON_SPLIT:
23589 case NEON_CONVERT:
23590 case NEON_FIXCONV:
23591 case NEON_LANEMUL:
23592 case NEON_LANEMULL:
23593 case NEON_LANEMULH:
23594 case NEON_LANEMAC:
23595 case NEON_SCALARMUL:
23596 case NEON_SCALARMULL:
23597 case NEON_SCALARMULH:
23598 case NEON_SCALARMAC:
23599 case NEON_SELECT:
23600 case NEON_VTBL:
23601 case NEON_VTBX:
23602 {
23603 int k;
23604 tree return_type = void_type_node, args = void_list_node;
23605
23606 /* Build a function type directly from the insn_data for
23607 this builtin. The build_function_type() function takes
23608 care of removing duplicates for us. */
23609 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23610 {
23611 tree eltype;
23612
23613 if (is_load && k == 1)
23614 {
23615 /* Neon load patterns always have the memory
23616 operand in the operand 1 position. */
23617 gcc_assert (insn_data[d->code].operand[k].predicate
23618 == neon_struct_operand);
23619
23620 switch (d->mode)
23621 {
23622 case T_V8QI:
23623 case T_V16QI:
23624 eltype = const_intQI_pointer_node;
23625 break;
23626
23627 case T_V4HI:
23628 case T_V8HI:
23629 eltype = const_intHI_pointer_node;
23630 break;
23631
23632 case T_V2SI:
23633 case T_V4SI:
23634 eltype = const_intSI_pointer_node;
23635 break;
23636
23637 case T_V2SF:
23638 case T_V4SF:
23639 eltype = const_float_pointer_node;
23640 break;
23641
23642 case T_DI:
23643 case T_V2DI:
23644 eltype = const_intDI_pointer_node;
23645 break;
23646
23647 default: gcc_unreachable ();
23648 }
23649 }
23650 else if (is_store && k == 0)
23651 {
23652 /* Similarly, Neon store patterns use operand 0 as
23653 the memory location to store to. */
23654 gcc_assert (insn_data[d->code].operand[k].predicate
23655 == neon_struct_operand);
23656
23657 switch (d->mode)
23658 {
23659 case T_V8QI:
23660 case T_V16QI:
23661 eltype = intQI_pointer_node;
23662 break;
23663
23664 case T_V4HI:
23665 case T_V8HI:
23666 eltype = intHI_pointer_node;
23667 break;
23668
23669 case T_V2SI:
23670 case T_V4SI:
23671 eltype = intSI_pointer_node;
23672 break;
23673
23674 case T_V2SF:
23675 case T_V4SF:
23676 eltype = float_pointer_node;
23677 break;
23678
23679 case T_DI:
23680 case T_V2DI:
23681 eltype = intDI_pointer_node;
23682 break;
23683
23684 default: gcc_unreachable ();
23685 }
23686 }
23687 else
23688 {
23689 switch (insn_data[d->code].operand[k].mode)
23690 {
23691 case VOIDmode: eltype = void_type_node; break;
23692 /* Scalars. */
23693 case QImode: eltype = neon_intQI_type_node; break;
23694 case HImode: eltype = neon_intHI_type_node; break;
23695 case SImode: eltype = neon_intSI_type_node; break;
23696 case SFmode: eltype = neon_float_type_node; break;
23697 case DImode: eltype = neon_intDI_type_node; break;
23698 case TImode: eltype = intTI_type_node; break;
23699 case EImode: eltype = intEI_type_node; break;
23700 case OImode: eltype = intOI_type_node; break;
23701 case CImode: eltype = intCI_type_node; break;
23702 case XImode: eltype = intXI_type_node; break;
23703 /* 64-bit vectors. */
23704 case V8QImode: eltype = V8QI_type_node; break;
23705 case V4HImode: eltype = V4HI_type_node; break;
23706 case V2SImode: eltype = V2SI_type_node; break;
23707 case V2SFmode: eltype = V2SF_type_node; break;
23708 /* 128-bit vectors. */
23709 case V16QImode: eltype = V16QI_type_node; break;
23710 case V8HImode: eltype = V8HI_type_node; break;
23711 case V4SImode: eltype = V4SI_type_node; break;
23712 case V4SFmode: eltype = V4SF_type_node; break;
23713 case V2DImode: eltype = V2DI_type_node; break;
23714 default: gcc_unreachable ();
23715 }
23716 }
23717
23718 if (k == 0 && !is_store)
23719 return_type = eltype;
23720 else
23721 args = tree_cons (NULL_TREE, eltype, args);
23722 }
23723
23724 ftype = build_function_type (return_type, args);
23725 }
23726 break;
23727
23728 case NEON_RESULTPAIR:
23729 {
23730 switch (insn_data[d->code].operand[1].mode)
23731 {
23732 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23733 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23734 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23735 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23736 case DImode: ftype = void_ftype_pdi_di_di; break;
23737 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23738 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23739 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23740 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23741 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23742 default: gcc_unreachable ();
23743 }
23744 }
23745 break;
23746
23747 case NEON_REINTERP:
23748 {
23749 /* We iterate over NUM_DREG_TYPES doubleword types,
23750 then NUM_QREG_TYPES quadword types.
23751 V4HF is not a type used in reinterpret, so we translate
23752 d->mode to the correct index in reinterp_ftype_dreg. */
23753 bool qreg_p
23754 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23755 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23756 % NUM_QREG_TYPES;
23757 switch (insn_data[d->code].operand[0].mode)
23758 {
23759 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23760 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23761 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23762 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23763 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23764 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23765 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23766 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23767 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23768 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23769 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23770 default: gcc_unreachable ();
23771 }
23772 }
23773 break;
23774 case NEON_FLOAT_WIDEN:
23775 {
23776 tree eltype = NULL_TREE;
23777 tree return_type = NULL_TREE;
23778
23779 switch (insn_data[d->code].operand[1].mode)
23780 {
23781 case V4HFmode:
23782 eltype = V4HF_type_node;
23783 return_type = V4SF_type_node;
23784 break;
23785 default: gcc_unreachable ();
23786 }
23787 ftype = build_function_type_list (return_type, eltype, NULL);
23788 break;
23789 }
23790 case NEON_FLOAT_NARROW:
23791 {
23792 tree eltype = NULL_TREE;
23793 tree return_type = NULL_TREE;
23794
23795 switch (insn_data[d->code].operand[1].mode)
23796 {
23797 case V4SFmode:
23798 eltype = V4SF_type_node;
23799 return_type = V4HF_type_node;
23800 break;
23801 default: gcc_unreachable ();
23802 }
23803 ftype = build_function_type_list (return_type, eltype, NULL);
23804 break;
23805 }
23806 default:
23807 gcc_unreachable ();
23808 }
23809
23810 gcc_assert (ftype != NULL);
23811
23812 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23813
23814 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23815 NULL_TREE);
23816 arm_builtin_decls[fcode] = decl;
23817 }
23818 }
23819
23820 #undef NUM_DREG_TYPES
23821 #undef NUM_QREG_TYPES
23822
23823 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23824 do \
23825 { \
23826 if ((MASK) & insn_flags) \
23827 { \
23828 tree bdecl; \
23829 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23830 BUILT_IN_MD, NULL, NULL_TREE); \
23831 arm_builtin_decls[CODE] = bdecl; \
23832 } \
23833 } \
23834 while (0)
23835
23836 struct builtin_description
23837 {
23838 const unsigned int mask;
23839 const enum insn_code icode;
23840 const char * const name;
23841 const enum arm_builtins code;
23842 const enum rtx_code comparison;
23843 const unsigned int flag;
23844 };
23845
23846 static const struct builtin_description bdesc_2arg[] =
23847 {
23848 #define IWMMXT_BUILTIN(code, string, builtin) \
23849 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23850 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23851
23852 #define IWMMXT2_BUILTIN(code, string, builtin) \
23853 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23854 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23855
23856 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23857 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23858 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23859 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23860 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23861 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23862 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23863 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23864 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23865 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23866 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23867 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23868 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23869 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23870 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23871 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23872 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23873 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23874 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23875 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23876 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23877 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23878 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23879 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23880 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23881 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23882 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23883 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23884 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23885 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23886 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23887 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23888 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23889 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23890 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23891 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23892 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23893 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23894 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23895 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23896 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23897 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23898 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23899 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23900 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23901 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23902 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23903 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23904 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23905 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23906 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23907 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23908 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23909 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23910 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23911 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23912 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23913 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23914 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23915 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23916 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23917 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23918 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23919 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23920 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23921 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23922 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23923 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23924 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23925 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23926 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23927 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23928 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23929 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23930 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23931 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23932 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23933 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23934
23935 #define IWMMXT_BUILTIN2(code, builtin) \
23936 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23937
23938 #define IWMMXT2_BUILTIN2(code, builtin) \
23939 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23940
23941 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
23942 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
23943 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
23944 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
23945 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
23946 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
23947 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
23948 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
23949 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
23950 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
23951
23952 #define CRC32_BUILTIN(L, U) \
23953 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
23954 UNKNOWN, 0},
23955 CRC32_BUILTIN (crc32b, CRC32B)
23956 CRC32_BUILTIN (crc32h, CRC32H)
23957 CRC32_BUILTIN (crc32w, CRC32W)
23958 CRC32_BUILTIN (crc32cb, CRC32CB)
23959 CRC32_BUILTIN (crc32ch, CRC32CH)
23960 CRC32_BUILTIN (crc32cw, CRC32CW)
23961 #undef CRC32_BUILTIN
23962
23963
23964 #define CRYPTO_BUILTIN(L, U) \
23965 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
23966 UNKNOWN, 0},
23967 #undef CRYPTO1
23968 #undef CRYPTO2
23969 #undef CRYPTO3
23970 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
23971 #define CRYPTO1(L, U, R, A)
23972 #define CRYPTO3(L, U, R, A1, A2, A3)
23973 #include "crypto.def"
23974 #undef CRYPTO1
23975 #undef CRYPTO2
23976 #undef CRYPTO3
23977
23978 };
23979
23980 static const struct builtin_description bdesc_1arg[] =
23981 {
23982 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
23983 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
23984 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
23985 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
23986 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
23987 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
23988 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
23989 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
23990 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
23991 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
23992 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
23993 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
23994 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
23995 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
23996 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
23997 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
23998 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
23999 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24000 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24001 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24002 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24003 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24004 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24005 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24006
24007 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24008 #define CRYPTO2(L, U, R, A1, A2)
24009 #define CRYPTO3(L, U, R, A1, A2, A3)
24010 #include "crypto.def"
24011 #undef CRYPTO1
24012 #undef CRYPTO2
24013 #undef CRYPTO3
24014 };
24015
24016 static const struct builtin_description bdesc_3arg[] =
24017 {
24018 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24019 #define CRYPTO1(L, U, R, A)
24020 #define CRYPTO2(L, U, R, A1, A2)
24021 #include "crypto.def"
24022 #undef CRYPTO1
24023 #undef CRYPTO2
24024 #undef CRYPTO3
24025 };
24026 #undef CRYPTO_BUILTIN
24027
24028 /* Set up all the iWMMXt builtins. This is not called if
24029 TARGET_IWMMXT is zero. */
24030
24031 static void
24032 arm_init_iwmmxt_builtins (void)
24033 {
24034 const struct builtin_description * d;
24035 size_t i;
24036
24037 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24038 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24039 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24040
24041 tree v8qi_ftype_v8qi_v8qi_int
24042 = build_function_type_list (V8QI_type_node,
24043 V8QI_type_node, V8QI_type_node,
24044 integer_type_node, NULL_TREE);
24045 tree v4hi_ftype_v4hi_int
24046 = build_function_type_list (V4HI_type_node,
24047 V4HI_type_node, integer_type_node, NULL_TREE);
24048 tree v2si_ftype_v2si_int
24049 = build_function_type_list (V2SI_type_node,
24050 V2SI_type_node, integer_type_node, NULL_TREE);
24051 tree v2si_ftype_di_di
24052 = build_function_type_list (V2SI_type_node,
24053 long_long_integer_type_node,
24054 long_long_integer_type_node,
24055 NULL_TREE);
24056 tree di_ftype_di_int
24057 = build_function_type_list (long_long_integer_type_node,
24058 long_long_integer_type_node,
24059 integer_type_node, NULL_TREE);
24060 tree di_ftype_di_int_int
24061 = build_function_type_list (long_long_integer_type_node,
24062 long_long_integer_type_node,
24063 integer_type_node,
24064 integer_type_node, NULL_TREE);
24065 tree int_ftype_v8qi
24066 = build_function_type_list (integer_type_node,
24067 V8QI_type_node, NULL_TREE);
24068 tree int_ftype_v4hi
24069 = build_function_type_list (integer_type_node,
24070 V4HI_type_node, NULL_TREE);
24071 tree int_ftype_v2si
24072 = build_function_type_list (integer_type_node,
24073 V2SI_type_node, NULL_TREE);
24074 tree int_ftype_v8qi_int
24075 = build_function_type_list (integer_type_node,
24076 V8QI_type_node, integer_type_node, NULL_TREE);
24077 tree int_ftype_v4hi_int
24078 = build_function_type_list (integer_type_node,
24079 V4HI_type_node, integer_type_node, NULL_TREE);
24080 tree int_ftype_v2si_int
24081 = build_function_type_list (integer_type_node,
24082 V2SI_type_node, integer_type_node, NULL_TREE);
24083 tree v8qi_ftype_v8qi_int_int
24084 = build_function_type_list (V8QI_type_node,
24085 V8QI_type_node, integer_type_node,
24086 integer_type_node, NULL_TREE);
24087 tree v4hi_ftype_v4hi_int_int
24088 = build_function_type_list (V4HI_type_node,
24089 V4HI_type_node, integer_type_node,
24090 integer_type_node, NULL_TREE);
24091 tree v2si_ftype_v2si_int_int
24092 = build_function_type_list (V2SI_type_node,
24093 V2SI_type_node, integer_type_node,
24094 integer_type_node, NULL_TREE);
24095 /* Miscellaneous. */
24096 tree v8qi_ftype_v4hi_v4hi
24097 = build_function_type_list (V8QI_type_node,
24098 V4HI_type_node, V4HI_type_node, NULL_TREE);
24099 tree v4hi_ftype_v2si_v2si
24100 = build_function_type_list (V4HI_type_node,
24101 V2SI_type_node, V2SI_type_node, NULL_TREE);
24102 tree v8qi_ftype_v4hi_v8qi
24103 = build_function_type_list (V8QI_type_node,
24104 V4HI_type_node, V8QI_type_node, NULL_TREE);
24105 tree v2si_ftype_v4hi_v4hi
24106 = build_function_type_list (V2SI_type_node,
24107 V4HI_type_node, V4HI_type_node, NULL_TREE);
24108 tree v2si_ftype_v8qi_v8qi
24109 = build_function_type_list (V2SI_type_node,
24110 V8QI_type_node, V8QI_type_node, NULL_TREE);
24111 tree v4hi_ftype_v4hi_di
24112 = build_function_type_list (V4HI_type_node,
24113 V4HI_type_node, long_long_integer_type_node,
24114 NULL_TREE);
24115 tree v2si_ftype_v2si_di
24116 = build_function_type_list (V2SI_type_node,
24117 V2SI_type_node, long_long_integer_type_node,
24118 NULL_TREE);
24119 tree di_ftype_void
24120 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24121 tree int_ftype_void
24122 = build_function_type_list (integer_type_node, NULL_TREE);
24123 tree di_ftype_v8qi
24124 = build_function_type_list (long_long_integer_type_node,
24125 V8QI_type_node, NULL_TREE);
24126 tree di_ftype_v4hi
24127 = build_function_type_list (long_long_integer_type_node,
24128 V4HI_type_node, NULL_TREE);
24129 tree di_ftype_v2si
24130 = build_function_type_list (long_long_integer_type_node,
24131 V2SI_type_node, NULL_TREE);
24132 tree v2si_ftype_v4hi
24133 = build_function_type_list (V2SI_type_node,
24134 V4HI_type_node, NULL_TREE);
24135 tree v4hi_ftype_v8qi
24136 = build_function_type_list (V4HI_type_node,
24137 V8QI_type_node, NULL_TREE);
24138 tree v8qi_ftype_v8qi
24139 = build_function_type_list (V8QI_type_node,
24140 V8QI_type_node, NULL_TREE);
24141 tree v4hi_ftype_v4hi
24142 = build_function_type_list (V4HI_type_node,
24143 V4HI_type_node, NULL_TREE);
24144 tree v2si_ftype_v2si
24145 = build_function_type_list (V2SI_type_node,
24146 V2SI_type_node, NULL_TREE);
24147
24148 tree di_ftype_di_v4hi_v4hi
24149 = build_function_type_list (long_long_unsigned_type_node,
24150 long_long_unsigned_type_node,
24151 V4HI_type_node, V4HI_type_node,
24152 NULL_TREE);
24153
24154 tree di_ftype_v4hi_v4hi
24155 = build_function_type_list (long_long_unsigned_type_node,
24156 V4HI_type_node,V4HI_type_node,
24157 NULL_TREE);
24158
24159 tree v2si_ftype_v2si_v4hi_v4hi
24160 = build_function_type_list (V2SI_type_node,
24161 V2SI_type_node, V4HI_type_node,
24162 V4HI_type_node, NULL_TREE);
24163
24164 tree v2si_ftype_v2si_v8qi_v8qi
24165 = build_function_type_list (V2SI_type_node,
24166 V2SI_type_node, V8QI_type_node,
24167 V8QI_type_node, NULL_TREE);
24168
24169 tree di_ftype_di_v2si_v2si
24170 = build_function_type_list (long_long_unsigned_type_node,
24171 long_long_unsigned_type_node,
24172 V2SI_type_node, V2SI_type_node,
24173 NULL_TREE);
24174
24175 tree di_ftype_di_di_int
24176 = build_function_type_list (long_long_unsigned_type_node,
24177 long_long_unsigned_type_node,
24178 long_long_unsigned_type_node,
24179 integer_type_node, NULL_TREE);
24180
24181 tree void_ftype_int
24182 = build_function_type_list (void_type_node,
24183 integer_type_node, NULL_TREE);
24184
24185 tree v8qi_ftype_char
24186 = build_function_type_list (V8QI_type_node,
24187 signed_char_type_node, NULL_TREE);
24188
24189 tree v4hi_ftype_short
24190 = build_function_type_list (V4HI_type_node,
24191 short_integer_type_node, NULL_TREE);
24192
24193 tree v2si_ftype_int
24194 = build_function_type_list (V2SI_type_node,
24195 integer_type_node, NULL_TREE);
24196
24197 /* Normal vector binops. */
24198 tree v8qi_ftype_v8qi_v8qi
24199 = build_function_type_list (V8QI_type_node,
24200 V8QI_type_node, V8QI_type_node, NULL_TREE);
24201 tree v4hi_ftype_v4hi_v4hi
24202 = build_function_type_list (V4HI_type_node,
24203 V4HI_type_node,V4HI_type_node, NULL_TREE);
24204 tree v2si_ftype_v2si_v2si
24205 = build_function_type_list (V2SI_type_node,
24206 V2SI_type_node, V2SI_type_node, NULL_TREE);
24207 tree di_ftype_di_di
24208 = build_function_type_list (long_long_unsigned_type_node,
24209 long_long_unsigned_type_node,
24210 long_long_unsigned_type_node,
24211 NULL_TREE);
24212
24213 /* Add all builtins that are more or less simple operations on two
24214 operands. */
24215 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24216 {
24217 /* Use one of the operands; the target can have a different mode for
24218 mask-generating compares. */
24219 enum machine_mode mode;
24220 tree type;
24221
24222 if (d->name == 0)
24223 continue;
24224
24225 mode = insn_data[d->icode].operand[1].mode;
24226
24227 switch (mode)
24228 {
24229 case V8QImode:
24230 type = v8qi_ftype_v8qi_v8qi;
24231 break;
24232 case V4HImode:
24233 type = v4hi_ftype_v4hi_v4hi;
24234 break;
24235 case V2SImode:
24236 type = v2si_ftype_v2si_v2si;
24237 break;
24238 case DImode:
24239 type = di_ftype_di_di;
24240 break;
24241
24242 default:
24243 gcc_unreachable ();
24244 }
24245
24246 def_mbuiltin (d->mask, d->name, type, d->code);
24247 }
24248
24249 /* Add the remaining MMX insns with somewhat more complicated types. */
24250 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24251 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24252 ARM_BUILTIN_ ## CODE)
24253
24254 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24255 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24256 ARM_BUILTIN_ ## CODE)
24257
24258 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24259 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24260 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24261 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24262 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24263 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24264 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24265 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24266 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24267
24268 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24269 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24270 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24271 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24272 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24273 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24274
24275 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24276 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24277 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24278 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24279 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24280 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24281
24282 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24283 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24284 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24285 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24286 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24287 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24288
24289 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24290 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24291 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24292 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24293 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24294 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24295
24296 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24297
24298 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24299 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24300 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24301 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24302 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24303 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24304 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24305 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24306 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24307 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24308
24309 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24310 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24311 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24312 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24313 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24314 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24315 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24316 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24317 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24318
24319 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24320 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24321 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24322
24323 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24324 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24325 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24326
24327 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24328 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24329
24330 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24331 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24332 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24333 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24334 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24335 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24336
24337 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24338 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24339 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24340 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24341 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24342 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24343 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24344 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24345 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24346 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24347 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24348 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24349
24350 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24351 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24352 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24353 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24354
24355 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24356 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24357 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24358 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24359 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24360 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24361 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24362
24363 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24364 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24365 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24366
24367 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24368 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24369 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24370 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24371
24372 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24373 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24374 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24375 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24376
24377 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24378 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24379 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24380 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24381
24382 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24383 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24384 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24385 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24386
24387 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24388 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24389 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24390 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24391
24392 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24393 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24394 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24395 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24396
24397 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24398
24399 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24400 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24401 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24402
24403 #undef iwmmx_mbuiltin
24404 #undef iwmmx2_mbuiltin
24405 }
24406
24407 static void
24408 arm_init_fp16_builtins (void)
24409 {
24410 tree fp16_type = make_node (REAL_TYPE);
24411 TYPE_PRECISION (fp16_type) = 16;
24412 layout_type (fp16_type);
24413 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24414 }
24415
24416 static void
24417 arm_init_crc32_builtins ()
24418 {
24419 tree si_ftype_si_qi
24420 = build_function_type_list (unsigned_intSI_type_node,
24421 unsigned_intSI_type_node,
24422 unsigned_intQI_type_node, NULL_TREE);
24423 tree si_ftype_si_hi
24424 = build_function_type_list (unsigned_intSI_type_node,
24425 unsigned_intSI_type_node,
24426 unsigned_intHI_type_node, NULL_TREE);
24427 tree si_ftype_si_si
24428 = build_function_type_list (unsigned_intSI_type_node,
24429 unsigned_intSI_type_node,
24430 unsigned_intSI_type_node, NULL_TREE);
24431
24432 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24433 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24434 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24435 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24436 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24437 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24438 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24439 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24440 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24441 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24442 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24443 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24444 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24445 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24446 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24447 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24448 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24449 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24450 }
24451
24452 static void
24453 arm_init_builtins (void)
24454 {
24455 if (TARGET_REALLY_IWMMXT)
24456 arm_init_iwmmxt_builtins ();
24457
24458 if (TARGET_NEON)
24459 arm_init_neon_builtins ();
24460
24461 if (arm_fp16_format)
24462 arm_init_fp16_builtins ();
24463
24464 if (TARGET_CRC32)
24465 arm_init_crc32_builtins ();
24466 }
24467
24468 /* Return the ARM builtin for CODE. */
24469
24470 static tree
24471 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24472 {
24473 if (code >= ARM_BUILTIN_MAX)
24474 return error_mark_node;
24475
24476 return arm_builtin_decls[code];
24477 }
24478
24479 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24480
24481 static const char *
24482 arm_invalid_parameter_type (const_tree t)
24483 {
24484 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24485 return N_("function parameters cannot have __fp16 type");
24486 return NULL;
24487 }
24488
24489 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24490
24491 static const char *
24492 arm_invalid_return_type (const_tree t)
24493 {
24494 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24495 return N_("functions cannot return __fp16 type");
24496 return NULL;
24497 }
24498
24499 /* Implement TARGET_PROMOTED_TYPE. */
24500
24501 static tree
24502 arm_promoted_type (const_tree t)
24503 {
24504 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24505 return float_type_node;
24506 return NULL_TREE;
24507 }
24508
24509 /* Implement TARGET_CONVERT_TO_TYPE.
24510 Specifically, this hook implements the peculiarity of the ARM
24511 half-precision floating-point C semantics that requires conversions between
24512 __fp16 to or from double to do an intermediate conversion to float. */
24513
24514 static tree
24515 arm_convert_to_type (tree type, tree expr)
24516 {
24517 tree fromtype = TREE_TYPE (expr);
24518 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24519 return NULL_TREE;
24520 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24521 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24522 return convert (type, convert (float_type_node, expr));
24523 return NULL_TREE;
24524 }
24525
24526 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24527 This simply adds HFmode as a supported mode; even though we don't
24528 implement arithmetic on this type directly, it's supported by
24529 optabs conversions, much the way the double-word arithmetic is
24530 special-cased in the default hook. */
24531
24532 static bool
24533 arm_scalar_mode_supported_p (enum machine_mode mode)
24534 {
24535 if (mode == HFmode)
24536 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24537 else if (ALL_FIXED_POINT_MODE_P (mode))
24538 return true;
24539 else
24540 return default_scalar_mode_supported_p (mode);
24541 }
24542
24543 /* Errors in the source file can cause expand_expr to return const0_rtx
24544 where we expect a vector. To avoid crashing, use one of the vector
24545 clear instructions. */
24546
24547 static rtx
24548 safe_vector_operand (rtx x, enum machine_mode mode)
24549 {
24550 if (x != const0_rtx)
24551 return x;
24552 x = gen_reg_rtx (mode);
24553
24554 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24555 : gen_rtx_SUBREG (DImode, x, 0)));
24556 return x;
24557 }
24558
24559 /* Function to expand ternary builtins. */
24560 static rtx
24561 arm_expand_ternop_builtin (enum insn_code icode,
24562 tree exp, rtx target)
24563 {
24564 rtx pat;
24565 tree arg0 = CALL_EXPR_ARG (exp, 0);
24566 tree arg1 = CALL_EXPR_ARG (exp, 1);
24567 tree arg2 = CALL_EXPR_ARG (exp, 2);
24568
24569 rtx op0 = expand_normal (arg0);
24570 rtx op1 = expand_normal (arg1);
24571 rtx op2 = expand_normal (arg2);
24572 rtx op3 = NULL_RTX;
24573
24574 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24575 lane operand depending on endianness. */
24576 bool builtin_sha1cpm_p = false;
24577
24578 if (insn_data[icode].n_operands == 5)
24579 {
24580 gcc_assert (icode == CODE_FOR_crypto_sha1c
24581 || icode == CODE_FOR_crypto_sha1p
24582 || icode == CODE_FOR_crypto_sha1m);
24583 builtin_sha1cpm_p = true;
24584 }
24585 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24586 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24587 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24588 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24589
24590
24591 if (VECTOR_MODE_P (mode0))
24592 op0 = safe_vector_operand (op0, mode0);
24593 if (VECTOR_MODE_P (mode1))
24594 op1 = safe_vector_operand (op1, mode1);
24595 if (VECTOR_MODE_P (mode2))
24596 op2 = safe_vector_operand (op2, mode2);
24597
24598 if (! target
24599 || GET_MODE (target) != tmode
24600 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24601 target = gen_reg_rtx (tmode);
24602
24603 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24604 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24605 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24606
24607 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24608 op0 = copy_to_mode_reg (mode0, op0);
24609 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24610 op1 = copy_to_mode_reg (mode1, op1);
24611 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24612 op2 = copy_to_mode_reg (mode2, op2);
24613 if (builtin_sha1cpm_p)
24614 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24615
24616 if (builtin_sha1cpm_p)
24617 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24618 else
24619 pat = GEN_FCN (icode) (target, op0, op1, op2);
24620 if (! pat)
24621 return 0;
24622 emit_insn (pat);
24623 return target;
24624 }
24625
24626 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24627
24628 static rtx
24629 arm_expand_binop_builtin (enum insn_code icode,
24630 tree exp, rtx target)
24631 {
24632 rtx pat;
24633 tree arg0 = CALL_EXPR_ARG (exp, 0);
24634 tree arg1 = CALL_EXPR_ARG (exp, 1);
24635 rtx op0 = expand_normal (arg0);
24636 rtx op1 = expand_normal (arg1);
24637 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24638 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24639 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24640
24641 if (VECTOR_MODE_P (mode0))
24642 op0 = safe_vector_operand (op0, mode0);
24643 if (VECTOR_MODE_P (mode1))
24644 op1 = safe_vector_operand (op1, mode1);
24645
24646 if (! target
24647 || GET_MODE (target) != tmode
24648 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24649 target = gen_reg_rtx (tmode);
24650
24651 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24652 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24653
24654 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24655 op0 = copy_to_mode_reg (mode0, op0);
24656 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24657 op1 = copy_to_mode_reg (mode1, op1);
24658
24659 pat = GEN_FCN (icode) (target, op0, op1);
24660 if (! pat)
24661 return 0;
24662 emit_insn (pat);
24663 return target;
24664 }
24665
24666 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24667
24668 static rtx
24669 arm_expand_unop_builtin (enum insn_code icode,
24670 tree exp, rtx target, int do_load)
24671 {
24672 rtx pat;
24673 tree arg0 = CALL_EXPR_ARG (exp, 0);
24674 rtx op0 = expand_normal (arg0);
24675 rtx op1 = NULL_RTX;
24676 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24677 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24678 bool builtin_sha1h_p = false;
24679
24680 if (insn_data[icode].n_operands == 3)
24681 {
24682 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24683 builtin_sha1h_p = true;
24684 }
24685
24686 if (! target
24687 || GET_MODE (target) != tmode
24688 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24689 target = gen_reg_rtx (tmode);
24690 if (do_load)
24691 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24692 else
24693 {
24694 if (VECTOR_MODE_P (mode0))
24695 op0 = safe_vector_operand (op0, mode0);
24696
24697 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24698 op0 = copy_to_mode_reg (mode0, op0);
24699 }
24700 if (builtin_sha1h_p)
24701 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24702
24703 if (builtin_sha1h_p)
24704 pat = GEN_FCN (icode) (target, op0, op1);
24705 else
24706 pat = GEN_FCN (icode) (target, op0);
24707 if (! pat)
24708 return 0;
24709 emit_insn (pat);
24710 return target;
24711 }
24712
24713 typedef enum {
24714 NEON_ARG_COPY_TO_REG,
24715 NEON_ARG_CONSTANT,
24716 NEON_ARG_MEMORY,
24717 NEON_ARG_STOP
24718 } builtin_arg;
24719
24720 #define NEON_MAX_BUILTIN_ARGS 5
24721
24722 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24723 and return an expression for the accessed memory.
24724
24725 The intrinsic function operates on a block of registers that has
24726 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24727 function references the memory at EXP of type TYPE and in mode
24728 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24729 available. */
24730
24731 static tree
24732 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24733 enum machine_mode reg_mode,
24734 neon_builtin_type_mode type_mode)
24735 {
24736 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24737 tree elem_type, upper_bound, array_type;
24738
24739 /* Work out the size of the register block in bytes. */
24740 reg_size = GET_MODE_SIZE (reg_mode);
24741
24742 /* Work out the size of each vector in bytes. */
24743 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24744 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24745
24746 /* Work out how many vectors there are. */
24747 gcc_assert (reg_size % vector_size == 0);
24748 nvectors = reg_size / vector_size;
24749
24750 /* Work out the type of each element. */
24751 gcc_assert (POINTER_TYPE_P (type));
24752 elem_type = TREE_TYPE (type);
24753
24754 /* Work out how many elements are being loaded or stored.
24755 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24756 and memory elements; anything else implies a lane load or store. */
24757 if (mem_mode == reg_mode)
24758 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24759 else
24760 nelems = nvectors;
24761
24762 /* Create a type that describes the full access. */
24763 upper_bound = build_int_cst (size_type_node, nelems - 1);
24764 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24765
24766 /* Dereference EXP using that type. */
24767 return fold_build2 (MEM_REF, array_type, exp,
24768 build_int_cst (build_pointer_type (array_type), 0));
24769 }
24770
24771 /* Expand a Neon builtin. */
24772 static rtx
24773 arm_expand_neon_args (rtx target, int icode, int have_retval,
24774 neon_builtin_type_mode type_mode,
24775 tree exp, int fcode, ...)
24776 {
24777 va_list ap;
24778 rtx pat;
24779 tree arg[NEON_MAX_BUILTIN_ARGS];
24780 rtx op[NEON_MAX_BUILTIN_ARGS];
24781 tree arg_type;
24782 tree formals;
24783 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24784 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24785 enum machine_mode other_mode;
24786 int argc = 0;
24787 int opno;
24788
24789 if (have_retval
24790 && (!target
24791 || GET_MODE (target) != tmode
24792 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24793 target = gen_reg_rtx (tmode);
24794
24795 va_start (ap, fcode);
24796
24797 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24798
24799 for (;;)
24800 {
24801 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24802
24803 if (thisarg == NEON_ARG_STOP)
24804 break;
24805 else
24806 {
24807 opno = argc + have_retval;
24808 mode[argc] = insn_data[icode].operand[opno].mode;
24809 arg[argc] = CALL_EXPR_ARG (exp, argc);
24810 arg_type = TREE_VALUE (formals);
24811 if (thisarg == NEON_ARG_MEMORY)
24812 {
24813 other_mode = insn_data[icode].operand[1 - opno].mode;
24814 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24815 mode[argc], other_mode,
24816 type_mode);
24817 }
24818
24819 op[argc] = expand_normal (arg[argc]);
24820
24821 switch (thisarg)
24822 {
24823 case NEON_ARG_COPY_TO_REG:
24824 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24825 if (!(*insn_data[icode].operand[opno].predicate)
24826 (op[argc], mode[argc]))
24827 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24828 break;
24829
24830 case NEON_ARG_CONSTANT:
24831 /* FIXME: This error message is somewhat unhelpful. */
24832 if (!(*insn_data[icode].operand[opno].predicate)
24833 (op[argc], mode[argc]))
24834 error ("argument must be a constant");
24835 break;
24836
24837 case NEON_ARG_MEMORY:
24838 gcc_assert (MEM_P (op[argc]));
24839 PUT_MODE (op[argc], mode[argc]);
24840 /* ??? arm_neon.h uses the same built-in functions for signed
24841 and unsigned accesses, casting where necessary. This isn't
24842 alias safe. */
24843 set_mem_alias_set (op[argc], 0);
24844 if (!(*insn_data[icode].operand[opno].predicate)
24845 (op[argc], mode[argc]))
24846 op[argc] = (replace_equiv_address
24847 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24848 break;
24849
24850 case NEON_ARG_STOP:
24851 gcc_unreachable ();
24852 }
24853
24854 argc++;
24855 formals = TREE_CHAIN (formals);
24856 }
24857 }
24858
24859 va_end (ap);
24860
24861 if (have_retval)
24862 switch (argc)
24863 {
24864 case 1:
24865 pat = GEN_FCN (icode) (target, op[0]);
24866 break;
24867
24868 case 2:
24869 pat = GEN_FCN (icode) (target, op[0], op[1]);
24870 break;
24871
24872 case 3:
24873 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24874 break;
24875
24876 case 4:
24877 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24878 break;
24879
24880 case 5:
24881 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24882 break;
24883
24884 default:
24885 gcc_unreachable ();
24886 }
24887 else
24888 switch (argc)
24889 {
24890 case 1:
24891 pat = GEN_FCN (icode) (op[0]);
24892 break;
24893
24894 case 2:
24895 pat = GEN_FCN (icode) (op[0], op[1]);
24896 break;
24897
24898 case 3:
24899 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24900 break;
24901
24902 case 4:
24903 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24904 break;
24905
24906 case 5:
24907 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24908 break;
24909
24910 default:
24911 gcc_unreachable ();
24912 }
24913
24914 if (!pat)
24915 return 0;
24916
24917 emit_insn (pat);
24918
24919 return target;
24920 }
24921
24922 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24923 constants defined per-instruction or per instruction-variant. Instead, the
24924 required info is looked up in the table neon_builtin_data. */
24925 static rtx
24926 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24927 {
24928 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24929 neon_itype itype = d->itype;
24930 enum insn_code icode = d->code;
24931 neon_builtin_type_mode type_mode = d->mode;
24932
24933 switch (itype)
24934 {
24935 case NEON_UNOP:
24936 case NEON_CONVERT:
24937 case NEON_DUPLANE:
24938 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24939 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24940
24941 case NEON_BINOP:
24942 case NEON_SETLANE:
24943 case NEON_SCALARMUL:
24944 case NEON_SCALARMULL:
24945 case NEON_SCALARMULH:
24946 case NEON_SHIFTINSERT:
24947 case NEON_LOGICBINOP:
24948 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24949 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24950 NEON_ARG_STOP);
24951
24952 case NEON_TERNOP:
24953 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24954 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24955 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24956
24957 case NEON_GETLANE:
24958 case NEON_FIXCONV:
24959 case NEON_SHIFTIMM:
24960 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24961 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
24962 NEON_ARG_STOP);
24963
24964 case NEON_CREATE:
24965 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24966 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24967
24968 case NEON_DUP:
24969 case NEON_RINT:
24970 case NEON_SPLIT:
24971 case NEON_FLOAT_WIDEN:
24972 case NEON_FLOAT_NARROW:
24973 case NEON_REINTERP:
24974 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24975 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24976
24977 case NEON_COMBINE:
24978 case NEON_VTBL:
24979 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24980 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24981
24982 case NEON_RESULTPAIR:
24983 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24984 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24985 NEON_ARG_STOP);
24986
24987 case NEON_LANEMUL:
24988 case NEON_LANEMULL:
24989 case NEON_LANEMULH:
24990 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24991 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24992 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24993
24994 case NEON_LANEMAC:
24995 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24996 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24997 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24998
24999 case NEON_SHIFTACC:
25000 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25001 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25002 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25003
25004 case NEON_SCALARMAC:
25005 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25006 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25007 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25008
25009 case NEON_SELECT:
25010 case NEON_VTBX:
25011 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25012 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25013 NEON_ARG_STOP);
25014
25015 case NEON_LOAD1:
25016 case NEON_LOADSTRUCT:
25017 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25018 NEON_ARG_MEMORY, NEON_ARG_STOP);
25019
25020 case NEON_LOAD1LANE:
25021 case NEON_LOADSTRUCTLANE:
25022 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25023 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25024 NEON_ARG_STOP);
25025
25026 case NEON_STORE1:
25027 case NEON_STORESTRUCT:
25028 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25029 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25030
25031 case NEON_STORE1LANE:
25032 case NEON_STORESTRUCTLANE:
25033 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25034 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25035 NEON_ARG_STOP);
25036 }
25037
25038 gcc_unreachable ();
25039 }
25040
25041 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25042 void
25043 neon_reinterpret (rtx dest, rtx src)
25044 {
25045 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25046 }
25047
25048 /* Emit code to place a Neon pair result in memory locations (with equal
25049 registers). */
25050 void
25051 neon_emit_pair_result_insn (enum machine_mode mode,
25052 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25053 rtx op1, rtx op2)
25054 {
25055 rtx mem = gen_rtx_MEM (mode, destaddr);
25056 rtx tmp1 = gen_reg_rtx (mode);
25057 rtx tmp2 = gen_reg_rtx (mode);
25058
25059 emit_insn (intfn (tmp1, op1, op2, tmp2));
25060
25061 emit_move_insn (mem, tmp1);
25062 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25063 emit_move_insn (mem, tmp2);
25064 }
25065
25066 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25067 not to early-clobber SRC registers in the process.
25068
25069 We assume that the operands described by SRC and DEST represent a
25070 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25071 number of components into which the copy has been decomposed. */
25072 void
25073 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25074 {
25075 unsigned int i;
25076
25077 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25078 || REGNO (operands[0]) < REGNO (operands[1]))
25079 {
25080 for (i = 0; i < count; i++)
25081 {
25082 operands[2 * i] = dest[i];
25083 operands[2 * i + 1] = src[i];
25084 }
25085 }
25086 else
25087 {
25088 for (i = 0; i < count; i++)
25089 {
25090 operands[2 * i] = dest[count - i - 1];
25091 operands[2 * i + 1] = src[count - i - 1];
25092 }
25093 }
25094 }
25095
25096 /* Split operands into moves from op[1] + op[2] into op[0]. */
25097
25098 void
25099 neon_split_vcombine (rtx operands[3])
25100 {
25101 unsigned int dest = REGNO (operands[0]);
25102 unsigned int src1 = REGNO (operands[1]);
25103 unsigned int src2 = REGNO (operands[2]);
25104 enum machine_mode halfmode = GET_MODE (operands[1]);
25105 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25106 rtx destlo, desthi;
25107
25108 if (src1 == dest && src2 == dest + halfregs)
25109 {
25110 /* No-op move. Can't split to nothing; emit something. */
25111 emit_note (NOTE_INSN_DELETED);
25112 return;
25113 }
25114
25115 /* Preserve register attributes for variable tracking. */
25116 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25117 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25118 GET_MODE_SIZE (halfmode));
25119
25120 /* Special case of reversed high/low parts. Use VSWP. */
25121 if (src2 == dest && src1 == dest + halfregs)
25122 {
25123 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25124 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25125 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25126 return;
25127 }
25128
25129 if (!reg_overlap_mentioned_p (operands[2], destlo))
25130 {
25131 /* Try to avoid unnecessary moves if part of the result
25132 is in the right place already. */
25133 if (src1 != dest)
25134 emit_move_insn (destlo, operands[1]);
25135 if (src2 != dest + halfregs)
25136 emit_move_insn (desthi, operands[2]);
25137 }
25138 else
25139 {
25140 if (src2 != dest + halfregs)
25141 emit_move_insn (desthi, operands[2]);
25142 if (src1 != dest)
25143 emit_move_insn (destlo, operands[1]);
25144 }
25145 }
25146
25147 /* Expand an expression EXP that calls a built-in function,
25148 with result going to TARGET if that's convenient
25149 (and in mode MODE if that's convenient).
25150 SUBTARGET may be used as the target for computing one of EXP's operands.
25151 IGNORE is nonzero if the value is to be ignored. */
25152
25153 static rtx
25154 arm_expand_builtin (tree exp,
25155 rtx target,
25156 rtx subtarget ATTRIBUTE_UNUSED,
25157 enum machine_mode mode ATTRIBUTE_UNUSED,
25158 int ignore ATTRIBUTE_UNUSED)
25159 {
25160 const struct builtin_description * d;
25161 enum insn_code icode;
25162 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25163 tree arg0;
25164 tree arg1;
25165 tree arg2;
25166 rtx op0;
25167 rtx op1;
25168 rtx op2;
25169 rtx pat;
25170 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25171 size_t i;
25172 enum machine_mode tmode;
25173 enum machine_mode mode0;
25174 enum machine_mode mode1;
25175 enum machine_mode mode2;
25176 int opint;
25177 int selector;
25178 int mask;
25179 int imm;
25180
25181 if (fcode >= ARM_BUILTIN_NEON_BASE)
25182 return arm_expand_neon_builtin (fcode, exp, target);
25183
25184 switch (fcode)
25185 {
25186 case ARM_BUILTIN_TEXTRMSB:
25187 case ARM_BUILTIN_TEXTRMUB:
25188 case ARM_BUILTIN_TEXTRMSH:
25189 case ARM_BUILTIN_TEXTRMUH:
25190 case ARM_BUILTIN_TEXTRMSW:
25191 case ARM_BUILTIN_TEXTRMUW:
25192 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25193 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25194 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25195 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25196 : CODE_FOR_iwmmxt_textrmw);
25197
25198 arg0 = CALL_EXPR_ARG (exp, 0);
25199 arg1 = CALL_EXPR_ARG (exp, 1);
25200 op0 = expand_normal (arg0);
25201 op1 = expand_normal (arg1);
25202 tmode = insn_data[icode].operand[0].mode;
25203 mode0 = insn_data[icode].operand[1].mode;
25204 mode1 = insn_data[icode].operand[2].mode;
25205
25206 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25207 op0 = copy_to_mode_reg (mode0, op0);
25208 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25209 {
25210 /* @@@ better error message */
25211 error ("selector must be an immediate");
25212 return gen_reg_rtx (tmode);
25213 }
25214
25215 opint = INTVAL (op1);
25216 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25217 {
25218 if (opint > 7 || opint < 0)
25219 error ("the range of selector should be in 0 to 7");
25220 }
25221 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25222 {
25223 if (opint > 3 || opint < 0)
25224 error ("the range of selector should be in 0 to 3");
25225 }
25226 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25227 {
25228 if (opint > 1 || opint < 0)
25229 error ("the range of selector should be in 0 to 1");
25230 }
25231
25232 if (target == 0
25233 || GET_MODE (target) != tmode
25234 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25235 target = gen_reg_rtx (tmode);
25236 pat = GEN_FCN (icode) (target, op0, op1);
25237 if (! pat)
25238 return 0;
25239 emit_insn (pat);
25240 return target;
25241
25242 case ARM_BUILTIN_WALIGNI:
25243 /* If op2 is immediate, call walighi, else call walighr. */
25244 arg0 = CALL_EXPR_ARG (exp, 0);
25245 arg1 = CALL_EXPR_ARG (exp, 1);
25246 arg2 = CALL_EXPR_ARG (exp, 2);
25247 op0 = expand_normal (arg0);
25248 op1 = expand_normal (arg1);
25249 op2 = expand_normal (arg2);
25250 if (CONST_INT_P (op2))
25251 {
25252 icode = CODE_FOR_iwmmxt_waligni;
25253 tmode = insn_data[icode].operand[0].mode;
25254 mode0 = insn_data[icode].operand[1].mode;
25255 mode1 = insn_data[icode].operand[2].mode;
25256 mode2 = insn_data[icode].operand[3].mode;
25257 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25258 op0 = copy_to_mode_reg (mode0, op0);
25259 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25260 op1 = copy_to_mode_reg (mode1, op1);
25261 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25262 selector = INTVAL (op2);
25263 if (selector > 7 || selector < 0)
25264 error ("the range of selector should be in 0 to 7");
25265 }
25266 else
25267 {
25268 icode = CODE_FOR_iwmmxt_walignr;
25269 tmode = insn_data[icode].operand[0].mode;
25270 mode0 = insn_data[icode].operand[1].mode;
25271 mode1 = insn_data[icode].operand[2].mode;
25272 mode2 = insn_data[icode].operand[3].mode;
25273 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25274 op0 = copy_to_mode_reg (mode0, op0);
25275 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25276 op1 = copy_to_mode_reg (mode1, op1);
25277 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25278 op2 = copy_to_mode_reg (mode2, op2);
25279 }
25280 if (target == 0
25281 || GET_MODE (target) != tmode
25282 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25283 target = gen_reg_rtx (tmode);
25284 pat = GEN_FCN (icode) (target, op0, op1, op2);
25285 if (!pat)
25286 return 0;
25287 emit_insn (pat);
25288 return target;
25289
25290 case ARM_BUILTIN_TINSRB:
25291 case ARM_BUILTIN_TINSRH:
25292 case ARM_BUILTIN_TINSRW:
25293 case ARM_BUILTIN_WMERGE:
25294 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25295 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25296 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25297 : CODE_FOR_iwmmxt_tinsrw);
25298 arg0 = CALL_EXPR_ARG (exp, 0);
25299 arg1 = CALL_EXPR_ARG (exp, 1);
25300 arg2 = CALL_EXPR_ARG (exp, 2);
25301 op0 = expand_normal (arg0);
25302 op1 = expand_normal (arg1);
25303 op2 = expand_normal (arg2);
25304 tmode = insn_data[icode].operand[0].mode;
25305 mode0 = insn_data[icode].operand[1].mode;
25306 mode1 = insn_data[icode].operand[2].mode;
25307 mode2 = insn_data[icode].operand[3].mode;
25308
25309 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25310 op0 = copy_to_mode_reg (mode0, op0);
25311 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25312 op1 = copy_to_mode_reg (mode1, op1);
25313 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25314 {
25315 error ("selector must be an immediate");
25316 return const0_rtx;
25317 }
25318 if (icode == CODE_FOR_iwmmxt_wmerge)
25319 {
25320 selector = INTVAL (op2);
25321 if (selector > 7 || selector < 0)
25322 error ("the range of selector should be in 0 to 7");
25323 }
25324 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25325 || (icode == CODE_FOR_iwmmxt_tinsrh)
25326 || (icode == CODE_FOR_iwmmxt_tinsrw))
25327 {
25328 mask = 0x01;
25329 selector= INTVAL (op2);
25330 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25331 error ("the range of selector should be in 0 to 7");
25332 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25333 error ("the range of selector should be in 0 to 3");
25334 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25335 error ("the range of selector should be in 0 to 1");
25336 mask <<= selector;
25337 op2 = GEN_INT (mask);
25338 }
25339 if (target == 0
25340 || GET_MODE (target) != tmode
25341 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25342 target = gen_reg_rtx (tmode);
25343 pat = GEN_FCN (icode) (target, op0, op1, op2);
25344 if (! pat)
25345 return 0;
25346 emit_insn (pat);
25347 return target;
25348
25349 case ARM_BUILTIN_SETWCGR0:
25350 case ARM_BUILTIN_SETWCGR1:
25351 case ARM_BUILTIN_SETWCGR2:
25352 case ARM_BUILTIN_SETWCGR3:
25353 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25354 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25355 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25356 : CODE_FOR_iwmmxt_setwcgr3);
25357 arg0 = CALL_EXPR_ARG (exp, 0);
25358 op0 = expand_normal (arg0);
25359 mode0 = insn_data[icode].operand[0].mode;
25360 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25361 op0 = copy_to_mode_reg (mode0, op0);
25362 pat = GEN_FCN (icode) (op0);
25363 if (!pat)
25364 return 0;
25365 emit_insn (pat);
25366 return 0;
25367
25368 case ARM_BUILTIN_GETWCGR0:
25369 case ARM_BUILTIN_GETWCGR1:
25370 case ARM_BUILTIN_GETWCGR2:
25371 case ARM_BUILTIN_GETWCGR3:
25372 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25373 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25374 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25375 : CODE_FOR_iwmmxt_getwcgr3);
25376 tmode = insn_data[icode].operand[0].mode;
25377 if (target == 0
25378 || GET_MODE (target) != tmode
25379 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25380 target = gen_reg_rtx (tmode);
25381 pat = GEN_FCN (icode) (target);
25382 if (!pat)
25383 return 0;
25384 emit_insn (pat);
25385 return target;
25386
25387 case ARM_BUILTIN_WSHUFH:
25388 icode = CODE_FOR_iwmmxt_wshufh;
25389 arg0 = CALL_EXPR_ARG (exp, 0);
25390 arg1 = CALL_EXPR_ARG (exp, 1);
25391 op0 = expand_normal (arg0);
25392 op1 = expand_normal (arg1);
25393 tmode = insn_data[icode].operand[0].mode;
25394 mode1 = insn_data[icode].operand[1].mode;
25395 mode2 = insn_data[icode].operand[2].mode;
25396
25397 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25398 op0 = copy_to_mode_reg (mode1, op0);
25399 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25400 {
25401 error ("mask must be an immediate");
25402 return const0_rtx;
25403 }
25404 selector = INTVAL (op1);
25405 if (selector < 0 || selector > 255)
25406 error ("the range of mask should be in 0 to 255");
25407 if (target == 0
25408 || GET_MODE (target) != tmode
25409 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25410 target = gen_reg_rtx (tmode);
25411 pat = GEN_FCN (icode) (target, op0, op1);
25412 if (! pat)
25413 return 0;
25414 emit_insn (pat);
25415 return target;
25416
25417 case ARM_BUILTIN_WMADDS:
25418 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25419 case ARM_BUILTIN_WMADDSX:
25420 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25421 case ARM_BUILTIN_WMADDSN:
25422 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25423 case ARM_BUILTIN_WMADDU:
25424 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25425 case ARM_BUILTIN_WMADDUX:
25426 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25427 case ARM_BUILTIN_WMADDUN:
25428 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25429 case ARM_BUILTIN_WSADBZ:
25430 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25431 case ARM_BUILTIN_WSADHZ:
25432 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25433
25434 /* Several three-argument builtins. */
25435 case ARM_BUILTIN_WMACS:
25436 case ARM_BUILTIN_WMACU:
25437 case ARM_BUILTIN_TMIA:
25438 case ARM_BUILTIN_TMIAPH:
25439 case ARM_BUILTIN_TMIATT:
25440 case ARM_BUILTIN_TMIATB:
25441 case ARM_BUILTIN_TMIABT:
25442 case ARM_BUILTIN_TMIABB:
25443 case ARM_BUILTIN_WQMIABB:
25444 case ARM_BUILTIN_WQMIABT:
25445 case ARM_BUILTIN_WQMIATB:
25446 case ARM_BUILTIN_WQMIATT:
25447 case ARM_BUILTIN_WQMIABBN:
25448 case ARM_BUILTIN_WQMIABTN:
25449 case ARM_BUILTIN_WQMIATBN:
25450 case ARM_BUILTIN_WQMIATTN:
25451 case ARM_BUILTIN_WMIABB:
25452 case ARM_BUILTIN_WMIABT:
25453 case ARM_BUILTIN_WMIATB:
25454 case ARM_BUILTIN_WMIATT:
25455 case ARM_BUILTIN_WMIABBN:
25456 case ARM_BUILTIN_WMIABTN:
25457 case ARM_BUILTIN_WMIATBN:
25458 case ARM_BUILTIN_WMIATTN:
25459 case ARM_BUILTIN_WMIAWBB:
25460 case ARM_BUILTIN_WMIAWBT:
25461 case ARM_BUILTIN_WMIAWTB:
25462 case ARM_BUILTIN_WMIAWTT:
25463 case ARM_BUILTIN_WMIAWBBN:
25464 case ARM_BUILTIN_WMIAWBTN:
25465 case ARM_BUILTIN_WMIAWTBN:
25466 case ARM_BUILTIN_WMIAWTTN:
25467 case ARM_BUILTIN_WSADB:
25468 case ARM_BUILTIN_WSADH:
25469 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25470 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25471 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25472 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25473 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25474 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25475 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25476 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25477 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25478 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25479 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25480 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25481 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25482 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25483 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25484 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25485 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25486 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25487 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25488 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25489 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25490 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25491 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25492 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25493 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25494 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25495 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25496 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25497 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25498 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25499 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25500 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25501 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25502 : CODE_FOR_iwmmxt_wsadh);
25503 arg0 = CALL_EXPR_ARG (exp, 0);
25504 arg1 = CALL_EXPR_ARG (exp, 1);
25505 arg2 = CALL_EXPR_ARG (exp, 2);
25506 op0 = expand_normal (arg0);
25507 op1 = expand_normal (arg1);
25508 op2 = expand_normal (arg2);
25509 tmode = insn_data[icode].operand[0].mode;
25510 mode0 = insn_data[icode].operand[1].mode;
25511 mode1 = insn_data[icode].operand[2].mode;
25512 mode2 = insn_data[icode].operand[3].mode;
25513
25514 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25515 op0 = copy_to_mode_reg (mode0, op0);
25516 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25517 op1 = copy_to_mode_reg (mode1, op1);
25518 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25519 op2 = copy_to_mode_reg (mode2, op2);
25520 if (target == 0
25521 || GET_MODE (target) != tmode
25522 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25523 target = gen_reg_rtx (tmode);
25524 pat = GEN_FCN (icode) (target, op0, op1, op2);
25525 if (! pat)
25526 return 0;
25527 emit_insn (pat);
25528 return target;
25529
25530 case ARM_BUILTIN_WZERO:
25531 target = gen_reg_rtx (DImode);
25532 emit_insn (gen_iwmmxt_clrdi (target));
25533 return target;
25534
25535 case ARM_BUILTIN_WSRLHI:
25536 case ARM_BUILTIN_WSRLWI:
25537 case ARM_BUILTIN_WSRLDI:
25538 case ARM_BUILTIN_WSLLHI:
25539 case ARM_BUILTIN_WSLLWI:
25540 case ARM_BUILTIN_WSLLDI:
25541 case ARM_BUILTIN_WSRAHI:
25542 case ARM_BUILTIN_WSRAWI:
25543 case ARM_BUILTIN_WSRADI:
25544 case ARM_BUILTIN_WRORHI:
25545 case ARM_BUILTIN_WRORWI:
25546 case ARM_BUILTIN_WRORDI:
25547 case ARM_BUILTIN_WSRLH:
25548 case ARM_BUILTIN_WSRLW:
25549 case ARM_BUILTIN_WSRLD:
25550 case ARM_BUILTIN_WSLLH:
25551 case ARM_BUILTIN_WSLLW:
25552 case ARM_BUILTIN_WSLLD:
25553 case ARM_BUILTIN_WSRAH:
25554 case ARM_BUILTIN_WSRAW:
25555 case ARM_BUILTIN_WSRAD:
25556 case ARM_BUILTIN_WRORH:
25557 case ARM_BUILTIN_WRORW:
25558 case ARM_BUILTIN_WRORD:
25559 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25560 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25561 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25562 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25563 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25564 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25565 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25566 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25567 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25568 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25569 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25570 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25571 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25572 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25573 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25574 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25575 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25576 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25577 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25578 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25579 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25580 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25581 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25582 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25583 : CODE_FOR_nothing);
25584 arg1 = CALL_EXPR_ARG (exp, 1);
25585 op1 = expand_normal (arg1);
25586 if (GET_MODE (op1) == VOIDmode)
25587 {
25588 imm = INTVAL (op1);
25589 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25590 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25591 && (imm < 0 || imm > 32))
25592 {
25593 if (fcode == ARM_BUILTIN_WRORHI)
25594 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25595 else if (fcode == ARM_BUILTIN_WRORWI)
25596 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25597 else if (fcode == ARM_BUILTIN_WRORH)
25598 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25599 else
25600 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25601 }
25602 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25603 && (imm < 0 || imm > 64))
25604 {
25605 if (fcode == ARM_BUILTIN_WRORDI)
25606 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25607 else
25608 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25609 }
25610 else if (imm < 0)
25611 {
25612 if (fcode == ARM_BUILTIN_WSRLHI)
25613 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25614 else if (fcode == ARM_BUILTIN_WSRLWI)
25615 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25616 else if (fcode == ARM_BUILTIN_WSRLDI)
25617 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25618 else if (fcode == ARM_BUILTIN_WSLLHI)
25619 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25620 else if (fcode == ARM_BUILTIN_WSLLWI)
25621 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25622 else if (fcode == ARM_BUILTIN_WSLLDI)
25623 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25624 else if (fcode == ARM_BUILTIN_WSRAHI)
25625 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25626 else if (fcode == ARM_BUILTIN_WSRAWI)
25627 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25628 else if (fcode == ARM_BUILTIN_WSRADI)
25629 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25630 else if (fcode == ARM_BUILTIN_WSRLH)
25631 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25632 else if (fcode == ARM_BUILTIN_WSRLW)
25633 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25634 else if (fcode == ARM_BUILTIN_WSRLD)
25635 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25636 else if (fcode == ARM_BUILTIN_WSLLH)
25637 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25638 else if (fcode == ARM_BUILTIN_WSLLW)
25639 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25640 else if (fcode == ARM_BUILTIN_WSLLD)
25641 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25642 else if (fcode == ARM_BUILTIN_WSRAH)
25643 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25644 else if (fcode == ARM_BUILTIN_WSRAW)
25645 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25646 else
25647 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25648 }
25649 }
25650 return arm_expand_binop_builtin (icode, exp, target);
25651
25652 default:
25653 break;
25654 }
25655
25656 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25657 if (d->code == (const enum arm_builtins) fcode)
25658 return arm_expand_binop_builtin (d->icode, exp, target);
25659
25660 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25661 if (d->code == (const enum arm_builtins) fcode)
25662 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25663
25664 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25665 if (d->code == (const enum arm_builtins) fcode)
25666 return arm_expand_ternop_builtin (d->icode, exp, target);
25667
25668 /* @@@ Should really do something sensible here. */
25669 return NULL_RTX;
25670 }
25671 \f
25672 /* Return the number (counting from 0) of
25673 the least significant set bit in MASK. */
25674
25675 inline static int
25676 number_of_first_bit_set (unsigned mask)
25677 {
25678 return ctz_hwi (mask);
25679 }
25680
25681 /* Like emit_multi_reg_push, but allowing for a different set of
25682 registers to be described as saved. MASK is the set of registers
25683 to be saved; REAL_REGS is the set of registers to be described as
25684 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25685
25686 static rtx
25687 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25688 {
25689 unsigned long regno;
25690 rtx par[10], tmp, reg, insn;
25691 int i, j;
25692
25693 /* Build the parallel of the registers actually being stored. */
25694 for (i = 0; mask; ++i, mask &= mask - 1)
25695 {
25696 regno = ctz_hwi (mask);
25697 reg = gen_rtx_REG (SImode, regno);
25698
25699 if (i == 0)
25700 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25701 else
25702 tmp = gen_rtx_USE (VOIDmode, reg);
25703
25704 par[i] = tmp;
25705 }
25706
25707 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25708 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25709 tmp = gen_frame_mem (BLKmode, tmp);
25710 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25711 par[0] = tmp;
25712
25713 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25714 insn = emit_insn (tmp);
25715
25716 /* Always build the stack adjustment note for unwind info. */
25717 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25718 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25719 par[0] = tmp;
25720
25721 /* Build the parallel of the registers recorded as saved for unwind. */
25722 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25723 {
25724 regno = ctz_hwi (real_regs);
25725 reg = gen_rtx_REG (SImode, regno);
25726
25727 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25728 tmp = gen_frame_mem (SImode, tmp);
25729 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25730 RTX_FRAME_RELATED_P (tmp) = 1;
25731 par[j + 1] = tmp;
25732 }
25733
25734 if (j == 0)
25735 tmp = par[0];
25736 else
25737 {
25738 RTX_FRAME_RELATED_P (par[0]) = 1;
25739 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25740 }
25741
25742 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25743
25744 return insn;
25745 }
25746
25747 /* Emit code to push or pop registers to or from the stack. F is the
25748 assembly file. MASK is the registers to pop. */
25749 static void
25750 thumb_pop (FILE *f, unsigned long mask)
25751 {
25752 int regno;
25753 int lo_mask = mask & 0xFF;
25754 int pushed_words = 0;
25755
25756 gcc_assert (mask);
25757
25758 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25759 {
25760 /* Special case. Do not generate a POP PC statement here, do it in
25761 thumb_exit() */
25762 thumb_exit (f, -1);
25763 return;
25764 }
25765
25766 fprintf (f, "\tpop\t{");
25767
25768 /* Look at the low registers first. */
25769 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25770 {
25771 if (lo_mask & 1)
25772 {
25773 asm_fprintf (f, "%r", regno);
25774
25775 if ((lo_mask & ~1) != 0)
25776 fprintf (f, ", ");
25777
25778 pushed_words++;
25779 }
25780 }
25781
25782 if (mask & (1 << PC_REGNUM))
25783 {
25784 /* Catch popping the PC. */
25785 if (TARGET_INTERWORK || TARGET_BACKTRACE
25786 || crtl->calls_eh_return)
25787 {
25788 /* The PC is never poped directly, instead
25789 it is popped into r3 and then BX is used. */
25790 fprintf (f, "}\n");
25791
25792 thumb_exit (f, -1);
25793
25794 return;
25795 }
25796 else
25797 {
25798 if (mask & 0xFF)
25799 fprintf (f, ", ");
25800
25801 asm_fprintf (f, "%r", PC_REGNUM);
25802 }
25803 }
25804
25805 fprintf (f, "}\n");
25806 }
25807
25808 /* Generate code to return from a thumb function.
25809 If 'reg_containing_return_addr' is -1, then the return address is
25810 actually on the stack, at the stack pointer. */
25811 static void
25812 thumb_exit (FILE *f, int reg_containing_return_addr)
25813 {
25814 unsigned regs_available_for_popping;
25815 unsigned regs_to_pop;
25816 int pops_needed;
25817 unsigned available;
25818 unsigned required;
25819 int mode;
25820 int size;
25821 int restore_a4 = FALSE;
25822
25823 /* Compute the registers we need to pop. */
25824 regs_to_pop = 0;
25825 pops_needed = 0;
25826
25827 if (reg_containing_return_addr == -1)
25828 {
25829 regs_to_pop |= 1 << LR_REGNUM;
25830 ++pops_needed;
25831 }
25832
25833 if (TARGET_BACKTRACE)
25834 {
25835 /* Restore the (ARM) frame pointer and stack pointer. */
25836 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25837 pops_needed += 2;
25838 }
25839
25840 /* If there is nothing to pop then just emit the BX instruction and
25841 return. */
25842 if (pops_needed == 0)
25843 {
25844 if (crtl->calls_eh_return)
25845 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25846
25847 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25848 return;
25849 }
25850 /* Otherwise if we are not supporting interworking and we have not created
25851 a backtrace structure and the function was not entered in ARM mode then
25852 just pop the return address straight into the PC. */
25853 else if (!TARGET_INTERWORK
25854 && !TARGET_BACKTRACE
25855 && !is_called_in_ARM_mode (current_function_decl)
25856 && !crtl->calls_eh_return)
25857 {
25858 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25859 return;
25860 }
25861
25862 /* Find out how many of the (return) argument registers we can corrupt. */
25863 regs_available_for_popping = 0;
25864
25865 /* If returning via __builtin_eh_return, the bottom three registers
25866 all contain information needed for the return. */
25867 if (crtl->calls_eh_return)
25868 size = 12;
25869 else
25870 {
25871 /* If we can deduce the registers used from the function's
25872 return value. This is more reliable that examining
25873 df_regs_ever_live_p () because that will be set if the register is
25874 ever used in the function, not just if the register is used
25875 to hold a return value. */
25876
25877 if (crtl->return_rtx != 0)
25878 mode = GET_MODE (crtl->return_rtx);
25879 else
25880 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25881
25882 size = GET_MODE_SIZE (mode);
25883
25884 if (size == 0)
25885 {
25886 /* In a void function we can use any argument register.
25887 In a function that returns a structure on the stack
25888 we can use the second and third argument registers. */
25889 if (mode == VOIDmode)
25890 regs_available_for_popping =
25891 (1 << ARG_REGISTER (1))
25892 | (1 << ARG_REGISTER (2))
25893 | (1 << ARG_REGISTER (3));
25894 else
25895 regs_available_for_popping =
25896 (1 << ARG_REGISTER (2))
25897 | (1 << ARG_REGISTER (3));
25898 }
25899 else if (size <= 4)
25900 regs_available_for_popping =
25901 (1 << ARG_REGISTER (2))
25902 | (1 << ARG_REGISTER (3));
25903 else if (size <= 8)
25904 regs_available_for_popping =
25905 (1 << ARG_REGISTER (3));
25906 }
25907
25908 /* Match registers to be popped with registers into which we pop them. */
25909 for (available = regs_available_for_popping,
25910 required = regs_to_pop;
25911 required != 0 && available != 0;
25912 available &= ~(available & - available),
25913 required &= ~(required & - required))
25914 -- pops_needed;
25915
25916 /* If we have any popping registers left over, remove them. */
25917 if (available > 0)
25918 regs_available_for_popping &= ~available;
25919
25920 /* Otherwise if we need another popping register we can use
25921 the fourth argument register. */
25922 else if (pops_needed)
25923 {
25924 /* If we have not found any free argument registers and
25925 reg a4 contains the return address, we must move it. */
25926 if (regs_available_for_popping == 0
25927 && reg_containing_return_addr == LAST_ARG_REGNUM)
25928 {
25929 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25930 reg_containing_return_addr = LR_REGNUM;
25931 }
25932 else if (size > 12)
25933 {
25934 /* Register a4 is being used to hold part of the return value,
25935 but we have dire need of a free, low register. */
25936 restore_a4 = TRUE;
25937
25938 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25939 }
25940
25941 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25942 {
25943 /* The fourth argument register is available. */
25944 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25945
25946 --pops_needed;
25947 }
25948 }
25949
25950 /* Pop as many registers as we can. */
25951 thumb_pop (f, regs_available_for_popping);
25952
25953 /* Process the registers we popped. */
25954 if (reg_containing_return_addr == -1)
25955 {
25956 /* The return address was popped into the lowest numbered register. */
25957 regs_to_pop &= ~(1 << LR_REGNUM);
25958
25959 reg_containing_return_addr =
25960 number_of_first_bit_set (regs_available_for_popping);
25961
25962 /* Remove this register for the mask of available registers, so that
25963 the return address will not be corrupted by further pops. */
25964 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25965 }
25966
25967 /* If we popped other registers then handle them here. */
25968 if (regs_available_for_popping)
25969 {
25970 int frame_pointer;
25971
25972 /* Work out which register currently contains the frame pointer. */
25973 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25974
25975 /* Move it into the correct place. */
25976 asm_fprintf (f, "\tmov\t%r, %r\n",
25977 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25978
25979 /* (Temporarily) remove it from the mask of popped registers. */
25980 regs_available_for_popping &= ~(1 << frame_pointer);
25981 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25982
25983 if (regs_available_for_popping)
25984 {
25985 int stack_pointer;
25986
25987 /* We popped the stack pointer as well,
25988 find the register that contains it. */
25989 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25990
25991 /* Move it into the stack register. */
25992 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25993
25994 /* At this point we have popped all necessary registers, so
25995 do not worry about restoring regs_available_for_popping
25996 to its correct value:
25997
25998 assert (pops_needed == 0)
25999 assert (regs_available_for_popping == (1 << frame_pointer))
26000 assert (regs_to_pop == (1 << STACK_POINTER)) */
26001 }
26002 else
26003 {
26004 /* Since we have just move the popped value into the frame
26005 pointer, the popping register is available for reuse, and
26006 we know that we still have the stack pointer left to pop. */
26007 regs_available_for_popping |= (1 << frame_pointer);
26008 }
26009 }
26010
26011 /* If we still have registers left on the stack, but we no longer have
26012 any registers into which we can pop them, then we must move the return
26013 address into the link register and make available the register that
26014 contained it. */
26015 if (regs_available_for_popping == 0 && pops_needed > 0)
26016 {
26017 regs_available_for_popping |= 1 << reg_containing_return_addr;
26018
26019 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26020 reg_containing_return_addr);
26021
26022 reg_containing_return_addr = LR_REGNUM;
26023 }
26024
26025 /* If we have registers left on the stack then pop some more.
26026 We know that at most we will want to pop FP and SP. */
26027 if (pops_needed > 0)
26028 {
26029 int popped_into;
26030 int move_to;
26031
26032 thumb_pop (f, regs_available_for_popping);
26033
26034 /* We have popped either FP or SP.
26035 Move whichever one it is into the correct register. */
26036 popped_into = number_of_first_bit_set (regs_available_for_popping);
26037 move_to = number_of_first_bit_set (regs_to_pop);
26038
26039 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26040
26041 regs_to_pop &= ~(1 << move_to);
26042
26043 --pops_needed;
26044 }
26045
26046 /* If we still have not popped everything then we must have only
26047 had one register available to us and we are now popping the SP. */
26048 if (pops_needed > 0)
26049 {
26050 int popped_into;
26051
26052 thumb_pop (f, regs_available_for_popping);
26053
26054 popped_into = number_of_first_bit_set (regs_available_for_popping);
26055
26056 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26057 /*
26058 assert (regs_to_pop == (1 << STACK_POINTER))
26059 assert (pops_needed == 1)
26060 */
26061 }
26062
26063 /* If necessary restore the a4 register. */
26064 if (restore_a4)
26065 {
26066 if (reg_containing_return_addr != LR_REGNUM)
26067 {
26068 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26069 reg_containing_return_addr = LR_REGNUM;
26070 }
26071
26072 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26073 }
26074
26075 if (crtl->calls_eh_return)
26076 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26077
26078 /* Return to caller. */
26079 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26080 }
26081 \f
26082 /* Scan INSN just before assembler is output for it.
26083 For Thumb-1, we track the status of the condition codes; this
26084 information is used in the cbranchsi4_insn pattern. */
26085 void
26086 thumb1_final_prescan_insn (rtx insn)
26087 {
26088 if (flag_print_asm_name)
26089 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26090 INSN_ADDRESSES (INSN_UID (insn)));
26091 /* Don't overwrite the previous setter when we get to a cbranch. */
26092 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26093 {
26094 enum attr_conds conds;
26095
26096 if (cfun->machine->thumb1_cc_insn)
26097 {
26098 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26099 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26100 CC_STATUS_INIT;
26101 }
26102 conds = get_attr_conds (insn);
26103 if (conds == CONDS_SET)
26104 {
26105 rtx set = single_set (insn);
26106 cfun->machine->thumb1_cc_insn = insn;
26107 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26108 cfun->machine->thumb1_cc_op1 = const0_rtx;
26109 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26110 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26111 {
26112 rtx src1 = XEXP (SET_SRC (set), 1);
26113 if (src1 == const0_rtx)
26114 cfun->machine->thumb1_cc_mode = CCmode;
26115 }
26116 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26117 {
26118 /* Record the src register operand instead of dest because
26119 cprop_hardreg pass propagates src. */
26120 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26121 }
26122 }
26123 else if (conds != CONDS_NOCOND)
26124 cfun->machine->thumb1_cc_insn = NULL_RTX;
26125 }
26126
26127 /* Check if unexpected far jump is used. */
26128 if (cfun->machine->lr_save_eliminated
26129 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26130 internal_error("Unexpected thumb1 far jump");
26131 }
26132
26133 int
26134 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26135 {
26136 unsigned HOST_WIDE_INT mask = 0xff;
26137 int i;
26138
26139 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26140 if (val == 0) /* XXX */
26141 return 0;
26142
26143 for (i = 0; i < 25; i++)
26144 if ((val & (mask << i)) == val)
26145 return 1;
26146
26147 return 0;
26148 }
26149
26150 /* Returns nonzero if the current function contains,
26151 or might contain a far jump. */
26152 static int
26153 thumb_far_jump_used_p (void)
26154 {
26155 rtx insn;
26156 bool far_jump = false;
26157 unsigned int func_size = 0;
26158
26159 /* This test is only important for leaf functions. */
26160 /* assert (!leaf_function_p ()); */
26161
26162 /* If we have already decided that far jumps may be used,
26163 do not bother checking again, and always return true even if
26164 it turns out that they are not being used. Once we have made
26165 the decision that far jumps are present (and that hence the link
26166 register will be pushed onto the stack) we cannot go back on it. */
26167 if (cfun->machine->far_jump_used)
26168 return 1;
26169
26170 /* If this function is not being called from the prologue/epilogue
26171 generation code then it must be being called from the
26172 INITIAL_ELIMINATION_OFFSET macro. */
26173 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26174 {
26175 /* In this case we know that we are being asked about the elimination
26176 of the arg pointer register. If that register is not being used,
26177 then there are no arguments on the stack, and we do not have to
26178 worry that a far jump might force the prologue to push the link
26179 register, changing the stack offsets. In this case we can just
26180 return false, since the presence of far jumps in the function will
26181 not affect stack offsets.
26182
26183 If the arg pointer is live (or if it was live, but has now been
26184 eliminated and so set to dead) then we do have to test to see if
26185 the function might contain a far jump. This test can lead to some
26186 false negatives, since before reload is completed, then length of
26187 branch instructions is not known, so gcc defaults to returning their
26188 longest length, which in turn sets the far jump attribute to true.
26189
26190 A false negative will not result in bad code being generated, but it
26191 will result in a needless push and pop of the link register. We
26192 hope that this does not occur too often.
26193
26194 If we need doubleword stack alignment this could affect the other
26195 elimination offsets so we can't risk getting it wrong. */
26196 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26197 cfun->machine->arg_pointer_live = 1;
26198 else if (!cfun->machine->arg_pointer_live)
26199 return 0;
26200 }
26201
26202 /* Check to see if the function contains a branch
26203 insn with the far jump attribute set. */
26204 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26205 {
26206 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26207 {
26208 far_jump = true;
26209 }
26210 func_size += get_attr_length (insn);
26211 }
26212
26213 /* Attribute far_jump will always be true for thumb1 before
26214 shorten_branch pass. So checking far_jump attribute before
26215 shorten_branch isn't much useful.
26216
26217 Following heuristic tries to estimate more accurately if a far jump
26218 may finally be used. The heuristic is very conservative as there is
26219 no chance to roll-back the decision of not to use far jump.
26220
26221 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26222 2-byte insn is associated with a 4 byte constant pool. Using
26223 function size 2048/3 as the threshold is conservative enough. */
26224 if (far_jump)
26225 {
26226 if ((func_size * 3) >= 2048)
26227 {
26228 /* Record the fact that we have decided that
26229 the function does use far jumps. */
26230 cfun->machine->far_jump_used = 1;
26231 return 1;
26232 }
26233 }
26234
26235 return 0;
26236 }
26237
26238 /* Return nonzero if FUNC must be entered in ARM mode. */
26239 int
26240 is_called_in_ARM_mode (tree func)
26241 {
26242 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26243
26244 /* Ignore the problem about functions whose address is taken. */
26245 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26246 return TRUE;
26247
26248 #ifdef ARM_PE
26249 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26250 #else
26251 return FALSE;
26252 #endif
26253 }
26254
26255 /* Given the stack offsets and register mask in OFFSETS, decide how
26256 many additional registers to push instead of subtracting a constant
26257 from SP. For epilogues the principle is the same except we use pop.
26258 FOR_PROLOGUE indicates which we're generating. */
26259 static int
26260 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26261 {
26262 HOST_WIDE_INT amount;
26263 unsigned long live_regs_mask = offsets->saved_regs_mask;
26264 /* Extract a mask of the ones we can give to the Thumb's push/pop
26265 instruction. */
26266 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26267 /* Then count how many other high registers will need to be pushed. */
26268 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26269 int n_free, reg_base, size;
26270
26271 if (!for_prologue && frame_pointer_needed)
26272 amount = offsets->locals_base - offsets->saved_regs;
26273 else
26274 amount = offsets->outgoing_args - offsets->saved_regs;
26275
26276 /* If the stack frame size is 512 exactly, we can save one load
26277 instruction, which should make this a win even when optimizing
26278 for speed. */
26279 if (!optimize_size && amount != 512)
26280 return 0;
26281
26282 /* Can't do this if there are high registers to push. */
26283 if (high_regs_pushed != 0)
26284 return 0;
26285
26286 /* Shouldn't do it in the prologue if no registers would normally
26287 be pushed at all. In the epilogue, also allow it if we'll have
26288 a pop insn for the PC. */
26289 if (l_mask == 0
26290 && (for_prologue
26291 || TARGET_BACKTRACE
26292 || (live_regs_mask & 1 << LR_REGNUM) == 0
26293 || TARGET_INTERWORK
26294 || crtl->args.pretend_args_size != 0))
26295 return 0;
26296
26297 /* Don't do this if thumb_expand_prologue wants to emit instructions
26298 between the push and the stack frame allocation. */
26299 if (for_prologue
26300 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26301 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26302 return 0;
26303
26304 reg_base = 0;
26305 n_free = 0;
26306 if (!for_prologue)
26307 {
26308 size = arm_size_return_regs ();
26309 reg_base = ARM_NUM_INTS (size);
26310 live_regs_mask >>= reg_base;
26311 }
26312
26313 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26314 && (for_prologue || call_used_regs[reg_base + n_free]))
26315 {
26316 live_regs_mask >>= 1;
26317 n_free++;
26318 }
26319
26320 if (n_free == 0)
26321 return 0;
26322 gcc_assert (amount / 4 * 4 == amount);
26323
26324 if (amount >= 512 && (amount - n_free * 4) < 512)
26325 return (amount - 508) / 4;
26326 if (amount <= n_free * 4)
26327 return amount / 4;
26328 return 0;
26329 }
26330
26331 /* The bits which aren't usefully expanded as rtl. */
26332 const char *
26333 thumb1_unexpanded_epilogue (void)
26334 {
26335 arm_stack_offsets *offsets;
26336 int regno;
26337 unsigned long live_regs_mask = 0;
26338 int high_regs_pushed = 0;
26339 int extra_pop;
26340 int had_to_push_lr;
26341 int size;
26342
26343 if (cfun->machine->return_used_this_function != 0)
26344 return "";
26345
26346 if (IS_NAKED (arm_current_func_type ()))
26347 return "";
26348
26349 offsets = arm_get_frame_offsets ();
26350 live_regs_mask = offsets->saved_regs_mask;
26351 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26352
26353 /* If we can deduce the registers used from the function's return value.
26354 This is more reliable that examining df_regs_ever_live_p () because that
26355 will be set if the register is ever used in the function, not just if
26356 the register is used to hold a return value. */
26357 size = arm_size_return_regs ();
26358
26359 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26360 if (extra_pop > 0)
26361 {
26362 unsigned long extra_mask = (1 << extra_pop) - 1;
26363 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26364 }
26365
26366 /* The prolog may have pushed some high registers to use as
26367 work registers. e.g. the testsuite file:
26368 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26369 compiles to produce:
26370 push {r4, r5, r6, r7, lr}
26371 mov r7, r9
26372 mov r6, r8
26373 push {r6, r7}
26374 as part of the prolog. We have to undo that pushing here. */
26375
26376 if (high_regs_pushed)
26377 {
26378 unsigned long mask = live_regs_mask & 0xff;
26379 int next_hi_reg;
26380
26381 /* The available low registers depend on the size of the value we are
26382 returning. */
26383 if (size <= 12)
26384 mask |= 1 << 3;
26385 if (size <= 8)
26386 mask |= 1 << 2;
26387
26388 if (mask == 0)
26389 /* Oh dear! We have no low registers into which we can pop
26390 high registers! */
26391 internal_error
26392 ("no low registers available for popping high registers");
26393
26394 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26395 if (live_regs_mask & (1 << next_hi_reg))
26396 break;
26397
26398 while (high_regs_pushed)
26399 {
26400 /* Find lo register(s) into which the high register(s) can
26401 be popped. */
26402 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26403 {
26404 if (mask & (1 << regno))
26405 high_regs_pushed--;
26406 if (high_regs_pushed == 0)
26407 break;
26408 }
26409
26410 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26411
26412 /* Pop the values into the low register(s). */
26413 thumb_pop (asm_out_file, mask);
26414
26415 /* Move the value(s) into the high registers. */
26416 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26417 {
26418 if (mask & (1 << regno))
26419 {
26420 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26421 regno);
26422
26423 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26424 if (live_regs_mask & (1 << next_hi_reg))
26425 break;
26426 }
26427 }
26428 }
26429 live_regs_mask &= ~0x0f00;
26430 }
26431
26432 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26433 live_regs_mask &= 0xff;
26434
26435 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26436 {
26437 /* Pop the return address into the PC. */
26438 if (had_to_push_lr)
26439 live_regs_mask |= 1 << PC_REGNUM;
26440
26441 /* Either no argument registers were pushed or a backtrace
26442 structure was created which includes an adjusted stack
26443 pointer, so just pop everything. */
26444 if (live_regs_mask)
26445 thumb_pop (asm_out_file, live_regs_mask);
26446
26447 /* We have either just popped the return address into the
26448 PC or it is was kept in LR for the entire function.
26449 Note that thumb_pop has already called thumb_exit if the
26450 PC was in the list. */
26451 if (!had_to_push_lr)
26452 thumb_exit (asm_out_file, LR_REGNUM);
26453 }
26454 else
26455 {
26456 /* Pop everything but the return address. */
26457 if (live_regs_mask)
26458 thumb_pop (asm_out_file, live_regs_mask);
26459
26460 if (had_to_push_lr)
26461 {
26462 if (size > 12)
26463 {
26464 /* We have no free low regs, so save one. */
26465 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26466 LAST_ARG_REGNUM);
26467 }
26468
26469 /* Get the return address into a temporary register. */
26470 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26471
26472 if (size > 12)
26473 {
26474 /* Move the return address to lr. */
26475 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26476 LAST_ARG_REGNUM);
26477 /* Restore the low register. */
26478 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26479 IP_REGNUM);
26480 regno = LR_REGNUM;
26481 }
26482 else
26483 regno = LAST_ARG_REGNUM;
26484 }
26485 else
26486 regno = LR_REGNUM;
26487
26488 /* Remove the argument registers that were pushed onto the stack. */
26489 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26490 SP_REGNUM, SP_REGNUM,
26491 crtl->args.pretend_args_size);
26492
26493 thumb_exit (asm_out_file, regno);
26494 }
26495
26496 return "";
26497 }
26498
26499 /* Functions to save and restore machine-specific function data. */
26500 static struct machine_function *
26501 arm_init_machine_status (void)
26502 {
26503 struct machine_function *machine;
26504 machine = ggc_alloc_cleared_machine_function ();
26505
26506 #if ARM_FT_UNKNOWN != 0
26507 machine->func_type = ARM_FT_UNKNOWN;
26508 #endif
26509 return machine;
26510 }
26511
26512 /* Return an RTX indicating where the return address to the
26513 calling function can be found. */
26514 rtx
26515 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26516 {
26517 if (count != 0)
26518 return NULL_RTX;
26519
26520 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26521 }
26522
26523 /* Do anything needed before RTL is emitted for each function. */
26524 void
26525 arm_init_expanders (void)
26526 {
26527 /* Arrange to initialize and mark the machine per-function status. */
26528 init_machine_status = arm_init_machine_status;
26529
26530 /* This is to stop the combine pass optimizing away the alignment
26531 adjustment of va_arg. */
26532 /* ??? It is claimed that this should not be necessary. */
26533 if (cfun)
26534 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26535 }
26536
26537
26538 /* Like arm_compute_initial_elimination offset. Simpler because there
26539 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26540 to point at the base of the local variables after static stack
26541 space for a function has been allocated. */
26542
26543 HOST_WIDE_INT
26544 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26545 {
26546 arm_stack_offsets *offsets;
26547
26548 offsets = arm_get_frame_offsets ();
26549
26550 switch (from)
26551 {
26552 case ARG_POINTER_REGNUM:
26553 switch (to)
26554 {
26555 case STACK_POINTER_REGNUM:
26556 return offsets->outgoing_args - offsets->saved_args;
26557
26558 case FRAME_POINTER_REGNUM:
26559 return offsets->soft_frame - offsets->saved_args;
26560
26561 case ARM_HARD_FRAME_POINTER_REGNUM:
26562 return offsets->saved_regs - offsets->saved_args;
26563
26564 case THUMB_HARD_FRAME_POINTER_REGNUM:
26565 return offsets->locals_base - offsets->saved_args;
26566
26567 default:
26568 gcc_unreachable ();
26569 }
26570 break;
26571
26572 case FRAME_POINTER_REGNUM:
26573 switch (to)
26574 {
26575 case STACK_POINTER_REGNUM:
26576 return offsets->outgoing_args - offsets->soft_frame;
26577
26578 case ARM_HARD_FRAME_POINTER_REGNUM:
26579 return offsets->saved_regs - offsets->soft_frame;
26580
26581 case THUMB_HARD_FRAME_POINTER_REGNUM:
26582 return offsets->locals_base - offsets->soft_frame;
26583
26584 default:
26585 gcc_unreachable ();
26586 }
26587 break;
26588
26589 default:
26590 gcc_unreachable ();
26591 }
26592 }
26593
26594 /* Generate the function's prologue. */
26595
26596 void
26597 thumb1_expand_prologue (void)
26598 {
26599 rtx insn;
26600
26601 HOST_WIDE_INT amount;
26602 arm_stack_offsets *offsets;
26603 unsigned long func_type;
26604 int regno;
26605 unsigned long live_regs_mask;
26606 unsigned long l_mask;
26607 unsigned high_regs_pushed = 0;
26608
26609 func_type = arm_current_func_type ();
26610
26611 /* Naked functions don't have prologues. */
26612 if (IS_NAKED (func_type))
26613 return;
26614
26615 if (IS_INTERRUPT (func_type))
26616 {
26617 error ("interrupt Service Routines cannot be coded in Thumb mode");
26618 return;
26619 }
26620
26621 if (is_called_in_ARM_mode (current_function_decl))
26622 emit_insn (gen_prologue_thumb1_interwork ());
26623
26624 offsets = arm_get_frame_offsets ();
26625 live_regs_mask = offsets->saved_regs_mask;
26626
26627 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26628 l_mask = live_regs_mask & 0x40ff;
26629 /* Then count how many other high registers will need to be pushed. */
26630 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26631
26632 if (crtl->args.pretend_args_size)
26633 {
26634 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26635
26636 if (cfun->machine->uses_anonymous_args)
26637 {
26638 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26639 unsigned long mask;
26640
26641 mask = 1ul << (LAST_ARG_REGNUM + 1);
26642 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26643
26644 insn = thumb1_emit_multi_reg_push (mask, 0);
26645 }
26646 else
26647 {
26648 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26649 stack_pointer_rtx, x));
26650 }
26651 RTX_FRAME_RELATED_P (insn) = 1;
26652 }
26653
26654 if (TARGET_BACKTRACE)
26655 {
26656 HOST_WIDE_INT offset = 0;
26657 unsigned work_register;
26658 rtx work_reg, x, arm_hfp_rtx;
26659
26660 /* We have been asked to create a stack backtrace structure.
26661 The code looks like this:
26662
26663 0 .align 2
26664 0 func:
26665 0 sub SP, #16 Reserve space for 4 registers.
26666 2 push {R7} Push low registers.
26667 4 add R7, SP, #20 Get the stack pointer before the push.
26668 6 str R7, [SP, #8] Store the stack pointer
26669 (before reserving the space).
26670 8 mov R7, PC Get hold of the start of this code + 12.
26671 10 str R7, [SP, #16] Store it.
26672 12 mov R7, FP Get hold of the current frame pointer.
26673 14 str R7, [SP, #4] Store it.
26674 16 mov R7, LR Get hold of the current return address.
26675 18 str R7, [SP, #12] Store it.
26676 20 add R7, SP, #16 Point at the start of the
26677 backtrace structure.
26678 22 mov FP, R7 Put this value into the frame pointer. */
26679
26680 work_register = thumb_find_work_register (live_regs_mask);
26681 work_reg = gen_rtx_REG (SImode, work_register);
26682 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26683
26684 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26685 stack_pointer_rtx, GEN_INT (-16)));
26686 RTX_FRAME_RELATED_P (insn) = 1;
26687
26688 if (l_mask)
26689 {
26690 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26691 RTX_FRAME_RELATED_P (insn) = 1;
26692
26693 offset = bit_count (l_mask) * UNITS_PER_WORD;
26694 }
26695
26696 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26697 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26698
26699 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26700 x = gen_frame_mem (SImode, x);
26701 emit_move_insn (x, work_reg);
26702
26703 /* Make sure that the instruction fetching the PC is in the right place
26704 to calculate "start of backtrace creation code + 12". */
26705 /* ??? The stores using the common WORK_REG ought to be enough to
26706 prevent the scheduler from doing anything weird. Failing that
26707 we could always move all of the following into an UNSPEC_VOLATILE. */
26708 if (l_mask)
26709 {
26710 x = gen_rtx_REG (SImode, PC_REGNUM);
26711 emit_move_insn (work_reg, x);
26712
26713 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26714 x = gen_frame_mem (SImode, x);
26715 emit_move_insn (x, work_reg);
26716
26717 emit_move_insn (work_reg, arm_hfp_rtx);
26718
26719 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26720 x = gen_frame_mem (SImode, x);
26721 emit_move_insn (x, work_reg);
26722 }
26723 else
26724 {
26725 emit_move_insn (work_reg, arm_hfp_rtx);
26726
26727 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26728 x = gen_frame_mem (SImode, x);
26729 emit_move_insn (x, work_reg);
26730
26731 x = gen_rtx_REG (SImode, PC_REGNUM);
26732 emit_move_insn (work_reg, x);
26733
26734 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26735 x = gen_frame_mem (SImode, x);
26736 emit_move_insn (x, work_reg);
26737 }
26738
26739 x = gen_rtx_REG (SImode, LR_REGNUM);
26740 emit_move_insn (work_reg, x);
26741
26742 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26743 x = gen_frame_mem (SImode, x);
26744 emit_move_insn (x, work_reg);
26745
26746 x = GEN_INT (offset + 12);
26747 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26748
26749 emit_move_insn (arm_hfp_rtx, work_reg);
26750 }
26751 /* Optimization: If we are not pushing any low registers but we are going
26752 to push some high registers then delay our first push. This will just
26753 be a push of LR and we can combine it with the push of the first high
26754 register. */
26755 else if ((l_mask & 0xff) != 0
26756 || (high_regs_pushed == 0 && l_mask))
26757 {
26758 unsigned long mask = l_mask;
26759 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26760 insn = thumb1_emit_multi_reg_push (mask, mask);
26761 RTX_FRAME_RELATED_P (insn) = 1;
26762 }
26763
26764 if (high_regs_pushed)
26765 {
26766 unsigned pushable_regs;
26767 unsigned next_hi_reg;
26768 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26769 : crtl->args.info.nregs;
26770 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26771
26772 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26773 if (live_regs_mask & (1 << next_hi_reg))
26774 break;
26775
26776 /* Here we need to mask out registers used for passing arguments
26777 even if they can be pushed. This is to avoid using them to stash the high
26778 registers. Such kind of stash may clobber the use of arguments. */
26779 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26780
26781 if (pushable_regs == 0)
26782 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26783
26784 while (high_regs_pushed > 0)
26785 {
26786 unsigned long real_regs_mask = 0;
26787
26788 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26789 {
26790 if (pushable_regs & (1 << regno))
26791 {
26792 emit_move_insn (gen_rtx_REG (SImode, regno),
26793 gen_rtx_REG (SImode, next_hi_reg));
26794
26795 high_regs_pushed --;
26796 real_regs_mask |= (1 << next_hi_reg);
26797
26798 if (high_regs_pushed)
26799 {
26800 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26801 next_hi_reg --)
26802 if (live_regs_mask & (1 << next_hi_reg))
26803 break;
26804 }
26805 else
26806 {
26807 pushable_regs &= ~((1 << regno) - 1);
26808 break;
26809 }
26810 }
26811 }
26812
26813 /* If we had to find a work register and we have not yet
26814 saved the LR then add it to the list of regs to push. */
26815 if (l_mask == (1 << LR_REGNUM))
26816 {
26817 pushable_regs |= l_mask;
26818 real_regs_mask |= l_mask;
26819 l_mask = 0;
26820 }
26821
26822 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26823 RTX_FRAME_RELATED_P (insn) = 1;
26824 }
26825 }
26826
26827 /* Load the pic register before setting the frame pointer,
26828 so we can use r7 as a temporary work register. */
26829 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26830 arm_load_pic_register (live_regs_mask);
26831
26832 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26833 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26834 stack_pointer_rtx);
26835
26836 if (flag_stack_usage_info)
26837 current_function_static_stack_size
26838 = offsets->outgoing_args - offsets->saved_args;
26839
26840 amount = offsets->outgoing_args - offsets->saved_regs;
26841 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26842 if (amount)
26843 {
26844 if (amount < 512)
26845 {
26846 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26847 GEN_INT (- amount)));
26848 RTX_FRAME_RELATED_P (insn) = 1;
26849 }
26850 else
26851 {
26852 rtx reg, dwarf;
26853
26854 /* The stack decrement is too big for an immediate value in a single
26855 insn. In theory we could issue multiple subtracts, but after
26856 three of them it becomes more space efficient to place the full
26857 value in the constant pool and load into a register. (Also the
26858 ARM debugger really likes to see only one stack decrement per
26859 function). So instead we look for a scratch register into which
26860 we can load the decrement, and then we subtract this from the
26861 stack pointer. Unfortunately on the thumb the only available
26862 scratch registers are the argument registers, and we cannot use
26863 these as they may hold arguments to the function. Instead we
26864 attempt to locate a call preserved register which is used by this
26865 function. If we can find one, then we know that it will have
26866 been pushed at the start of the prologue and so we can corrupt
26867 it now. */
26868 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26869 if (live_regs_mask & (1 << regno))
26870 break;
26871
26872 gcc_assert(regno <= LAST_LO_REGNUM);
26873
26874 reg = gen_rtx_REG (SImode, regno);
26875
26876 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26877
26878 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26879 stack_pointer_rtx, reg));
26880
26881 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26882 plus_constant (Pmode, stack_pointer_rtx,
26883 -amount));
26884 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26885 RTX_FRAME_RELATED_P (insn) = 1;
26886 }
26887 }
26888
26889 if (frame_pointer_needed)
26890 thumb_set_frame_pointer (offsets);
26891
26892 /* If we are profiling, make sure no instructions are scheduled before
26893 the call to mcount. Similarly if the user has requested no
26894 scheduling in the prolog. Similarly if we want non-call exceptions
26895 using the EABI unwinder, to prevent faulting instructions from being
26896 swapped with a stack adjustment. */
26897 if (crtl->profile || !TARGET_SCHED_PROLOG
26898 || (arm_except_unwind_info (&global_options) == UI_TARGET
26899 && cfun->can_throw_non_call_exceptions))
26900 emit_insn (gen_blockage ());
26901
26902 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26903 if (live_regs_mask & 0xff)
26904 cfun->machine->lr_save_eliminated = 0;
26905 }
26906
26907 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26908 POP instruction can be generated. LR should be replaced by PC. All
26909 the checks required are already done by USE_RETURN_INSN (). Hence,
26910 all we really need to check here is if single register is to be
26911 returned, or multiple register return. */
26912 void
26913 thumb2_expand_return (bool simple_return)
26914 {
26915 int i, num_regs;
26916 unsigned long saved_regs_mask;
26917 arm_stack_offsets *offsets;
26918
26919 offsets = arm_get_frame_offsets ();
26920 saved_regs_mask = offsets->saved_regs_mask;
26921
26922 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26923 if (saved_regs_mask & (1 << i))
26924 num_regs++;
26925
26926 if (!simple_return && saved_regs_mask)
26927 {
26928 if (num_regs == 1)
26929 {
26930 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26931 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26932 rtx addr = gen_rtx_MEM (SImode,
26933 gen_rtx_POST_INC (SImode,
26934 stack_pointer_rtx));
26935 set_mem_alias_set (addr, get_frame_alias_set ());
26936 XVECEXP (par, 0, 0) = ret_rtx;
26937 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
26938 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26939 emit_jump_insn (par);
26940 }
26941 else
26942 {
26943 saved_regs_mask &= ~ (1 << LR_REGNUM);
26944 saved_regs_mask |= (1 << PC_REGNUM);
26945 arm_emit_multi_reg_pop (saved_regs_mask);
26946 }
26947 }
26948 else
26949 {
26950 emit_jump_insn (simple_return_rtx);
26951 }
26952 }
26953
26954 void
26955 thumb1_expand_epilogue (void)
26956 {
26957 HOST_WIDE_INT amount;
26958 arm_stack_offsets *offsets;
26959 int regno;
26960
26961 /* Naked functions don't have prologues. */
26962 if (IS_NAKED (arm_current_func_type ()))
26963 return;
26964
26965 offsets = arm_get_frame_offsets ();
26966 amount = offsets->outgoing_args - offsets->saved_regs;
26967
26968 if (frame_pointer_needed)
26969 {
26970 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26971 amount = offsets->locals_base - offsets->saved_regs;
26972 }
26973 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26974
26975 gcc_assert (amount >= 0);
26976 if (amount)
26977 {
26978 emit_insn (gen_blockage ());
26979
26980 if (amount < 512)
26981 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26982 GEN_INT (amount)));
26983 else
26984 {
26985 /* r3 is always free in the epilogue. */
26986 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26987
26988 emit_insn (gen_movsi (reg, GEN_INT (amount)));
26989 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26990 }
26991 }
26992
26993 /* Emit a USE (stack_pointer_rtx), so that
26994 the stack adjustment will not be deleted. */
26995 emit_insn (gen_force_register_use (stack_pointer_rtx));
26996
26997 if (crtl->profile || !TARGET_SCHED_PROLOG)
26998 emit_insn (gen_blockage ());
26999
27000 /* Emit a clobber for each insn that will be restored in the epilogue,
27001 so that flow2 will get register lifetimes correct. */
27002 for (regno = 0; regno < 13; regno++)
27003 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27004 emit_clobber (gen_rtx_REG (SImode, regno));
27005
27006 if (! df_regs_ever_live_p (LR_REGNUM))
27007 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27008 }
27009
27010 /* Epilogue code for APCS frame. */
27011 static void
27012 arm_expand_epilogue_apcs_frame (bool really_return)
27013 {
27014 unsigned long func_type;
27015 unsigned long saved_regs_mask;
27016 int num_regs = 0;
27017 int i;
27018 int floats_from_frame = 0;
27019 arm_stack_offsets *offsets;
27020
27021 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27022 func_type = arm_current_func_type ();
27023
27024 /* Get frame offsets for ARM. */
27025 offsets = arm_get_frame_offsets ();
27026 saved_regs_mask = offsets->saved_regs_mask;
27027
27028 /* Find the offset of the floating-point save area in the frame. */
27029 floats_from_frame = offsets->saved_args - offsets->frame;
27030
27031 /* Compute how many core registers saved and how far away the floats are. */
27032 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27033 if (saved_regs_mask & (1 << i))
27034 {
27035 num_regs++;
27036 floats_from_frame += 4;
27037 }
27038
27039 if (TARGET_HARD_FLOAT && TARGET_VFP)
27040 {
27041 int start_reg;
27042
27043 /* The offset is from IP_REGNUM. */
27044 int saved_size = arm_get_vfp_saved_size ();
27045 if (saved_size > 0)
27046 {
27047 floats_from_frame += saved_size;
27048 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
27049 hard_frame_pointer_rtx,
27050 GEN_INT (-floats_from_frame)));
27051 }
27052
27053 /* Generate VFP register multi-pop. */
27054 start_reg = FIRST_VFP_REGNUM;
27055
27056 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27057 /* Look for a case where a reg does not need restoring. */
27058 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27059 && (!df_regs_ever_live_p (i + 1)
27060 || call_used_regs[i + 1]))
27061 {
27062 if (start_reg != i)
27063 arm_emit_vfp_multi_reg_pop (start_reg,
27064 (i - start_reg) / 2,
27065 gen_rtx_REG (SImode,
27066 IP_REGNUM));
27067 start_reg = i + 2;
27068 }
27069
27070 /* Restore the remaining regs that we have discovered (or possibly
27071 even all of them, if the conditional in the for loop never
27072 fired). */
27073 if (start_reg != i)
27074 arm_emit_vfp_multi_reg_pop (start_reg,
27075 (i - start_reg) / 2,
27076 gen_rtx_REG (SImode, IP_REGNUM));
27077 }
27078
27079 if (TARGET_IWMMXT)
27080 {
27081 /* The frame pointer is guaranteed to be non-double-word aligned, as
27082 it is set to double-word-aligned old_stack_pointer - 4. */
27083 rtx insn;
27084 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27085
27086 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27087 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27088 {
27089 rtx addr = gen_frame_mem (V2SImode,
27090 plus_constant (Pmode, hard_frame_pointer_rtx,
27091 - lrm_count * 4));
27092 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27093 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27094 gen_rtx_REG (V2SImode, i),
27095 NULL_RTX);
27096 lrm_count += 2;
27097 }
27098 }
27099
27100 /* saved_regs_mask should contain IP which contains old stack pointer
27101 at the time of activation creation. Since SP and IP are adjacent registers,
27102 we can restore the value directly into SP. */
27103 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27104 saved_regs_mask &= ~(1 << IP_REGNUM);
27105 saved_regs_mask |= (1 << SP_REGNUM);
27106
27107 /* There are two registers left in saved_regs_mask - LR and PC. We
27108 only need to restore LR (the return address), but to
27109 save time we can load it directly into PC, unless we need a
27110 special function exit sequence, or we are not really returning. */
27111 if (really_return
27112 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27113 && !crtl->calls_eh_return)
27114 /* Delete LR from the register mask, so that LR on
27115 the stack is loaded into the PC in the register mask. */
27116 saved_regs_mask &= ~(1 << LR_REGNUM);
27117 else
27118 saved_regs_mask &= ~(1 << PC_REGNUM);
27119
27120 num_regs = bit_count (saved_regs_mask);
27121 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27122 {
27123 emit_insn (gen_blockage ());
27124 /* Unwind the stack to just below the saved registers. */
27125 emit_insn (gen_addsi3 (stack_pointer_rtx,
27126 hard_frame_pointer_rtx,
27127 GEN_INT (- 4 * num_regs)));
27128 }
27129
27130 arm_emit_multi_reg_pop (saved_regs_mask);
27131
27132 if (IS_INTERRUPT (func_type))
27133 {
27134 /* Interrupt handlers will have pushed the
27135 IP onto the stack, so restore it now. */
27136 rtx insn;
27137 rtx addr = gen_rtx_MEM (SImode,
27138 gen_rtx_POST_INC (SImode,
27139 stack_pointer_rtx));
27140 set_mem_alias_set (addr, get_frame_alias_set ());
27141 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27142 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27143 gen_rtx_REG (SImode, IP_REGNUM),
27144 NULL_RTX);
27145 }
27146
27147 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27148 return;
27149
27150 if (crtl->calls_eh_return)
27151 emit_insn (gen_addsi3 (stack_pointer_rtx,
27152 stack_pointer_rtx,
27153 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27154
27155 if (IS_STACKALIGN (func_type))
27156 /* Restore the original stack pointer. Before prologue, the stack was
27157 realigned and the original stack pointer saved in r0. For details,
27158 see comment in arm_expand_prologue. */
27159 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27160
27161 emit_jump_insn (simple_return_rtx);
27162 }
27163
27164 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27165 function is not a sibcall. */
27166 void
27167 arm_expand_epilogue (bool really_return)
27168 {
27169 unsigned long func_type;
27170 unsigned long saved_regs_mask;
27171 int num_regs = 0;
27172 int i;
27173 int amount;
27174 arm_stack_offsets *offsets;
27175
27176 func_type = arm_current_func_type ();
27177
27178 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27179 let output_return_instruction take care of instruction emission if any. */
27180 if (IS_NAKED (func_type)
27181 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27182 {
27183 if (really_return)
27184 emit_jump_insn (simple_return_rtx);
27185 return;
27186 }
27187
27188 /* If we are throwing an exception, then we really must be doing a
27189 return, so we can't tail-call. */
27190 gcc_assert (!crtl->calls_eh_return || really_return);
27191
27192 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27193 {
27194 arm_expand_epilogue_apcs_frame (really_return);
27195 return;
27196 }
27197
27198 /* Get frame offsets for ARM. */
27199 offsets = arm_get_frame_offsets ();
27200 saved_regs_mask = offsets->saved_regs_mask;
27201 num_regs = bit_count (saved_regs_mask);
27202
27203 if (frame_pointer_needed)
27204 {
27205 rtx insn;
27206 /* Restore stack pointer if necessary. */
27207 if (TARGET_ARM)
27208 {
27209 /* In ARM mode, frame pointer points to first saved register.
27210 Restore stack pointer to last saved register. */
27211 amount = offsets->frame - offsets->saved_regs;
27212
27213 /* Force out any pending memory operations that reference stacked data
27214 before stack de-allocation occurs. */
27215 emit_insn (gen_blockage ());
27216 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27217 hard_frame_pointer_rtx,
27218 GEN_INT (amount)));
27219 arm_add_cfa_adjust_cfa_note (insn, amount,
27220 stack_pointer_rtx,
27221 hard_frame_pointer_rtx);
27222
27223 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27224 deleted. */
27225 emit_insn (gen_force_register_use (stack_pointer_rtx));
27226 }
27227 else
27228 {
27229 /* In Thumb-2 mode, the frame pointer points to the last saved
27230 register. */
27231 amount = offsets->locals_base - offsets->saved_regs;
27232 if (amount)
27233 {
27234 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27235 hard_frame_pointer_rtx,
27236 GEN_INT (amount)));
27237 arm_add_cfa_adjust_cfa_note (insn, amount,
27238 hard_frame_pointer_rtx,
27239 hard_frame_pointer_rtx);
27240 }
27241
27242 /* Force out any pending memory operations that reference stacked data
27243 before stack de-allocation occurs. */
27244 emit_insn (gen_blockage ());
27245 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27246 hard_frame_pointer_rtx));
27247 arm_add_cfa_adjust_cfa_note (insn, 0,
27248 stack_pointer_rtx,
27249 hard_frame_pointer_rtx);
27250 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27251 deleted. */
27252 emit_insn (gen_force_register_use (stack_pointer_rtx));
27253 }
27254 }
27255 else
27256 {
27257 /* Pop off outgoing args and local frame to adjust stack pointer to
27258 last saved register. */
27259 amount = offsets->outgoing_args - offsets->saved_regs;
27260 if (amount)
27261 {
27262 rtx tmp;
27263 /* Force out any pending memory operations that reference stacked data
27264 before stack de-allocation occurs. */
27265 emit_insn (gen_blockage ());
27266 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27267 stack_pointer_rtx,
27268 GEN_INT (amount)));
27269 arm_add_cfa_adjust_cfa_note (tmp, amount,
27270 stack_pointer_rtx, stack_pointer_rtx);
27271 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27272 not deleted. */
27273 emit_insn (gen_force_register_use (stack_pointer_rtx));
27274 }
27275 }
27276
27277 if (TARGET_HARD_FLOAT && TARGET_VFP)
27278 {
27279 /* Generate VFP register multi-pop. */
27280 int end_reg = LAST_VFP_REGNUM + 1;
27281
27282 /* Scan the registers in reverse order. We need to match
27283 any groupings made in the prologue and generate matching
27284 vldm operations. The need to match groups is because,
27285 unlike pop, vldm can only do consecutive regs. */
27286 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27287 /* Look for a case where a reg does not need restoring. */
27288 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27289 && (!df_regs_ever_live_p (i + 1)
27290 || call_used_regs[i + 1]))
27291 {
27292 /* Restore the regs discovered so far (from reg+2 to
27293 end_reg). */
27294 if (end_reg > i + 2)
27295 arm_emit_vfp_multi_reg_pop (i + 2,
27296 (end_reg - (i + 2)) / 2,
27297 stack_pointer_rtx);
27298 end_reg = i;
27299 }
27300
27301 /* Restore the remaining regs that we have discovered (or possibly
27302 even all of them, if the conditional in the for loop never
27303 fired). */
27304 if (end_reg > i + 2)
27305 arm_emit_vfp_multi_reg_pop (i + 2,
27306 (end_reg - (i + 2)) / 2,
27307 stack_pointer_rtx);
27308 }
27309
27310 if (TARGET_IWMMXT)
27311 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27312 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27313 {
27314 rtx insn;
27315 rtx addr = gen_rtx_MEM (V2SImode,
27316 gen_rtx_POST_INC (SImode,
27317 stack_pointer_rtx));
27318 set_mem_alias_set (addr, get_frame_alias_set ());
27319 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27320 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27321 gen_rtx_REG (V2SImode, i),
27322 NULL_RTX);
27323 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27324 stack_pointer_rtx, stack_pointer_rtx);
27325 }
27326
27327 if (saved_regs_mask)
27328 {
27329 rtx insn;
27330 bool return_in_pc = false;
27331
27332 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27333 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27334 && !IS_STACKALIGN (func_type)
27335 && really_return
27336 && crtl->args.pretend_args_size == 0
27337 && saved_regs_mask & (1 << LR_REGNUM)
27338 && !crtl->calls_eh_return)
27339 {
27340 saved_regs_mask &= ~(1 << LR_REGNUM);
27341 saved_regs_mask |= (1 << PC_REGNUM);
27342 return_in_pc = true;
27343 }
27344
27345 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27346 {
27347 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27348 if (saved_regs_mask & (1 << i))
27349 {
27350 rtx addr = gen_rtx_MEM (SImode,
27351 gen_rtx_POST_INC (SImode,
27352 stack_pointer_rtx));
27353 set_mem_alias_set (addr, get_frame_alias_set ());
27354
27355 if (i == PC_REGNUM)
27356 {
27357 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27358 XVECEXP (insn, 0, 0) = ret_rtx;
27359 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27360 gen_rtx_REG (SImode, i),
27361 addr);
27362 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27363 insn = emit_jump_insn (insn);
27364 }
27365 else
27366 {
27367 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27368 addr));
27369 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27370 gen_rtx_REG (SImode, i),
27371 NULL_RTX);
27372 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27373 stack_pointer_rtx,
27374 stack_pointer_rtx);
27375 }
27376 }
27377 }
27378 else
27379 {
27380 if (TARGET_LDRD
27381 && current_tune->prefer_ldrd_strd
27382 && !optimize_function_for_size_p (cfun))
27383 {
27384 if (TARGET_THUMB2)
27385 thumb2_emit_ldrd_pop (saved_regs_mask);
27386 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27387 arm_emit_ldrd_pop (saved_regs_mask);
27388 else
27389 arm_emit_multi_reg_pop (saved_regs_mask);
27390 }
27391 else
27392 arm_emit_multi_reg_pop (saved_regs_mask);
27393 }
27394
27395 if (return_in_pc == true)
27396 return;
27397 }
27398
27399 if (crtl->args.pretend_args_size)
27400 {
27401 int i, j;
27402 rtx dwarf = NULL_RTX;
27403 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27404 stack_pointer_rtx,
27405 GEN_INT (crtl->args.pretend_args_size)));
27406
27407 RTX_FRAME_RELATED_P (tmp) = 1;
27408
27409 if (cfun->machine->uses_anonymous_args)
27410 {
27411 /* Restore pretend args. Refer arm_expand_prologue on how to save
27412 pretend_args in stack. */
27413 int num_regs = crtl->args.pretend_args_size / 4;
27414 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27415 for (j = 0, i = 0; j < num_regs; i++)
27416 if (saved_regs_mask & (1 << i))
27417 {
27418 rtx reg = gen_rtx_REG (SImode, i);
27419 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27420 j++;
27421 }
27422 REG_NOTES (tmp) = dwarf;
27423 }
27424 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27425 stack_pointer_rtx, stack_pointer_rtx);
27426 }
27427
27428 if (!really_return)
27429 return;
27430
27431 if (crtl->calls_eh_return)
27432 emit_insn (gen_addsi3 (stack_pointer_rtx,
27433 stack_pointer_rtx,
27434 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27435
27436 if (IS_STACKALIGN (func_type))
27437 /* Restore the original stack pointer. Before prologue, the stack was
27438 realigned and the original stack pointer saved in r0. For details,
27439 see comment in arm_expand_prologue. */
27440 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27441
27442 emit_jump_insn (simple_return_rtx);
27443 }
27444
27445 /* Implementation of insn prologue_thumb1_interwork. This is the first
27446 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27447
27448 const char *
27449 thumb1_output_interwork (void)
27450 {
27451 const char * name;
27452 FILE *f = asm_out_file;
27453
27454 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27455 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27456 == SYMBOL_REF);
27457 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27458
27459 /* Generate code sequence to switch us into Thumb mode. */
27460 /* The .code 32 directive has already been emitted by
27461 ASM_DECLARE_FUNCTION_NAME. */
27462 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27463 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27464
27465 /* Generate a label, so that the debugger will notice the
27466 change in instruction sets. This label is also used by
27467 the assembler to bypass the ARM code when this function
27468 is called from a Thumb encoded function elsewhere in the
27469 same file. Hence the definition of STUB_NAME here must
27470 agree with the definition in gas/config/tc-arm.c. */
27471
27472 #define STUB_NAME ".real_start_of"
27473
27474 fprintf (f, "\t.code\t16\n");
27475 #ifdef ARM_PE
27476 if (arm_dllexport_name_p (name))
27477 name = arm_strip_name_encoding (name);
27478 #endif
27479 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27480 fprintf (f, "\t.thumb_func\n");
27481 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27482
27483 return "";
27484 }
27485
27486 /* Handle the case of a double word load into a low register from
27487 a computed memory address. The computed address may involve a
27488 register which is overwritten by the load. */
27489 const char *
27490 thumb_load_double_from_address (rtx *operands)
27491 {
27492 rtx addr;
27493 rtx base;
27494 rtx offset;
27495 rtx arg1;
27496 rtx arg2;
27497
27498 gcc_assert (REG_P (operands[0]));
27499 gcc_assert (MEM_P (operands[1]));
27500
27501 /* Get the memory address. */
27502 addr = XEXP (operands[1], 0);
27503
27504 /* Work out how the memory address is computed. */
27505 switch (GET_CODE (addr))
27506 {
27507 case REG:
27508 operands[2] = adjust_address (operands[1], SImode, 4);
27509
27510 if (REGNO (operands[0]) == REGNO (addr))
27511 {
27512 output_asm_insn ("ldr\t%H0, %2", operands);
27513 output_asm_insn ("ldr\t%0, %1", operands);
27514 }
27515 else
27516 {
27517 output_asm_insn ("ldr\t%0, %1", operands);
27518 output_asm_insn ("ldr\t%H0, %2", operands);
27519 }
27520 break;
27521
27522 case CONST:
27523 /* Compute <address> + 4 for the high order load. */
27524 operands[2] = adjust_address (operands[1], SImode, 4);
27525
27526 output_asm_insn ("ldr\t%0, %1", operands);
27527 output_asm_insn ("ldr\t%H0, %2", operands);
27528 break;
27529
27530 case PLUS:
27531 arg1 = XEXP (addr, 0);
27532 arg2 = XEXP (addr, 1);
27533
27534 if (CONSTANT_P (arg1))
27535 base = arg2, offset = arg1;
27536 else
27537 base = arg1, offset = arg2;
27538
27539 gcc_assert (REG_P (base));
27540
27541 /* Catch the case of <address> = <reg> + <reg> */
27542 if (REG_P (offset))
27543 {
27544 int reg_offset = REGNO (offset);
27545 int reg_base = REGNO (base);
27546 int reg_dest = REGNO (operands[0]);
27547
27548 /* Add the base and offset registers together into the
27549 higher destination register. */
27550 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27551 reg_dest + 1, reg_base, reg_offset);
27552
27553 /* Load the lower destination register from the address in
27554 the higher destination register. */
27555 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27556 reg_dest, reg_dest + 1);
27557
27558 /* Load the higher destination register from its own address
27559 plus 4. */
27560 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27561 reg_dest + 1, reg_dest + 1);
27562 }
27563 else
27564 {
27565 /* Compute <address> + 4 for the high order load. */
27566 operands[2] = adjust_address (operands[1], SImode, 4);
27567
27568 /* If the computed address is held in the low order register
27569 then load the high order register first, otherwise always
27570 load the low order register first. */
27571 if (REGNO (operands[0]) == REGNO (base))
27572 {
27573 output_asm_insn ("ldr\t%H0, %2", operands);
27574 output_asm_insn ("ldr\t%0, %1", operands);
27575 }
27576 else
27577 {
27578 output_asm_insn ("ldr\t%0, %1", operands);
27579 output_asm_insn ("ldr\t%H0, %2", operands);
27580 }
27581 }
27582 break;
27583
27584 case LABEL_REF:
27585 /* With no registers to worry about we can just load the value
27586 directly. */
27587 operands[2] = adjust_address (operands[1], SImode, 4);
27588
27589 output_asm_insn ("ldr\t%H0, %2", operands);
27590 output_asm_insn ("ldr\t%0, %1", operands);
27591 break;
27592
27593 default:
27594 gcc_unreachable ();
27595 }
27596
27597 return "";
27598 }
27599
27600 const char *
27601 thumb_output_move_mem_multiple (int n, rtx *operands)
27602 {
27603 rtx tmp;
27604
27605 switch (n)
27606 {
27607 case 2:
27608 if (REGNO (operands[4]) > REGNO (operands[5]))
27609 {
27610 tmp = operands[4];
27611 operands[4] = operands[5];
27612 operands[5] = tmp;
27613 }
27614 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27615 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27616 break;
27617
27618 case 3:
27619 if (REGNO (operands[4]) > REGNO (operands[5]))
27620 {
27621 tmp = operands[4];
27622 operands[4] = operands[5];
27623 operands[5] = tmp;
27624 }
27625 if (REGNO (operands[5]) > REGNO (operands[6]))
27626 {
27627 tmp = operands[5];
27628 operands[5] = operands[6];
27629 operands[6] = tmp;
27630 }
27631 if (REGNO (operands[4]) > REGNO (operands[5]))
27632 {
27633 tmp = operands[4];
27634 operands[4] = operands[5];
27635 operands[5] = tmp;
27636 }
27637
27638 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27639 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27640 break;
27641
27642 default:
27643 gcc_unreachable ();
27644 }
27645
27646 return "";
27647 }
27648
27649 /* Output a call-via instruction for thumb state. */
27650 const char *
27651 thumb_call_via_reg (rtx reg)
27652 {
27653 int regno = REGNO (reg);
27654 rtx *labelp;
27655
27656 gcc_assert (regno < LR_REGNUM);
27657
27658 /* If we are in the normal text section we can use a single instance
27659 per compilation unit. If we are doing function sections, then we need
27660 an entry per section, since we can't rely on reachability. */
27661 if (in_section == text_section)
27662 {
27663 thumb_call_reg_needed = 1;
27664
27665 if (thumb_call_via_label[regno] == NULL)
27666 thumb_call_via_label[regno] = gen_label_rtx ();
27667 labelp = thumb_call_via_label + regno;
27668 }
27669 else
27670 {
27671 if (cfun->machine->call_via[regno] == NULL)
27672 cfun->machine->call_via[regno] = gen_label_rtx ();
27673 labelp = cfun->machine->call_via + regno;
27674 }
27675
27676 output_asm_insn ("bl\t%a0", labelp);
27677 return "";
27678 }
27679
27680 /* Routines for generating rtl. */
27681 void
27682 thumb_expand_movmemqi (rtx *operands)
27683 {
27684 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27685 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27686 HOST_WIDE_INT len = INTVAL (operands[2]);
27687 HOST_WIDE_INT offset = 0;
27688
27689 while (len >= 12)
27690 {
27691 emit_insn (gen_movmem12b (out, in, out, in));
27692 len -= 12;
27693 }
27694
27695 if (len >= 8)
27696 {
27697 emit_insn (gen_movmem8b (out, in, out, in));
27698 len -= 8;
27699 }
27700
27701 if (len >= 4)
27702 {
27703 rtx reg = gen_reg_rtx (SImode);
27704 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27705 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27706 len -= 4;
27707 offset += 4;
27708 }
27709
27710 if (len >= 2)
27711 {
27712 rtx reg = gen_reg_rtx (HImode);
27713 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27714 plus_constant (Pmode, in,
27715 offset))));
27716 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27717 offset)),
27718 reg));
27719 len -= 2;
27720 offset += 2;
27721 }
27722
27723 if (len)
27724 {
27725 rtx reg = gen_reg_rtx (QImode);
27726 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27727 plus_constant (Pmode, in,
27728 offset))));
27729 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27730 offset)),
27731 reg));
27732 }
27733 }
27734
27735 void
27736 thumb_reload_out_hi (rtx *operands)
27737 {
27738 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27739 }
27740
27741 /* Handle reading a half-word from memory during reload. */
27742 void
27743 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27744 {
27745 gcc_unreachable ();
27746 }
27747
27748 /* Return the length of a function name prefix
27749 that starts with the character 'c'. */
27750 static int
27751 arm_get_strip_length (int c)
27752 {
27753 switch (c)
27754 {
27755 ARM_NAME_ENCODING_LENGTHS
27756 default: return 0;
27757 }
27758 }
27759
27760 /* Return a pointer to a function's name with any
27761 and all prefix encodings stripped from it. */
27762 const char *
27763 arm_strip_name_encoding (const char *name)
27764 {
27765 int skip;
27766
27767 while ((skip = arm_get_strip_length (* name)))
27768 name += skip;
27769
27770 return name;
27771 }
27772
27773 /* If there is a '*' anywhere in the name's prefix, then
27774 emit the stripped name verbatim, otherwise prepend an
27775 underscore if leading underscores are being used. */
27776 void
27777 arm_asm_output_labelref (FILE *stream, const char *name)
27778 {
27779 int skip;
27780 int verbatim = 0;
27781
27782 while ((skip = arm_get_strip_length (* name)))
27783 {
27784 verbatim |= (*name == '*');
27785 name += skip;
27786 }
27787
27788 if (verbatim)
27789 fputs (name, stream);
27790 else
27791 asm_fprintf (stream, "%U%s", name);
27792 }
27793
27794 /* This function is used to emit an EABI tag and its associated value.
27795 We emit the numerical value of the tag in case the assembler does not
27796 support textual tags. (Eg gas prior to 2.20). If requested we include
27797 the tag name in a comment so that anyone reading the assembler output
27798 will know which tag is being set.
27799
27800 This function is not static because arm-c.c needs it too. */
27801
27802 void
27803 arm_emit_eabi_attribute (const char *name, int num, int val)
27804 {
27805 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27806 if (flag_verbose_asm || flag_debug_asm)
27807 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27808 asm_fprintf (asm_out_file, "\n");
27809 }
27810
27811 static void
27812 arm_file_start (void)
27813 {
27814 int val;
27815
27816 if (TARGET_UNIFIED_ASM)
27817 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27818
27819 if (TARGET_BPABI)
27820 {
27821 const char *fpu_name;
27822 if (arm_selected_arch)
27823 {
27824 const char* pos = strchr (arm_selected_arch->name, '+');
27825 if (pos)
27826 {
27827 char buf[15];
27828 gcc_assert (strlen (arm_selected_arch->name)
27829 <= sizeof (buf) / sizeof (*pos));
27830 strncpy (buf, arm_selected_arch->name,
27831 (pos - arm_selected_arch->name) * sizeof (*pos));
27832 buf[pos - arm_selected_arch->name] = '\0';
27833 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
27834 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
27835 }
27836 else
27837 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27838 }
27839 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27840 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27841 else
27842 {
27843 const char* truncated_name
27844 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27845 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27846 }
27847
27848 if (TARGET_SOFT_FLOAT)
27849 {
27850 fpu_name = "softvfp";
27851 }
27852 else
27853 {
27854 fpu_name = arm_fpu_desc->name;
27855 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27856 {
27857 if (TARGET_HARD_FLOAT)
27858 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27859 if (TARGET_HARD_FLOAT_ABI)
27860 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27861 }
27862 }
27863 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27864
27865 /* Some of these attributes only apply when the corresponding features
27866 are used. However we don't have any easy way of figuring this out.
27867 Conservatively record the setting that would have been used. */
27868
27869 if (flag_rounding_math)
27870 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27871
27872 if (!flag_unsafe_math_optimizations)
27873 {
27874 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27875 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27876 }
27877 if (flag_signaling_nans)
27878 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27879
27880 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27881 flag_finite_math_only ? 1 : 3);
27882
27883 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27884 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27885 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27886 flag_short_enums ? 1 : 2);
27887
27888 /* Tag_ABI_optimization_goals. */
27889 if (optimize_size)
27890 val = 4;
27891 else if (optimize >= 2)
27892 val = 2;
27893 else if (optimize)
27894 val = 1;
27895 else
27896 val = 6;
27897 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27898
27899 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27900 unaligned_access);
27901
27902 if (arm_fp16_format)
27903 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27904 (int) arm_fp16_format);
27905
27906 if (arm_lang_output_object_attributes_hook)
27907 arm_lang_output_object_attributes_hook();
27908 }
27909
27910 default_file_start ();
27911 }
27912
27913 static void
27914 arm_file_end (void)
27915 {
27916 int regno;
27917
27918 if (NEED_INDICATE_EXEC_STACK)
27919 /* Add .note.GNU-stack. */
27920 file_end_indicate_exec_stack ();
27921
27922 if (! thumb_call_reg_needed)
27923 return;
27924
27925 switch_to_section (text_section);
27926 asm_fprintf (asm_out_file, "\t.code 16\n");
27927 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27928
27929 for (regno = 0; regno < LR_REGNUM; regno++)
27930 {
27931 rtx label = thumb_call_via_label[regno];
27932
27933 if (label != 0)
27934 {
27935 targetm.asm_out.internal_label (asm_out_file, "L",
27936 CODE_LABEL_NUMBER (label));
27937 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27938 }
27939 }
27940 }
27941
27942 #ifndef ARM_PE
27943 /* Symbols in the text segment can be accessed without indirecting via the
27944 constant pool; it may take an extra binary operation, but this is still
27945 faster than indirecting via memory. Don't do this when not optimizing,
27946 since we won't be calculating al of the offsets necessary to do this
27947 simplification. */
27948
27949 static void
27950 arm_encode_section_info (tree decl, rtx rtl, int first)
27951 {
27952 if (optimize > 0 && TREE_CONSTANT (decl))
27953 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27954
27955 default_encode_section_info (decl, rtl, first);
27956 }
27957 #endif /* !ARM_PE */
27958
27959 static void
27960 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27961 {
27962 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27963 && !strcmp (prefix, "L"))
27964 {
27965 arm_ccfsm_state = 0;
27966 arm_target_insn = NULL;
27967 }
27968 default_internal_label (stream, prefix, labelno);
27969 }
27970
27971 /* Output code to add DELTA to the first argument, and then jump
27972 to FUNCTION. Used for C++ multiple inheritance. */
27973 static void
27974 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
27975 HOST_WIDE_INT delta,
27976 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
27977 tree function)
27978 {
27979 static int thunk_label = 0;
27980 char label[256];
27981 char labelpc[256];
27982 int mi_delta = delta;
27983 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27984 int shift = 0;
27985 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27986 ? 1 : 0);
27987 if (mi_delta < 0)
27988 mi_delta = - mi_delta;
27989
27990 final_start_function (emit_barrier (), file, 1);
27991
27992 if (TARGET_THUMB1)
27993 {
27994 int labelno = thunk_label++;
27995 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27996 /* Thunks are entered in arm mode when avaiable. */
27997 if (TARGET_THUMB1_ONLY)
27998 {
27999 /* push r3 so we can use it as a temporary. */
28000 /* TODO: Omit this save if r3 is not used. */
28001 fputs ("\tpush {r3}\n", file);
28002 fputs ("\tldr\tr3, ", file);
28003 }
28004 else
28005 {
28006 fputs ("\tldr\tr12, ", file);
28007 }
28008 assemble_name (file, label);
28009 fputc ('\n', file);
28010 if (flag_pic)
28011 {
28012 /* If we are generating PIC, the ldr instruction below loads
28013 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28014 the address of the add + 8, so we have:
28015
28016 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28017 = target + 1.
28018
28019 Note that we have "+ 1" because some versions of GNU ld
28020 don't set the low bit of the result for R_ARM_REL32
28021 relocations against thumb function symbols.
28022 On ARMv6M this is +4, not +8. */
28023 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28024 assemble_name (file, labelpc);
28025 fputs (":\n", file);
28026 if (TARGET_THUMB1_ONLY)
28027 {
28028 /* This is 2 insns after the start of the thunk, so we know it
28029 is 4-byte aligned. */
28030 fputs ("\tadd\tr3, pc, r3\n", file);
28031 fputs ("\tmov r12, r3\n", file);
28032 }
28033 else
28034 fputs ("\tadd\tr12, pc, r12\n", file);
28035 }
28036 else if (TARGET_THUMB1_ONLY)
28037 fputs ("\tmov r12, r3\n", file);
28038 }
28039 if (TARGET_THUMB1_ONLY)
28040 {
28041 if (mi_delta > 255)
28042 {
28043 fputs ("\tldr\tr3, ", file);
28044 assemble_name (file, label);
28045 fputs ("+4\n", file);
28046 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28047 mi_op, this_regno, this_regno);
28048 }
28049 else if (mi_delta != 0)
28050 {
28051 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28052 mi_op, this_regno, this_regno,
28053 mi_delta);
28054 }
28055 }
28056 else
28057 {
28058 /* TODO: Use movw/movt for large constants when available. */
28059 while (mi_delta != 0)
28060 {
28061 if ((mi_delta & (3 << shift)) == 0)
28062 shift += 2;
28063 else
28064 {
28065 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28066 mi_op, this_regno, this_regno,
28067 mi_delta & (0xff << shift));
28068 mi_delta &= ~(0xff << shift);
28069 shift += 8;
28070 }
28071 }
28072 }
28073 if (TARGET_THUMB1)
28074 {
28075 if (TARGET_THUMB1_ONLY)
28076 fputs ("\tpop\t{r3}\n", file);
28077
28078 fprintf (file, "\tbx\tr12\n");
28079 ASM_OUTPUT_ALIGN (file, 2);
28080 assemble_name (file, label);
28081 fputs (":\n", file);
28082 if (flag_pic)
28083 {
28084 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28085 rtx tem = XEXP (DECL_RTL (function), 0);
28086 tem = plus_constant (GET_MODE (tem), tem, -7);
28087 tem = gen_rtx_MINUS (GET_MODE (tem),
28088 tem,
28089 gen_rtx_SYMBOL_REF (Pmode,
28090 ggc_strdup (labelpc)));
28091 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28092 }
28093 else
28094 /* Output ".word .LTHUNKn". */
28095 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28096
28097 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28098 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28099 }
28100 else
28101 {
28102 fputs ("\tb\t", file);
28103 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28104 if (NEED_PLT_RELOC)
28105 fputs ("(PLT)", file);
28106 fputc ('\n', file);
28107 }
28108
28109 final_end_function ();
28110 }
28111
28112 int
28113 arm_emit_vector_const (FILE *file, rtx x)
28114 {
28115 int i;
28116 const char * pattern;
28117
28118 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28119
28120 switch (GET_MODE (x))
28121 {
28122 case V2SImode: pattern = "%08x"; break;
28123 case V4HImode: pattern = "%04x"; break;
28124 case V8QImode: pattern = "%02x"; break;
28125 default: gcc_unreachable ();
28126 }
28127
28128 fprintf (file, "0x");
28129 for (i = CONST_VECTOR_NUNITS (x); i--;)
28130 {
28131 rtx element;
28132
28133 element = CONST_VECTOR_ELT (x, i);
28134 fprintf (file, pattern, INTVAL (element));
28135 }
28136
28137 return 1;
28138 }
28139
28140 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28141 HFmode constant pool entries are actually loaded with ldr. */
28142 void
28143 arm_emit_fp16_const (rtx c)
28144 {
28145 REAL_VALUE_TYPE r;
28146 long bits;
28147
28148 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28149 bits = real_to_target (NULL, &r, HFmode);
28150 if (WORDS_BIG_ENDIAN)
28151 assemble_zeros (2);
28152 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28153 if (!WORDS_BIG_ENDIAN)
28154 assemble_zeros (2);
28155 }
28156
28157 const char *
28158 arm_output_load_gr (rtx *operands)
28159 {
28160 rtx reg;
28161 rtx offset;
28162 rtx wcgr;
28163 rtx sum;
28164
28165 if (!MEM_P (operands [1])
28166 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28167 || !REG_P (reg = XEXP (sum, 0))
28168 || !CONST_INT_P (offset = XEXP (sum, 1))
28169 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28170 return "wldrw%?\t%0, %1";
28171
28172 /* Fix up an out-of-range load of a GR register. */
28173 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28174 wcgr = operands[0];
28175 operands[0] = reg;
28176 output_asm_insn ("ldr%?\t%0, %1", operands);
28177
28178 operands[0] = wcgr;
28179 operands[1] = reg;
28180 output_asm_insn ("tmcr%?\t%0, %1", operands);
28181 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28182
28183 return "";
28184 }
28185
28186 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28187
28188 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28189 named arg and all anonymous args onto the stack.
28190 XXX I know the prologue shouldn't be pushing registers, but it is faster
28191 that way. */
28192
28193 static void
28194 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28195 enum machine_mode mode,
28196 tree type,
28197 int *pretend_size,
28198 int second_time ATTRIBUTE_UNUSED)
28199 {
28200 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28201 int nregs;
28202
28203 cfun->machine->uses_anonymous_args = 1;
28204 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28205 {
28206 nregs = pcum->aapcs_ncrn;
28207 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28208 nregs++;
28209 }
28210 else
28211 nregs = pcum->nregs;
28212
28213 if (nregs < NUM_ARG_REGS)
28214 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28215 }
28216
28217 /* We can't rely on the caller doing the proper promotion when
28218 using APCS or ATPCS. */
28219
28220 static bool
28221 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28222 {
28223 return !TARGET_AAPCS_BASED;
28224 }
28225
28226 static enum machine_mode
28227 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28228 enum machine_mode mode,
28229 int *punsignedp ATTRIBUTE_UNUSED,
28230 const_tree fntype ATTRIBUTE_UNUSED,
28231 int for_return ATTRIBUTE_UNUSED)
28232 {
28233 if (GET_MODE_CLASS (mode) == MODE_INT
28234 && GET_MODE_SIZE (mode) < 4)
28235 return SImode;
28236
28237 return mode;
28238 }
28239
28240 /* AAPCS based ABIs use short enums by default. */
28241
28242 static bool
28243 arm_default_short_enums (void)
28244 {
28245 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28246 }
28247
28248
28249 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28250
28251 static bool
28252 arm_align_anon_bitfield (void)
28253 {
28254 return TARGET_AAPCS_BASED;
28255 }
28256
28257
28258 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28259
28260 static tree
28261 arm_cxx_guard_type (void)
28262 {
28263 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28264 }
28265
28266
28267 /* The EABI says test the least significant bit of a guard variable. */
28268
28269 static bool
28270 arm_cxx_guard_mask_bit (void)
28271 {
28272 return TARGET_AAPCS_BASED;
28273 }
28274
28275
28276 /* The EABI specifies that all array cookies are 8 bytes long. */
28277
28278 static tree
28279 arm_get_cookie_size (tree type)
28280 {
28281 tree size;
28282
28283 if (!TARGET_AAPCS_BASED)
28284 return default_cxx_get_cookie_size (type);
28285
28286 size = build_int_cst (sizetype, 8);
28287 return size;
28288 }
28289
28290
28291 /* The EABI says that array cookies should also contain the element size. */
28292
28293 static bool
28294 arm_cookie_has_size (void)
28295 {
28296 return TARGET_AAPCS_BASED;
28297 }
28298
28299
28300 /* The EABI says constructors and destructors should return a pointer to
28301 the object constructed/destroyed. */
28302
28303 static bool
28304 arm_cxx_cdtor_returns_this (void)
28305 {
28306 return TARGET_AAPCS_BASED;
28307 }
28308
28309 /* The EABI says that an inline function may never be the key
28310 method. */
28311
28312 static bool
28313 arm_cxx_key_method_may_be_inline (void)
28314 {
28315 return !TARGET_AAPCS_BASED;
28316 }
28317
28318 static void
28319 arm_cxx_determine_class_data_visibility (tree decl)
28320 {
28321 if (!TARGET_AAPCS_BASED
28322 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28323 return;
28324
28325 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28326 is exported. However, on systems without dynamic vague linkage,
28327 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28328 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28329 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28330 else
28331 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28332 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28333 }
28334
28335 static bool
28336 arm_cxx_class_data_always_comdat (void)
28337 {
28338 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28339 vague linkage if the class has no key function. */
28340 return !TARGET_AAPCS_BASED;
28341 }
28342
28343
28344 /* The EABI says __aeabi_atexit should be used to register static
28345 destructors. */
28346
28347 static bool
28348 arm_cxx_use_aeabi_atexit (void)
28349 {
28350 return TARGET_AAPCS_BASED;
28351 }
28352
28353
28354 void
28355 arm_set_return_address (rtx source, rtx scratch)
28356 {
28357 arm_stack_offsets *offsets;
28358 HOST_WIDE_INT delta;
28359 rtx addr;
28360 unsigned long saved_regs;
28361
28362 offsets = arm_get_frame_offsets ();
28363 saved_regs = offsets->saved_regs_mask;
28364
28365 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28366 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28367 else
28368 {
28369 if (frame_pointer_needed)
28370 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28371 else
28372 {
28373 /* LR will be the first saved register. */
28374 delta = offsets->outgoing_args - (offsets->frame + 4);
28375
28376
28377 if (delta >= 4096)
28378 {
28379 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28380 GEN_INT (delta & ~4095)));
28381 addr = scratch;
28382 delta &= 4095;
28383 }
28384 else
28385 addr = stack_pointer_rtx;
28386
28387 addr = plus_constant (Pmode, addr, delta);
28388 }
28389 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28390 }
28391 }
28392
28393
28394 void
28395 thumb_set_return_address (rtx source, rtx scratch)
28396 {
28397 arm_stack_offsets *offsets;
28398 HOST_WIDE_INT delta;
28399 HOST_WIDE_INT limit;
28400 int reg;
28401 rtx addr;
28402 unsigned long mask;
28403
28404 emit_use (source);
28405
28406 offsets = arm_get_frame_offsets ();
28407 mask = offsets->saved_regs_mask;
28408 if (mask & (1 << LR_REGNUM))
28409 {
28410 limit = 1024;
28411 /* Find the saved regs. */
28412 if (frame_pointer_needed)
28413 {
28414 delta = offsets->soft_frame - offsets->saved_args;
28415 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28416 if (TARGET_THUMB1)
28417 limit = 128;
28418 }
28419 else
28420 {
28421 delta = offsets->outgoing_args - offsets->saved_args;
28422 reg = SP_REGNUM;
28423 }
28424 /* Allow for the stack frame. */
28425 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28426 delta -= 16;
28427 /* The link register is always the first saved register. */
28428 delta -= 4;
28429
28430 /* Construct the address. */
28431 addr = gen_rtx_REG (SImode, reg);
28432 if (delta > limit)
28433 {
28434 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28435 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28436 addr = scratch;
28437 }
28438 else
28439 addr = plus_constant (Pmode, addr, delta);
28440
28441 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28442 }
28443 else
28444 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28445 }
28446
28447 /* Implements target hook vector_mode_supported_p. */
28448 bool
28449 arm_vector_mode_supported_p (enum machine_mode mode)
28450 {
28451 /* Neon also supports V2SImode, etc. listed in the clause below. */
28452 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28453 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28454 return true;
28455
28456 if ((TARGET_NEON || TARGET_IWMMXT)
28457 && ((mode == V2SImode)
28458 || (mode == V4HImode)
28459 || (mode == V8QImode)))
28460 return true;
28461
28462 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28463 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28464 || mode == V2HAmode))
28465 return true;
28466
28467 return false;
28468 }
28469
28470 /* Implements target hook array_mode_supported_p. */
28471
28472 static bool
28473 arm_array_mode_supported_p (enum machine_mode mode,
28474 unsigned HOST_WIDE_INT nelems)
28475 {
28476 if (TARGET_NEON
28477 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28478 && (nelems >= 2 && nelems <= 4))
28479 return true;
28480
28481 return false;
28482 }
28483
28484 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28485 registers when autovectorizing for Neon, at least until multiple vector
28486 widths are supported properly by the middle-end. */
28487
28488 static enum machine_mode
28489 arm_preferred_simd_mode (enum machine_mode mode)
28490 {
28491 if (TARGET_NEON)
28492 switch (mode)
28493 {
28494 case SFmode:
28495 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28496 case SImode:
28497 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28498 case HImode:
28499 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28500 case QImode:
28501 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28502 case DImode:
28503 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28504 return V2DImode;
28505 break;
28506
28507 default:;
28508 }
28509
28510 if (TARGET_REALLY_IWMMXT)
28511 switch (mode)
28512 {
28513 case SImode:
28514 return V2SImode;
28515 case HImode:
28516 return V4HImode;
28517 case QImode:
28518 return V8QImode;
28519
28520 default:;
28521 }
28522
28523 return word_mode;
28524 }
28525
28526 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28527
28528 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28529 using r0-r4 for function arguments, r7 for the stack frame and don't have
28530 enough left over to do doubleword arithmetic. For Thumb-2 all the
28531 potentially problematic instructions accept high registers so this is not
28532 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28533 that require many low registers. */
28534 static bool
28535 arm_class_likely_spilled_p (reg_class_t rclass)
28536 {
28537 if ((TARGET_THUMB1 && rclass == LO_REGS)
28538 || rclass == CC_REG)
28539 return true;
28540
28541 return false;
28542 }
28543
28544 /* Implements target hook small_register_classes_for_mode_p. */
28545 bool
28546 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28547 {
28548 return TARGET_THUMB1;
28549 }
28550
28551 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28552 ARM insns and therefore guarantee that the shift count is modulo 256.
28553 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28554 guarantee no particular behavior for out-of-range counts. */
28555
28556 static unsigned HOST_WIDE_INT
28557 arm_shift_truncation_mask (enum machine_mode mode)
28558 {
28559 return mode == SImode ? 255 : 0;
28560 }
28561
28562
28563 /* Map internal gcc register numbers to DWARF2 register numbers. */
28564
28565 unsigned int
28566 arm_dbx_register_number (unsigned int regno)
28567 {
28568 if (regno < 16)
28569 return regno;
28570
28571 if (IS_VFP_REGNUM (regno))
28572 {
28573 /* See comment in arm_dwarf_register_span. */
28574 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28575 return 64 + regno - FIRST_VFP_REGNUM;
28576 else
28577 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28578 }
28579
28580 if (IS_IWMMXT_GR_REGNUM (regno))
28581 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28582
28583 if (IS_IWMMXT_REGNUM (regno))
28584 return 112 + regno - FIRST_IWMMXT_REGNUM;
28585
28586 gcc_unreachable ();
28587 }
28588
28589 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28590 GCC models tham as 64 32-bit registers, so we need to describe this to
28591 the DWARF generation code. Other registers can use the default. */
28592 static rtx
28593 arm_dwarf_register_span (rtx rtl)
28594 {
28595 enum machine_mode mode;
28596 unsigned regno;
28597 rtx parts[8];
28598 int nregs;
28599 int i;
28600
28601 regno = REGNO (rtl);
28602 if (!IS_VFP_REGNUM (regno))
28603 return NULL_RTX;
28604
28605 /* XXX FIXME: The EABI defines two VFP register ranges:
28606 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28607 256-287: D0-D31
28608 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28609 corresponding D register. Until GDB supports this, we shall use the
28610 legacy encodings. We also use these encodings for D0-D15 for
28611 compatibility with older debuggers. */
28612 mode = GET_MODE (rtl);
28613 if (GET_MODE_SIZE (mode) < 8)
28614 return NULL_RTX;
28615
28616 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28617 {
28618 nregs = GET_MODE_SIZE (mode) / 4;
28619 for (i = 0; i < nregs; i += 2)
28620 if (TARGET_BIG_END)
28621 {
28622 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28623 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28624 }
28625 else
28626 {
28627 parts[i] = gen_rtx_REG (SImode, regno + i);
28628 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28629 }
28630 }
28631 else
28632 {
28633 nregs = GET_MODE_SIZE (mode) / 8;
28634 for (i = 0; i < nregs; i++)
28635 parts[i] = gen_rtx_REG (DImode, regno + i);
28636 }
28637
28638 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28639 }
28640
28641 #if ARM_UNWIND_INFO
28642 /* Emit unwind directives for a store-multiple instruction or stack pointer
28643 push during alignment.
28644 These should only ever be generated by the function prologue code, so
28645 expect them to have a particular form. */
28646
28647 static void
28648 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28649 {
28650 int i;
28651 HOST_WIDE_INT offset;
28652 HOST_WIDE_INT nregs;
28653 int reg_size;
28654 unsigned reg;
28655 unsigned lastreg;
28656 rtx e;
28657
28658 e = XVECEXP (p, 0, 0);
28659 if (GET_CODE (e) != SET)
28660 abort ();
28661
28662 /* First insn will adjust the stack pointer. */
28663 if (GET_CODE (e) != SET
28664 || !REG_P (XEXP (e, 0))
28665 || REGNO (XEXP (e, 0)) != SP_REGNUM
28666 || GET_CODE (XEXP (e, 1)) != PLUS)
28667 abort ();
28668
28669 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
28670 nregs = XVECLEN (p, 0) - 1;
28671
28672 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
28673 if (reg < 16)
28674 {
28675 /* The function prologue may also push pc, but not annotate it as it is
28676 never restored. We turn this into a stack pointer adjustment. */
28677 if (nregs * 4 == offset - 4)
28678 {
28679 fprintf (asm_out_file, "\t.pad #4\n");
28680 offset -= 4;
28681 }
28682 reg_size = 4;
28683 fprintf (asm_out_file, "\t.save {");
28684 }
28685 else if (IS_VFP_REGNUM (reg))
28686 {
28687 reg_size = 8;
28688 fprintf (asm_out_file, "\t.vsave {");
28689 }
28690 else
28691 /* Unknown register type. */
28692 abort ();
28693
28694 /* If the stack increment doesn't match the size of the saved registers,
28695 something has gone horribly wrong. */
28696 if (offset != nregs * reg_size)
28697 abort ();
28698
28699 offset = 0;
28700 lastreg = 0;
28701 /* The remaining insns will describe the stores. */
28702 for (i = 1; i <= nregs; i++)
28703 {
28704 /* Expect (set (mem <addr>) (reg)).
28705 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28706 e = XVECEXP (p, 0, i);
28707 if (GET_CODE (e) != SET
28708 || !MEM_P (XEXP (e, 0))
28709 || !REG_P (XEXP (e, 1)))
28710 abort ();
28711
28712 reg = REGNO (XEXP (e, 1));
28713 if (reg < lastreg)
28714 abort ();
28715
28716 if (i != 1)
28717 fprintf (asm_out_file, ", ");
28718 /* We can't use %r for vfp because we need to use the
28719 double precision register names. */
28720 if (IS_VFP_REGNUM (reg))
28721 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28722 else
28723 asm_fprintf (asm_out_file, "%r", reg);
28724
28725 #ifdef ENABLE_CHECKING
28726 /* Check that the addresses are consecutive. */
28727 e = XEXP (XEXP (e, 0), 0);
28728 if (GET_CODE (e) == PLUS)
28729 {
28730 offset += reg_size;
28731 if (!REG_P (XEXP (e, 0))
28732 || REGNO (XEXP (e, 0)) != SP_REGNUM
28733 || !CONST_INT_P (XEXP (e, 1))
28734 || offset != INTVAL (XEXP (e, 1)))
28735 abort ();
28736 }
28737 else if (i != 1
28738 || !REG_P (e)
28739 || REGNO (e) != SP_REGNUM)
28740 abort ();
28741 #endif
28742 }
28743 fprintf (asm_out_file, "}\n");
28744 }
28745
28746 /* Emit unwind directives for a SET. */
28747
28748 static void
28749 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28750 {
28751 rtx e0;
28752 rtx e1;
28753 unsigned reg;
28754
28755 e0 = XEXP (p, 0);
28756 e1 = XEXP (p, 1);
28757 switch (GET_CODE (e0))
28758 {
28759 case MEM:
28760 /* Pushing a single register. */
28761 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28762 || !REG_P (XEXP (XEXP (e0, 0), 0))
28763 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28764 abort ();
28765
28766 asm_fprintf (asm_out_file, "\t.save ");
28767 if (IS_VFP_REGNUM (REGNO (e1)))
28768 asm_fprintf(asm_out_file, "{d%d}\n",
28769 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28770 else
28771 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28772 break;
28773
28774 case REG:
28775 if (REGNO (e0) == SP_REGNUM)
28776 {
28777 /* A stack increment. */
28778 if (GET_CODE (e1) != PLUS
28779 || !REG_P (XEXP (e1, 0))
28780 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28781 || !CONST_INT_P (XEXP (e1, 1)))
28782 abort ();
28783
28784 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28785 -INTVAL (XEXP (e1, 1)));
28786 }
28787 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28788 {
28789 HOST_WIDE_INT offset;
28790
28791 if (GET_CODE (e1) == PLUS)
28792 {
28793 if (!REG_P (XEXP (e1, 0))
28794 || !CONST_INT_P (XEXP (e1, 1)))
28795 abort ();
28796 reg = REGNO (XEXP (e1, 0));
28797 offset = INTVAL (XEXP (e1, 1));
28798 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28799 HARD_FRAME_POINTER_REGNUM, reg,
28800 offset);
28801 }
28802 else if (REG_P (e1))
28803 {
28804 reg = REGNO (e1);
28805 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28806 HARD_FRAME_POINTER_REGNUM, reg);
28807 }
28808 else
28809 abort ();
28810 }
28811 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28812 {
28813 /* Move from sp to reg. */
28814 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28815 }
28816 else if (GET_CODE (e1) == PLUS
28817 && REG_P (XEXP (e1, 0))
28818 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28819 && CONST_INT_P (XEXP (e1, 1)))
28820 {
28821 /* Set reg to offset from sp. */
28822 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28823 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28824 }
28825 else
28826 abort ();
28827 break;
28828
28829 default:
28830 abort ();
28831 }
28832 }
28833
28834
28835 /* Emit unwind directives for the given insn. */
28836
28837 static void
28838 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28839 {
28840 rtx note, pat;
28841 bool handled_one = false;
28842
28843 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28844 return;
28845
28846 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28847 && (TREE_NOTHROW (current_function_decl)
28848 || crtl->all_throwers_are_sibcalls))
28849 return;
28850
28851 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28852 return;
28853
28854 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28855 {
28856 switch (REG_NOTE_KIND (note))
28857 {
28858 case REG_FRAME_RELATED_EXPR:
28859 pat = XEXP (note, 0);
28860 goto found;
28861
28862 case REG_CFA_REGISTER:
28863 pat = XEXP (note, 0);
28864 if (pat == NULL)
28865 {
28866 pat = PATTERN (insn);
28867 if (GET_CODE (pat) == PARALLEL)
28868 pat = XVECEXP (pat, 0, 0);
28869 }
28870
28871 /* Only emitted for IS_STACKALIGN re-alignment. */
28872 {
28873 rtx dest, src;
28874 unsigned reg;
28875
28876 src = SET_SRC (pat);
28877 dest = SET_DEST (pat);
28878
28879 gcc_assert (src == stack_pointer_rtx);
28880 reg = REGNO (dest);
28881 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28882 reg + 0x90, reg);
28883 }
28884 handled_one = true;
28885 break;
28886
28887 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28888 to get correct dwarf information for shrink-wrap. We should not
28889 emit unwind information for it because these are used either for
28890 pretend arguments or notes to adjust sp and restore registers from
28891 stack. */
28892 case REG_CFA_ADJUST_CFA:
28893 case REG_CFA_RESTORE:
28894 return;
28895
28896 case REG_CFA_DEF_CFA:
28897 case REG_CFA_EXPRESSION:
28898 case REG_CFA_OFFSET:
28899 /* ??? Only handling here what we actually emit. */
28900 gcc_unreachable ();
28901
28902 default:
28903 break;
28904 }
28905 }
28906 if (handled_one)
28907 return;
28908 pat = PATTERN (insn);
28909 found:
28910
28911 switch (GET_CODE (pat))
28912 {
28913 case SET:
28914 arm_unwind_emit_set (asm_out_file, pat);
28915 break;
28916
28917 case SEQUENCE:
28918 /* Store multiple. */
28919 arm_unwind_emit_sequence (asm_out_file, pat);
28920 break;
28921
28922 default:
28923 abort();
28924 }
28925 }
28926
28927
28928 /* Output a reference from a function exception table to the type_info
28929 object X. The EABI specifies that the symbol should be relocated by
28930 an R_ARM_TARGET2 relocation. */
28931
28932 static bool
28933 arm_output_ttype (rtx x)
28934 {
28935 fputs ("\t.word\t", asm_out_file);
28936 output_addr_const (asm_out_file, x);
28937 /* Use special relocations for symbol references. */
28938 if (!CONST_INT_P (x))
28939 fputs ("(TARGET2)", asm_out_file);
28940 fputc ('\n', asm_out_file);
28941
28942 return TRUE;
28943 }
28944
28945 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28946
28947 static void
28948 arm_asm_emit_except_personality (rtx personality)
28949 {
28950 fputs ("\t.personality\t", asm_out_file);
28951 output_addr_const (asm_out_file, personality);
28952 fputc ('\n', asm_out_file);
28953 }
28954
28955 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28956
28957 static void
28958 arm_asm_init_sections (void)
28959 {
28960 exception_section = get_unnamed_section (0, output_section_asm_op,
28961 "\t.handlerdata");
28962 }
28963 #endif /* ARM_UNWIND_INFO */
28964
28965 /* Output unwind directives for the start/end of a function. */
28966
28967 void
28968 arm_output_fn_unwind (FILE * f, bool prologue)
28969 {
28970 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28971 return;
28972
28973 if (prologue)
28974 fputs ("\t.fnstart\n", f);
28975 else
28976 {
28977 /* If this function will never be unwound, then mark it as such.
28978 The came condition is used in arm_unwind_emit to suppress
28979 the frame annotations. */
28980 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28981 && (TREE_NOTHROW (current_function_decl)
28982 || crtl->all_throwers_are_sibcalls))
28983 fputs("\t.cantunwind\n", f);
28984
28985 fputs ("\t.fnend\n", f);
28986 }
28987 }
28988
28989 static bool
28990 arm_emit_tls_decoration (FILE *fp, rtx x)
28991 {
28992 enum tls_reloc reloc;
28993 rtx val;
28994
28995 val = XVECEXP (x, 0, 0);
28996 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28997
28998 output_addr_const (fp, val);
28999
29000 switch (reloc)
29001 {
29002 case TLS_GD32:
29003 fputs ("(tlsgd)", fp);
29004 break;
29005 case TLS_LDM32:
29006 fputs ("(tlsldm)", fp);
29007 break;
29008 case TLS_LDO32:
29009 fputs ("(tlsldo)", fp);
29010 break;
29011 case TLS_IE32:
29012 fputs ("(gottpoff)", fp);
29013 break;
29014 case TLS_LE32:
29015 fputs ("(tpoff)", fp);
29016 break;
29017 case TLS_DESCSEQ:
29018 fputs ("(tlsdesc)", fp);
29019 break;
29020 default:
29021 gcc_unreachable ();
29022 }
29023
29024 switch (reloc)
29025 {
29026 case TLS_GD32:
29027 case TLS_LDM32:
29028 case TLS_IE32:
29029 case TLS_DESCSEQ:
29030 fputs (" + (. - ", fp);
29031 output_addr_const (fp, XVECEXP (x, 0, 2));
29032 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29033 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29034 output_addr_const (fp, XVECEXP (x, 0, 3));
29035 fputc (')', fp);
29036 break;
29037 default:
29038 break;
29039 }
29040
29041 return TRUE;
29042 }
29043
29044 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29045
29046 static void
29047 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29048 {
29049 gcc_assert (size == 4);
29050 fputs ("\t.word\t", file);
29051 output_addr_const (file, x);
29052 fputs ("(tlsldo)", file);
29053 }
29054
29055 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29056
29057 static bool
29058 arm_output_addr_const_extra (FILE *fp, rtx x)
29059 {
29060 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29061 return arm_emit_tls_decoration (fp, x);
29062 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29063 {
29064 char label[256];
29065 int labelno = INTVAL (XVECEXP (x, 0, 0));
29066
29067 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29068 assemble_name_raw (fp, label);
29069
29070 return TRUE;
29071 }
29072 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29073 {
29074 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29075 if (GOT_PCREL)
29076 fputs ("+.", fp);
29077 fputs ("-(", fp);
29078 output_addr_const (fp, XVECEXP (x, 0, 0));
29079 fputc (')', fp);
29080 return TRUE;
29081 }
29082 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29083 {
29084 output_addr_const (fp, XVECEXP (x, 0, 0));
29085 if (GOT_PCREL)
29086 fputs ("+.", fp);
29087 fputs ("-(", fp);
29088 output_addr_const (fp, XVECEXP (x, 0, 1));
29089 fputc (')', fp);
29090 return TRUE;
29091 }
29092 else if (GET_CODE (x) == CONST_VECTOR)
29093 return arm_emit_vector_const (fp, x);
29094
29095 return FALSE;
29096 }
29097
29098 /* Output assembly for a shift instruction.
29099 SET_FLAGS determines how the instruction modifies the condition codes.
29100 0 - Do not set condition codes.
29101 1 - Set condition codes.
29102 2 - Use smallest instruction. */
29103 const char *
29104 arm_output_shift(rtx * operands, int set_flags)
29105 {
29106 char pattern[100];
29107 static const char flag_chars[3] = {'?', '.', '!'};
29108 const char *shift;
29109 HOST_WIDE_INT val;
29110 char c;
29111
29112 c = flag_chars[set_flags];
29113 if (TARGET_UNIFIED_ASM)
29114 {
29115 shift = shift_op(operands[3], &val);
29116 if (shift)
29117 {
29118 if (val != -1)
29119 operands[2] = GEN_INT(val);
29120 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29121 }
29122 else
29123 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29124 }
29125 else
29126 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29127 output_asm_insn (pattern, operands);
29128 return "";
29129 }
29130
29131 /* Output assembly for a WMMX immediate shift instruction. */
29132 const char *
29133 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29134 {
29135 int shift = INTVAL (operands[2]);
29136 char templ[50];
29137 enum machine_mode opmode = GET_MODE (operands[0]);
29138
29139 gcc_assert (shift >= 0);
29140
29141 /* If the shift value in the register versions is > 63 (for D qualifier),
29142 31 (for W qualifier) or 15 (for H qualifier). */
29143 if (((opmode == V4HImode) && (shift > 15))
29144 || ((opmode == V2SImode) && (shift > 31))
29145 || ((opmode == DImode) && (shift > 63)))
29146 {
29147 if (wror_or_wsra)
29148 {
29149 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29150 output_asm_insn (templ, operands);
29151 if (opmode == DImode)
29152 {
29153 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29154 output_asm_insn (templ, operands);
29155 }
29156 }
29157 else
29158 {
29159 /* The destination register will contain all zeros. */
29160 sprintf (templ, "wzero\t%%0");
29161 output_asm_insn (templ, operands);
29162 }
29163 return "";
29164 }
29165
29166 if ((opmode == DImode) && (shift > 32))
29167 {
29168 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29169 output_asm_insn (templ, operands);
29170 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29171 output_asm_insn (templ, operands);
29172 }
29173 else
29174 {
29175 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29176 output_asm_insn (templ, operands);
29177 }
29178 return "";
29179 }
29180
29181 /* Output assembly for a WMMX tinsr instruction. */
29182 const char *
29183 arm_output_iwmmxt_tinsr (rtx *operands)
29184 {
29185 int mask = INTVAL (operands[3]);
29186 int i;
29187 char templ[50];
29188 int units = mode_nunits[GET_MODE (operands[0])];
29189 gcc_assert ((mask & (mask - 1)) == 0);
29190 for (i = 0; i < units; ++i)
29191 {
29192 if ((mask & 0x01) == 1)
29193 {
29194 break;
29195 }
29196 mask >>= 1;
29197 }
29198 gcc_assert (i < units);
29199 {
29200 switch (GET_MODE (operands[0]))
29201 {
29202 case V8QImode:
29203 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29204 break;
29205 case V4HImode:
29206 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29207 break;
29208 case V2SImode:
29209 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29210 break;
29211 default:
29212 gcc_unreachable ();
29213 break;
29214 }
29215 output_asm_insn (templ, operands);
29216 }
29217 return "";
29218 }
29219
29220 /* Output a Thumb-1 casesi dispatch sequence. */
29221 const char *
29222 thumb1_output_casesi (rtx *operands)
29223 {
29224 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29225
29226 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29227
29228 switch (GET_MODE(diff_vec))
29229 {
29230 case QImode:
29231 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29232 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29233 case HImode:
29234 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29235 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29236 case SImode:
29237 return "bl\t%___gnu_thumb1_case_si";
29238 default:
29239 gcc_unreachable ();
29240 }
29241 }
29242
29243 /* Output a Thumb-2 casesi instruction. */
29244 const char *
29245 thumb2_output_casesi (rtx *operands)
29246 {
29247 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29248
29249 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29250
29251 output_asm_insn ("cmp\t%0, %1", operands);
29252 output_asm_insn ("bhi\t%l3", operands);
29253 switch (GET_MODE(diff_vec))
29254 {
29255 case QImode:
29256 return "tbb\t[%|pc, %0]";
29257 case HImode:
29258 return "tbh\t[%|pc, %0, lsl #1]";
29259 case SImode:
29260 if (flag_pic)
29261 {
29262 output_asm_insn ("adr\t%4, %l2", operands);
29263 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29264 output_asm_insn ("add\t%4, %4, %5", operands);
29265 return "bx\t%4";
29266 }
29267 else
29268 {
29269 output_asm_insn ("adr\t%4, %l2", operands);
29270 return "ldr\t%|pc, [%4, %0, lsl #2]";
29271 }
29272 default:
29273 gcc_unreachable ();
29274 }
29275 }
29276
29277 /* Most ARM cores are single issue, but some newer ones can dual issue.
29278 The scheduler descriptions rely on this being correct. */
29279 static int
29280 arm_issue_rate (void)
29281 {
29282 switch (arm_tune)
29283 {
29284 case cortexa15:
29285 return 3;
29286
29287 case cortexr4:
29288 case cortexr4f:
29289 case cortexr5:
29290 case genericv7a:
29291 case cortexa5:
29292 case cortexa7:
29293 case cortexa8:
29294 case cortexa9:
29295 case cortexa12:
29296 case cortexa53:
29297 case fa726te:
29298 case marvell_pj4:
29299 return 2;
29300
29301 default:
29302 return 1;
29303 }
29304 }
29305
29306 /* A table and a function to perform ARM-specific name mangling for
29307 NEON vector types in order to conform to the AAPCS (see "Procedure
29308 Call Standard for the ARM Architecture", Appendix A). To qualify
29309 for emission with the mangled names defined in that document, a
29310 vector type must not only be of the correct mode but also be
29311 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29312 typedef struct
29313 {
29314 enum machine_mode mode;
29315 const char *element_type_name;
29316 const char *aapcs_name;
29317 } arm_mangle_map_entry;
29318
29319 static arm_mangle_map_entry arm_mangle_map[] = {
29320 /* 64-bit containerized types. */
29321 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29322 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29323 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29324 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29325 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29326 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29327 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29328 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29329 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29330 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29331
29332 /* 128-bit containerized types. */
29333 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29334 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29335 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29336 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29337 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29338 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29339 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29340 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29341 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29342 { VOIDmode, NULL, NULL }
29343 };
29344
29345 const char *
29346 arm_mangle_type (const_tree type)
29347 {
29348 arm_mangle_map_entry *pos = arm_mangle_map;
29349
29350 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29351 has to be managled as if it is in the "std" namespace. */
29352 if (TARGET_AAPCS_BASED
29353 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29354 return "St9__va_list";
29355
29356 /* Half-precision float. */
29357 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29358 return "Dh";
29359
29360 if (TREE_CODE (type) != VECTOR_TYPE)
29361 return NULL;
29362
29363 /* Check the mode of the vector type, and the name of the vector
29364 element type, against the table. */
29365 while (pos->mode != VOIDmode)
29366 {
29367 tree elt_type = TREE_TYPE (type);
29368
29369 if (pos->mode == TYPE_MODE (type)
29370 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29371 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29372 pos->element_type_name))
29373 return pos->aapcs_name;
29374
29375 pos++;
29376 }
29377
29378 /* Use the default mangling for unrecognized (possibly user-defined)
29379 vector types. */
29380 return NULL;
29381 }
29382
29383 /* Order of allocation of core registers for Thumb: this allocation is
29384 written over the corresponding initial entries of the array
29385 initialized with REG_ALLOC_ORDER. We allocate all low registers
29386 first. Saving and restoring a low register is usually cheaper than
29387 using a call-clobbered high register. */
29388
29389 static const int thumb_core_reg_alloc_order[] =
29390 {
29391 3, 2, 1, 0, 4, 5, 6, 7,
29392 14, 12, 8, 9, 10, 11
29393 };
29394
29395 /* Adjust register allocation order when compiling for Thumb. */
29396
29397 void
29398 arm_order_regs_for_local_alloc (void)
29399 {
29400 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29401 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29402 if (TARGET_THUMB)
29403 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29404 sizeof (thumb_core_reg_alloc_order));
29405 }
29406
29407 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29408
29409 bool
29410 arm_frame_pointer_required (void)
29411 {
29412 return (cfun->has_nonlocal_label
29413 || SUBTARGET_FRAME_POINTER_REQUIRED
29414 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29415 }
29416
29417 /* Only thumb1 can't support conditional execution, so return true if
29418 the target is not thumb1. */
29419 static bool
29420 arm_have_conditional_execution (void)
29421 {
29422 return !TARGET_THUMB1;
29423 }
29424
29425 tree
29426 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29427 {
29428 enum machine_mode in_mode, out_mode;
29429 int in_n, out_n;
29430
29431 if (TREE_CODE (type_out) != VECTOR_TYPE
29432 || TREE_CODE (type_in) != VECTOR_TYPE
29433 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29434 return NULL_TREE;
29435
29436 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29437 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29438 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29439 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29440
29441 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29442 decl of the vectorized builtin for the appropriate vector mode.
29443 NULL_TREE is returned if no such builtin is available. */
29444 #undef ARM_CHECK_BUILTIN_MODE
29445 #define ARM_CHECK_BUILTIN_MODE(C) \
29446 (out_mode == SFmode && out_n == C \
29447 && in_mode == SFmode && in_n == C)
29448
29449 #undef ARM_FIND_VRINT_VARIANT
29450 #define ARM_FIND_VRINT_VARIANT(N) \
29451 (ARM_CHECK_BUILTIN_MODE (2) \
29452 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29453 : (ARM_CHECK_BUILTIN_MODE (4) \
29454 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29455 : NULL_TREE))
29456
29457 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29458 {
29459 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29460 switch (fn)
29461 {
29462 case BUILT_IN_FLOORF:
29463 return ARM_FIND_VRINT_VARIANT (vrintm);
29464 case BUILT_IN_CEILF:
29465 return ARM_FIND_VRINT_VARIANT (vrintp);
29466 case BUILT_IN_TRUNCF:
29467 return ARM_FIND_VRINT_VARIANT (vrintz);
29468 case BUILT_IN_ROUNDF:
29469 return ARM_FIND_VRINT_VARIANT (vrinta);
29470 default:
29471 return NULL_TREE;
29472 }
29473 }
29474 return NULL_TREE;
29475 }
29476 #undef ARM_CHECK_BUILTIN_MODE
29477 #undef ARM_FIND_VRINT_VARIANT
29478
29479 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29480 static HOST_WIDE_INT
29481 arm_vector_alignment (const_tree type)
29482 {
29483 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29484
29485 if (TARGET_AAPCS_BASED)
29486 align = MIN (align, 64);
29487
29488 return align;
29489 }
29490
29491 static unsigned int
29492 arm_autovectorize_vector_sizes (void)
29493 {
29494 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29495 }
29496
29497 static bool
29498 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29499 {
29500 /* Vectors which aren't in packed structures will not be less aligned than
29501 the natural alignment of their element type, so this is safe. */
29502 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29503 return !is_packed;
29504
29505 return default_builtin_vector_alignment_reachable (type, is_packed);
29506 }
29507
29508 static bool
29509 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29510 const_tree type, int misalignment,
29511 bool is_packed)
29512 {
29513 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29514 {
29515 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29516
29517 if (is_packed)
29518 return align == 1;
29519
29520 /* If the misalignment is unknown, we should be able to handle the access
29521 so long as it is not to a member of a packed data structure. */
29522 if (misalignment == -1)
29523 return true;
29524
29525 /* Return true if the misalignment is a multiple of the natural alignment
29526 of the vector's element type. This is probably always going to be
29527 true in practice, since we've already established that this isn't a
29528 packed access. */
29529 return ((misalignment % align) == 0);
29530 }
29531
29532 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29533 is_packed);
29534 }
29535
29536 static void
29537 arm_conditional_register_usage (void)
29538 {
29539 int regno;
29540
29541 if (TARGET_THUMB1 && optimize_size)
29542 {
29543 /* When optimizing for size on Thumb-1, it's better not
29544 to use the HI regs, because of the overhead of
29545 stacking them. */
29546 for (regno = FIRST_HI_REGNUM;
29547 regno <= LAST_HI_REGNUM; ++regno)
29548 fixed_regs[regno] = call_used_regs[regno] = 1;
29549 }
29550
29551 /* The link register can be clobbered by any branch insn,
29552 but we have no way to track that at present, so mark
29553 it as unavailable. */
29554 if (TARGET_THUMB1)
29555 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29556
29557 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29558 {
29559 /* VFPv3 registers are disabled when earlier VFP
29560 versions are selected due to the definition of
29561 LAST_VFP_REGNUM. */
29562 for (regno = FIRST_VFP_REGNUM;
29563 regno <= LAST_VFP_REGNUM; ++ regno)
29564 {
29565 fixed_regs[regno] = 0;
29566 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29567 || regno >= FIRST_VFP_REGNUM + 32;
29568 }
29569 }
29570
29571 if (TARGET_REALLY_IWMMXT)
29572 {
29573 regno = FIRST_IWMMXT_GR_REGNUM;
29574 /* The 2002/10/09 revision of the XScale ABI has wCG0
29575 and wCG1 as call-preserved registers. The 2002/11/21
29576 revision changed this so that all wCG registers are
29577 scratch registers. */
29578 for (regno = FIRST_IWMMXT_GR_REGNUM;
29579 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29580 fixed_regs[regno] = 0;
29581 /* The XScale ABI has wR0 - wR9 as scratch registers,
29582 the rest as call-preserved registers. */
29583 for (regno = FIRST_IWMMXT_REGNUM;
29584 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29585 {
29586 fixed_regs[regno] = 0;
29587 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29588 }
29589 }
29590
29591 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29592 {
29593 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29594 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29595 }
29596 else if (TARGET_APCS_STACK)
29597 {
29598 fixed_regs[10] = 1;
29599 call_used_regs[10] = 1;
29600 }
29601 /* -mcaller-super-interworking reserves r11 for calls to
29602 _interwork_r11_call_via_rN(). Making the register global
29603 is an easy way of ensuring that it remains valid for all
29604 calls. */
29605 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29606 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29607 {
29608 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29609 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29610 if (TARGET_CALLER_INTERWORKING)
29611 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29612 }
29613 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29614 }
29615
29616 static reg_class_t
29617 arm_preferred_rename_class (reg_class_t rclass)
29618 {
29619 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29620 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29621 and code size can be reduced. */
29622 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29623 return LO_REGS;
29624 else
29625 return NO_REGS;
29626 }
29627
29628 /* Compute the atrribute "length" of insn "*push_multi".
29629 So this function MUST be kept in sync with that insn pattern. */
29630 int
29631 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29632 {
29633 int i, regno, hi_reg;
29634 int num_saves = XVECLEN (parallel_op, 0);
29635
29636 /* ARM mode. */
29637 if (TARGET_ARM)
29638 return 4;
29639 /* Thumb1 mode. */
29640 if (TARGET_THUMB1)
29641 return 2;
29642
29643 /* Thumb2 mode. */
29644 regno = REGNO (first_op);
29645 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29646 for (i = 1; i < num_saves && !hi_reg; i++)
29647 {
29648 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29649 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29650 }
29651
29652 if (!hi_reg)
29653 return 2;
29654 return 4;
29655 }
29656
29657 /* Compute the number of instructions emitted by output_move_double. */
29658 int
29659 arm_count_output_move_double_insns (rtx *operands)
29660 {
29661 int count;
29662 rtx ops[2];
29663 /* output_move_double may modify the operands array, so call it
29664 here on a copy of the array. */
29665 ops[0] = operands[0];
29666 ops[1] = operands[1];
29667 output_move_double (ops, false, &count);
29668 return count;
29669 }
29670
29671 int
29672 vfp3_const_double_for_fract_bits (rtx operand)
29673 {
29674 REAL_VALUE_TYPE r0;
29675
29676 if (!CONST_DOUBLE_P (operand))
29677 return 0;
29678
29679 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29680 if (exact_real_inverse (DFmode, &r0))
29681 {
29682 if (exact_real_truncate (DFmode, &r0))
29683 {
29684 HOST_WIDE_INT value = real_to_integer (&r0);
29685 value = value & 0xffffffff;
29686 if ((value != 0) && ( (value & (value - 1)) == 0))
29687 return int_log2 (value);
29688 }
29689 }
29690 return 0;
29691 }
29692 \f
29693 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29694
29695 static void
29696 arm_pre_atomic_barrier (enum memmodel model)
29697 {
29698 if (need_atomic_barrier_p (model, true))
29699 emit_insn (gen_memory_barrier ());
29700 }
29701
29702 static void
29703 arm_post_atomic_barrier (enum memmodel model)
29704 {
29705 if (need_atomic_barrier_p (model, false))
29706 emit_insn (gen_memory_barrier ());
29707 }
29708
29709 /* Emit the load-exclusive and store-exclusive instructions.
29710 Use acquire and release versions if necessary. */
29711
29712 static void
29713 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29714 {
29715 rtx (*gen) (rtx, rtx);
29716
29717 if (acq)
29718 {
29719 switch (mode)
29720 {
29721 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29722 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29723 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29724 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29725 default:
29726 gcc_unreachable ();
29727 }
29728 }
29729 else
29730 {
29731 switch (mode)
29732 {
29733 case QImode: gen = gen_arm_load_exclusiveqi; break;
29734 case HImode: gen = gen_arm_load_exclusivehi; break;
29735 case SImode: gen = gen_arm_load_exclusivesi; break;
29736 case DImode: gen = gen_arm_load_exclusivedi; break;
29737 default:
29738 gcc_unreachable ();
29739 }
29740 }
29741
29742 emit_insn (gen (rval, mem));
29743 }
29744
29745 static void
29746 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29747 rtx mem, bool rel)
29748 {
29749 rtx (*gen) (rtx, rtx, rtx);
29750
29751 if (rel)
29752 {
29753 switch (mode)
29754 {
29755 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29756 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29757 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29758 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29759 default:
29760 gcc_unreachable ();
29761 }
29762 }
29763 else
29764 {
29765 switch (mode)
29766 {
29767 case QImode: gen = gen_arm_store_exclusiveqi; break;
29768 case HImode: gen = gen_arm_store_exclusivehi; break;
29769 case SImode: gen = gen_arm_store_exclusivesi; break;
29770 case DImode: gen = gen_arm_store_exclusivedi; break;
29771 default:
29772 gcc_unreachable ();
29773 }
29774 }
29775
29776 emit_insn (gen (bval, rval, mem));
29777 }
29778
29779 /* Mark the previous jump instruction as unlikely. */
29780
29781 static void
29782 emit_unlikely_jump (rtx insn)
29783 {
29784 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29785
29786 insn = emit_jump_insn (insn);
29787 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29788 }
29789
29790 /* Expand a compare and swap pattern. */
29791
29792 void
29793 arm_expand_compare_and_swap (rtx operands[])
29794 {
29795 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29796 enum machine_mode mode;
29797 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29798
29799 bval = operands[0];
29800 rval = operands[1];
29801 mem = operands[2];
29802 oldval = operands[3];
29803 newval = operands[4];
29804 is_weak = operands[5];
29805 mod_s = operands[6];
29806 mod_f = operands[7];
29807 mode = GET_MODE (mem);
29808
29809 /* Normally the succ memory model must be stronger than fail, but in the
29810 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29811 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29812
29813 if (TARGET_HAVE_LDACQ
29814 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29815 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29816 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29817
29818 switch (mode)
29819 {
29820 case QImode:
29821 case HImode:
29822 /* For narrow modes, we're going to perform the comparison in SImode,
29823 so do the zero-extension now. */
29824 rval = gen_reg_rtx (SImode);
29825 oldval = convert_modes (SImode, mode, oldval, true);
29826 /* FALLTHRU */
29827
29828 case SImode:
29829 /* Force the value into a register if needed. We waited until after
29830 the zero-extension above to do this properly. */
29831 if (!arm_add_operand (oldval, SImode))
29832 oldval = force_reg (SImode, oldval);
29833 break;
29834
29835 case DImode:
29836 if (!cmpdi_operand (oldval, mode))
29837 oldval = force_reg (mode, oldval);
29838 break;
29839
29840 default:
29841 gcc_unreachable ();
29842 }
29843
29844 switch (mode)
29845 {
29846 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29847 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29848 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29849 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29850 default:
29851 gcc_unreachable ();
29852 }
29853
29854 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29855
29856 if (mode == QImode || mode == HImode)
29857 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29858
29859 /* In all cases, we arrange for success to be signaled by Z set.
29860 This arrangement allows for the boolean result to be used directly
29861 in a subsequent branch, post optimization. */
29862 x = gen_rtx_REG (CCmode, CC_REGNUM);
29863 x = gen_rtx_EQ (SImode, x, const0_rtx);
29864 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29865 }
29866
29867 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29868 another memory store between the load-exclusive and store-exclusive can
29869 reset the monitor from Exclusive to Open state. This means we must wait
29870 until after reload to split the pattern, lest we get a register spill in
29871 the middle of the atomic sequence. */
29872
29873 void
29874 arm_split_compare_and_swap (rtx operands[])
29875 {
29876 rtx rval, mem, oldval, newval, scratch;
29877 enum machine_mode mode;
29878 enum memmodel mod_s, mod_f;
29879 bool is_weak;
29880 rtx label1, label2, x, cond;
29881
29882 rval = operands[0];
29883 mem = operands[1];
29884 oldval = operands[2];
29885 newval = operands[3];
29886 is_weak = (operands[4] != const0_rtx);
29887 mod_s = (enum memmodel) INTVAL (operands[5]);
29888 mod_f = (enum memmodel) INTVAL (operands[6]);
29889 scratch = operands[7];
29890 mode = GET_MODE (mem);
29891
29892 bool use_acquire = TARGET_HAVE_LDACQ
29893 && !(mod_s == MEMMODEL_RELAXED
29894 || mod_s == MEMMODEL_CONSUME
29895 || mod_s == MEMMODEL_RELEASE);
29896
29897 bool use_release = TARGET_HAVE_LDACQ
29898 && !(mod_s == MEMMODEL_RELAXED
29899 || mod_s == MEMMODEL_CONSUME
29900 || mod_s == MEMMODEL_ACQUIRE);
29901
29902 /* Checks whether a barrier is needed and emits one accordingly. */
29903 if (!(use_acquire || use_release))
29904 arm_pre_atomic_barrier (mod_s);
29905
29906 label1 = NULL_RTX;
29907 if (!is_weak)
29908 {
29909 label1 = gen_label_rtx ();
29910 emit_label (label1);
29911 }
29912 label2 = gen_label_rtx ();
29913
29914 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29915
29916 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
29917 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29918 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29919 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29920 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29921
29922 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
29923
29924 /* Weak or strong, we want EQ to be true for success, so that we
29925 match the flags that we got from the compare above. */
29926 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29927 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
29928 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
29929
29930 if (!is_weak)
29931 {
29932 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29933 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29934 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
29935 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29936 }
29937
29938 if (mod_f != MEMMODEL_RELAXED)
29939 emit_label (label2);
29940
29941 /* Checks whether a barrier is needed and emits one accordingly. */
29942 if (!(use_acquire || use_release))
29943 arm_post_atomic_barrier (mod_s);
29944
29945 if (mod_f == MEMMODEL_RELAXED)
29946 emit_label (label2);
29947 }
29948
29949 void
29950 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29951 rtx value, rtx model_rtx, rtx cond)
29952 {
29953 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
29954 enum machine_mode mode = GET_MODE (mem);
29955 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
29956 rtx label, x;
29957
29958 bool use_acquire = TARGET_HAVE_LDACQ
29959 && !(model == MEMMODEL_RELAXED
29960 || model == MEMMODEL_CONSUME
29961 || model == MEMMODEL_RELEASE);
29962
29963 bool use_release = TARGET_HAVE_LDACQ
29964 && !(model == MEMMODEL_RELAXED
29965 || model == MEMMODEL_CONSUME
29966 || model == MEMMODEL_ACQUIRE);
29967
29968 /* Checks whether a barrier is needed and emits one accordingly. */
29969 if (!(use_acquire || use_release))
29970 arm_pre_atomic_barrier (model);
29971
29972 label = gen_label_rtx ();
29973 emit_label (label);
29974
29975 if (new_out)
29976 new_out = gen_lowpart (wmode, new_out);
29977 if (old_out)
29978 old_out = gen_lowpart (wmode, old_out);
29979 else
29980 old_out = new_out;
29981 value = simplify_gen_subreg (wmode, value, mode, 0);
29982
29983 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29984
29985 switch (code)
29986 {
29987 case SET:
29988 new_out = value;
29989 break;
29990
29991 case NOT:
29992 x = gen_rtx_AND (wmode, old_out, value);
29993 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29994 x = gen_rtx_NOT (wmode, new_out);
29995 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29996 break;
29997
29998 case MINUS:
29999 if (CONST_INT_P (value))
30000 {
30001 value = GEN_INT (-INTVAL (value));
30002 code = PLUS;
30003 }
30004 /* FALLTHRU */
30005
30006 case PLUS:
30007 if (mode == DImode)
30008 {
30009 /* DImode plus/minus need to clobber flags. */
30010 /* The adddi3 and subdi3 patterns are incorrectly written so that
30011 they require matching operands, even when we could easily support
30012 three operands. Thankfully, this can be fixed up post-splitting,
30013 as the individual add+adc patterns do accept three operands and
30014 post-reload cprop can make these moves go away. */
30015 emit_move_insn (new_out, old_out);
30016 if (code == PLUS)
30017 x = gen_adddi3 (new_out, new_out, value);
30018 else
30019 x = gen_subdi3 (new_out, new_out, value);
30020 emit_insn (x);
30021 break;
30022 }
30023 /* FALLTHRU */
30024
30025 default:
30026 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30027 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30028 break;
30029 }
30030
30031 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30032 use_release);
30033
30034 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30035 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30036
30037 /* Checks whether a barrier is needed and emits one accordingly. */
30038 if (!(use_acquire || use_release))
30039 arm_post_atomic_barrier (model);
30040 }
30041 \f
30042 #define MAX_VECT_LEN 16
30043
30044 struct expand_vec_perm_d
30045 {
30046 rtx target, op0, op1;
30047 unsigned char perm[MAX_VECT_LEN];
30048 enum machine_mode vmode;
30049 unsigned char nelt;
30050 bool one_vector_p;
30051 bool testing_p;
30052 };
30053
30054 /* Generate a variable permutation. */
30055
30056 static void
30057 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30058 {
30059 enum machine_mode vmode = GET_MODE (target);
30060 bool one_vector_p = rtx_equal_p (op0, op1);
30061
30062 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30063 gcc_checking_assert (GET_MODE (op0) == vmode);
30064 gcc_checking_assert (GET_MODE (op1) == vmode);
30065 gcc_checking_assert (GET_MODE (sel) == vmode);
30066 gcc_checking_assert (TARGET_NEON);
30067
30068 if (one_vector_p)
30069 {
30070 if (vmode == V8QImode)
30071 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30072 else
30073 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30074 }
30075 else
30076 {
30077 rtx pair;
30078
30079 if (vmode == V8QImode)
30080 {
30081 pair = gen_reg_rtx (V16QImode);
30082 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30083 pair = gen_lowpart (TImode, pair);
30084 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30085 }
30086 else
30087 {
30088 pair = gen_reg_rtx (OImode);
30089 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30090 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30091 }
30092 }
30093 }
30094
30095 void
30096 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30097 {
30098 enum machine_mode vmode = GET_MODE (target);
30099 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30100 bool one_vector_p = rtx_equal_p (op0, op1);
30101 rtx rmask[MAX_VECT_LEN], mask;
30102
30103 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30104 numbering of elements for big-endian, we must reverse the order. */
30105 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30106
30107 /* The VTBL instruction does not use a modulo index, so we must take care
30108 of that ourselves. */
30109 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30110 for (i = 0; i < nelt; ++i)
30111 rmask[i] = mask;
30112 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30113 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30114
30115 arm_expand_vec_perm_1 (target, op0, op1, sel);
30116 }
30117
30118 /* Generate or test for an insn that supports a constant permutation. */
30119
30120 /* Recognize patterns for the VUZP insns. */
30121
30122 static bool
30123 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30124 {
30125 unsigned int i, odd, mask, nelt = d->nelt;
30126 rtx out0, out1, in0, in1, x;
30127 rtx (*gen)(rtx, rtx, rtx, rtx);
30128
30129 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30130 return false;
30131
30132 /* Note that these are little-endian tests. Adjust for big-endian later. */
30133 if (d->perm[0] == 0)
30134 odd = 0;
30135 else if (d->perm[0] == 1)
30136 odd = 1;
30137 else
30138 return false;
30139 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30140
30141 for (i = 0; i < nelt; i++)
30142 {
30143 unsigned elt = (i * 2 + odd) & mask;
30144 if (d->perm[i] != elt)
30145 return false;
30146 }
30147
30148 /* Success! */
30149 if (d->testing_p)
30150 return true;
30151
30152 switch (d->vmode)
30153 {
30154 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30155 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30156 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30157 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30158 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30159 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30160 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30161 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30162 default:
30163 gcc_unreachable ();
30164 }
30165
30166 in0 = d->op0;
30167 in1 = d->op1;
30168 if (BYTES_BIG_ENDIAN)
30169 {
30170 x = in0, in0 = in1, in1 = x;
30171 odd = !odd;
30172 }
30173
30174 out0 = d->target;
30175 out1 = gen_reg_rtx (d->vmode);
30176 if (odd)
30177 x = out0, out0 = out1, out1 = x;
30178
30179 emit_insn (gen (out0, in0, in1, out1));
30180 return true;
30181 }
30182
30183 /* Recognize patterns for the VZIP insns. */
30184
30185 static bool
30186 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30187 {
30188 unsigned int i, high, mask, nelt = d->nelt;
30189 rtx out0, out1, in0, in1, x;
30190 rtx (*gen)(rtx, rtx, rtx, rtx);
30191
30192 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30193 return false;
30194
30195 /* Note that these are little-endian tests. Adjust for big-endian later. */
30196 high = nelt / 2;
30197 if (d->perm[0] == high)
30198 ;
30199 else if (d->perm[0] == 0)
30200 high = 0;
30201 else
30202 return false;
30203 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30204
30205 for (i = 0; i < nelt / 2; i++)
30206 {
30207 unsigned elt = (i + high) & mask;
30208 if (d->perm[i * 2] != elt)
30209 return false;
30210 elt = (elt + nelt) & mask;
30211 if (d->perm[i * 2 + 1] != elt)
30212 return false;
30213 }
30214
30215 /* Success! */
30216 if (d->testing_p)
30217 return true;
30218
30219 switch (d->vmode)
30220 {
30221 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30222 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30223 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30224 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30225 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30226 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30227 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30228 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30229 default:
30230 gcc_unreachable ();
30231 }
30232
30233 in0 = d->op0;
30234 in1 = d->op1;
30235 if (BYTES_BIG_ENDIAN)
30236 {
30237 x = in0, in0 = in1, in1 = x;
30238 high = !high;
30239 }
30240
30241 out0 = d->target;
30242 out1 = gen_reg_rtx (d->vmode);
30243 if (high)
30244 x = out0, out0 = out1, out1 = x;
30245
30246 emit_insn (gen (out0, in0, in1, out1));
30247 return true;
30248 }
30249
30250 /* Recognize patterns for the VREV insns. */
30251
30252 static bool
30253 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30254 {
30255 unsigned int i, j, diff, nelt = d->nelt;
30256 rtx (*gen)(rtx, rtx, rtx);
30257
30258 if (!d->one_vector_p)
30259 return false;
30260
30261 diff = d->perm[0];
30262 switch (diff)
30263 {
30264 case 7:
30265 switch (d->vmode)
30266 {
30267 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30268 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30269 default:
30270 return false;
30271 }
30272 break;
30273 case 3:
30274 switch (d->vmode)
30275 {
30276 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30277 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30278 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30279 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30280 default:
30281 return false;
30282 }
30283 break;
30284 case 1:
30285 switch (d->vmode)
30286 {
30287 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30288 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30289 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30290 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30291 case V4SImode: gen = gen_neon_vrev64v4si; break;
30292 case V2SImode: gen = gen_neon_vrev64v2si; break;
30293 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30294 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30295 default:
30296 return false;
30297 }
30298 break;
30299 default:
30300 return false;
30301 }
30302
30303 for (i = 0; i < nelt ; i += diff + 1)
30304 for (j = 0; j <= diff; j += 1)
30305 {
30306 /* This is guaranteed to be true as the value of diff
30307 is 7, 3, 1 and we should have enough elements in the
30308 queue to generate this. Getting a vector mask with a
30309 value of diff other than these values implies that
30310 something is wrong by the time we get here. */
30311 gcc_assert (i + j < nelt);
30312 if (d->perm[i + j] != i + diff - j)
30313 return false;
30314 }
30315
30316 /* Success! */
30317 if (d->testing_p)
30318 return true;
30319
30320 /* ??? The third operand is an artifact of the builtin infrastructure
30321 and is ignored by the actual instruction. */
30322 emit_insn (gen (d->target, d->op0, const0_rtx));
30323 return true;
30324 }
30325
30326 /* Recognize patterns for the VTRN insns. */
30327
30328 static bool
30329 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30330 {
30331 unsigned int i, odd, mask, nelt = d->nelt;
30332 rtx out0, out1, in0, in1, x;
30333 rtx (*gen)(rtx, rtx, rtx, rtx);
30334
30335 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30336 return false;
30337
30338 /* Note that these are little-endian tests. Adjust for big-endian later. */
30339 if (d->perm[0] == 0)
30340 odd = 0;
30341 else if (d->perm[0] == 1)
30342 odd = 1;
30343 else
30344 return false;
30345 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30346
30347 for (i = 0; i < nelt; i += 2)
30348 {
30349 if (d->perm[i] != i + odd)
30350 return false;
30351 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30352 return false;
30353 }
30354
30355 /* Success! */
30356 if (d->testing_p)
30357 return true;
30358
30359 switch (d->vmode)
30360 {
30361 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30362 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30363 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30364 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30365 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30366 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30367 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30368 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30369 default:
30370 gcc_unreachable ();
30371 }
30372
30373 in0 = d->op0;
30374 in1 = d->op1;
30375 if (BYTES_BIG_ENDIAN)
30376 {
30377 x = in0, in0 = in1, in1 = x;
30378 odd = !odd;
30379 }
30380
30381 out0 = d->target;
30382 out1 = gen_reg_rtx (d->vmode);
30383 if (odd)
30384 x = out0, out0 = out1, out1 = x;
30385
30386 emit_insn (gen (out0, in0, in1, out1));
30387 return true;
30388 }
30389
30390 /* Recognize patterns for the VEXT insns. */
30391
30392 static bool
30393 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30394 {
30395 unsigned int i, nelt = d->nelt;
30396 rtx (*gen) (rtx, rtx, rtx, rtx);
30397 rtx offset;
30398
30399 unsigned int location;
30400
30401 unsigned int next = d->perm[0] + 1;
30402
30403 /* TODO: Handle GCC's numbering of elements for big-endian. */
30404 if (BYTES_BIG_ENDIAN)
30405 return false;
30406
30407 /* Check if the extracted indexes are increasing by one. */
30408 for (i = 1; i < nelt; next++, i++)
30409 {
30410 /* If we hit the most significant element of the 2nd vector in
30411 the previous iteration, no need to test further. */
30412 if (next == 2 * nelt)
30413 return false;
30414
30415 /* If we are operating on only one vector: it could be a
30416 rotation. If there are only two elements of size < 64, let
30417 arm_evpc_neon_vrev catch it. */
30418 if (d->one_vector_p && (next == nelt))
30419 {
30420 if ((nelt == 2) && (d->vmode != V2DImode))
30421 return false;
30422 else
30423 next = 0;
30424 }
30425
30426 if (d->perm[i] != next)
30427 return false;
30428 }
30429
30430 location = d->perm[0];
30431
30432 switch (d->vmode)
30433 {
30434 case V16QImode: gen = gen_neon_vextv16qi; break;
30435 case V8QImode: gen = gen_neon_vextv8qi; break;
30436 case V4HImode: gen = gen_neon_vextv4hi; break;
30437 case V8HImode: gen = gen_neon_vextv8hi; break;
30438 case V2SImode: gen = gen_neon_vextv2si; break;
30439 case V4SImode: gen = gen_neon_vextv4si; break;
30440 case V2SFmode: gen = gen_neon_vextv2sf; break;
30441 case V4SFmode: gen = gen_neon_vextv4sf; break;
30442 case V2DImode: gen = gen_neon_vextv2di; break;
30443 default:
30444 return false;
30445 }
30446
30447 /* Success! */
30448 if (d->testing_p)
30449 return true;
30450
30451 offset = GEN_INT (location);
30452 emit_insn (gen (d->target, d->op0, d->op1, offset));
30453 return true;
30454 }
30455
30456 /* The NEON VTBL instruction is a fully variable permuation that's even
30457 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30458 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30459 can do slightly better by expanding this as a constant where we don't
30460 have to apply a mask. */
30461
30462 static bool
30463 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30464 {
30465 rtx rperm[MAX_VECT_LEN], sel;
30466 enum machine_mode vmode = d->vmode;
30467 unsigned int i, nelt = d->nelt;
30468
30469 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30470 numbering of elements for big-endian, we must reverse the order. */
30471 if (BYTES_BIG_ENDIAN)
30472 return false;
30473
30474 if (d->testing_p)
30475 return true;
30476
30477 /* Generic code will try constant permutation twice. Once with the
30478 original mode and again with the elements lowered to QImode.
30479 So wait and don't do the selector expansion ourselves. */
30480 if (vmode != V8QImode && vmode != V16QImode)
30481 return false;
30482
30483 for (i = 0; i < nelt; ++i)
30484 rperm[i] = GEN_INT (d->perm[i]);
30485 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30486 sel = force_reg (vmode, sel);
30487
30488 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30489 return true;
30490 }
30491
30492 static bool
30493 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30494 {
30495 /* Check if the input mask matches vext before reordering the
30496 operands. */
30497 if (TARGET_NEON)
30498 if (arm_evpc_neon_vext (d))
30499 return true;
30500
30501 /* The pattern matching functions above are written to look for a small
30502 number to begin the sequence (0, 1, N/2). If we begin with an index
30503 from the second operand, we can swap the operands. */
30504 if (d->perm[0] >= d->nelt)
30505 {
30506 unsigned i, nelt = d->nelt;
30507 rtx x;
30508
30509 for (i = 0; i < nelt; ++i)
30510 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30511
30512 x = d->op0;
30513 d->op0 = d->op1;
30514 d->op1 = x;
30515 }
30516
30517 if (TARGET_NEON)
30518 {
30519 if (arm_evpc_neon_vuzp (d))
30520 return true;
30521 if (arm_evpc_neon_vzip (d))
30522 return true;
30523 if (arm_evpc_neon_vrev (d))
30524 return true;
30525 if (arm_evpc_neon_vtrn (d))
30526 return true;
30527 return arm_evpc_neon_vtbl (d);
30528 }
30529 return false;
30530 }
30531
30532 /* Expand a vec_perm_const pattern. */
30533
30534 bool
30535 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30536 {
30537 struct expand_vec_perm_d d;
30538 int i, nelt, which;
30539
30540 d.target = target;
30541 d.op0 = op0;
30542 d.op1 = op1;
30543
30544 d.vmode = GET_MODE (target);
30545 gcc_assert (VECTOR_MODE_P (d.vmode));
30546 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30547 d.testing_p = false;
30548
30549 for (i = which = 0; i < nelt; ++i)
30550 {
30551 rtx e = XVECEXP (sel, 0, i);
30552 int ei = INTVAL (e) & (2 * nelt - 1);
30553 which |= (ei < nelt ? 1 : 2);
30554 d.perm[i] = ei;
30555 }
30556
30557 switch (which)
30558 {
30559 default:
30560 gcc_unreachable();
30561
30562 case 3:
30563 d.one_vector_p = false;
30564 if (!rtx_equal_p (op0, op1))
30565 break;
30566
30567 /* The elements of PERM do not suggest that only the first operand
30568 is used, but both operands are identical. Allow easier matching
30569 of the permutation by folding the permutation into the single
30570 input vector. */
30571 /* FALLTHRU */
30572 case 2:
30573 for (i = 0; i < nelt; ++i)
30574 d.perm[i] &= nelt - 1;
30575 d.op0 = op1;
30576 d.one_vector_p = true;
30577 break;
30578
30579 case 1:
30580 d.op1 = op0;
30581 d.one_vector_p = true;
30582 break;
30583 }
30584
30585 return arm_expand_vec_perm_const_1 (&d);
30586 }
30587
30588 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30589
30590 static bool
30591 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30592 const unsigned char *sel)
30593 {
30594 struct expand_vec_perm_d d;
30595 unsigned int i, nelt, which;
30596 bool ret;
30597
30598 d.vmode = vmode;
30599 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30600 d.testing_p = true;
30601 memcpy (d.perm, sel, nelt);
30602
30603 /* Categorize the set of elements in the selector. */
30604 for (i = which = 0; i < nelt; ++i)
30605 {
30606 unsigned char e = d.perm[i];
30607 gcc_assert (e < 2 * nelt);
30608 which |= (e < nelt ? 1 : 2);
30609 }
30610
30611 /* For all elements from second vector, fold the elements to first. */
30612 if (which == 2)
30613 for (i = 0; i < nelt; ++i)
30614 d.perm[i] -= nelt;
30615
30616 /* Check whether the mask can be applied to the vector type. */
30617 d.one_vector_p = (which != 3);
30618
30619 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30620 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30621 if (!d.one_vector_p)
30622 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30623
30624 start_sequence ();
30625 ret = arm_expand_vec_perm_const_1 (&d);
30626 end_sequence ();
30627
30628 return ret;
30629 }
30630
30631 bool
30632 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30633 {
30634 /* If we are soft float and we do not have ldrd
30635 then all auto increment forms are ok. */
30636 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30637 return true;
30638
30639 switch (code)
30640 {
30641 /* Post increment and Pre Decrement are supported for all
30642 instruction forms except for vector forms. */
30643 case ARM_POST_INC:
30644 case ARM_PRE_DEC:
30645 if (VECTOR_MODE_P (mode))
30646 {
30647 if (code != ARM_PRE_DEC)
30648 return true;
30649 else
30650 return false;
30651 }
30652
30653 return true;
30654
30655 case ARM_POST_DEC:
30656 case ARM_PRE_INC:
30657 /* Without LDRD and mode size greater than
30658 word size, there is no point in auto-incrementing
30659 because ldm and stm will not have these forms. */
30660 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30661 return false;
30662
30663 /* Vector and floating point modes do not support
30664 these auto increment forms. */
30665 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30666 return false;
30667
30668 return true;
30669
30670 default:
30671 return false;
30672
30673 }
30674
30675 return false;
30676 }
30677
30678 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30679 on ARM, since we know that shifts by negative amounts are no-ops.
30680 Additionally, the default expansion code is not available or suitable
30681 for post-reload insn splits (this can occur when the register allocator
30682 chooses not to do a shift in NEON).
30683
30684 This function is used in both initial expand and post-reload splits, and
30685 handles all kinds of 64-bit shifts.
30686
30687 Input requirements:
30688 - It is safe for the input and output to be the same register, but
30689 early-clobber rules apply for the shift amount and scratch registers.
30690 - Shift by register requires both scratch registers. In all other cases
30691 the scratch registers may be NULL.
30692 - Ashiftrt by a register also clobbers the CC register. */
30693 void
30694 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30695 rtx amount, rtx scratch1, rtx scratch2)
30696 {
30697 rtx out_high = gen_highpart (SImode, out);
30698 rtx out_low = gen_lowpart (SImode, out);
30699 rtx in_high = gen_highpart (SImode, in);
30700 rtx in_low = gen_lowpart (SImode, in);
30701
30702 /* Terminology:
30703 in = the register pair containing the input value.
30704 out = the destination register pair.
30705 up = the high- or low-part of each pair.
30706 down = the opposite part to "up".
30707 In a shift, we can consider bits to shift from "up"-stream to
30708 "down"-stream, so in a left-shift "up" is the low-part and "down"
30709 is the high-part of each register pair. */
30710
30711 rtx out_up = code == ASHIFT ? out_low : out_high;
30712 rtx out_down = code == ASHIFT ? out_high : out_low;
30713 rtx in_up = code == ASHIFT ? in_low : in_high;
30714 rtx in_down = code == ASHIFT ? in_high : in_low;
30715
30716 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30717 gcc_assert (out
30718 && (REG_P (out) || GET_CODE (out) == SUBREG)
30719 && GET_MODE (out) == DImode);
30720 gcc_assert (in
30721 && (REG_P (in) || GET_CODE (in) == SUBREG)
30722 && GET_MODE (in) == DImode);
30723 gcc_assert (amount
30724 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30725 && GET_MODE (amount) == SImode)
30726 || CONST_INT_P (amount)));
30727 gcc_assert (scratch1 == NULL
30728 || (GET_CODE (scratch1) == SCRATCH)
30729 || (GET_MODE (scratch1) == SImode
30730 && REG_P (scratch1)));
30731 gcc_assert (scratch2 == NULL
30732 || (GET_CODE (scratch2) == SCRATCH)
30733 || (GET_MODE (scratch2) == SImode
30734 && REG_P (scratch2)));
30735 gcc_assert (!REG_P (out) || !REG_P (amount)
30736 || !HARD_REGISTER_P (out)
30737 || (REGNO (out) != REGNO (amount)
30738 && REGNO (out) + 1 != REGNO (amount)));
30739
30740 /* Macros to make following code more readable. */
30741 #define SUB_32(DEST,SRC) \
30742 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30743 #define RSB_32(DEST,SRC) \
30744 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30745 #define SUB_S_32(DEST,SRC) \
30746 gen_addsi3_compare0 ((DEST), (SRC), \
30747 GEN_INT (-32))
30748 #define SET(DEST,SRC) \
30749 gen_rtx_SET (SImode, (DEST), (SRC))
30750 #define SHIFT(CODE,SRC,AMOUNT) \
30751 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30752 #define LSHIFT(CODE,SRC,AMOUNT) \
30753 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30754 SImode, (SRC), (AMOUNT))
30755 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30756 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30757 SImode, (SRC), (AMOUNT))
30758 #define ORR(A,B) \
30759 gen_rtx_IOR (SImode, (A), (B))
30760 #define BRANCH(COND,LABEL) \
30761 gen_arm_cond_branch ((LABEL), \
30762 gen_rtx_ ## COND (CCmode, cc_reg, \
30763 const0_rtx), \
30764 cc_reg)
30765
30766 /* Shifts by register and shifts by constant are handled separately. */
30767 if (CONST_INT_P (amount))
30768 {
30769 /* We have a shift-by-constant. */
30770
30771 /* First, handle out-of-range shift amounts.
30772 In both cases we try to match the result an ARM instruction in a
30773 shift-by-register would give. This helps reduce execution
30774 differences between optimization levels, but it won't stop other
30775 parts of the compiler doing different things. This is "undefined
30776 behaviour, in any case. */
30777 if (INTVAL (amount) <= 0)
30778 emit_insn (gen_movdi (out, in));
30779 else if (INTVAL (amount) >= 64)
30780 {
30781 if (code == ASHIFTRT)
30782 {
30783 rtx const31_rtx = GEN_INT (31);
30784 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30785 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30786 }
30787 else
30788 emit_insn (gen_movdi (out, const0_rtx));
30789 }
30790
30791 /* Now handle valid shifts. */
30792 else if (INTVAL (amount) < 32)
30793 {
30794 /* Shifts by a constant less than 32. */
30795 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30796
30797 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30798 emit_insn (SET (out_down,
30799 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30800 out_down)));
30801 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30802 }
30803 else
30804 {
30805 /* Shifts by a constant greater than 31. */
30806 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30807
30808 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30809 if (code == ASHIFTRT)
30810 emit_insn (gen_ashrsi3 (out_up, in_up,
30811 GEN_INT (31)));
30812 else
30813 emit_insn (SET (out_up, const0_rtx));
30814 }
30815 }
30816 else
30817 {
30818 /* We have a shift-by-register. */
30819 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30820
30821 /* This alternative requires the scratch registers. */
30822 gcc_assert (scratch1 && REG_P (scratch1));
30823 gcc_assert (scratch2 && REG_P (scratch2));
30824
30825 /* We will need the values "amount-32" and "32-amount" later.
30826 Swapping them around now allows the later code to be more general. */
30827 switch (code)
30828 {
30829 case ASHIFT:
30830 emit_insn (SUB_32 (scratch1, amount));
30831 emit_insn (RSB_32 (scratch2, amount));
30832 break;
30833 case ASHIFTRT:
30834 emit_insn (RSB_32 (scratch1, amount));
30835 /* Also set CC = amount > 32. */
30836 emit_insn (SUB_S_32 (scratch2, amount));
30837 break;
30838 case LSHIFTRT:
30839 emit_insn (RSB_32 (scratch1, amount));
30840 emit_insn (SUB_32 (scratch2, amount));
30841 break;
30842 default:
30843 gcc_unreachable ();
30844 }
30845
30846 /* Emit code like this:
30847
30848 arithmetic-left:
30849 out_down = in_down << amount;
30850 out_down = (in_up << (amount - 32)) | out_down;
30851 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30852 out_up = in_up << amount;
30853
30854 arithmetic-right:
30855 out_down = in_down >> amount;
30856 out_down = (in_up << (32 - amount)) | out_down;
30857 if (amount < 32)
30858 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30859 out_up = in_up << amount;
30860
30861 logical-right:
30862 out_down = in_down >> amount;
30863 out_down = (in_up << (32 - amount)) | out_down;
30864 if (amount < 32)
30865 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30866 out_up = in_up << amount;
30867
30868 The ARM and Thumb2 variants are the same but implemented slightly
30869 differently. If this were only called during expand we could just
30870 use the Thumb2 case and let combine do the right thing, but this
30871 can also be called from post-reload splitters. */
30872
30873 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30874
30875 if (!TARGET_THUMB2)
30876 {
30877 /* Emit code for ARM mode. */
30878 emit_insn (SET (out_down,
30879 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30880 if (code == ASHIFTRT)
30881 {
30882 rtx done_label = gen_label_rtx ();
30883 emit_jump_insn (BRANCH (LT, done_label));
30884 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30885 out_down)));
30886 emit_label (done_label);
30887 }
30888 else
30889 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30890 out_down)));
30891 }
30892 else
30893 {
30894 /* Emit code for Thumb2 mode.
30895 Thumb2 can't do shift and or in one insn. */
30896 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30897 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30898
30899 if (code == ASHIFTRT)
30900 {
30901 rtx done_label = gen_label_rtx ();
30902 emit_jump_insn (BRANCH (LT, done_label));
30903 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30904 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30905 emit_label (done_label);
30906 }
30907 else
30908 {
30909 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30910 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30911 }
30912 }
30913
30914 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30915 }
30916
30917 #undef SUB_32
30918 #undef RSB_32
30919 #undef SUB_S_32
30920 #undef SET
30921 #undef SHIFT
30922 #undef LSHIFT
30923 #undef REV_LSHIFT
30924 #undef ORR
30925 #undef BRANCH
30926 }
30927
30928
30929 /* Returns true if a valid comparison operation and makes
30930 the operands in a form that is valid. */
30931 bool
30932 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30933 {
30934 enum rtx_code code = GET_CODE (*comparison);
30935 int code_int;
30936 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30937 ? GET_MODE (*op2) : GET_MODE (*op1);
30938
30939 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30940
30941 if (code == UNEQ || code == LTGT)
30942 return false;
30943
30944 code_int = (int)code;
30945 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30946 PUT_CODE (*comparison, (enum rtx_code)code_int);
30947
30948 switch (mode)
30949 {
30950 case SImode:
30951 if (!arm_add_operand (*op1, mode))
30952 *op1 = force_reg (mode, *op1);
30953 if (!arm_add_operand (*op2, mode))
30954 *op2 = force_reg (mode, *op2);
30955 return true;
30956
30957 case DImode:
30958 if (!cmpdi_operand (*op1, mode))
30959 *op1 = force_reg (mode, *op1);
30960 if (!cmpdi_operand (*op2, mode))
30961 *op2 = force_reg (mode, *op2);
30962 return true;
30963
30964 case SFmode:
30965 case DFmode:
30966 if (!arm_float_compare_operand (*op1, mode))
30967 *op1 = force_reg (mode, *op1);
30968 if (!arm_float_compare_operand (*op2, mode))
30969 *op2 = force_reg (mode, *op2);
30970 return true;
30971 default:
30972 break;
30973 }
30974
30975 return false;
30976
30977 }
30978
30979 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30980
30981 static unsigned HOST_WIDE_INT
30982 arm_asan_shadow_offset (void)
30983 {
30984 return (unsigned HOST_WIDE_INT) 1 << 29;
30985 }
30986
30987 #include "gt-arm.h"