1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2019 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
33 #include "stringpool.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
57 /* This file should be included last. */
58 #include "target-def.h"
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
63 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
65 machine_mode store_mode
;
66 machine_mode other_mode
;
69 if (recog_memoized (in_insn
) < 0
70 || (get_attr_type (in_insn
) != TYPE_FPSTORE
71 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
72 || recog_memoized (out_insn
) < 0)
75 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
77 set
= single_set (out_insn
);
81 other_mode
= GET_MODE (SET_SRC (set
));
83 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
91 #define DO_FRAME_NOTES 0
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx
, rtx
);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
100 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
101 static bool hppa_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
102 static inline rtx
force_mode (machine_mode
, rtx
);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
107 static bool forward_branch_p (rtx_insn
*);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
110 static int compute_cpymem_length (rtx_insn
*);
111 static int compute_clrmem_length (rtx_insn
*);
112 static bool pa_assemble_integer (rtx
, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT
, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT
);
116 static void load_reg (int, HOST_WIDE_INT
, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
118 static rtx
pa_function_value (const_tree
, const_tree
, bool);
119 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED
;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED
;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn
*, int, rtx_insn
*, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
129 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
130 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
132 static void pa_encode_section_info (tree
, rtx
, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree
, tree
);
135 static void pa_globalize_label (FILE *, const char *)
137 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
138 HOST_WIDE_INT
, tree
);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx
, int);
141 static void pa_asm_out_destructor (rtx
, int);
143 static void pa_init_builtins (void);
144 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
145 static rtx
hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree
, rtx
);
147 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
148 static bool pa_scalar_mode_supported_p (scalar_mode
);
149 static bool pa_commutative_p (const_rtx x
, int outer_code
);
150 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
151 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
152 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx
pa_struct_value_rtx (tree
, int);
167 static bool pa_pass_by_reference (cumulative_args_t
, machine_mode
,
169 static int pa_arg_partial_bytes (cumulative_args_t
, machine_mode
,
171 static void pa_function_arg_advance (cumulative_args_t
, machine_mode
,
173 static rtx
pa_function_arg (cumulative_args_t
, machine_mode
,
175 static pad_direction
pa_function_arg_padding (machine_mode
, const_tree
);
176 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
177 static struct machine_function
* pa_init_machine_status (void);
178 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
180 secondary_reload_info
*);
181 static bool pa_secondary_memory_needed (machine_mode
,
182 reg_class_t
, reg_class_t
);
183 static void pa_extra_live_on_entry (bitmap
);
184 static machine_mode
pa_promote_function_mode (const_tree
,
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx
, tree
, rtx
);
190 static rtx
pa_trampoline_adjust_address (rtx
);
191 static rtx
pa_delegitimize_address (rtx
);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx
pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode
pa_c_mode_for_suffix (char);
197 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
199 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
200 static unsigned int pa_section_type_flags (tree
, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool);
202 static bool pa_callee_copies (cumulative_args_t
, machine_mode
,
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode
);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode
);
206 static bool pa_modes_tieable_p (machine_mode
, machine_mode
);
207 static bool pa_can_change_mode_class (machine_mode
, machine_mode
, reg_class_t
);
208 static HOST_WIDE_INT
pa_starting_frame_offset (void);
210 /* The following extra sections are only used for SOM. */
211 static GTY(()) section
*som_readonly_data_section
;
212 static GTY(()) section
*som_one_only_readonly_data_section
;
213 static GTY(()) section
*som_one_only_data_section
;
214 static GTY(()) section
*som_tm_clone_table_section
;
216 /* Counts for the number of callee-saved general and floating point
217 registers which were saved by the current function's prologue. */
218 static int gr_saved
, fr_saved
;
220 /* Boolean indicating whether the return pointer was saved by the
221 current function's prologue. */
222 static bool rp_saved
;
224 static rtx
find_addr_reg (rtx
);
226 /* Keep track of the number of bytes we have output in the CODE subspace
227 during this compilation so we'll know when to emit inline long-calls. */
228 unsigned long total_code_bytes
;
230 /* The last address of the previous function plus the number of bytes in
231 associated thunks that have been output. This is used to determine if
232 a thunk can use an IA-relative branch to reach its target function. */
233 static unsigned int last_address
;
235 /* Variables to handle plabels that we discover are necessary at assembly
236 output time. They are output after the current function. */
237 struct GTY(()) deferred_plabel
242 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
244 static size_t n_deferred_plabels
= 0;
246 /* Initialize the GCC target structure. */
248 #undef TARGET_OPTION_OVERRIDE
249 #define TARGET_OPTION_OVERRIDE pa_option_override
251 #undef TARGET_ASM_ALIGNED_HI_OP
252 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
253 #undef TARGET_ASM_ALIGNED_SI_OP
254 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
255 #undef TARGET_ASM_ALIGNED_DI_OP
256 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
257 #undef TARGET_ASM_UNALIGNED_HI_OP
258 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
259 #undef TARGET_ASM_UNALIGNED_SI_OP
260 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
261 #undef TARGET_ASM_UNALIGNED_DI_OP
262 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
263 #undef TARGET_ASM_INTEGER
264 #define TARGET_ASM_INTEGER pa_assemble_integer
266 #undef TARGET_ASM_FUNCTION_EPILOGUE
267 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
269 #undef TARGET_FUNCTION_VALUE
270 #define TARGET_FUNCTION_VALUE pa_function_value
271 #undef TARGET_LIBCALL_VALUE
272 #define TARGET_LIBCALL_VALUE pa_libcall_value
273 #undef TARGET_FUNCTION_VALUE_REGNO_P
274 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
276 #undef TARGET_LEGITIMIZE_ADDRESS
277 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
279 #undef TARGET_SCHED_ADJUST_COST
280 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
281 #undef TARGET_SCHED_ISSUE_RATE
282 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
284 #undef TARGET_ENCODE_SECTION_INFO
285 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
286 #undef TARGET_STRIP_NAME_ENCODING
287 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
289 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
290 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
292 #undef TARGET_COMMUTATIVE_P
293 #define TARGET_COMMUTATIVE_P pa_commutative_p
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
300 #undef TARGET_ASM_FILE_END
301 #define TARGET_ASM_FILE_END pa_file_end
303 #undef TARGET_ASM_RELOC_RW_MASK
304 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
306 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
307 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
309 #if !defined(USE_COLLECT2)
310 #undef TARGET_ASM_CONSTRUCTOR
311 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
312 #undef TARGET_ASM_DESTRUCTOR
313 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
316 #undef TARGET_INIT_BUILTINS
317 #define TARGET_INIT_BUILTINS pa_init_builtins
319 #undef TARGET_EXPAND_BUILTIN
320 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
322 #undef TARGET_REGISTER_MOVE_COST
323 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
324 #undef TARGET_RTX_COSTS
325 #define TARGET_RTX_COSTS hppa_rtx_costs
326 #undef TARGET_ADDRESS_COST
327 #define TARGET_ADDRESS_COST hppa_address_cost
329 #undef TARGET_MACHINE_DEPENDENT_REORG
330 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
332 #undef TARGET_INIT_LIBFUNCS
333 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
335 #undef TARGET_PROMOTE_FUNCTION_MODE
336 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
337 #undef TARGET_PROMOTE_PROTOTYPES
338 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
340 #undef TARGET_STRUCT_VALUE_RTX
341 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
342 #undef TARGET_RETURN_IN_MEMORY
343 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
344 #undef TARGET_MUST_PASS_IN_STACK
345 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
346 #undef TARGET_PASS_BY_REFERENCE
347 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
348 #undef TARGET_CALLEE_COPIES
349 #define TARGET_CALLEE_COPIES pa_callee_copies
350 #undef TARGET_ARG_PARTIAL_BYTES
351 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
352 #undef TARGET_FUNCTION_ARG
353 #define TARGET_FUNCTION_ARG pa_function_arg
354 #undef TARGET_FUNCTION_ARG_ADVANCE
355 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
356 #undef TARGET_FUNCTION_ARG_PADDING
357 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
358 #undef TARGET_FUNCTION_ARG_BOUNDARY
359 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
361 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
362 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
363 #undef TARGET_EXPAND_BUILTIN_VA_START
364 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
365 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
366 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
368 #undef TARGET_SCALAR_MODE_SUPPORTED_P
369 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
371 #undef TARGET_CANNOT_FORCE_CONST_MEM
372 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
374 #undef TARGET_SECONDARY_RELOAD
375 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
376 #undef TARGET_SECONDARY_MEMORY_NEEDED
377 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
379 #undef TARGET_EXTRA_LIVE_ON_ENTRY
380 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
382 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
383 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
384 #undef TARGET_TRAMPOLINE_INIT
385 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
386 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
387 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
388 #undef TARGET_DELEGITIMIZE_ADDRESS
389 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
390 #undef TARGET_INTERNAL_ARG_POINTER
391 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
392 #undef TARGET_CAN_ELIMINATE
393 #define TARGET_CAN_ELIMINATE pa_can_eliminate
394 #undef TARGET_CONDITIONAL_REGISTER_USAGE
395 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
396 #undef TARGET_C_MODE_FOR_SUFFIX
397 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
398 #undef TARGET_ASM_FUNCTION_SECTION
399 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
401 #undef TARGET_LEGITIMATE_CONSTANT_P
402 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
403 #undef TARGET_SECTION_TYPE_FLAGS
404 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
405 #undef TARGET_LEGITIMATE_ADDRESS_P
406 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
409 #define TARGET_LRA_P hook_bool_void_false
411 #undef TARGET_HARD_REGNO_NREGS
412 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
413 #undef TARGET_HARD_REGNO_MODE_OK
414 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
415 #undef TARGET_MODES_TIEABLE_P
416 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
418 #undef TARGET_CAN_CHANGE_MODE_CLASS
419 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
421 #undef TARGET_CONSTANT_ALIGNMENT
422 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
424 #undef TARGET_STARTING_FRAME_OFFSET
425 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
427 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
428 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
430 struct gcc_target targetm
= TARGET_INITIALIZER
;
432 /* Parse the -mfixed-range= option string. */
435 fix_range (const char *const_str
)
438 char *str
, *dash
, *comma
;
440 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
441 REG2 are either register names or register numbers. The effect
442 of this option is to mark the registers in the range from REG1 to
443 REG2 as ``fixed'' so they won't be used by the compiler. This is
444 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
446 i
= strlen (const_str
);
447 str
= (char *) alloca (i
+ 1);
448 memcpy (str
, const_str
, i
+ 1);
452 dash
= strchr (str
, '-');
455 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
460 comma
= strchr (dash
+ 1, ',');
464 first
= decode_reg_name (str
);
467 warning (0, "unknown register name: %s", str
);
471 last
= decode_reg_name (dash
+ 1);
474 warning (0, "unknown register name: %s", dash
+ 1);
482 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
486 for (i
= first
; i
<= last
; ++i
)
487 fixed_regs
[i
] = call_used_regs
[i
] = 1;
496 /* Check if all floating point registers have been fixed. */
497 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
502 target_flags
|= MASK_DISABLE_FPREGS
;
505 /* Implement the TARGET_OPTION_OVERRIDE hook. */
508 pa_option_override (void)
511 cl_deferred_option
*opt
;
512 vec
<cl_deferred_option
> *v
513 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
516 FOR_EACH_VEC_ELT (*v
, i
, opt
)
518 switch (opt
->opt_index
)
520 case OPT_mfixed_range_
:
521 fix_range (opt
->arg
);
529 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
531 warning (0, "PIC code generation is not supported in the portable runtime model");
534 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
536 warning (0, "PIC code generation is not compatible with fast indirect calls");
539 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
541 warning (0, "%<-g%> is only supported when using GAS on this processor,");
542 warning (0, "%<-g%> option disabled");
543 write_symbols
= NO_DEBUG
;
546 /* We only support the "big PIC" model now. And we always generate PIC
547 code when in 64bit mode. */
548 if (flag_pic
== 1 || TARGET_64BIT
)
551 /* Disable -freorder-blocks-and-partition as we don't support hot and
552 cold partitioning. */
553 if (flag_reorder_blocks_and_partition
)
555 inform (input_location
,
556 "%<-freorder-blocks-and-partition%> does not work "
557 "on this architecture");
558 flag_reorder_blocks_and_partition
= 0;
559 flag_reorder_blocks
= 1;
562 /* We can't guarantee that .dword is available for 32-bit targets. */
563 if (UNITS_PER_WORD
== 4)
564 targetm
.asm_out
.aligned_op
.di
= NULL
;
566 /* The unaligned ops are only available when using GAS. */
569 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
570 targetm
.asm_out
.unaligned_op
.si
= NULL
;
571 targetm
.asm_out
.unaligned_op
.di
= NULL
;
574 init_machine_status
= pa_init_machine_status
;
579 PA_BUILTIN_COPYSIGNQ
,
582 PA_BUILTIN_HUGE_VALQ
,
586 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
589 pa_init_builtins (void)
591 #ifdef DONT_HAVE_FPUTC_UNLOCKED
593 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
594 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
595 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
602 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
603 set_user_assembler_name (decl
, "_Isfinite");
604 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
605 set_user_assembler_name (decl
, "_Isfinitef");
609 if (HPUX_LONG_DOUBLE_LIBRARY
)
613 /* Under HPUX, the __float128 type is a synonym for "long double". */
614 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
617 /* TFmode support builtins. */
618 ftype
= build_function_type_list (long_double_type_node
,
619 long_double_type_node
,
621 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
622 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
623 "_U_Qfabs", NULL_TREE
);
624 TREE_READONLY (decl
) = 1;
625 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
627 ftype
= build_function_type_list (long_double_type_node
,
628 long_double_type_node
,
629 long_double_type_node
,
631 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
632 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
633 "_U_Qfcopysign", NULL_TREE
);
634 TREE_READONLY (decl
) = 1;
635 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
637 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
638 decl
= add_builtin_function ("__builtin_infq", ftype
,
639 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
641 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
643 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
644 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
646 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
651 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
652 machine_mode mode ATTRIBUTE_UNUSED
,
653 int ignore ATTRIBUTE_UNUSED
)
655 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
656 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
660 case PA_BUILTIN_FABSQ
:
661 case PA_BUILTIN_COPYSIGNQ
:
662 return expand_call (exp
, target
, ignore
);
664 case PA_BUILTIN_INFQ
:
665 case PA_BUILTIN_HUGE_VALQ
:
667 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
672 tmp
= const_double_from_real_value (inf
, target_mode
);
674 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
677 target
= gen_reg_rtx (target_mode
);
679 emit_move_insn (target
, tmp
);
690 /* Function to init struct machine_function.
691 This will be called, via a pointer variable,
692 from push_function_context. */
694 static struct machine_function
*
695 pa_init_machine_status (void)
697 return ggc_cleared_alloc
<machine_function
> ();
700 /* If FROM is a probable pointer register, mark TO as a probable
701 pointer register with the same pointer alignment as FROM. */
704 copy_reg_pointer (rtx to
, rtx from
)
706 if (REG_POINTER (from
))
707 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
710 /* Return 1 if X contains a symbolic expression. We know these
711 expressions will have one of a few well defined forms, so
712 we need only check those forms. */
714 pa_symbolic_expression_p (rtx x
)
717 /* Strip off any HIGH. */
718 if (GET_CODE (x
) == HIGH
)
721 return symbolic_operand (x
, VOIDmode
);
724 /* Accept any constant that can be moved in one instruction into a
727 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival
)
729 /* OK if ldo, ldil, or zdepi, can be used. */
730 return (VAL_14_BITS_P (ival
)
731 || pa_ldil_cint_p (ival
)
732 || pa_zdepi_cint_p (ival
));
735 /* True iff ldil can be used to load this CONST_INT. The least
736 significant 11 bits of the value must be zero and the value must
737 not change sign when extended from 32 to 64 bits. */
739 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival
)
741 unsigned HOST_WIDE_INT x
;
743 x
= ival
& (((unsigned HOST_WIDE_INT
) -1 << 31) | 0x7ff);
744 return x
== 0 || x
== ((unsigned HOST_WIDE_INT
) -1 << 31);
747 /* True iff zdepi can be used to generate this CONST_INT.
748 zdepi first sign extends a 5-bit signed number to a given field
749 length, then places this field anywhere in a zero. */
751 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
753 unsigned HOST_WIDE_INT lsb_mask
, t
;
755 /* This might not be obvious, but it's at least fast.
756 This function is critical; we don't have the time loops would take. */
758 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
759 /* Return true iff t is a power of two. */
760 return ((t
& (t
- 1)) == 0);
763 /* True iff depi or extru can be used to compute (reg & mask).
764 Accept bit pattern like these:
769 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
772 mask
+= mask
& -mask
;
773 return (mask
& (mask
- 1)) == 0;
776 /* True iff depi can be used to compute (reg | MASK). */
778 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
780 mask
+= mask
& -mask
;
781 return (mask
& (mask
- 1)) == 0;
784 /* Legitimize PIC addresses. If the address is already
785 position-independent, we return ORIG. Newly generated
786 position-independent addresses go to REG. If we need more
787 than one register, we lose. */
790 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
794 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
796 /* Labels need special handling. */
797 if (pic_label_operand (orig
, mode
))
801 /* We do not want to go through the movXX expanders here since that
802 would create recursion.
804 Nor do we really want to call a generator for a named pattern
805 since that requires multiple patterns if we want to support
808 So instead we just emit the raw set, which avoids the movXX
809 expanders completely. */
810 mark_reg_pointer (reg
, BITS_PER_UNIT
);
811 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
813 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
814 add_reg_note (insn
, REG_EQUAL
, orig
);
816 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
817 and update LABEL_NUSES because this is not done automatically. */
818 if (reload_in_progress
|| reload_completed
)
820 /* Extract LABEL_REF. */
821 if (GET_CODE (orig
) == CONST
)
822 orig
= XEXP (XEXP (orig
, 0), 0);
823 /* Extract CODE_LABEL. */
824 orig
= XEXP (orig
, 0);
825 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
826 /* Make sure we have label and not a note. */
828 LABEL_NUSES (orig
)++;
830 crtl
->uses_pic_offset_table
= 1;
833 if (GET_CODE (orig
) == SYMBOL_REF
)
840 /* Before reload, allocate a temporary register for the intermediate
841 result. This allows the sequence to be deleted when the final
842 result is unused and the insns are trivially dead. */
843 tmp_reg
= ((reload_in_progress
|| reload_completed
)
844 ? reg
: gen_reg_rtx (Pmode
));
846 if (function_label_operand (orig
, VOIDmode
))
848 /* Force function label into memory in word mode. */
849 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
850 /* Load plabel address from DLT. */
851 emit_move_insn (tmp_reg
,
852 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
853 gen_rtx_HIGH (word_mode
, orig
)));
855 = gen_const_mem (Pmode
,
856 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
857 gen_rtx_UNSPEC (Pmode
,
860 emit_move_insn (reg
, pic_ref
);
861 /* Now load address of function descriptor. */
862 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
866 /* Load symbol reference from DLT. */
867 emit_move_insn (tmp_reg
,
868 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
869 gen_rtx_HIGH (word_mode
, orig
)));
871 = gen_const_mem (Pmode
,
872 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
873 gen_rtx_UNSPEC (Pmode
,
878 crtl
->uses_pic_offset_table
= 1;
879 mark_reg_pointer (reg
, BITS_PER_UNIT
);
880 insn
= emit_move_insn (reg
, pic_ref
);
882 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
883 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
887 else if (GET_CODE (orig
) == CONST
)
891 if (GET_CODE (XEXP (orig
, 0)) == PLUS
892 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
896 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
898 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
899 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
900 base
== reg
? 0 : reg
);
902 if (GET_CODE (orig
) == CONST_INT
)
904 if (INT_14_BITS (orig
))
905 return plus_constant (Pmode
, base
, INTVAL (orig
));
906 orig
= force_reg (Pmode
, orig
);
908 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
909 /* Likewise, should we set special REG_NOTEs here? */
915 static GTY(()) rtx gen_tls_tga
;
918 gen_tls_get_addr (void)
921 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
926 hppa_tls_call (rtx arg
)
930 ret
= gen_reg_rtx (Pmode
);
931 emit_library_call_value (gen_tls_get_addr (), ret
,
932 LCT_CONST
, Pmode
, arg
, Pmode
);
938 legitimize_tls_address (rtx addr
)
940 rtx ret
, tmp
, t1
, t2
, tp
;
943 /* Currently, we can't handle anything but a SYMBOL_REF. */
944 if (GET_CODE (addr
) != SYMBOL_REF
)
947 switch (SYMBOL_REF_TLS_MODEL (addr
))
949 case TLS_MODEL_GLOBAL_DYNAMIC
:
950 tmp
= gen_reg_rtx (Pmode
);
952 emit_insn (gen_tgd_load_pic (tmp
, addr
));
954 emit_insn (gen_tgd_load (tmp
, addr
));
955 ret
= hppa_tls_call (tmp
);
958 case TLS_MODEL_LOCAL_DYNAMIC
:
959 ret
= gen_reg_rtx (Pmode
);
960 tmp
= gen_reg_rtx (Pmode
);
963 emit_insn (gen_tld_load_pic (tmp
, addr
));
965 emit_insn (gen_tld_load (tmp
, addr
));
966 t1
= hppa_tls_call (tmp
);
969 t2
= gen_reg_rtx (Pmode
);
970 emit_libcall_block (insn
, t2
, t1
,
971 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
973 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
976 case TLS_MODEL_INITIAL_EXEC
:
977 tp
= gen_reg_rtx (Pmode
);
978 tmp
= gen_reg_rtx (Pmode
);
979 ret
= gen_reg_rtx (Pmode
);
980 emit_insn (gen_tp_load (tp
));
982 emit_insn (gen_tie_load_pic (tmp
, addr
));
984 emit_insn (gen_tie_load (tmp
, addr
));
985 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
988 case TLS_MODEL_LOCAL_EXEC
:
989 tp
= gen_reg_rtx (Pmode
);
990 ret
= gen_reg_rtx (Pmode
);
991 emit_insn (gen_tp_load (tp
));
992 emit_insn (gen_tle_load (ret
, addr
, tp
));
1002 /* Helper for hppa_legitimize_address. Given X, return true if it
1003 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1005 This respectively represent canonical shift-add rtxs or scaled
1006 memory addresses. */
1008 mem_shadd_or_shadd_rtx_p (rtx x
)
1010 return ((GET_CODE (x
) == ASHIFT
1011 || GET_CODE (x
) == MULT
)
1012 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1013 && ((GET_CODE (x
) == ASHIFT
1014 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
1015 || (GET_CODE (x
) == MULT
1016 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
1019 /* Try machine-dependent ways of modifying an illegitimate address
1020 to be legitimate. If we find one, return the new, valid address.
1021 This macro is used in only one place: `memory_address' in explow.c.
1023 OLDX is the address as it was before break_out_memory_refs was called.
1024 In some cases it is useful to look at this to decide what needs to be done.
1026 It is always safe for this macro to do nothing. It exists to recognize
1027 opportunities to optimize the output.
1029 For the PA, transform:
1031 memory(X + <large int>)
1035 if (<large int> & mask) >= 16
1036 Y = (<large int> & ~mask) + mask + 1 Round up.
1038 Y = (<large int> & ~mask) Round down.
1040 memory (Z + (<large int> - Y));
1042 This is for CSE to find several similar references, and only use one Z.
1044 X can either be a SYMBOL_REF or REG, but because combine cannot
1045 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1046 D will not fit in 14 bits.
1048 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1051 MODE_INT references allow displacements which fit in 14 bits, so use
1054 This relies on the fact that most mode MODE_FLOAT references will use FP
1055 registers and most mode MODE_INT references will use integer registers.
1056 (In the rare case of an FP register used in an integer MODE, we depend
1057 on secondary reloads to clean things up.)
1060 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1061 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1062 addressing modes to be used).
1064 Note that the addresses passed into hppa_legitimize_address always
1065 come from a MEM, so we only have to match the MULT form on incoming
1066 addresses. But to be future proof we also match the ASHIFT form.
1068 However, this routine always places those shift-add sequences into
1069 registers, so we have to generate the ASHIFT form as our output.
1071 Put X and Z into registers. Then put the entire expression into
1075 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1080 /* We need to canonicalize the order of operands in unscaled indexed
1081 addresses since the code that checks if an address is valid doesn't
1082 always try both orders. */
1083 if (!TARGET_NO_SPACE_REGS
1084 && GET_CODE (x
) == PLUS
1085 && GET_MODE (x
) == Pmode
1086 && REG_P (XEXP (x
, 0))
1087 && REG_P (XEXP (x
, 1))
1088 && REG_POINTER (XEXP (x
, 0))
1089 && !REG_POINTER (XEXP (x
, 1)))
1090 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1092 if (tls_referenced_p (x
))
1093 return legitimize_tls_address (x
);
1095 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1097 /* Strip off CONST. */
1098 if (GET_CODE (x
) == CONST
)
1101 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1102 That should always be safe. */
1103 if (GET_CODE (x
) == PLUS
1104 && GET_CODE (XEXP (x
, 0)) == REG
1105 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1107 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1108 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1111 /* Note we must reject symbols which represent function addresses
1112 since the assembler/linker can't handle arithmetic on plabels. */
1113 if (GET_CODE (x
) == PLUS
1114 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1115 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1116 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1117 || GET_CODE (XEXP (x
, 0)) == REG
))
1119 rtx int_part
, ptr_reg
;
1121 int offset
= INTVAL (XEXP (x
, 1));
1124 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1125 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1127 /* Choose which way to round the offset. Round up if we
1128 are >= halfway to the next boundary. */
1129 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1130 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1132 newoffset
= (offset
& ~ mask
);
1134 /* If the newoffset will not fit in 14 bits (ldo), then
1135 handling this would take 4 or 5 instructions (2 to load
1136 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1137 add the new offset and the SYMBOL_REF.) Combine cannot
1138 handle 4->2 or 5->2 combinations, so do not create
1140 if (! VAL_14_BITS_P (newoffset
)
1141 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1143 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1146 gen_rtx_HIGH (Pmode
, const_part
));
1149 gen_rtx_LO_SUM (Pmode
,
1150 tmp_reg
, const_part
));
1154 if (! VAL_14_BITS_P (newoffset
))
1155 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1157 int_part
= GEN_INT (newoffset
);
1159 ptr_reg
= force_reg (Pmode
,
1160 gen_rtx_PLUS (Pmode
,
1161 force_reg (Pmode
, XEXP (x
, 0)),
1164 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1167 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1169 if (GET_CODE (x
) == PLUS
1170 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1171 && (OBJECT_P (XEXP (x
, 1))
1172 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1173 && GET_CODE (XEXP (x
, 1)) != CONST
)
1175 /* If we were given a MULT, we must fix the constant
1176 as we're going to create the ASHIFT form. */
1177 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1178 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1179 shift_val
= exact_log2 (shift_val
);
1183 if (GET_CODE (reg1
) != REG
)
1184 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1186 reg2
= XEXP (XEXP (x
, 0), 0);
1187 if (GET_CODE (reg2
) != REG
)
1188 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1190 return force_reg (Pmode
,
1191 gen_rtx_PLUS (Pmode
,
1192 gen_rtx_ASHIFT (Pmode
, reg2
,
1193 GEN_INT (shift_val
)),
1197 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1199 Only do so for floating point modes since this is more speculative
1200 and we lose if it's an integer store. */
1201 if (GET_CODE (x
) == PLUS
1202 && GET_CODE (XEXP (x
, 0)) == PLUS
1203 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1204 && (mode
== SFmode
|| mode
== DFmode
))
1206 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1208 /* If we were given a MULT, we must fix the constant
1209 as we're going to create the ASHIFT form. */
1210 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1211 shift_val
= exact_log2 (shift_val
);
1213 /* Try and figure out what to use as a base register. */
1214 rtx reg1
, reg2
, base
, idx
;
1216 reg1
= XEXP (XEXP (x
, 0), 1);
1221 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1222 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1223 it's a base register below. */
1224 if (GET_CODE (reg1
) != REG
)
1225 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1227 if (GET_CODE (reg2
) != REG
)
1228 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1230 /* Figure out what the base and index are. */
1232 if (GET_CODE (reg1
) == REG
1233 && REG_POINTER (reg1
))
1236 idx
= gen_rtx_PLUS (Pmode
,
1237 gen_rtx_ASHIFT (Pmode
,
1238 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1239 GEN_INT (shift_val
)),
1242 else if (GET_CODE (reg2
) == REG
1243 && REG_POINTER (reg2
))
1252 /* If the index adds a large constant, try to scale the
1253 constant so that it can be loaded with only one insn. */
1254 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1255 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1256 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1257 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1259 /* Divide the CONST_INT by the scale factor, then add it to A. */
1260 int val
= INTVAL (XEXP (idx
, 1));
1261 val
/= (1 << shift_val
);
1263 reg1
= XEXP (XEXP (idx
, 0), 0);
1264 if (GET_CODE (reg1
) != REG
)
1265 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1267 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1269 /* We can now generate a simple scaled indexed address. */
1272 (Pmode
, gen_rtx_PLUS (Pmode
,
1273 gen_rtx_ASHIFT (Pmode
, reg1
,
1274 GEN_INT (shift_val
)),
1278 /* If B + C is still a valid base register, then add them. */
1279 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1280 && INTVAL (XEXP (idx
, 1)) <= 4096
1281 && INTVAL (XEXP (idx
, 1)) >= -4096)
1285 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1287 reg2
= XEXP (XEXP (idx
, 0), 0);
1288 if (GET_CODE (reg2
) != CONST_INT
)
1289 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1291 return force_reg (Pmode
,
1292 gen_rtx_PLUS (Pmode
,
1293 gen_rtx_ASHIFT (Pmode
, reg2
,
1294 GEN_INT (shift_val
)),
1298 /* Get the index into a register, then add the base + index and
1299 return a register holding the result. */
1301 /* First get A into a register. */
1302 reg1
= XEXP (XEXP (idx
, 0), 0);
1303 if (GET_CODE (reg1
) != REG
)
1304 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1306 /* And get B into a register. */
1307 reg2
= XEXP (idx
, 1);
1308 if (GET_CODE (reg2
) != REG
)
1309 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1311 reg1
= force_reg (Pmode
,
1312 gen_rtx_PLUS (Pmode
,
1313 gen_rtx_ASHIFT (Pmode
, reg1
,
1314 GEN_INT (shift_val
)),
1317 /* Add the result to our base register and return. */
1318 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1322 /* Uh-oh. We might have an address for x[n-100000]. This needs
1323 special handling to avoid creating an indexed memory address
1324 with x-100000 as the base.
1326 If the constant part is small enough, then it's still safe because
1327 there is a guard page at the beginning and end of the data segment.
1329 Scaled references are common enough that we want to try and rearrange the
1330 terms so that we can use indexing for these addresses too. Only
1331 do the optimization for floatint point modes. */
1333 if (GET_CODE (x
) == PLUS
1334 && pa_symbolic_expression_p (XEXP (x
, 1)))
1336 /* Ugly. We modify things here so that the address offset specified
1337 by the index expression is computed first, then added to x to form
1338 the entire address. */
1340 rtx regx1
, regx2
, regy1
, regy2
, y
;
1342 /* Strip off any CONST. */
1344 if (GET_CODE (y
) == CONST
)
1347 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1349 /* See if this looks like
1350 (plus (mult (reg) (mem_shadd_const))
1351 (const (plus (symbol_ref) (const_int))))
1353 Where const_int is small. In that case the const
1354 expression is a valid pointer for indexing.
1356 If const_int is big, but can be divided evenly by shadd_const
1357 and added to (reg). This allows more scaled indexed addresses. */
1358 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1359 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1360 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1361 && INTVAL (XEXP (y
, 1)) >= -4096
1362 && INTVAL (XEXP (y
, 1)) <= 4095)
1364 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1366 /* If we were given a MULT, we must fix the constant
1367 as we're going to create the ASHIFT form. */
1368 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1369 shift_val
= exact_log2 (shift_val
);
1374 if (GET_CODE (reg1
) != REG
)
1375 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1377 reg2
= XEXP (XEXP (x
, 0), 0);
1378 if (GET_CODE (reg2
) != REG
)
1379 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1383 gen_rtx_PLUS (Pmode
,
1384 gen_rtx_ASHIFT (Pmode
,
1386 GEN_INT (shift_val
)),
1389 else if ((mode
== DFmode
|| mode
== SFmode
)
1390 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1391 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1392 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1393 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1395 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1397 /* If we were given a MULT, we must fix the constant
1398 as we're going to create the ASHIFT form. */
1399 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1400 shift_val
= exact_log2 (shift_val
);
1403 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1404 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1405 regx2
= XEXP (XEXP (x
, 0), 0);
1406 if (GET_CODE (regx2
) != REG
)
1407 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1408 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1412 gen_rtx_PLUS (Pmode
,
1413 gen_rtx_ASHIFT (Pmode
, regx2
,
1414 GEN_INT (shift_val
)),
1415 force_reg (Pmode
, XEXP (y
, 0))));
1417 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1418 && INTVAL (XEXP (y
, 1)) >= -4096
1419 && INTVAL (XEXP (y
, 1)) <= 4095)
1421 /* This is safe because of the guard page at the
1422 beginning and end of the data space. Just
1423 return the original address. */
1428 /* Doesn't look like one we can optimize. */
1429 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1430 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1431 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1432 regx1
= force_reg (Pmode
,
1433 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1435 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1443 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1445 Compute extra cost of moving data between one register class
1448 Make moves from SAR so expensive they should never happen. We used to
1449 have 0xffff here, but that generates overflow in rare cases.
1451 Copies involving a FP register and a non-FP register are relatively
1452 expensive because they must go through memory.
1454 Other copies are reasonably cheap. */
1457 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1458 reg_class_t from
, reg_class_t to
)
1460 if (from
== SHIFT_REGS
)
1462 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1464 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1465 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1471 /* For the HPPA, REG and REG+CONST is cost 0
1472 and addresses involving symbolic constants are cost 2.
1474 PIC addresses are very expensive.
1476 It is no coincidence that this has the same structure
1477 as pa_legitimate_address_p. */
1480 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1481 addr_space_t as ATTRIBUTE_UNUSED
,
1482 bool speed ATTRIBUTE_UNUSED
)
1484 switch (GET_CODE (X
))
1497 /* Compute a (partial) cost for rtx X. Return true if the complete
1498 cost has been computed, and false if subexpressions should be
1499 scanned. In either case, *TOTAL contains the cost result. */
1502 hppa_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
1503 int opno ATTRIBUTE_UNUSED
,
1504 int *total
, bool speed ATTRIBUTE_UNUSED
)
1507 int code
= GET_CODE (x
);
1512 if (INTVAL (x
) == 0)
1514 else if (INT_14_BITS (x
))
1531 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1532 && outer_code
!= SET
)
1539 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1541 *total
= COSTS_N_INSNS (3);
1545 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1546 factor
= GET_MODE_SIZE (mode
) / 4;
1550 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1551 *total
= factor
* factor
* COSTS_N_INSNS (8);
1553 *total
= factor
* factor
* COSTS_N_INSNS (20);
1557 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1559 *total
= COSTS_N_INSNS (14);
1567 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1568 factor
= GET_MODE_SIZE (mode
) / 4;
1572 *total
= factor
* factor
* COSTS_N_INSNS (60);
1575 case PLUS
: /* this includes shNadd insns */
1577 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1579 *total
= COSTS_N_INSNS (3);
1583 /* A size N times larger than UNITS_PER_WORD needs N times as
1584 many insns, taking N times as long. */
1585 factor
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
1588 *total
= factor
* COSTS_N_INSNS (1);
1594 *total
= COSTS_N_INSNS (1);
1602 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1603 new rtx with the correct mode. */
1605 force_mode (machine_mode mode
, rtx orig
)
1607 if (mode
== GET_MODE (orig
))
1610 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1612 return gen_rtx_REG (mode
, REGNO (orig
));
1615 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1618 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1620 return tls_referenced_p (x
);
1623 /* Emit insns to move operands[1] into operands[0].
1625 Return 1 if we have written out everything that needs to be done to
1626 do the move. Otherwise, return 0 and the caller will emit the move
1629 Note SCRATCH_REG may not be in the proper mode depending on how it
1630 will be used. This routine is responsible for creating a new copy
1631 of SCRATCH_REG in the proper mode. */
1634 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1636 register rtx operand0
= operands
[0];
1637 register rtx operand1
= operands
[1];
1640 /* We can only handle indexed addresses in the destination operand
1641 of floating point stores. Thus, we need to break out indexed
1642 addresses from the destination operand. */
1643 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1645 gcc_assert (can_create_pseudo_p ());
1647 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1648 operand0
= replace_equiv_address (operand0
, tem
);
1651 /* On targets with non-equivalent space registers, break out unscaled
1652 indexed addresses from the source operand before the final CSE.
1653 We have to do this because the REG_POINTER flag is not correctly
1654 carried through various optimization passes and CSE may substitute
1655 a pseudo without the pointer set for one with the pointer set. As
1656 a result, we loose various opportunities to create insns with
1657 unscaled indexed addresses. */
1658 if (!TARGET_NO_SPACE_REGS
1659 && !cse_not_expected
1660 && GET_CODE (operand1
) == MEM
1661 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1662 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1663 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1665 = replace_equiv_address (operand1
,
1666 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1669 && reload_in_progress
&& GET_CODE (operand0
) == REG
1670 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1671 operand0
= reg_equiv_mem (REGNO (operand0
));
1672 else if (scratch_reg
1673 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1674 && GET_CODE (SUBREG_REG (operand0
)) == REG
1675 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1677 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1678 the code which tracks sets/uses for delete_output_reload. */
1679 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1680 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1681 SUBREG_BYTE (operand0
));
1682 operand0
= alter_subreg (&temp
, true);
1686 && reload_in_progress
&& GET_CODE (operand1
) == REG
1687 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1688 operand1
= reg_equiv_mem (REGNO (operand1
));
1689 else if (scratch_reg
1690 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1691 && GET_CODE (SUBREG_REG (operand1
)) == REG
1692 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1694 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1695 the code which tracks sets/uses for delete_output_reload. */
1696 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1697 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1698 SUBREG_BYTE (operand1
));
1699 operand1
= alter_subreg (&temp
, true);
1702 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1703 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1704 != XEXP (operand0
, 0)))
1705 operand0
= replace_equiv_address (operand0
, tem
);
1707 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1708 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1709 != XEXP (operand1
, 0)))
1710 operand1
= replace_equiv_address (operand1
, tem
);
1712 /* Handle secondary reloads for loads/stores of FP registers from
1713 REG+D addresses where D does not fit in 5 or 14 bits, including
1714 (subreg (mem (addr))) cases, and reloads for other unsupported
1717 && FP_REG_P (operand0
)
1718 && (MEM_P (operand1
)
1719 || (GET_CODE (operand1
) == SUBREG
1720 && MEM_P (XEXP (operand1
, 0)))))
1724 if (GET_CODE (op1
) == SUBREG
)
1725 op1
= XEXP (op1
, 0);
1727 if (reg_plus_base_memory_operand (op1
, GET_MODE (op1
)))
1731 && INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1732 && !INT_5_BITS (XEXP (XEXP (op1
, 0), 1)))
1734 /* SCRATCH_REG will hold an address and maybe the actual data.
1735 We want it in WORD_MODE regardless of what mode it was
1736 originally given to us. */
1737 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1739 /* D might not fit in 14 bits either; for such cases load D
1740 into scratch reg. */
1741 if (!INT_14_BITS (XEXP (XEXP (op1
, 0), 1)))
1743 emit_move_insn (scratch_reg
, XEXP (XEXP (op1
, 0), 1));
1744 emit_move_insn (scratch_reg
,
1745 gen_rtx_fmt_ee (GET_CODE (XEXP (op1
, 0)),
1747 XEXP (XEXP (op1
, 0), 0),
1751 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1752 op1
= replace_equiv_address (op1
, scratch_reg
);
1755 else if ((!INT14_OK_STRICT
&& symbolic_memory_operand (op1
, VOIDmode
))
1756 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1
, 0))
1757 || IS_INDEX_ADDR_P (XEXP (op1
, 0)))
1759 /* Load memory address into SCRATCH_REG. */
1760 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1761 emit_move_insn (scratch_reg
, XEXP (op1
, 0));
1762 op1
= replace_equiv_address (op1
, scratch_reg
);
1764 emit_insn (gen_rtx_SET (operand0
, op1
));
1767 else if (scratch_reg
1768 && FP_REG_P (operand1
)
1769 && (MEM_P (operand0
)
1770 || (GET_CODE (operand0
) == SUBREG
1771 && MEM_P (XEXP (operand0
, 0)))))
1775 if (GET_CODE (op0
) == SUBREG
)
1776 op0
= XEXP (op0
, 0);
1778 if (reg_plus_base_memory_operand (op0
, GET_MODE (op0
)))
1782 && INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1783 && !INT_5_BITS (XEXP (XEXP (op0
, 0), 1)))
1785 /* SCRATCH_REG will hold an address and maybe the actual data.
1786 We want it in WORD_MODE regardless of what mode it was
1787 originally given to us. */
1788 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1790 /* D might not fit in 14 bits either; for such cases load D
1791 into scratch reg. */
1792 if (!INT_14_BITS (XEXP (XEXP (op0
, 0), 1)))
1794 emit_move_insn (scratch_reg
, XEXP (XEXP (op0
, 0), 1));
1795 emit_move_insn (scratch_reg
,
1796 gen_rtx_fmt_ee (GET_CODE (XEXP (op0
, 0)),
1798 XEXP (XEXP (op0
, 0), 0),
1802 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1803 op0
= replace_equiv_address (op0
, scratch_reg
);
1806 else if ((!INT14_OK_STRICT
&& symbolic_memory_operand (op0
, VOIDmode
))
1807 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0
, 0))
1808 || IS_INDEX_ADDR_P (XEXP (op0
, 0)))
1810 /* Load memory address into SCRATCH_REG. */
1811 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1812 emit_move_insn (scratch_reg
, XEXP (op0
, 0));
1813 op0
= replace_equiv_address (op0
, scratch_reg
);
1815 emit_insn (gen_rtx_SET (op0
, operand1
));
1818 /* Handle secondary reloads for loads of FP registers from constant
1819 expressions by forcing the constant into memory. For the most part,
1820 this is only necessary for SImode and DImode.
1822 Use scratch_reg to hold the address of the memory location. */
1823 else if (scratch_reg
1824 && CONSTANT_P (operand1
)
1825 && FP_REG_P (operand0
))
1827 rtx const_mem
, xoperands
[2];
1829 if (operand1
== CONST0_RTX (mode
))
1831 emit_insn (gen_rtx_SET (operand0
, operand1
));
1835 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1836 it in WORD_MODE regardless of what mode it was originally given
1838 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1840 /* Force the constant into memory and put the address of the
1841 memory location into scratch_reg. */
1842 const_mem
= force_const_mem (mode
, operand1
);
1843 xoperands
[0] = scratch_reg
;
1844 xoperands
[1] = XEXP (const_mem
, 0);
1845 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1847 /* Now load the destination register. */
1848 emit_insn (gen_rtx_SET (operand0
,
1849 replace_equiv_address (const_mem
, scratch_reg
)));
1852 /* Handle secondary reloads for SAR. These occur when trying to load
1853 the SAR from memory or a constant. */
1854 else if (scratch_reg
1855 && GET_CODE (operand0
) == REG
1856 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1857 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1858 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1860 /* D might not fit in 14 bits either; for such cases load D into
1862 if (GET_CODE (operand1
) == MEM
1863 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1865 /* We are reloading the address into the scratch register, so we
1866 want to make sure the scratch register is a full register. */
1867 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1869 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1870 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1873 XEXP (XEXP (operand1
, 0),
1877 /* Now we are going to load the scratch register from memory,
1878 we want to load it in the same width as the original MEM,
1879 which must be the same as the width of the ultimate destination,
1881 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1883 emit_move_insn (scratch_reg
,
1884 replace_equiv_address (operand1
, scratch_reg
));
1888 /* We want to load the scratch register using the same mode as
1889 the ultimate destination. */
1890 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1892 emit_move_insn (scratch_reg
, operand1
);
1895 /* And emit the insn to set the ultimate destination. We know that
1896 the scratch register has the same mode as the destination at this
1898 emit_move_insn (operand0
, scratch_reg
);
1902 /* Handle the most common case: storing into a register. */
1903 if (register_operand (operand0
, mode
))
1905 /* Legitimize TLS symbol references. This happens for references
1906 that aren't a legitimate constant. */
1907 if (PA_SYMBOL_REF_TLS_P (operand1
))
1908 operand1
= legitimize_tls_address (operand1
);
1910 if (register_operand (operand1
, mode
)
1911 || (GET_CODE (operand1
) == CONST_INT
1912 && pa_cint_ok_for_move (UINTVAL (operand1
)))
1913 || (operand1
== CONST0_RTX (mode
))
1914 || (GET_CODE (operand1
) == HIGH
1915 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1916 /* Only `general_operands' can come here, so MEM is ok. */
1917 || GET_CODE (operand1
) == MEM
)
1919 /* Various sets are created during RTL generation which don't
1920 have the REG_POINTER flag correctly set. After the CSE pass,
1921 instruction recognition can fail if we don't consistently
1922 set this flag when performing register copies. This should
1923 also improve the opportunities for creating insns that use
1924 unscaled indexing. */
1925 if (REG_P (operand0
) && REG_P (operand1
))
1927 if (REG_POINTER (operand1
)
1928 && !REG_POINTER (operand0
)
1929 && !HARD_REGISTER_P (operand0
))
1930 copy_reg_pointer (operand0
, operand1
);
1933 /* When MEMs are broken out, the REG_POINTER flag doesn't
1934 get set. In some cases, we can set the REG_POINTER flag
1935 from the declaration for the MEM. */
1936 if (REG_P (operand0
)
1937 && GET_CODE (operand1
) == MEM
1938 && !REG_POINTER (operand0
))
1940 tree decl
= MEM_EXPR (operand1
);
1942 /* Set the register pointer flag and register alignment
1943 if the declaration for this memory reference is a
1949 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1951 if (TREE_CODE (decl
) == COMPONENT_REF
)
1952 decl
= TREE_OPERAND (decl
, 1);
1954 type
= TREE_TYPE (decl
);
1955 type
= strip_array_types (type
);
1957 if (POINTER_TYPE_P (type
))
1958 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
1962 emit_insn (gen_rtx_SET (operand0
, operand1
));
1966 else if (GET_CODE (operand0
) == MEM
)
1968 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1969 && !(reload_in_progress
|| reload_completed
))
1971 rtx temp
= gen_reg_rtx (DFmode
);
1973 emit_insn (gen_rtx_SET (temp
, operand1
));
1974 emit_insn (gen_rtx_SET (operand0
, temp
));
1977 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1979 /* Run this case quickly. */
1980 emit_insn (gen_rtx_SET (operand0
, operand1
));
1983 if (! (reload_in_progress
|| reload_completed
))
1985 operands
[0] = validize_mem (operand0
);
1986 operands
[1] = operand1
= force_reg (mode
, operand1
);
1990 /* Simplify the source if we need to.
1991 Note we do have to handle function labels here, even though we do
1992 not consider them legitimate constants. Loop optimizations can
1993 call the emit_move_xxx with one as a source. */
1994 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1995 || (GET_CODE (operand1
) == HIGH
1996 && symbolic_operand (XEXP (operand1
, 0), mode
))
1997 || function_label_operand (operand1
, VOIDmode
)
1998 || tls_referenced_p (operand1
))
2002 if (GET_CODE (operand1
) == HIGH
)
2005 operand1
= XEXP (operand1
, 0);
2007 if (symbolic_operand (operand1
, mode
))
2009 /* Argh. The assembler and linker can't handle arithmetic
2012 So we force the plabel into memory, load operand0 from
2013 the memory location, then add in the constant part. */
2014 if ((GET_CODE (operand1
) == CONST
2015 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2016 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
2018 || function_label_operand (operand1
, VOIDmode
))
2020 rtx temp
, const_part
;
2022 /* Figure out what (if any) scratch register to use. */
2023 if (reload_in_progress
|| reload_completed
)
2025 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
2026 /* SCRATCH_REG will hold an address and maybe the actual
2027 data. We want it in WORD_MODE regardless of what mode it
2028 was originally given to us. */
2029 scratch_reg
= force_mode (word_mode
, scratch_reg
);
2032 scratch_reg
= gen_reg_rtx (Pmode
);
2034 if (GET_CODE (operand1
) == CONST
)
2036 /* Save away the constant part of the expression. */
2037 const_part
= XEXP (XEXP (operand1
, 0), 1);
2038 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
2040 /* Force the function label into memory. */
2041 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2045 /* No constant part. */
2046 const_part
= NULL_RTX
;
2048 /* Force the function label into memory. */
2049 temp
= force_const_mem (mode
, operand1
);
2053 /* Get the address of the memory location. PIC-ify it if
2055 temp
= XEXP (temp
, 0);
2057 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2059 /* Put the address of the memory location into our destination
2062 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2064 /* Now load from the memory location into our destination
2066 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2067 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2069 /* And add back in the constant part. */
2070 if (const_part
!= NULL_RTX
)
2071 expand_inc (operand0
, const_part
);
2081 if (reload_in_progress
|| reload_completed
)
2083 temp
= scratch_reg
? scratch_reg
: operand0
;
2084 /* TEMP will hold an address and maybe the actual
2085 data. We want it in WORD_MODE regardless of what mode it
2086 was originally given to us. */
2087 temp
= force_mode (word_mode
, temp
);
2090 temp
= gen_reg_rtx (Pmode
);
2092 /* Force (const (plus (symbol) (const_int))) to memory
2093 if the const_int will not fit in 14 bits. Although
2094 this requires a relocation, the instruction sequence
2095 needed to load the value is shorter. */
2096 if (GET_CODE (operand1
) == CONST
2097 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2098 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2099 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2101 rtx x
, m
= force_const_mem (mode
, operand1
);
2103 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2104 x
= replace_equiv_address (m
, x
);
2105 insn
= emit_move_insn (operand0
, x
);
2109 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2110 if (REG_P (operand0
) && REG_P (operands
[1]))
2111 copy_reg_pointer (operand0
, operands
[1]);
2112 insn
= emit_move_insn (operand0
, operands
[1]);
2115 /* Put a REG_EQUAL note on this insn. */
2116 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2118 /* On the HPPA, references to data space are supposed to use dp,
2119 register 27, but showing it in the RTL inhibits various cse
2120 and loop optimizations. */
2125 if (reload_in_progress
|| reload_completed
)
2127 temp
= scratch_reg
? scratch_reg
: operand0
;
2128 /* TEMP will hold an address and maybe the actual
2129 data. We want it in WORD_MODE regardless of what mode it
2130 was originally given to us. */
2131 temp
= force_mode (word_mode
, temp
);
2134 temp
= gen_reg_rtx (mode
);
2136 /* Loading a SYMBOL_REF into a register makes that register
2137 safe to be used as the base in an indexed address.
2139 Don't mark hard registers though. That loses. */
2140 if (GET_CODE (operand0
) == REG
2141 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2142 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2143 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2144 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2147 set
= gen_rtx_SET (operand0
, temp
);
2149 set
= gen_rtx_SET (operand0
,
2150 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2152 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2158 else if (tls_referenced_p (operand1
))
2163 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2165 addend
= XEXP (XEXP (tmp
, 0), 1);
2166 tmp
= XEXP (XEXP (tmp
, 0), 0);
2169 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2170 tmp
= legitimize_tls_address (tmp
);
2173 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2174 tmp
= force_operand (tmp
, operands
[0]);
2178 else if (GET_CODE (operand1
) != CONST_INT
2179 || !pa_cint_ok_for_move (UINTVAL (operand1
)))
2184 HOST_WIDE_INT value
= 0;
2185 HOST_WIDE_INT insv
= 0;
2188 if (GET_CODE (operand1
) == CONST_INT
)
2189 value
= INTVAL (operand1
);
2192 && GET_CODE (operand1
) == CONST_INT
2193 && HOST_BITS_PER_WIDE_INT
> 32
2194 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2198 /* Extract the low order 32 bits of the value and sign extend.
2199 If the new value is the same as the original value, we can
2200 can use the original value as-is. If the new value is
2201 different, we use it and insert the most-significant 32-bits
2202 of the original value into the final result. */
2203 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2204 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2207 #if HOST_BITS_PER_WIDE_INT > 32
2208 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2212 operand1
= GEN_INT (nval
);
2216 if (reload_in_progress
|| reload_completed
)
2217 temp
= scratch_reg
? scratch_reg
: operand0
;
2219 temp
= gen_reg_rtx (mode
);
2221 /* We don't directly split DImode constants on 32-bit targets
2222 because PLUS uses an 11-bit immediate and the insn sequence
2223 generated is not as efficient as the one using HIGH/LO_SUM. */
2224 if (GET_CODE (operand1
) == CONST_INT
2225 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2226 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2229 /* Directly break constant into high and low parts. This
2230 provides better optimization opportunities because various
2231 passes recognize constants split with PLUS but not LO_SUM.
2232 We use a 14-bit signed low part except when the addition
2233 of 0x4000 to the high part might change the sign of the
2235 HOST_WIDE_INT low
= value
& 0x3fff;
2236 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2240 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2248 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2249 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2253 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2254 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2257 insn
= emit_move_insn (operands
[0], operands
[1]);
2259 /* Now insert the most significant 32 bits of the value
2260 into the register. When we don't have a second register
2261 available, it could take up to nine instructions to load
2262 a 64-bit integer constant. Prior to reload, we force
2263 constants that would take more than three instructions
2264 to load to the constant pool. During and after reload,
2265 we have to handle all possible values. */
2268 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2269 register and the value to be inserted is outside the
2270 range that can be loaded with three depdi instructions. */
2271 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2273 operand1
= GEN_INT (insv
);
2275 emit_insn (gen_rtx_SET (temp
,
2276 gen_rtx_HIGH (mode
, operand1
)));
2277 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2279 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2282 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2287 int len
= 5, pos
= 27;
2289 /* Insert the bits using the depdi instruction. */
2292 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2293 HOST_WIDE_INT sign
= v5
< 0;
2295 /* Left extend the insertion. */
2296 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2297 while (pos
> 0 && (insv
& 1) == sign
)
2299 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2305 insn
= emit_insn (gen_insvdi (operand0
,
2310 insn
= emit_insn (gen_insvsi (operand0
,
2315 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2321 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2326 /* Now have insn-emit do whatever it normally does. */
2330 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2331 it will need a link/runtime reloc). */
2334 pa_reloc_needed (tree exp
)
2338 switch (TREE_CODE (exp
))
2343 case POINTER_PLUS_EXPR
:
2346 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2347 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2351 case NON_LVALUE_EXPR
:
2352 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2358 unsigned HOST_WIDE_INT ix
;
2360 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2362 reloc
|= pa_reloc_needed (value
);
2376 /* Return the best assembler insn template
2377 for moving operands[1] into operands[0] as a fullword. */
2379 pa_singlemove_string (rtx
*operands
)
2381 HOST_WIDE_INT intval
;
2383 if (GET_CODE (operands
[0]) == MEM
)
2384 return "stw %r1,%0";
2385 if (GET_CODE (operands
[1]) == MEM
)
2387 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2391 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2393 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2395 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands
[1]), i
);
2397 operands
[1] = GEN_INT (i
);
2398 /* Fall through to CONST_INT case. */
2400 if (GET_CODE (operands
[1]) == CONST_INT
)
2402 intval
= INTVAL (operands
[1]);
2404 if (VAL_14_BITS_P (intval
))
2406 else if ((intval
& 0x7ff) == 0)
2407 return "ldil L'%1,%0";
2408 else if (pa_zdepi_cint_p (intval
))
2409 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2411 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2413 return "copy %1,%0";
2417 /* Compute position (in OP[1]) and width (in OP[2])
2418 useful for copying IMM to a register using the zdepi
2419 instructions. Store the immediate value to insert in OP[0]. */
2421 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2425 /* Find the least significant set bit in IMM. */
2426 for (lsb
= 0; lsb
< 32; lsb
++)
2433 /* Choose variants based on *sign* of the 5-bit field. */
2434 if ((imm
& 0x10) == 0)
2435 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2438 /* Find the width of the bitstring in IMM. */
2439 for (len
= 5; len
< 32 - lsb
; len
++)
2441 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2445 /* Sign extend IMM as a 5-bit value. */
2446 imm
= (imm
& 0xf) - 0x10;
2454 /* Compute position (in OP[1]) and width (in OP[2])
2455 useful for copying IMM to a register using the depdi,z
2456 instructions. Store the immediate value to insert in OP[0]. */
2459 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2461 int lsb
, len
, maxlen
;
2463 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2465 /* Find the least significant set bit in IMM. */
2466 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2473 /* Choose variants based on *sign* of the 5-bit field. */
2474 if ((imm
& 0x10) == 0)
2475 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2478 /* Find the width of the bitstring in IMM. */
2479 for (len
= 5; len
< maxlen
- lsb
; len
++)
2481 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2485 /* Extend length if host is narrow and IMM is negative. */
2486 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2489 /* Sign extend IMM as a 5-bit value. */
2490 imm
= (imm
& 0xf) - 0x10;
2498 /* Output assembler code to perform a doubleword move insn
2499 with operands OPERANDS. */
2502 pa_output_move_double (rtx
*operands
)
2504 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2506 rtx addreg0
= 0, addreg1
= 0;
2509 /* First classify both operands. */
2511 if (REG_P (operands
[0]))
2513 else if (offsettable_memref_p (operands
[0]))
2515 else if (GET_CODE (operands
[0]) == MEM
)
2520 if (REG_P (operands
[1]))
2522 else if (CONSTANT_P (operands
[1]))
2524 else if (offsettable_memref_p (operands
[1]))
2526 else if (GET_CODE (operands
[1]) == MEM
)
2531 /* Check for the cases that the operand constraints are not
2532 supposed to allow to happen. */
2533 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2535 /* Handle copies between general and floating registers. */
2537 if (optype0
== REGOP
&& optype1
== REGOP
2538 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2540 if (FP_REG_P (operands
[0]))
2542 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2543 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2544 return "{fldds|fldd} -16(%%sp),%0";
2548 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2549 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2550 return "{ldws|ldw} -12(%%sp),%R0";
2554 /* Handle auto decrementing and incrementing loads and stores
2555 specifically, since the structure of the function doesn't work
2556 for them without major modification. Do it better when we learn
2557 this port about the general inc/dec addressing of PA.
2558 (This was written by tege. Chide him if it doesn't work.) */
2560 if (optype0
== MEMOP
)
2562 /* We have to output the address syntax ourselves, since print_operand
2563 doesn't deal with the addresses we want to use. Fix this later. */
2565 rtx addr
= XEXP (operands
[0], 0);
2566 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2568 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2570 operands
[0] = XEXP (addr
, 0);
2571 gcc_assert (GET_CODE (operands
[1]) == REG
2572 && GET_CODE (operands
[0]) == REG
);
2574 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2576 /* No overlap between high target register and address
2577 register. (We do this in a non-obvious way to
2578 save a register file writeback) */
2579 if (GET_CODE (addr
) == POST_INC
)
2580 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2581 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2583 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2585 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2587 operands
[0] = XEXP (addr
, 0);
2588 gcc_assert (GET_CODE (operands
[1]) == REG
2589 && GET_CODE (operands
[0]) == REG
);
2591 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2592 /* No overlap between high target register and address
2593 register. (We do this in a non-obvious way to save a
2594 register file writeback) */
2595 if (GET_CODE (addr
) == PRE_INC
)
2596 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2597 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2600 if (optype1
== MEMOP
)
2602 /* We have to output the address syntax ourselves, since print_operand
2603 doesn't deal with the addresses we want to use. Fix this later. */
2605 rtx addr
= XEXP (operands
[1], 0);
2606 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2608 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2610 operands
[1] = XEXP (addr
, 0);
2611 gcc_assert (GET_CODE (operands
[0]) == REG
2612 && GET_CODE (operands
[1]) == REG
);
2614 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2616 /* No overlap between high target register and address
2617 register. (We do this in a non-obvious way to
2618 save a register file writeback) */
2619 if (GET_CODE (addr
) == POST_INC
)
2620 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2621 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2625 /* This is an undefined situation. We should load into the
2626 address register *and* update that register. Probably
2627 we don't need to handle this at all. */
2628 if (GET_CODE (addr
) == POST_INC
)
2629 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2630 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2633 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2635 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2637 operands
[1] = XEXP (addr
, 0);
2638 gcc_assert (GET_CODE (operands
[0]) == REG
2639 && GET_CODE (operands
[1]) == REG
);
2641 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2643 /* No overlap between high target register and address
2644 register. (We do this in a non-obvious way to
2645 save a register file writeback) */
2646 if (GET_CODE (addr
) == PRE_INC
)
2647 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2648 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2652 /* This is an undefined situation. We should load into the
2653 address register *and* update that register. Probably
2654 we don't need to handle this at all. */
2655 if (GET_CODE (addr
) == PRE_INC
)
2656 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2657 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2660 else if (GET_CODE (addr
) == PLUS
2661 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2665 /* Load address into left half of destination register. */
2666 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2667 xoperands
[1] = XEXP (addr
, 1);
2668 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2669 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2670 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2672 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2674 else if (GET_CODE (addr
) == PLUS
2675 && REG_P (XEXP (addr
, 0))
2676 && REG_P (XEXP (addr
, 1)))
2680 /* Load address into left half of destination register. */
2681 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2682 xoperands
[1] = XEXP (addr
, 0);
2683 xoperands
[2] = XEXP (addr
, 1);
2684 output_asm_insn ("{addl|add,l} %1,%2,%0",
2686 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2690 /* If an operand is an unoffsettable memory ref, find a register
2691 we can increment temporarily to make it refer to the second word. */
2693 if (optype0
== MEMOP
)
2694 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2696 if (optype1
== MEMOP
)
2697 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2699 /* Ok, we can do one word at a time.
2700 Normally we do the low-numbered word first.
2702 In either case, set up in LATEHALF the operands to use
2703 for the high-numbered word and in some cases alter the
2704 operands in OPERANDS to be suitable for the low-numbered word. */
2706 if (optype0
== REGOP
)
2707 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2708 else if (optype0
== OFFSOP
)
2709 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2711 latehalf
[0] = operands
[0];
2713 if (optype1
== REGOP
)
2714 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2715 else if (optype1
== OFFSOP
)
2716 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
2717 else if (optype1
== CNSTOP
)
2719 if (GET_CODE (operands
[1]) == HIGH
)
2721 operands
[1] = XEXP (operands
[1], 0);
2724 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2727 latehalf
[1] = operands
[1];
2729 /* If the first move would clobber the source of the second one,
2730 do them in the other order.
2732 This can happen in two cases:
2734 mem -> register where the first half of the destination register
2735 is the same register used in the memory's address. Reload
2736 can create such insns.
2738 mem in this case will be either register indirect or register
2739 indirect plus a valid offset.
2741 register -> register move where REGNO(dst) == REGNO(src + 1)
2742 someone (Tim/Tege?) claimed this can happen for parameter loads.
2744 Handle mem -> register case first. */
2745 if (optype0
== REGOP
2746 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2747 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
2749 /* Do the late half first. */
2751 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2752 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2756 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2757 return pa_singlemove_string (operands
);
2760 /* Now handle register -> register case. */
2761 if (optype0
== REGOP
&& optype1
== REGOP
2762 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2764 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2765 return pa_singlemove_string (operands
);
2768 /* Normal case: do the two words, low-numbered first. */
2770 output_asm_insn (pa_singlemove_string (operands
), operands
);
2772 /* Make any unoffsettable addresses point at high-numbered word. */
2774 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2776 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2778 /* Do high-numbered word. */
2780 output_asm_insn ("ldil L'%1,%0", latehalf
);
2782 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2784 /* Undo the adds we just did. */
2786 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2788 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2794 pa_output_fp_move_double (rtx
*operands
)
2796 if (FP_REG_P (operands
[0]))
2798 if (FP_REG_P (operands
[1])
2799 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2800 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2802 output_asm_insn ("fldd%F1 %1,%0", operands
);
2804 else if (FP_REG_P (operands
[1]))
2806 output_asm_insn ("fstd%F0 %1,%0", operands
);
2812 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2814 /* This is a pain. You have to be prepared to deal with an
2815 arbitrary address here including pre/post increment/decrement.
2817 so avoid this in the MD. */
2818 gcc_assert (GET_CODE (operands
[0]) == REG
);
2820 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2821 xoperands
[0] = operands
[0];
2822 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2827 /* Return a REG that occurs in ADDR with coefficient 1.
2828 ADDR can be effectively incremented by incrementing REG. */
2831 find_addr_reg (rtx addr
)
2833 while (GET_CODE (addr
) == PLUS
)
2835 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2836 addr
= XEXP (addr
, 0);
2837 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2838 addr
= XEXP (addr
, 1);
2839 else if (CONSTANT_P (XEXP (addr
, 0)))
2840 addr
= XEXP (addr
, 1);
2841 else if (CONSTANT_P (XEXP (addr
, 1)))
2842 addr
= XEXP (addr
, 0);
2846 gcc_assert (GET_CODE (addr
) == REG
);
2850 /* Emit code to perform a block move.
2852 OPERANDS[0] is the destination pointer as a REG, clobbered.
2853 OPERANDS[1] is the source pointer as a REG, clobbered.
2854 OPERANDS[2] is a register for temporary storage.
2855 OPERANDS[3] is a register for temporary storage.
2856 OPERANDS[4] is the size as a CONST_INT
2857 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2858 OPERANDS[6] is another temporary register. */
2861 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2863 int align
= INTVAL (operands
[5]);
2864 unsigned long n_bytes
= INTVAL (operands
[4]);
2866 /* We can't move more than a word at a time because the PA
2867 has no longer integer move insns. (Could use fp mem ops?) */
2868 if (align
> (TARGET_64BIT
? 8 : 4))
2869 align
= (TARGET_64BIT
? 8 : 4);
2871 /* Note that we know each loop below will execute at least twice
2872 (else we would have open-coded the copy). */
2876 /* Pre-adjust the loop counter. */
2877 operands
[4] = GEN_INT (n_bytes
- 16);
2878 output_asm_insn ("ldi %4,%2", operands
);
2881 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2882 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2883 output_asm_insn ("std,ma %3,8(%0)", operands
);
2884 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2885 output_asm_insn ("std,ma %6,8(%0)", operands
);
2887 /* Handle the residual. There could be up to 7 bytes of
2888 residual to copy! */
2889 if (n_bytes
% 16 != 0)
2891 operands
[4] = GEN_INT (n_bytes
% 8);
2892 if (n_bytes
% 16 >= 8)
2893 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2894 if (n_bytes
% 8 != 0)
2895 output_asm_insn ("ldd 0(%1),%6", operands
);
2896 if (n_bytes
% 16 >= 8)
2897 output_asm_insn ("std,ma %3,8(%0)", operands
);
2898 if (n_bytes
% 8 != 0)
2899 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2904 /* Pre-adjust the loop counter. */
2905 operands
[4] = GEN_INT (n_bytes
- 8);
2906 output_asm_insn ("ldi %4,%2", operands
);
2909 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2910 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2911 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2912 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2913 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2915 /* Handle the residual. There could be up to 7 bytes of
2916 residual to copy! */
2917 if (n_bytes
% 8 != 0)
2919 operands
[4] = GEN_INT (n_bytes
% 4);
2920 if (n_bytes
% 8 >= 4)
2921 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2922 if (n_bytes
% 4 != 0)
2923 output_asm_insn ("ldw 0(%1),%6", operands
);
2924 if (n_bytes
% 8 >= 4)
2925 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2926 if (n_bytes
% 4 != 0)
2927 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2932 /* Pre-adjust the loop counter. */
2933 operands
[4] = GEN_INT (n_bytes
- 4);
2934 output_asm_insn ("ldi %4,%2", operands
);
2937 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2938 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2939 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2940 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2941 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2943 /* Handle the residual. */
2944 if (n_bytes
% 4 != 0)
2946 if (n_bytes
% 4 >= 2)
2947 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2948 if (n_bytes
% 2 != 0)
2949 output_asm_insn ("ldb 0(%1),%6", operands
);
2950 if (n_bytes
% 4 >= 2)
2951 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2952 if (n_bytes
% 2 != 0)
2953 output_asm_insn ("stb %6,0(%0)", operands
);
2958 /* Pre-adjust the loop counter. */
2959 operands
[4] = GEN_INT (n_bytes
- 2);
2960 output_asm_insn ("ldi %4,%2", operands
);
2963 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2964 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2965 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2966 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2967 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2969 /* Handle the residual. */
2970 if (n_bytes
% 2 != 0)
2972 output_asm_insn ("ldb 0(%1),%3", operands
);
2973 output_asm_insn ("stb %3,0(%0)", operands
);
2982 /* Count the number of insns necessary to handle this block move.
2984 Basic structure is the same as emit_block_move, except that we
2985 count insns rather than emit them. */
2988 compute_cpymem_length (rtx_insn
*insn
)
2990 rtx pat
= PATTERN (insn
);
2991 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2992 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2993 unsigned int n_insns
= 0;
2995 /* We can't move more than four bytes at a time because the PA
2996 has no longer integer move insns. (Could use fp mem ops?) */
2997 if (align
> (TARGET_64BIT
? 8 : 4))
2998 align
= (TARGET_64BIT
? 8 : 4);
3000 /* The basic copying loop. */
3004 if (n_bytes
% (2 * align
) != 0)
3006 if ((n_bytes
% (2 * align
)) >= align
)
3009 if ((n_bytes
% align
) != 0)
3013 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3017 /* Emit code to perform a block clear.
3019 OPERANDS[0] is the destination pointer as a REG, clobbered.
3020 OPERANDS[1] is a register for temporary storage.
3021 OPERANDS[2] is the size as a CONST_INT
3022 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3025 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
3027 int align
= INTVAL (operands
[3]);
3028 unsigned long n_bytes
= INTVAL (operands
[2]);
3030 /* We can't clear more than a word at a time because the PA
3031 has no longer integer move insns. */
3032 if (align
> (TARGET_64BIT
? 8 : 4))
3033 align
= (TARGET_64BIT
? 8 : 4);
3035 /* Note that we know each loop below will execute at least twice
3036 (else we would have open-coded the copy). */
3040 /* Pre-adjust the loop counter. */
3041 operands
[2] = GEN_INT (n_bytes
- 16);
3042 output_asm_insn ("ldi %2,%1", operands
);
3045 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3046 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
3047 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3049 /* Handle the residual. There could be up to 7 bytes of
3050 residual to copy! */
3051 if (n_bytes
% 16 != 0)
3053 operands
[2] = GEN_INT (n_bytes
% 8);
3054 if (n_bytes
% 16 >= 8)
3055 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3056 if (n_bytes
% 8 != 0)
3057 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3062 /* Pre-adjust the loop counter. */
3063 operands
[2] = GEN_INT (n_bytes
- 8);
3064 output_asm_insn ("ldi %2,%1", operands
);
3067 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3068 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3069 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3071 /* Handle the residual. There could be up to 7 bytes of
3072 residual to copy! */
3073 if (n_bytes
% 8 != 0)
3075 operands
[2] = GEN_INT (n_bytes
% 4);
3076 if (n_bytes
% 8 >= 4)
3077 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3078 if (n_bytes
% 4 != 0)
3079 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3084 /* Pre-adjust the loop counter. */
3085 operands
[2] = GEN_INT (n_bytes
- 4);
3086 output_asm_insn ("ldi %2,%1", operands
);
3089 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3090 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3091 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3093 /* Handle the residual. */
3094 if (n_bytes
% 4 != 0)
3096 if (n_bytes
% 4 >= 2)
3097 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3098 if (n_bytes
% 2 != 0)
3099 output_asm_insn ("stb %%r0,0(%0)", operands
);
3104 /* Pre-adjust the loop counter. */
3105 operands
[2] = GEN_INT (n_bytes
- 2);
3106 output_asm_insn ("ldi %2,%1", operands
);
3109 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3110 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3111 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3113 /* Handle the residual. */
3114 if (n_bytes
% 2 != 0)
3115 output_asm_insn ("stb %%r0,0(%0)", operands
);
3124 /* Count the number of insns necessary to handle this block move.
3126 Basic structure is the same as emit_block_move, except that we
3127 count insns rather than emit them. */
3130 compute_clrmem_length (rtx_insn
*insn
)
3132 rtx pat
= PATTERN (insn
);
3133 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3134 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3135 unsigned int n_insns
= 0;
3137 /* We can't clear more than a word at a time because the PA
3138 has no longer integer move insns. */
3139 if (align
> (TARGET_64BIT
? 8 : 4))
3140 align
= (TARGET_64BIT
? 8 : 4);
3142 /* The basic loop. */
3146 if (n_bytes
% (2 * align
) != 0)
3148 if ((n_bytes
% (2 * align
)) >= align
)
3151 if ((n_bytes
% align
) != 0)
3155 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3161 pa_output_and (rtx
*operands
)
3163 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3165 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3166 int ls0
, ls1
, ms0
, p
, len
;
3168 for (ls0
= 0; ls0
< 32; ls0
++)
3169 if ((mask
& (1 << ls0
)) == 0)
3172 for (ls1
= ls0
; ls1
< 32; ls1
++)
3173 if ((mask
& (1 << ls1
)) != 0)
3176 for (ms0
= ls1
; ms0
< 32; ms0
++)
3177 if ((mask
& (1 << ms0
)) == 0)
3180 gcc_assert (ms0
== 32);
3188 operands
[2] = GEN_INT (len
);
3189 return "{extru|extrw,u} %1,31,%2,%0";
3193 /* We could use this `depi' for the case above as well, but `depi'
3194 requires one more register file access than an `extru'. */
3199 operands
[2] = GEN_INT (p
);
3200 operands
[3] = GEN_INT (len
);
3201 return "{depi|depwi} 0,%2,%3,%0";
3205 return "and %1,%2,%0";
3208 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3209 storing the result in operands[0]. */
3211 pa_output_64bit_and (rtx
*operands
)
3213 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3215 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3216 int ls0
, ls1
, ms0
, p
, len
;
3218 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3219 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3222 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3223 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3226 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3227 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3230 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3232 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3238 operands
[2] = GEN_INT (len
);
3239 return "extrd,u %1,63,%2,%0";
3243 /* We could use this `depi' for the case above as well, but `depi'
3244 requires one more register file access than an `extru'. */
3249 operands
[2] = GEN_INT (p
);
3250 operands
[3] = GEN_INT (len
);
3251 return "depdi 0,%2,%3,%0";
3255 return "and %1,%2,%0";
3259 pa_output_ior (rtx
*operands
)
3261 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3262 int bs0
, bs1
, p
, len
;
3264 if (INTVAL (operands
[2]) == 0)
3265 return "copy %1,%0";
3267 for (bs0
= 0; bs0
< 32; bs0
++)
3268 if ((mask
& (1 << bs0
)) != 0)
3271 for (bs1
= bs0
; bs1
< 32; bs1
++)
3272 if ((mask
& (1 << bs1
)) == 0)
3275 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3280 operands
[2] = GEN_INT (p
);
3281 operands
[3] = GEN_INT (len
);
3282 return "{depi|depwi} -1,%2,%3,%0";
3285 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3286 storing the result in operands[0]. */
3288 pa_output_64bit_ior (rtx
*operands
)
3290 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3291 int bs0
, bs1
, p
, len
;
3293 if (INTVAL (operands
[2]) == 0)
3294 return "copy %1,%0";
3296 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3297 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3300 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3301 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3304 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3305 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3310 operands
[2] = GEN_INT (p
);
3311 operands
[3] = GEN_INT (len
);
3312 return "depdi -1,%2,%3,%0";
3315 /* Target hook for assembling integer objects. This code handles
3316 aligned SI and DI integers specially since function references
3317 must be preceded by P%. */
3320 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3325 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3326 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3327 calling output_addr_const. Otherwise, it may call assemble_external
3328 in the midst of outputing the assembler code for the SYMBOL_REF.
3329 We restore the SYMBOL_REF_DECL after the output is done. */
3330 if (GET_CODE (x
) == SYMBOL_REF
)
3332 decl
= SYMBOL_REF_DECL (x
);
3335 assemble_external (decl
);
3336 SET_SYMBOL_REF_DECL (x
, NULL
);
3340 if (size
== UNITS_PER_WORD
3342 && function_label_operand (x
, VOIDmode
))
3344 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3346 /* We don't want an OPD when generating fast indirect calls. */
3347 if (!TARGET_FAST_INDIRECT_CALLS
)
3348 fputs ("P%", asm_out_file
);
3350 output_addr_const (asm_out_file
, x
);
3351 fputc ('\n', asm_out_file
);
3355 result
= default_assemble_integer (x
, size
, aligned_p
);
3358 SET_SYMBOL_REF_DECL (x
, decl
);
3363 /* Output an ascii string. */
3365 pa_output_ascii (FILE *file
, const char *p
, int size
)
3369 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3371 /* The HP assembler can only take strings of 256 characters at one
3372 time. This is a limitation on input line length, *not* the
3373 length of the string. Sigh. Even worse, it seems that the
3374 restriction is in number of input characters (see \xnn &
3375 \whatever). So we have to do this very carefully. */
3377 fputs ("\t.STRING \"", file
);
3380 for (i
= 0; i
< size
; i
+= 4)
3384 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3386 register unsigned int c
= (unsigned char) p
[i
+ io
];
3388 if (c
== '\"' || c
== '\\')
3389 partial_output
[co
++] = '\\';
3390 if (c
>= ' ' && c
< 0177)
3391 partial_output
[co
++] = c
;
3395 partial_output
[co
++] = '\\';
3396 partial_output
[co
++] = 'x';
3397 hexd
= c
/ 16 - 0 + '0';
3399 hexd
-= '9' - 'a' + 1;
3400 partial_output
[co
++] = hexd
;
3401 hexd
= c
% 16 - 0 + '0';
3403 hexd
-= '9' - 'a' + 1;
3404 partial_output
[co
++] = hexd
;
3407 if (chars_output
+ co
> 243)
3409 fputs ("\"\n\t.STRING \"", file
);
3412 fwrite (partial_output
, 1, (size_t) co
, file
);
3416 fputs ("\"\n", file
);
3419 /* Try to rewrite floating point comparisons & branches to avoid
3420 useless add,tr insns.
3422 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3423 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3424 first attempt to remove useless add,tr insns. It is zero
3425 for the second pass as reorg sometimes leaves bogus REG_DEAD
3428 When CHECK_NOTES is zero we can only eliminate add,tr insns
3429 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3432 remove_useless_addtr_insns (int check_notes
)
3435 static int pass
= 0;
3437 /* This is fairly cheap, so always run it when optimizing. */
3441 int fbranch_count
= 0;
3443 /* Walk all the insns in this function looking for fcmp & fbranch
3444 instructions. Keep track of how many of each we find. */
3445 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3449 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3450 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3453 tmp
= PATTERN (insn
);
3455 /* It must be a set. */
3456 if (GET_CODE (tmp
) != SET
)
3459 /* If the destination is CCFP, then we've found an fcmp insn. */
3460 tmp
= SET_DEST (tmp
);
3461 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3467 tmp
= PATTERN (insn
);
3468 /* If this is an fbranch instruction, bump the fbranch counter. */
3469 if (GET_CODE (tmp
) == SET
3470 && SET_DEST (tmp
) == pc_rtx
3471 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3472 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3473 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3474 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3482 /* Find all floating point compare + branch insns. If possible,
3483 reverse the comparison & the branch to avoid add,tr insns. */
3484 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3489 /* Ignore anything that isn't an INSN. */
3490 if (! NONJUMP_INSN_P (insn
))
3493 tmp
= PATTERN (insn
);
3495 /* It must be a set. */
3496 if (GET_CODE (tmp
) != SET
)
3499 /* The destination must be CCFP, which is register zero. */
3500 tmp
= SET_DEST (tmp
);
3501 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3504 /* INSN should be a set of CCFP.
3506 See if the result of this insn is used in a reversed FP
3507 conditional branch. If so, reverse our condition and
3508 the branch. Doing so avoids useless add,tr insns. */
3509 next
= next_insn (insn
);
3512 /* Jumps, calls and labels stop our search. */
3513 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3516 /* As does another fcmp insn. */
3517 if (NONJUMP_INSN_P (next
)
3518 && GET_CODE (PATTERN (next
)) == SET
3519 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3520 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3523 next
= next_insn (next
);
3526 /* Is NEXT_INSN a branch? */
3527 if (next
&& JUMP_P (next
))
3529 rtx pattern
= PATTERN (next
);
3531 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3532 and CCFP dies, then reverse our conditional and the branch
3533 to avoid the add,tr. */
3534 if (GET_CODE (pattern
) == SET
3535 && SET_DEST (pattern
) == pc_rtx
3536 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3537 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3538 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3539 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3540 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3541 && (fcmp_count
== fbranch_count
3543 && find_regno_note (next
, REG_DEAD
, 0))))
3545 /* Reverse the branch. */
3546 tmp
= XEXP (SET_SRC (pattern
), 1);
3547 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3548 XEXP (SET_SRC (pattern
), 2) = tmp
;
3549 INSN_CODE (next
) = -1;
3551 /* Reverse our condition. */
3552 tmp
= PATTERN (insn
);
3553 PUT_CODE (XEXP (tmp
, 1),
3554 (reverse_condition_maybe_unordered
3555 (GET_CODE (XEXP (tmp
, 1)))));
3565 /* You may have trouble believing this, but this is the 32 bit HP-PA
3570 Variable arguments (optional; any number may be allocated)
3572 SP-(4*(N+9)) arg word N
3577 Fixed arguments (must be allocated; may remain unused)
3586 SP-32 External Data Pointer (DP)
3588 SP-24 External/stub RP (RP')
3592 SP-8 Calling Stub RP (RP'')
3597 SP-0 Stack Pointer (points to next available address)
3601 /* This function saves registers as follows. Registers marked with ' are
3602 this function's registers (as opposed to the previous function's).
3603 If a frame_pointer isn't needed, r4 is saved as a general register;
3604 the space for the frame pointer is still allocated, though, to keep
3610 SP (FP') Previous FP
3611 SP + 4 Alignment filler (sigh)
3612 SP + 8 Space for locals reserved here.
3616 SP + n All call saved register used.
3620 SP + o All call saved fp registers used.
3624 SP + p (SP') points to next available address.
3628 /* Global variables set by output_function_prologue(). */
3629 /* Size of frame. Need to know this to emit return insns from
3631 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3632 static int save_fregs
;
3634 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3635 Handle case where DISP > 8k by using the add_high_const patterns.
3637 Note in DISP > 8k case, we will leave the high part of the address
3638 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3641 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3643 rtx dest
, src
, basereg
;
3646 src
= gen_rtx_REG (word_mode
, reg
);
3647 basereg
= gen_rtx_REG (Pmode
, base
);
3648 if (VAL_14_BITS_P (disp
))
3650 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3651 insn
= emit_move_insn (dest
, src
);
3653 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3655 rtx delta
= GEN_INT (disp
);
3656 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3658 emit_move_insn (tmpreg
, delta
);
3659 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3662 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3663 gen_rtx_SET (tmpreg
,
3664 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3665 RTX_FRAME_RELATED_P (insn
) = 1;
3667 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3668 insn
= emit_move_insn (dest
, src
);
3672 rtx delta
= GEN_INT (disp
);
3673 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3674 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3676 emit_move_insn (tmpreg
, high
);
3677 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3678 insn
= emit_move_insn (dest
, src
);
3680 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3681 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3682 gen_rtx_PLUS (word_mode
,
3689 RTX_FRAME_RELATED_P (insn
) = 1;
3692 /* Emit RTL to store REG at the memory location specified by BASE and then
3693 add MOD to BASE. MOD must be <= 8k. */
3696 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3698 rtx basereg
, srcreg
, delta
;
3701 gcc_assert (VAL_14_BITS_P (mod
));
3703 basereg
= gen_rtx_REG (Pmode
, base
);
3704 srcreg
= gen_rtx_REG (word_mode
, reg
);
3705 delta
= GEN_INT (mod
);
3707 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3710 RTX_FRAME_RELATED_P (insn
) = 1;
3712 /* RTX_FRAME_RELATED_P must be set on each frame related set
3713 in a parallel with more than one element. */
3714 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3715 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3719 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3720 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3721 whether to add a frame note or not.
3723 In the DISP > 8k case, we leave the high part of the address in %r1.
3724 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3727 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3731 if (VAL_14_BITS_P (disp
))
3733 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3734 plus_constant (Pmode
,
3735 gen_rtx_REG (Pmode
, base
), disp
));
3737 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3739 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3740 rtx delta
= GEN_INT (disp
);
3741 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3743 emit_move_insn (tmpreg
, delta
);
3744 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3745 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3747 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3748 gen_rtx_SET (tmpreg
,
3749 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3753 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3754 rtx delta
= GEN_INT (disp
);
3755 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3757 emit_move_insn (tmpreg
,
3758 gen_rtx_PLUS (Pmode
, basereg
,
3759 gen_rtx_HIGH (Pmode
, delta
)));
3760 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3761 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3764 if (DO_FRAME_NOTES
&& note
)
3765 RTX_FRAME_RELATED_P (insn
) = 1;
3769 pa_compute_frame_size (poly_int64 size
, int *fregs_live
)
3774 /* The code in pa_expand_prologue and pa_expand_epilogue must
3775 be consistent with the rounding and size calculation done here.
3776 Change them at the same time. */
3778 /* We do our own stack alignment. First, round the size of the
3779 stack locals up to a word boundary. */
3780 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3782 /* Space for previous frame pointer + filler. If any frame is
3783 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3784 waste some space here for the sake of HP compatibility. The
3785 first slot is only used when the frame pointer is needed. */
3786 if (size
|| frame_pointer_needed
)
3787 size
+= pa_starting_frame_offset ();
3789 /* If the current function calls __builtin_eh_return, then we need
3790 to allocate stack space for registers that will hold data for
3791 the exception handler. */
3792 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3796 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3798 size
+= i
* UNITS_PER_WORD
;
3801 /* Account for space used by the callee general register saves. */
3802 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3803 if (df_regs_ever_live_p (i
))
3804 size
+= UNITS_PER_WORD
;
3806 /* Account for space used by the callee floating point register saves. */
3807 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3808 if (df_regs_ever_live_p (i
)
3809 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3813 /* We always save both halves of the FP register, so always
3814 increment the frame size by 8 bytes. */
3818 /* If any of the floating registers are saved, account for the
3819 alignment needed for the floating point register save block. */
3822 size
= (size
+ 7) & ~7;
3827 /* The various ABIs include space for the outgoing parameters in the
3828 size of the current function's stack frame. We don't need to align
3829 for the outgoing arguments as their alignment is set by the final
3830 rounding for the frame as a whole. */
3831 size
+= crtl
->outgoing_args_size
;
3833 /* Allocate space for the fixed frame marker. This space must be
3834 allocated for any function that makes calls or allocates
3836 if (!crtl
->is_leaf
|| size
)
3837 size
+= TARGET_64BIT
? 48 : 32;
3839 /* Finally, round to the preferred stack boundary. */
3840 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3841 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3844 /* Output function label, and associated .PROC and .CALLINFO statements. */
3847 pa_output_function_label (FILE *file
)
3849 /* The function's label and associated .PROC must never be
3850 separated and must be output *after* any profiling declarations
3851 to avoid changing spaces/subspaces within a procedure. */
3852 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3853 fputs ("\t.PROC\n", file
);
3855 /* pa_expand_prologue does the dirty work now. We just need
3856 to output the assembler directives which denote the start
3858 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3860 fputs (",NO_CALLS", file
);
3862 fputs (",CALLS", file
);
3864 fputs (",SAVE_RP", file
);
3866 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3867 at the beginning of the frame and that it is used as the frame
3868 pointer for the frame. We do this because our current frame
3869 layout doesn't conform to that specified in the HP runtime
3870 documentation and we need a way to indicate to programs such as
3871 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3872 isn't used by HP compilers but is supported by the assembler.
3873 However, SAVE_SP is supposed to indicate that the previous stack
3874 pointer has been saved in the frame marker. */
3875 if (frame_pointer_needed
)
3876 fputs (",SAVE_SP", file
);
3878 /* Pass on information about the number of callee register saves
3879 performed in the prologue.
3881 The compiler is supposed to pass the highest register number
3882 saved, the assembler then has to adjust that number before
3883 entering it into the unwind descriptor (to account for any
3884 caller saved registers with lower register numbers than the
3885 first callee saved register). */
3887 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3890 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3892 fputs ("\n\t.ENTRY\n", file
);
3895 /* Output function prologue. */
3898 pa_output_function_prologue (FILE *file
)
3900 pa_output_function_label (file
);
3901 remove_useless_addtr_insns (0);
3904 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
3907 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED
)
3909 remove_useless_addtr_insns (0);
3913 pa_expand_prologue (void)
3915 int merge_sp_adjust_with_store
= 0;
3916 HOST_WIDE_INT size
= get_frame_size ();
3917 HOST_WIDE_INT offset
;
3926 /* Compute total size for frame pointer, filler, locals and rounding to
3927 the next word boundary. Similar code appears in pa_compute_frame_size
3928 and must be changed in tandem with this code. */
3929 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3930 if (local_fsize
|| frame_pointer_needed
)
3931 local_fsize
+= pa_starting_frame_offset ();
3933 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3934 if (flag_stack_usage_info
)
3935 current_function_static_stack_size
= actual_fsize
;
3937 /* Compute a few things we will use often. */
3938 tmpreg
= gen_rtx_REG (word_mode
, 1);
3940 /* Save RP first. The calling conventions manual states RP will
3941 always be stored into the caller's frame at sp - 20 or sp - 16
3942 depending on which ABI is in use. */
3943 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3945 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3951 /* Allocate the local frame and set up the frame pointer if needed. */
3952 if (actual_fsize
!= 0)
3954 if (frame_pointer_needed
)
3956 /* Copy the old frame pointer temporarily into %r1. Set up the
3957 new stack pointer, then store away the saved old frame pointer
3958 into the stack at sp and at the same time update the stack
3959 pointer by actual_fsize bytes. Two versions, first
3960 handles small (<8k) frames. The second handles large (>=8k)
3962 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3964 RTX_FRAME_RELATED_P (insn
) = 1;
3966 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3968 RTX_FRAME_RELATED_P (insn
) = 1;
3970 if (VAL_14_BITS_P (actual_fsize
))
3971 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3974 /* It is incorrect to store the saved frame pointer at *sp,
3975 then increment sp (writes beyond the current stack boundary).
3977 So instead use stwm to store at *sp and post-increment the
3978 stack pointer as an atomic operation. Then increment sp to
3979 finish allocating the new frame. */
3980 HOST_WIDE_INT adjust1
= 8192 - 64;
3981 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3983 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3984 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3988 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3989 we need to store the previous stack pointer (frame pointer)
3990 into the frame marker on targets that use the HP unwind
3991 library. This allows the HP unwind library to be used to
3992 unwind GCC frames. However, we are not fully compatible
3993 with the HP library because our frame layout differs from
3994 that specified in the HP runtime specification.
3996 We don't want a frame note on this instruction as the frame
3997 marker moves during dynamic stack allocation.
3999 This instruction also serves as a blockage to prevent
4000 register spills from being scheduled before the stack
4001 pointer is raised. This is necessary as we store
4002 registers using the frame pointer as a base register,
4003 and the frame pointer is set before sp is raised. */
4004 if (TARGET_HPUX_UNWIND_LIBRARY
)
4006 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
4007 GEN_INT (TARGET_64BIT
? -8 : -4));
4009 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
4010 hard_frame_pointer_rtx
);
4013 emit_insn (gen_blockage ());
4015 /* no frame pointer needed. */
4018 /* In some cases we can perform the first callee register save
4019 and allocating the stack frame at the same time. If so, just
4020 make a note of it and defer allocating the frame until saving
4021 the callee registers. */
4022 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
4023 merge_sp_adjust_with_store
= 1;
4024 /* Cannot optimize. Adjust the stack frame by actual_fsize
4027 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4032 /* Normal register save.
4034 Do not save the frame pointer in the frame_pointer_needed case. It
4035 was done earlier. */
4036 if (frame_pointer_needed
)
4038 offset
= local_fsize
;
4040 /* Saving the EH return data registers in the frame is the simplest
4041 way to get the frame unwind information emitted. We put them
4042 just before the general registers. */
4043 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4045 unsigned int i
, regno
;
4049 regno
= EH_RETURN_DATA_REGNO (i
);
4050 if (regno
== INVALID_REGNUM
)
4053 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4054 offset
+= UNITS_PER_WORD
;
4058 for (i
= 18; i
>= 4; i
--)
4059 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4061 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4062 offset
+= UNITS_PER_WORD
;
4065 /* Account for %r3 which is saved in a special place. */
4068 /* No frame pointer needed. */
4071 offset
= local_fsize
- actual_fsize
;
4073 /* Saving the EH return data registers in the frame is the simplest
4074 way to get the frame unwind information emitted. */
4075 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4077 unsigned int i
, regno
;
4081 regno
= EH_RETURN_DATA_REGNO (i
);
4082 if (regno
== INVALID_REGNUM
)
4085 /* If merge_sp_adjust_with_store is nonzero, then we can
4086 optimize the first save. */
4087 if (merge_sp_adjust_with_store
)
4089 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4090 merge_sp_adjust_with_store
= 0;
4093 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4094 offset
+= UNITS_PER_WORD
;
4098 for (i
= 18; i
>= 3; i
--)
4099 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4101 /* If merge_sp_adjust_with_store is nonzero, then we can
4102 optimize the first GR save. */
4103 if (merge_sp_adjust_with_store
)
4105 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4106 merge_sp_adjust_with_store
= 0;
4109 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4110 offset
+= UNITS_PER_WORD
;
4114 /* If we wanted to merge the SP adjustment with a GR save, but we never
4115 did any GR saves, then just emit the adjustment here. */
4116 if (merge_sp_adjust_with_store
)
4117 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4121 /* The hppa calling conventions say that %r19, the pic offset
4122 register, is saved at sp - 32 (in this function's frame)
4123 when generating PIC code. FIXME: What is the correct thing
4124 to do for functions which make no calls and allocate no
4125 frame? Do we need to allocate a frame, or can we just omit
4126 the save? For now we'll just omit the save.
4128 We don't want a note on this insn as the frame marker can
4129 move if there is a dynamic stack allocation. */
4130 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4132 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4134 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4138 /* Align pointer properly (doubleword boundary). */
4139 offset
= (offset
+ 7) & ~7;
4141 /* Floating point register store. */
4146 /* First get the frame or stack pointer to the start of the FP register
4148 if (frame_pointer_needed
)
4150 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4151 base
= hard_frame_pointer_rtx
;
4155 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4156 base
= stack_pointer_rtx
;
4159 /* Now actually save the FP registers. */
4160 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4162 if (df_regs_ever_live_p (i
)
4163 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4167 addr
= gen_rtx_MEM (DFmode
,
4168 gen_rtx_POST_INC (word_mode
, tmpreg
));
4169 reg
= gen_rtx_REG (DFmode
, i
);
4170 insn
= emit_move_insn (addr
, reg
);
4173 RTX_FRAME_RELATED_P (insn
) = 1;
4176 rtx mem
= gen_rtx_MEM (DFmode
,
4177 plus_constant (Pmode
, base
,
4179 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4180 gen_rtx_SET (mem
, reg
));
4184 rtx meml
= gen_rtx_MEM (SFmode
,
4185 plus_constant (Pmode
, base
,
4187 rtx memr
= gen_rtx_MEM (SFmode
,
4188 plus_constant (Pmode
, base
,
4190 rtx regl
= gen_rtx_REG (SFmode
, i
);
4191 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4192 rtx setl
= gen_rtx_SET (meml
, regl
);
4193 rtx setr
= gen_rtx_SET (memr
, regr
);
4196 RTX_FRAME_RELATED_P (setl
) = 1;
4197 RTX_FRAME_RELATED_P (setr
) = 1;
4198 vec
= gen_rtvec (2, setl
, setr
);
4199 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4200 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4203 offset
+= GET_MODE_SIZE (DFmode
);
4210 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4211 Handle case where DISP > 8k by using the add_high_const patterns. */
4214 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4216 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4217 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4220 if (VAL_14_BITS_P (disp
))
4221 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4222 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4224 rtx delta
= GEN_INT (disp
);
4225 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4227 emit_move_insn (tmpreg
, delta
);
4228 if (TARGET_DISABLE_INDEXING
)
4230 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4231 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4234 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4238 rtx delta
= GEN_INT (disp
);
4239 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4240 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4242 emit_move_insn (tmpreg
, high
);
4243 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4246 emit_move_insn (dest
, src
);
4249 /* Update the total code bytes output to the text section. */
4252 update_total_code_bytes (unsigned int nbytes
)
4254 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4255 && !IN_NAMED_SECTION_P (cfun
->decl
))
4257 unsigned int old_total
= total_code_bytes
;
4259 total_code_bytes
+= nbytes
;
4261 /* Be prepared to handle overflows. */
4262 if (old_total
> total_code_bytes
)
4263 total_code_bytes
= UINT_MAX
;
4267 /* This function generates the assembly code for function exit.
4268 Args are as for output_function_prologue ().
4270 The function epilogue should not depend on the current stack
4271 pointer! It should use the frame pointer only. This is mandatory
4272 because of alloca; we also take advantage of it to omit stack
4273 adjustments before returning. */
4276 pa_output_function_epilogue (FILE *file
)
4278 rtx_insn
*insn
= get_last_insn ();
4281 /* pa_expand_epilogue does the dirty work now. We just need
4282 to output the assembler directives which denote the end
4285 To make debuggers happy, emit a nop if the epilogue was completely
4286 eliminated due to a volatile call as the last insn in the
4287 current function. That way the return address (in %r2) will
4288 always point to a valid instruction in the current function. */
4290 /* Get the last real insn. */
4292 insn
= prev_real_insn (insn
);
4294 /* If it is a sequence, then look inside. */
4295 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4296 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4298 /* If insn is a CALL_INSN, then it must be a call to a volatile
4299 function (otherwise there would be epilogue insns). */
4300 if (insn
&& CALL_P (insn
))
4302 fputs ("\tnop\n", file
);
4308 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4310 if (TARGET_SOM
&& TARGET_GAS
)
4312 /* We are done with this subspace except possibly for some additional
4313 debug information. Forget that we are in this subspace to ensure
4314 that the next function is output in its own subspace. */
4316 cfun
->machine
->in_nsubspa
= 2;
4319 /* Thunks do their own insn accounting. */
4323 if (INSN_ADDRESSES_SET_P ())
4325 last_address
= extra_nop
? 4 : 0;
4326 insn
= get_last_nonnote_insn ();
4329 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4331 last_address
+= insn_default_length (insn
);
4333 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4334 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4337 last_address
= UINT_MAX
;
4339 /* Finally, update the total number of code bytes output so far. */
4340 update_total_code_bytes (last_address
);
4344 pa_expand_epilogue (void)
4347 HOST_WIDE_INT offset
;
4348 HOST_WIDE_INT ret_off
= 0;
4350 int merge_sp_adjust_with_load
= 0;
4352 /* We will use this often. */
4353 tmpreg
= gen_rtx_REG (word_mode
, 1);
4355 /* Try to restore RP early to avoid load/use interlocks when
4356 RP gets used in the return (bv) instruction. This appears to still
4357 be necessary even when we schedule the prologue and epilogue. */
4360 ret_off
= TARGET_64BIT
? -16 : -20;
4361 if (frame_pointer_needed
)
4363 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4368 /* No frame pointer, and stack is smaller than 8k. */
4369 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4371 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4377 /* General register restores. */
4378 if (frame_pointer_needed
)
4380 offset
= local_fsize
;
4382 /* If the current function calls __builtin_eh_return, then we need
4383 to restore the saved EH data registers. */
4384 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4386 unsigned int i
, regno
;
4390 regno
= EH_RETURN_DATA_REGNO (i
);
4391 if (regno
== INVALID_REGNUM
)
4394 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4395 offset
+= UNITS_PER_WORD
;
4399 for (i
= 18; i
>= 4; i
--)
4400 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4402 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4403 offset
+= UNITS_PER_WORD
;
4408 offset
= local_fsize
- actual_fsize
;
4410 /* If the current function calls __builtin_eh_return, then we need
4411 to restore the saved EH data registers. */
4412 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4414 unsigned int i
, regno
;
4418 regno
= EH_RETURN_DATA_REGNO (i
);
4419 if (regno
== INVALID_REGNUM
)
4422 /* Only for the first load.
4423 merge_sp_adjust_with_load holds the register load
4424 with which we will merge the sp adjustment. */
4425 if (merge_sp_adjust_with_load
== 0
4427 && VAL_14_BITS_P (-actual_fsize
))
4428 merge_sp_adjust_with_load
= regno
;
4430 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4431 offset
+= UNITS_PER_WORD
;
4435 for (i
= 18; i
>= 3; i
--)
4437 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4439 /* Only for the first load.
4440 merge_sp_adjust_with_load holds the register load
4441 with which we will merge the sp adjustment. */
4442 if (merge_sp_adjust_with_load
== 0
4444 && VAL_14_BITS_P (-actual_fsize
))
4445 merge_sp_adjust_with_load
= i
;
4447 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4448 offset
+= UNITS_PER_WORD
;
4453 /* Align pointer properly (doubleword boundary). */
4454 offset
= (offset
+ 7) & ~7;
4456 /* FP register restores. */
4459 /* Adjust the register to index off of. */
4460 if (frame_pointer_needed
)
4461 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4463 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4465 /* Actually do the restores now. */
4466 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4467 if (df_regs_ever_live_p (i
)
4468 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4470 rtx src
= gen_rtx_MEM (DFmode
,
4471 gen_rtx_POST_INC (word_mode
, tmpreg
));
4472 rtx dest
= gen_rtx_REG (DFmode
, i
);
4473 emit_move_insn (dest
, src
);
4477 /* Emit a blockage insn here to keep these insns from being moved to
4478 an earlier spot in the epilogue, or into the main instruction stream.
4480 This is necessary as we must not cut the stack back before all the
4481 restores are finished. */
4482 emit_insn (gen_blockage ());
4484 /* Reset stack pointer (and possibly frame pointer). The stack
4485 pointer is initially set to fp + 64 to avoid a race condition. */
4486 if (frame_pointer_needed
)
4488 rtx delta
= GEN_INT (-64);
4490 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4491 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4492 stack_pointer_rtx
, delta
));
4494 /* If we were deferring a callee register restore, do it now. */
4495 else if (merge_sp_adjust_with_load
)
4497 rtx delta
= GEN_INT (-actual_fsize
);
4498 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4500 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4502 else if (actual_fsize
!= 0)
4503 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4506 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4507 frame greater than 8k), do so now. */
4509 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4511 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4513 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4515 emit_insn (gen_blockage ());
4516 emit_insn (TARGET_64BIT
4517 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4518 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4523 pa_can_use_return_insn (void)
4525 if (!reload_completed
)
4528 if (frame_pointer_needed
)
4531 if (df_regs_ever_live_p (2))
4537 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4541 hppa_pic_save_rtx (void)
4543 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4546 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4547 #define NO_DEFERRED_PROFILE_COUNTERS 0
4551 /* Vector of funcdef numbers. */
4552 static vec
<int> funcdef_nos
;
4554 /* Output deferred profile counters. */
4556 output_deferred_profile_counters (void)
4561 if (funcdef_nos
.is_empty ())
4564 switch_to_section (data_section
);
4565 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4566 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4568 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4570 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4571 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4574 funcdef_nos
.release ();
4578 hppa_profile_hook (int label_no
)
4580 rtx_code_label
*label_rtx
= gen_label_rtx ();
4581 int reg_parm_stack_space
= REG_PARM_STACK_SPACE (NULL_TREE
);
4582 rtx arg_bytes
, begin_label_rtx
, mcount
, sym
;
4583 rtx_insn
*call_insn
;
4584 char begin_label_name
[16];
4585 bool use_mcount_pcrel_call
;
4587 /* Set up call destination. */
4588 sym
= gen_rtx_SYMBOL_REF (Pmode
, "_mcount");
4589 pa_encode_label (sym
);
4590 mcount
= gen_rtx_MEM (Pmode
, sym
);
4592 /* If we can reach _mcount with a pc-relative call, we can optimize
4593 loading the address of the current function. This requires linker
4594 long branch stub support. */
4595 if (!TARGET_PORTABLE_RUNTIME
4596 && !TARGET_LONG_CALLS
4597 && (TARGET_SOM
|| flag_function_sections
))
4598 use_mcount_pcrel_call
= TRUE
;
4600 use_mcount_pcrel_call
= FALSE
;
4602 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4604 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4606 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4608 if (!use_mcount_pcrel_call
)
4610 /* The address of the function is loaded into %r25 with an instruction-
4611 relative sequence that avoids the use of relocations. We use SImode
4612 for the address of the function in both 32 and 64-bit code to avoid
4613 having to provide DImode versions of the lcla2 pattern. */
4615 emit_insn (gen_lcla2 (gen_rtx_REG (SImode
, 25), label_rtx
));
4617 emit_insn (gen_lcla1 (gen_rtx_REG (SImode
, 25), label_rtx
));
4620 if (!NO_DEFERRED_PROFILE_COUNTERS
)
4622 rtx count_label_rtx
, addr
, r24
;
4623 char count_label_name
[16];
4625 funcdef_nos
.safe_push (label_no
);
4626 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4627 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
,
4628 ggc_strdup (count_label_name
));
4630 addr
= force_reg (Pmode
, count_label_rtx
);
4631 r24
= gen_rtx_REG (Pmode
, 24);
4632 emit_move_insn (r24
, addr
);
4634 arg_bytes
= GEN_INT (TARGET_64BIT
? 24 : 12);
4635 if (use_mcount_pcrel_call
)
4636 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4639 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4641 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4645 arg_bytes
= GEN_INT (TARGET_64BIT
? 16 : 8);
4646 if (use_mcount_pcrel_call
)
4647 call_insn
= emit_call_insn (gen_call_mcount (mcount
, arg_bytes
,
4650 call_insn
= emit_call_insn (gen_call (mcount
, arg_bytes
));
4653 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4654 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4656 /* Indicate the _mcount call cannot throw, nor will it execute a
4658 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4660 /* Allocate space for fixed arguments. */
4661 if (reg_parm_stack_space
> crtl
->outgoing_args_size
)
4662 crtl
->outgoing_args_size
= reg_parm_stack_space
;
4665 /* Fetch the return address for the frame COUNT steps up from
4666 the current frame, after the prologue. FRAMEADDR is the
4667 frame pointer of the COUNT frame.
4669 We want to ignore any export stub remnants here. To handle this,
4670 we examine the code at the return address, and if it is an export
4671 stub, we return a memory rtx for the stub return address stored
4674 The value returned is used in two different ways:
4676 1. To find a function's caller.
4678 2. To change the return address for a function.
4680 This function handles most instances of case 1; however, it will
4681 fail if there are two levels of stubs to execute on the return
4682 path. The only way I believe that can happen is if the return value
4683 needs a parameter relocation, which never happens for C code.
4685 This function handles most instances of case 2; however, it will
4686 fail if we did not originally have stub code on the return path
4687 but will need stub code on the new return path. This can happen if
4688 the caller & callee are both in the main program, but the new
4689 return location is in a shared library. */
4692 pa_return_addr_rtx (int count
, rtx frameaddr
)
4699 /* The instruction stream at the return address of a PA1.X export stub is:
4701 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4702 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4703 0x00011820 | stub+16: mtsp r1,sr0
4704 0xe0400002 | stub+20: be,n 0(sr0,rp)
4706 0xe0400002 must be specified as -532676606 so that it won't be
4707 rejected as an invalid immediate operand on 64-bit hosts.
4709 The instruction stream at the return address of a PA2.0 export stub is:
4711 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4712 0xe840d002 | stub+12: bve,n (rp)
4715 HOST_WIDE_INT insns
[4];
4721 rp
= get_hard_reg_initial_val (Pmode
, 2);
4723 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4726 /* If there is no export stub then just use the value saved from
4727 the return pointer register. */
4729 saved_rp
= gen_reg_rtx (Pmode
);
4730 emit_move_insn (saved_rp
, rp
);
4732 /* Get pointer to the instruction stream. We have to mask out the
4733 privilege level from the two low order bits of the return address
4734 pointer here so that ins will point to the start of the first
4735 instruction that would have been executed if we returned. */
4736 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4737 label
= gen_label_rtx ();
4741 insns
[0] = 0x4bc23fd1;
4742 insns
[1] = -398405630;
4747 insns
[0] = 0x4bc23fd1;
4748 insns
[1] = 0x004010a1;
4749 insns
[2] = 0x00011820;
4750 insns
[3] = -532676606;
4754 /* Check the instruction stream at the normal return address for the
4755 export stub. If it is an export stub, than our return address is
4756 really in -24[frameaddr]. */
4758 for (i
= 0; i
< len
; i
++)
4760 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4761 rtx op1
= GEN_INT (insns
[i
]);
4762 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4765 /* Here we know that our return address points to an export
4766 stub. We don't want to return the address of the export stub,
4767 but rather the return address of the export stub. That return
4768 address is stored at -24[frameaddr]. */
4770 emit_move_insn (saved_rp
,
4772 memory_address (Pmode
,
4773 plus_constant (Pmode
, frameaddr
,
4782 pa_emit_bcond_fp (rtx operands
[])
4784 enum rtx_code code
= GET_CODE (operands
[0]);
4785 rtx operand0
= operands
[1];
4786 rtx operand1
= operands
[2];
4787 rtx label
= operands
[3];
4789 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
4790 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4792 emit_jump_insn (gen_rtx_SET (pc_rtx
,
4793 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4796 gen_rtx_REG (CCFPmode
, 0),
4798 gen_rtx_LABEL_REF (VOIDmode
, label
),
4803 /* Adjust the cost of a scheduling dependency. Return the new cost of
4804 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4807 pa_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
4810 enum attr_type attr_type
;
4812 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4813 true dependencies as they are described with bypasses now. */
4814 if (pa_cpu
>= PROCESSOR_8000
|| dep_type
== 0)
4817 if (! recog_memoized (insn
))
4820 attr_type
= get_attr_type (insn
);
4825 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4828 if (attr_type
== TYPE_FPLOAD
)
4830 rtx pat
= PATTERN (insn
);
4831 rtx dep_pat
= PATTERN (dep_insn
);
4832 if (GET_CODE (pat
) == PARALLEL
)
4834 /* This happens for the fldXs,mb patterns. */
4835 pat
= XVECEXP (pat
, 0, 0);
4837 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4838 /* If this happens, we have to extend this to schedule
4839 optimally. Return 0 for now. */
4842 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4844 if (! recog_memoized (dep_insn
))
4846 switch (get_attr_type (dep_insn
))
4853 case TYPE_FPSQRTSGL
:
4854 case TYPE_FPSQRTDBL
:
4855 /* A fpload can't be issued until one cycle before a
4856 preceding arithmetic operation has finished if
4857 the target of the fpload is any of the sources
4858 (or destination) of the arithmetic operation. */
4859 return insn_default_latency (dep_insn
) - 1;
4866 else if (attr_type
== TYPE_FPALU
)
4868 rtx pat
= PATTERN (insn
);
4869 rtx dep_pat
= PATTERN (dep_insn
);
4870 if (GET_CODE (pat
) == PARALLEL
)
4872 /* This happens for the fldXs,mb patterns. */
4873 pat
= XVECEXP (pat
, 0, 0);
4875 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4876 /* If this happens, we have to extend this to schedule
4877 optimally. Return 0 for now. */
4880 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4882 if (! recog_memoized (dep_insn
))
4884 switch (get_attr_type (dep_insn
))
4888 case TYPE_FPSQRTSGL
:
4889 case TYPE_FPSQRTDBL
:
4890 /* An ALU flop can't be issued until two cycles before a
4891 preceding divide or sqrt operation has finished if
4892 the target of the ALU flop is any of the sources
4893 (or destination) of the divide or sqrt operation. */
4894 return insn_default_latency (dep_insn
) - 2;
4902 /* For other anti dependencies, the cost is 0. */
4905 case REG_DEP_OUTPUT
:
4906 /* Output dependency; DEP_INSN writes a register that INSN writes some
4908 if (attr_type
== TYPE_FPLOAD
)
4910 rtx pat
= PATTERN (insn
);
4911 rtx dep_pat
= PATTERN (dep_insn
);
4912 if (GET_CODE (pat
) == PARALLEL
)
4914 /* This happens for the fldXs,mb patterns. */
4915 pat
= XVECEXP (pat
, 0, 0);
4917 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4918 /* If this happens, we have to extend this to schedule
4919 optimally. Return 0 for now. */
4922 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4924 if (! recog_memoized (dep_insn
))
4926 switch (get_attr_type (dep_insn
))
4933 case TYPE_FPSQRTSGL
:
4934 case TYPE_FPSQRTDBL
:
4935 /* A fpload can't be issued until one cycle before a
4936 preceding arithmetic operation has finished if
4937 the target of the fpload is the destination of the
4938 arithmetic operation.
4940 Exception: For PA7100LC, PA7200 and PA7300, the cost
4941 is 3 cycles, unless they bundle together. We also
4942 pay the penalty if the second insn is a fpload. */
4943 return insn_default_latency (dep_insn
) - 1;
4950 else if (attr_type
== TYPE_FPALU
)
4952 rtx pat
= PATTERN (insn
);
4953 rtx dep_pat
= PATTERN (dep_insn
);
4954 if (GET_CODE (pat
) == PARALLEL
)
4956 /* This happens for the fldXs,mb patterns. */
4957 pat
= XVECEXP (pat
, 0, 0);
4959 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4960 /* If this happens, we have to extend this to schedule
4961 optimally. Return 0 for now. */
4964 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4966 if (! recog_memoized (dep_insn
))
4968 switch (get_attr_type (dep_insn
))
4972 case TYPE_FPSQRTSGL
:
4973 case TYPE_FPSQRTDBL
:
4974 /* An ALU flop can't be issued until two cycles before a
4975 preceding divide or sqrt operation has finished if
4976 the target of the ALU flop is also the target of
4977 the divide or sqrt operation. */
4978 return insn_default_latency (dep_insn
) - 2;
4986 /* For other output dependencies, the cost is 0. */
4994 /* The 700 can only issue a single insn at a time.
4995 The 7XXX processors can issue two insns at a time.
4996 The 8000 can issue 4 insns at a time. */
4998 pa_issue_rate (void)
5002 case PROCESSOR_700
: return 1;
5003 case PROCESSOR_7100
: return 2;
5004 case PROCESSOR_7100LC
: return 2;
5005 case PROCESSOR_7200
: return 2;
5006 case PROCESSOR_7300
: return 2;
5007 case PROCESSOR_8000
: return 4;
5016 /* Return any length plus adjustment needed by INSN which already has
5017 its length computed as LENGTH. Return LENGTH if no adjustment is
5020 Also compute the length of an inline block move here as it is too
5021 complicated to express as a length attribute in pa.md. */
5023 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
5025 rtx pat
= PATTERN (insn
);
5027 /* If length is negative or undefined, provide initial length. */
5028 if ((unsigned int) length
>= INT_MAX
)
5030 if (GET_CODE (pat
) == SEQUENCE
)
5031 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
5033 switch (get_attr_type (insn
))
5036 length
= pa_attr_length_millicode_call (insn
);
5039 length
= pa_attr_length_call (insn
, 0);
5042 length
= pa_attr_length_call (insn
, 1);
5045 length
= pa_attr_length_indirect_call (insn
);
5047 case TYPE_SH_FUNC_ADRS
:
5048 length
= pa_attr_length_millicode_call (insn
) + 20;
5055 /* Block move pattern. */
5056 if (NONJUMP_INSN_P (insn
)
5057 && GET_CODE (pat
) == PARALLEL
5058 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5059 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5060 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
5061 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5062 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5063 length
+= compute_cpymem_length (insn
) - 4;
5064 /* Block clear pattern. */
5065 else if (NONJUMP_INSN_P (insn
)
5066 && GET_CODE (pat
) == PARALLEL
5067 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5068 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5069 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5070 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5071 length
+= compute_clrmem_length (insn
) - 4;
5072 /* Conditional branch with an unfilled delay slot. */
5073 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5075 /* Adjust a short backwards conditional with an unfilled delay slot. */
5076 if (GET_CODE (pat
) == SET
5078 && JUMP_LABEL (insn
) != NULL_RTX
5079 && ! forward_branch_p (insn
))
5081 else if (GET_CODE (pat
) == PARALLEL
5082 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5085 /* Adjust dbra insn with short backwards conditional branch with
5086 unfilled delay slot -- only for case where counter is in a
5087 general register register. */
5088 else if (GET_CODE (pat
) == PARALLEL
5089 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5090 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5091 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5093 && ! forward_branch_p (insn
))
5099 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5102 pa_print_operand_punct_valid_p (unsigned char code
)
5113 /* Print operand X (an rtx) in assembler syntax to file FILE.
5114 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5115 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5118 pa_print_operand (FILE *file
, rtx x
, int code
)
5123 /* Output a 'nop' if there's nothing for the delay slot. */
5124 if (dbr_sequence_length () == 0)
5125 fputs ("\n\tnop", file
);
5128 /* Output a nullification completer if there's nothing for the */
5129 /* delay slot or nullification is requested. */
5130 if (dbr_sequence_length () == 0 ||
5132 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5136 /* Print out the second register name of a register pair.
5137 I.e., R (6) => 7. */
5138 fputs (reg_names
[REGNO (x
) + 1], file
);
5141 /* A register or zero. */
5143 || (x
== CONST0_RTX (DFmode
))
5144 || (x
== CONST0_RTX (SFmode
)))
5146 fputs ("%r0", file
);
5152 /* A register or zero (floating point). */
5154 || (x
== CONST0_RTX (DFmode
))
5155 || (x
== CONST0_RTX (SFmode
)))
5157 fputs ("%fr0", file
);
5166 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5167 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5168 pa_output_global_address (file
, xoperands
[1], 0);
5169 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5173 case 'C': /* Plain (C)ondition */
5175 switch (GET_CODE (x
))
5178 fputs ("=", file
); break;
5180 fputs ("<>", file
); break;
5182 fputs (">", file
); break;
5184 fputs (">=", file
); break;
5186 fputs (">>=", file
); break;
5188 fputs (">>", file
); break;
5190 fputs ("<", file
); break;
5192 fputs ("<=", file
); break;
5194 fputs ("<<=", file
); break;
5196 fputs ("<<", file
); break;
5201 case 'N': /* Condition, (N)egated */
5202 switch (GET_CODE (x
))
5205 fputs ("<>", file
); break;
5207 fputs ("=", file
); break;
5209 fputs ("<=", file
); break;
5211 fputs ("<", file
); break;
5213 fputs ("<<", file
); break;
5215 fputs ("<<=", file
); break;
5217 fputs (">=", file
); break;
5219 fputs (">", file
); break;
5221 fputs (">>", file
); break;
5223 fputs (">>=", file
); break;
5228 /* For floating point comparisons. Note that the output
5229 predicates are the complement of the desired mode. The
5230 conditions for GT, GE, LT, LE and LTGT cause an invalid
5231 operation exception if the result is unordered and this
5232 exception is enabled in the floating-point status register. */
5234 switch (GET_CODE (x
))
5237 fputs ("!=", file
); break;
5239 fputs ("=", file
); break;
5241 fputs ("!>", file
); break;
5243 fputs ("!>=", file
); break;
5245 fputs ("!<", file
); break;
5247 fputs ("!<=", file
); break;
5249 fputs ("!<>", file
); break;
5251 fputs ("!?<=", file
); break;
5253 fputs ("!?<", file
); break;
5255 fputs ("!?>=", file
); break;
5257 fputs ("!?>", file
); break;
5259 fputs ("!?=", file
); break;
5261 fputs ("!?", file
); break;
5263 fputs ("?", file
); break;
5268 case 'S': /* Condition, operands are (S)wapped. */
5269 switch (GET_CODE (x
))
5272 fputs ("=", file
); break;
5274 fputs ("<>", file
); break;
5276 fputs ("<", file
); break;
5278 fputs ("<=", file
); break;
5280 fputs ("<<=", file
); break;
5282 fputs ("<<", file
); break;
5284 fputs (">", file
); break;
5286 fputs (">=", file
); break;
5288 fputs (">>=", file
); break;
5290 fputs (">>", file
); break;
5295 case 'B': /* Condition, (B)oth swapped and negate. */
5296 switch (GET_CODE (x
))
5299 fputs ("<>", file
); break;
5301 fputs ("=", file
); break;
5303 fputs (">=", file
); break;
5305 fputs (">", file
); break;
5307 fputs (">>", file
); break;
5309 fputs (">>=", file
); break;
5311 fputs ("<=", file
); break;
5313 fputs ("<", file
); break;
5315 fputs ("<<", file
); break;
5317 fputs ("<<=", file
); break;
5323 gcc_assert (GET_CODE (x
) == CONST_INT
);
5324 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5327 gcc_assert (GET_CODE (x
) == CONST_INT
);
5328 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5331 gcc_assert (GET_CODE (x
) == CONST_INT
);
5332 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5335 gcc_assert (GET_CODE (x
) == CONST_INT
5336 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5337 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5340 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5341 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5344 gcc_assert (GET_CODE (x
) == CONST_INT
);
5345 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5348 gcc_assert (GET_CODE (x
) == CONST_INT
);
5349 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5352 if (GET_CODE (x
) == CONST_INT
)
5357 switch (GET_CODE (XEXP (x
, 0)))
5361 if (ASSEMBLER_DIALECT
== 0)
5362 fputs ("s,mb", file
);
5364 fputs (",mb", file
);
5368 if (ASSEMBLER_DIALECT
== 0)
5369 fputs ("s,ma", file
);
5371 fputs (",ma", file
);
5374 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5375 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5377 if (ASSEMBLER_DIALECT
== 0)
5380 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5381 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5383 if (ASSEMBLER_DIALECT
== 0)
5384 fputs ("x,s", file
);
5388 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5392 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5398 pa_output_global_address (file
, x
, 0);
5401 pa_output_global_address (file
, x
, 1);
5403 case 0: /* Don't do anything special */
5408 compute_zdepwi_operands (INTVAL (x
), op
);
5409 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5415 compute_zdepdi_operands (INTVAL (x
), op
);
5416 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5420 /* We can get here from a .vtable_inherit due to our
5421 CONSTANT_ADDRESS_P rejecting perfectly good constant
5427 if (GET_CODE (x
) == REG
)
5429 fputs (reg_names
[REGNO (x
)], file
);
5430 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5436 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5437 && (REGNO (x
) & 1) == 0)
5440 else if (GET_CODE (x
) == MEM
)
5442 int size
= GET_MODE_SIZE (GET_MODE (x
));
5443 rtx base
= NULL_RTX
;
5444 switch (GET_CODE (XEXP (x
, 0)))
5448 base
= XEXP (XEXP (x
, 0), 0);
5449 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5453 base
= XEXP (XEXP (x
, 0), 0);
5454 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5457 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5458 fprintf (file
, "%s(%s)",
5459 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5460 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5461 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5462 fprintf (file
, "%s(%s)",
5463 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5464 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5465 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5466 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5468 /* Because the REG_POINTER flag can get lost during reload,
5469 pa_legitimate_address_p canonicalizes the order of the
5470 index and base registers in the combined move patterns. */
5471 rtx base
= XEXP (XEXP (x
, 0), 1);
5472 rtx index
= XEXP (XEXP (x
, 0), 0);
5474 fprintf (file
, "%s(%s)",
5475 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5478 output_address (GET_MODE (x
), XEXP (x
, 0));
5481 output_address (GET_MODE (x
), XEXP (x
, 0));
5486 output_addr_const (file
, x
);
5489 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5492 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5495 /* Imagine (high (const (plus ...))). */
5496 if (GET_CODE (x
) == HIGH
)
5499 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5500 output_addr_const (file
, x
);
5501 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5503 output_addr_const (file
, x
);
5504 fputs ("-$global$", file
);
5506 else if (GET_CODE (x
) == CONST
)
5508 const char *sep
= "";
5509 int offset
= 0; /* assembler wants -$global$ at end */
5510 rtx base
= NULL_RTX
;
5512 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5516 base
= XEXP (XEXP (x
, 0), 0);
5517 output_addr_const (file
, base
);
5520 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5526 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5530 base
= XEXP (XEXP (x
, 0), 1);
5531 output_addr_const (file
, base
);
5534 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5540 /* How bogus. The compiler is apparently responsible for
5541 rounding the constant if it uses an LR field selector.
5543 The linker and/or assembler seem a better place since
5544 they have to do this kind of thing already.
5546 If we fail to do this, HP's optimizing linker may eliminate
5547 an addil, but not update the ldw/stw/ldo instruction that
5548 uses the result of the addil. */
5550 offset
= ((offset
+ 0x1000) & ~0x1fff);
5552 switch (GET_CODE (XEXP (x
, 0)))
5565 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5573 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5574 fputs ("-$global$", file
);
5576 fprintf (file
, "%s%d", sep
, offset
);
5579 output_addr_const (file
, x
);
5582 /* Output boilerplate text to appear at the beginning of the file.
5583 There are several possible versions. */
5584 #define aputs(x) fputs(x, asm_out_file)
5586 pa_file_start_level (void)
5589 aputs ("\t.LEVEL 2.0w\n");
5590 else if (TARGET_PA_20
)
5591 aputs ("\t.LEVEL 2.0\n");
5592 else if (TARGET_PA_11
)
5593 aputs ("\t.LEVEL 1.1\n");
5595 aputs ("\t.LEVEL 1.0\n");
5599 pa_file_start_space (int sortspace
)
5601 aputs ("\t.SPACE $PRIVATE$");
5604 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5606 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5607 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5608 "\n\t.SPACE $TEXT$");
5611 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5612 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5616 pa_file_start_file (int want_version
)
5618 if (write_symbols
!= NO_DEBUG
)
5620 output_file_directive (asm_out_file
, main_input_filename
);
5622 aputs ("\t.version\t\"01.01\"\n");
5627 pa_file_start_mcount (const char *aswhat
)
5630 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5634 pa_elf_file_start (void)
5636 pa_file_start_level ();
5637 pa_file_start_mcount ("ENTRY");
5638 pa_file_start_file (0);
5642 pa_som_file_start (void)
5644 pa_file_start_level ();
5645 pa_file_start_space (0);
5646 aputs ("\t.IMPORT $global$,DATA\n"
5647 "\t.IMPORT $$dyncall,MILLICODE\n");
5648 pa_file_start_mcount ("CODE");
5649 pa_file_start_file (0);
5653 pa_linux_file_start (void)
5655 pa_file_start_file (1);
5656 pa_file_start_level ();
5657 pa_file_start_mcount ("CODE");
5661 pa_hpux64_gas_file_start (void)
5663 pa_file_start_level ();
5664 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5666 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5668 pa_file_start_file (1);
5672 pa_hpux64_hpas_file_start (void)
5674 pa_file_start_level ();
5675 pa_file_start_space (1);
5676 pa_file_start_mcount ("CODE");
5677 pa_file_start_file (0);
5681 /* Search the deferred plabel list for SYMBOL and return its internal
5682 label. If an entry for SYMBOL is not found, a new entry is created. */
5685 pa_get_deferred_plabel (rtx symbol
)
5687 const char *fname
= XSTR (symbol
, 0);
5690 /* See if we have already put this function on the list of deferred
5691 plabels. This list is generally small, so a liner search is not
5692 too ugly. If it proves too slow replace it with something faster. */
5693 for (i
= 0; i
< n_deferred_plabels
; i
++)
5694 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5697 /* If the deferred plabel list is empty, or this entry was not found
5698 on the list, create a new entry on the list. */
5699 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5703 if (deferred_plabels
== 0)
5704 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5706 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5708 n_deferred_plabels
+ 1);
5710 i
= n_deferred_plabels
++;
5711 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5712 deferred_plabels
[i
].symbol
= symbol
;
5714 /* Gross. We have just implicitly taken the address of this
5715 function. Mark it in the same manner as assemble_name. */
5716 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5718 mark_referenced (id
);
5721 return deferred_plabels
[i
].internal_label
;
5725 output_deferred_plabels (void)
5729 /* If we have some deferred plabels, then we need to switch into the
5730 data or readonly data section, and align it to a 4 byte boundary
5731 before outputting the deferred plabels. */
5732 if (n_deferred_plabels
)
5734 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5735 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5738 /* Now output the deferred plabels. */
5739 for (i
= 0; i
< n_deferred_plabels
; i
++)
5741 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5742 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5743 assemble_integer (deferred_plabels
[i
].symbol
,
5744 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5748 /* Initialize optabs to point to emulation routines. */
5751 pa_init_libfuncs (void)
5753 if (HPUX_LONG_DOUBLE_LIBRARY
)
5755 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5756 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5757 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5758 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5759 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5760 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5761 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5762 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5763 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5765 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5766 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5767 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5768 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5769 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5770 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5771 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5773 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5774 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5775 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5776 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5778 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5779 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5780 : "_U_Qfcnvfxt_quad_to_sgl");
5781 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5782 "_U_Qfcnvfxt_quad_to_dbl");
5783 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5784 "_U_Qfcnvfxt_quad_to_usgl");
5785 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5786 "_U_Qfcnvfxt_quad_to_udbl");
5788 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5789 "_U_Qfcnvxf_sgl_to_quad");
5790 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5791 "_U_Qfcnvxf_dbl_to_quad");
5792 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5793 "_U_Qfcnvxf_usgl_to_quad");
5794 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5795 "_U_Qfcnvxf_udbl_to_quad");
5798 if (TARGET_SYNC_LIBCALL
)
5799 init_sync_libfuncs (8);
5802 /* HP's millicode routines mean something special to the assembler.
5803 Keep track of which ones we have used. */
5805 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5806 static void import_milli (enum millicodes
);
5807 static char imported
[(int) end1000
];
5808 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5809 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5810 #define MILLI_START 10
5813 import_milli (enum millicodes code
)
5815 char str
[sizeof (import_string
)];
5817 if (!imported
[(int) code
])
5819 imported
[(int) code
] = 1;
5820 strcpy (str
, import_string
);
5821 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5822 output_asm_insn (str
, 0);
5826 /* The register constraints have put the operands and return value in
5827 the proper registers. */
5830 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
5832 import_milli (mulI
);
5833 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5836 /* Emit the rtl for doing a division by a constant. */
5838 /* Do magic division millicodes exist for this value? */
5839 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5841 /* We'll use an array to keep track of the magic millicodes and
5842 whether or not we've used them already. [n][0] is signed, [n][1] is
5845 static int div_milli
[16][2];
5848 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5850 if (GET_CODE (operands
[2]) == CONST_INT
5851 && INTVAL (operands
[2]) > 0
5852 && INTVAL (operands
[2]) < 16
5853 && pa_magic_milli
[INTVAL (operands
[2])])
5855 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5857 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5861 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
5862 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5864 gen_rtx_REG (SImode
, 26),
5866 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5867 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5868 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5869 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5870 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5871 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5878 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
5882 /* If the divisor is a constant, try to use one of the special
5884 if (GET_CODE (operands
[0]) == CONST_INT
)
5886 static char buf
[100];
5887 divisor
= INTVAL (operands
[0]);
5888 if (!div_milli
[divisor
][unsignedp
])
5890 div_milli
[divisor
][unsignedp
] = 1;
5892 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5894 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5898 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5899 INTVAL (operands
[0]));
5900 return pa_output_millicode_call (insn
,
5901 gen_rtx_SYMBOL_REF (SImode
, buf
));
5905 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5906 INTVAL (operands
[0]));
5907 return pa_output_millicode_call (insn
,
5908 gen_rtx_SYMBOL_REF (SImode
, buf
));
5911 /* Divisor isn't a special constant. */
5916 import_milli (divU
);
5917 return pa_output_millicode_call (insn
,
5918 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5922 import_milli (divI
);
5923 return pa_output_millicode_call (insn
,
5924 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5929 /* Output a $$rem millicode to do mod. */
5932 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
5936 import_milli (remU
);
5937 return pa_output_millicode_call (insn
,
5938 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5942 import_milli (remI
);
5943 return pa_output_millicode_call (insn
,
5944 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5949 pa_output_arg_descriptor (rtx_insn
*call_insn
)
5951 const char *arg_regs
[4];
5952 machine_mode arg_mode
;
5954 int i
, output_flag
= 0;
5957 /* We neither need nor want argument location descriptors for the
5958 64bit runtime environment or the ELF32 environment. */
5959 if (TARGET_64BIT
|| TARGET_ELF32
)
5962 for (i
= 0; i
< 4; i
++)
5965 /* Specify explicitly that no argument relocations should take place
5966 if using the portable runtime calling conventions. */
5967 if (TARGET_PORTABLE_RUNTIME
)
5969 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5974 gcc_assert (CALL_P (call_insn
));
5975 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5976 link
; link
= XEXP (link
, 1))
5978 rtx use
= XEXP (link
, 0);
5980 if (! (GET_CODE (use
) == USE
5981 && GET_CODE (XEXP (use
, 0)) == REG
5982 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5985 arg_mode
= GET_MODE (XEXP (use
, 0));
5986 regno
= REGNO (XEXP (use
, 0));
5987 if (regno
>= 23 && regno
<= 26)
5989 arg_regs
[26 - regno
] = "GR";
5990 if (arg_mode
== DImode
)
5991 arg_regs
[25 - regno
] = "GR";
5993 else if (regno
>= 32 && regno
<= 39)
5995 if (arg_mode
== SFmode
)
5996 arg_regs
[(regno
- 32) / 2] = "FR";
5999 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6000 arg_regs
[(regno
- 34) / 2] = "FR";
6001 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
6003 arg_regs
[(regno
- 34) / 2] = "FU";
6004 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
6009 fputs ("\t.CALL ", asm_out_file
);
6010 for (i
= 0; i
< 4; i
++)
6015 fputc (',', asm_out_file
);
6016 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
6019 fputc ('\n', asm_out_file
);
6022 /* Inform reload about cases where moving X with a mode MODE to or from
6023 a register in RCLASS requires an extra scratch or immediate register.
6024 Return the class needed for the immediate register. */
6027 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
6028 machine_mode mode
, secondary_reload_info
*sri
)
6031 enum reg_class rclass
= (enum reg_class
) rclass_i
;
6033 /* Handle the easy stuff first. */
6034 if (rclass
== R1_REGS
)
6040 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
6046 /* If we have something like (mem (mem (...)), we can safely assume the
6047 inner MEM will end up in a general register after reloading, so there's
6048 no need for a secondary reload. */
6049 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
6052 /* Trying to load a constant into a FP register during PIC code
6053 generation requires %r1 as a scratch register. For float modes,
6054 the only legitimate constant is CONST0_RTX. However, there are
6055 a few patterns that accept constant double operands. */
6057 && FP_REG_CLASS_P (rclass
)
6058 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6063 sri
->icode
= CODE_FOR_reload_insi_r1
;
6067 sri
->icode
= CODE_FOR_reload_indi_r1
;
6071 sri
->icode
= CODE_FOR_reload_insf_r1
;
6075 sri
->icode
= CODE_FOR_reload_indf_r1
;
6084 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6085 register when we're generating PIC code or when the operand isn't
6087 if (pa_symbolic_expression_p (x
))
6089 if (GET_CODE (x
) == HIGH
)
6092 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6097 sri
->icode
= CODE_FOR_reload_insi_r1
;
6101 sri
->icode
= CODE_FOR_reload_indi_r1
;
6111 /* Profiling showed the PA port spends about 1.3% of its compilation
6112 time in true_regnum from calls inside pa_secondary_reload_class. */
6113 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6114 regno
= true_regnum (x
);
6116 /* Handle reloads for floating point loads and stores. */
6117 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6118 && FP_REG_CLASS_P (rclass
))
6124 /* We don't need a secondary reload for indexed memory addresses.
6126 When INT14_OK_STRICT is true, it might appear that we could
6127 directly allow register indirect memory addresses. However,
6128 this doesn't work because we don't support SUBREGs in
6129 floating-point register copies and reload doesn't tell us
6130 when it's going to use a SUBREG. */
6131 if (IS_INDEX_ADDR_P (x
))
6135 /* Request a secondary reload with a general scratch register
6136 for everything else. ??? Could symbolic operands be handled
6137 directly when generating non-pic PA 2.0 code? */
6139 ? direct_optab_handler (reload_in_optab
, mode
)
6140 : direct_optab_handler (reload_out_optab
, mode
));
6144 /* A SAR<->FP register copy requires an intermediate general register
6145 and secondary memory. We need a secondary reload with a general
6146 scratch register for spills. */
6147 if (rclass
== SHIFT_REGS
)
6150 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6153 ? direct_optab_handler (reload_in_optab
, mode
)
6154 : direct_optab_handler (reload_out_optab
, mode
));
6158 /* Handle FP copy. */
6159 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6160 return GENERAL_REGS
;
6163 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6164 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6165 && FP_REG_CLASS_P (rclass
))
6166 return GENERAL_REGS
;
6171 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6174 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED
,
6175 reg_class_t class1 ATTRIBUTE_UNUSED
,
6176 reg_class_t class2 ATTRIBUTE_UNUSED
)
6178 #ifdef PA_SECONDARY_MEMORY_NEEDED
6179 return PA_SECONDARY_MEMORY_NEEDED (mode
, class1
, class2
);
6185 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6186 is only marked as live on entry by df-scan when it is a fixed
6187 register. It isn't a fixed register in the 64-bit runtime,
6188 so we need to mark it here. */
6191 pa_extra_live_on_entry (bitmap regs
)
6194 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6197 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6198 to prevent it from being deleted. */
6201 pa_eh_return_handler_rtx (void)
6205 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6206 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6207 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6212 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6213 by invisible reference. As a GCC extension, we also pass anything
6214 with a zero or variable size by reference.
6216 The 64-bit runtime does not describe passing any types by invisible
6217 reference. The internals of GCC can't currently handle passing
6218 empty structures, and zero or variable length arrays when they are
6219 not passed entirely on the stack or by reference. Thus, as a GCC
6220 extension, we pass these types by reference. The HP compiler doesn't
6221 support these types, so hopefully there shouldn't be any compatibility
6222 issues. This may have to be revisited when HP releases a C99 compiler
6223 or updates the ABI. */
6226 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6227 machine_mode mode
, const_tree type
,
6228 bool named ATTRIBUTE_UNUSED
)
6233 size
= int_size_in_bytes (type
);
6235 size
= GET_MODE_SIZE (mode
);
6240 return size
<= 0 || size
> 8;
6243 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6245 static pad_direction
6246 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6251 && (AGGREGATE_TYPE_P (type
)
6252 || TREE_CODE (type
) == COMPLEX_TYPE
6253 || TREE_CODE (type
) == VECTOR_TYPE
)))
6255 /* Return PAD_NONE if justification is not required. */
6257 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6258 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6261 /* The directions set here are ignored when a BLKmode argument larger
6262 than a word is placed in a register. Different code is used for
6263 the stack and registers. This makes it difficult to have a
6264 consistent data representation for both the stack and registers.
6265 For both runtimes, the justification and padding for arguments on
6266 the stack and in registers should be identical. */
6268 /* The 64-bit runtime specifies left justification for aggregates. */
6271 /* The 32-bit runtime architecture specifies right justification.
6272 When the argument is passed on the stack, the argument is padded
6273 with garbage on the left. The HP compiler pads with zeros. */
6274 return PAD_DOWNWARD
;
6277 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6278 return PAD_DOWNWARD
;
6284 /* Do what is necessary for `va_start'. We look at the current function
6285 to determine if stdargs or varargs is used and fill in an initial
6286 va_list. A pointer to this constructor is returned. */
6289 hppa_builtin_saveregs (void)
6292 tree fntype
= TREE_TYPE (current_function_decl
);
6293 int argadj
= ((!stdarg_p (fntype
))
6294 ? UNITS_PER_WORD
: 0);
6297 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6299 offset
= crtl
->args
.arg_offset_rtx
;
6305 /* Adjust for varargs/stdarg differences. */
6307 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6309 offset
= crtl
->args
.arg_offset_rtx
;
6311 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6312 from the incoming arg pointer and growing to larger addresses. */
6313 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6314 emit_move_insn (gen_rtx_MEM (word_mode
,
6315 plus_constant (Pmode
,
6316 arg_pointer_rtx
, off
)),
6317 gen_rtx_REG (word_mode
, i
));
6319 /* The incoming args pointer points just beyond the flushback area;
6320 normally this is not a serious concern. However, when we are doing
6321 varargs/stdargs we want to make the arg pointer point to the start
6322 of the incoming argument area. */
6323 emit_move_insn (virtual_incoming_args_rtx
,
6324 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6326 /* Now return a pointer to the first anonymous argument. */
6327 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6328 virtual_incoming_args_rtx
,
6329 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6332 /* Store general registers on the stack. */
6333 dest
= gen_rtx_MEM (BLKmode
,
6334 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6336 set_mem_alias_set (dest
, get_varargs_alias_set ());
6337 set_mem_align (dest
, BITS_PER_WORD
);
6338 move_block_from_reg (23, dest
, 4);
6340 /* move_block_from_reg will emit code to store the argument registers
6341 individually as scalar stores.
6343 However, other insns may later load from the same addresses for
6344 a structure load (passing a struct to a varargs routine).
6346 The alias code assumes that such aliasing can never happen, so we
6347 have to keep memory referencing insns from moving up beyond the
6348 last argument register store. So we emit a blockage insn here. */
6349 emit_insn (gen_blockage ());
6351 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6352 crtl
->args
.internal_arg_pointer
,
6353 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6357 hppa_va_start (tree valist
, rtx nextarg
)
6359 nextarg
= expand_builtin_saveregs ();
6360 std_expand_builtin_va_start (valist
, nextarg
);
6364 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6369 /* Args grow upward. We can use the generic routines. */
6370 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6372 else /* !TARGET_64BIT */
6374 tree ptr
= build_pointer_type (type
);
6377 unsigned int size
, ofs
;
6380 indirect
= pass_va_arg_by_reference (type
);
6384 ptr
= build_pointer_type (type
);
6386 size
= int_size_in_bytes (type
);
6387 valist_type
= TREE_TYPE (valist
);
6389 /* Args grow down. Not handled by generic routines. */
6391 u
= fold_convert (sizetype
, size_in_bytes (type
));
6392 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6393 t
= fold_build_pointer_plus (valist
, u
);
6395 /* Align to 4 or 8 byte boundary depending on argument size. */
6397 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6398 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6399 t
= fold_convert (valist_type
, t
);
6401 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6403 ofs
= (8 - size
) % 4;
6405 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6407 t
= fold_convert (ptr
, t
);
6408 t
= build_va_arg_indirect_ref (t
);
6411 t
= build_va_arg_indirect_ref (t
);
6417 /* True if MODE is valid for the target. By "valid", we mean able to
6418 be manipulated in non-trivial ways. In particular, this means all
6419 the arithmetic is supported.
6421 Currently, TImode is not valid as the HP 64-bit runtime documentation
6422 doesn't document the alignment and calling conventions for this type.
6423 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6424 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6427 pa_scalar_mode_supported_p (scalar_mode mode
)
6429 int precision
= GET_MODE_PRECISION (mode
);
6431 switch (GET_MODE_CLASS (mode
))
6433 case MODE_PARTIAL_INT
:
6435 if (precision
== CHAR_TYPE_SIZE
)
6437 if (precision
== SHORT_TYPE_SIZE
)
6439 if (precision
== INT_TYPE_SIZE
)
6441 if (precision
== LONG_TYPE_SIZE
)
6443 if (precision
== LONG_LONG_TYPE_SIZE
)
6448 if (precision
== FLOAT_TYPE_SIZE
)
6450 if (precision
== DOUBLE_TYPE_SIZE
)
6452 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6456 case MODE_DECIMAL_FLOAT
:
6464 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6465 it branches into the delay slot. Otherwise, return FALSE. */
6468 branch_to_delay_slot_p (rtx_insn
*insn
)
6470 rtx_insn
*jump_insn
;
6472 if (dbr_sequence_length ())
6475 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6478 insn
= next_active_insn (insn
);
6479 if (jump_insn
== insn
)
6482 /* We can't rely on the length of asms. So, we return FALSE when
6483 the branch is followed by an asm. */
6485 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6486 || asm_noperands (PATTERN (insn
)) >= 0
6487 || get_attr_length (insn
) > 0)
6494 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6496 This occurs when INSN has an unfilled delay slot and is followed
6497 by an asm. Disaster can occur if the asm is empty and the jump
6498 branches into the delay slot. So, we add a nop in the delay slot
6499 when this occurs. */
6502 branch_needs_nop_p (rtx_insn
*insn
)
6504 rtx_insn
*jump_insn
;
6506 if (dbr_sequence_length ())
6509 jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6512 insn
= next_active_insn (insn
);
6513 if (!insn
|| jump_insn
== insn
)
6516 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6517 || asm_noperands (PATTERN (insn
)) >= 0)
6518 && get_attr_length (insn
) > 0)
6525 /* Return TRUE if INSN, a forward jump insn, can use nullification
6526 to skip the following instruction. This avoids an extra cycle due
6527 to a mis-predicted branch when we fall through. */
6530 use_skip_p (rtx_insn
*insn
)
6532 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL_AS_INSN (insn
));
6536 insn
= next_active_insn (insn
);
6538 /* We can't rely on the length of asms, so we can't skip asms. */
6540 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6541 || asm_noperands (PATTERN (insn
)) >= 0)
6543 if (get_attr_length (insn
) == 4
6544 && jump_insn
== next_active_insn (insn
))
6546 if (get_attr_length (insn
) > 0)
6553 /* This routine handles all the normal conditional branch sequences we
6554 might need to generate. It handles compare immediate vs compare
6555 register, nullification of delay slots, varying length branches,
6556 negated branches, and all combinations of the above. It returns the
6557 output appropriate to emit the branch corresponding to all given
6561 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6563 static char buf
[100];
6565 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6566 int length
= get_attr_length (insn
);
6569 /* A conditional branch to the following instruction (e.g. the delay slot)
6570 is asking for a disaster. This can happen when not optimizing and
6571 when jump optimization fails.
6573 While it is usually safe to emit nothing, this can fail if the
6574 preceding instruction is a nullified branch with an empty delay
6575 slot and the same branch target as this branch. We could check
6576 for this but jump optimization should eliminate nop jumps. It
6577 is always safe to emit a nop. */
6578 if (branch_to_delay_slot_p (insn
))
6581 /* The doubleword form of the cmpib instruction doesn't have the LEU
6582 and GTU conditions while the cmpb instruction does. Since we accept
6583 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6584 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6585 operands
[2] = gen_rtx_REG (DImode
, 0);
6586 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6587 operands
[1] = gen_rtx_REG (DImode
, 0);
6589 /* If this is a long branch with its delay slot unfilled, set `nullify'
6590 as it can nullify the delay slot and save a nop. */
6591 if (length
== 8 && dbr_sequence_length () == 0)
6594 /* If this is a short forward conditional branch which did not get
6595 its delay slot filled, the delay slot can still be nullified. */
6596 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6597 nullify
= forward_branch_p (insn
);
6599 /* A forward branch over a single nullified insn can be done with a
6600 comclr instruction. This avoids a single cycle penalty due to
6601 mis-predicted branch if we fall through (branch not taken). */
6602 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6606 /* All short conditional branches except backwards with an unfilled
6610 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6612 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6613 if (GET_MODE (operands
[1]) == DImode
)
6616 strcat (buf
, "%B3");
6618 strcat (buf
, "%S3");
6620 strcat (buf
, " %2,%r1,%%r0");
6623 if (branch_needs_nop_p (insn
))
6624 strcat (buf
, ",n %2,%r1,%0%#");
6626 strcat (buf
, ",n %2,%r1,%0");
6629 strcat (buf
, " %2,%r1,%0");
6632 /* All long conditionals. Note a short backward branch with an
6633 unfilled delay slot is treated just like a long backward branch
6634 with an unfilled delay slot. */
6636 /* Handle weird backwards branch with a filled delay slot
6637 which is nullified. */
6638 if (dbr_sequence_length () != 0
6639 && ! forward_branch_p (insn
)
6642 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6643 if (GET_MODE (operands
[1]) == DImode
)
6646 strcat (buf
, "%S3");
6648 strcat (buf
, "%B3");
6649 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6651 /* Handle short backwards branch with an unfilled delay slot.
6652 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6653 taken and untaken branches. */
6654 else if (dbr_sequence_length () == 0
6655 && ! forward_branch_p (insn
)
6656 && INSN_ADDRESSES_SET_P ()
6657 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6658 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6660 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6661 if (GET_MODE (operands
[1]) == DImode
)
6664 strcat (buf
, "%B3 %2,%r1,%0%#");
6666 strcat (buf
, "%S3 %2,%r1,%0%#");
6670 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6671 if (GET_MODE (operands
[1]) == DImode
)
6674 strcat (buf
, "%S3");
6676 strcat (buf
, "%B3");
6678 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6680 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6685 /* The reversed conditional branch must branch over one additional
6686 instruction if the delay slot is filled and needs to be extracted
6687 by pa_output_lbranch. If the delay slot is empty or this is a
6688 nullified forward branch, the instruction after the reversed
6689 condition branch must be nullified. */
6690 if (dbr_sequence_length () == 0
6691 || (nullify
&& forward_branch_p (insn
)))
6695 operands
[4] = GEN_INT (length
);
6700 operands
[4] = GEN_INT (length
+ 4);
6703 /* Create a reversed conditional branch which branches around
6704 the following insns. */
6705 if (GET_MODE (operands
[1]) != DImode
)
6711 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6714 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6720 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6723 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6732 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6735 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6741 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6744 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6748 output_asm_insn (buf
, operands
);
6749 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6754 /* Output a PIC pc-relative instruction sequence to load the address of
6755 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6756 or a code label. OPERANDS[1] specifies the register to use to load
6757 the program counter. OPERANDS[3] may be used for label generation
6758 The sequence is always three instructions in length. The program
6759 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6760 Register %r1 is clobbered. */
6763 pa_output_pic_pcrel_sequence (rtx
*operands
)
6765 gcc_assert (SYMBOL_REF_P (operands
[0]) || LABEL_P (operands
[0]));
6768 /* We can use mfia to determine the current program counter. */
6769 if (TARGET_SOM
|| !TARGET_GAS
)
6771 operands
[3] = gen_label_rtx ();
6772 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6773 CODE_LABEL_NUMBER (operands
[3]));
6774 output_asm_insn ("mfia %1", operands
);
6775 output_asm_insn ("addil L'%0-%l3,%1", operands
);
6776 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
6780 output_asm_insn ("mfia %1", operands
);
6781 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands
);
6782 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands
);
6787 /* We need to use a branch to determine the current program counter. */
6788 output_asm_insn ("{bl|b,l} .+8,%1", operands
);
6789 if (TARGET_SOM
|| !TARGET_GAS
)
6791 operands
[3] = gen_label_rtx ();
6792 output_asm_insn ("addil L'%0-%l3,%1", operands
);
6793 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6794 CODE_LABEL_NUMBER (operands
[3]));
6795 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands
);
6799 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands
);
6800 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands
);
6805 /* This routine handles output of long unconditional branches that
6806 exceed the maximum range of a simple branch instruction. Since
6807 we don't have a register available for the branch, we save register
6808 %r1 in the frame marker, load the branch destination DEST into %r1,
6809 execute the branch, and restore %r1 in the delay slot of the branch.
6811 Since long branches may have an insn in the delay slot and the
6812 delay slot is used to restore %r1, we in general need to extract
6813 this insn and execute it before the branch. However, to facilitate
6814 use of this function by conditional branches, we also provide an
6815 option to not extract the delay insn so that it will be emitted
6816 after the long branch. So, if there is an insn in the delay slot,
6817 it is extracted if XDELAY is nonzero.
6819 The lengths of the various long-branch sequences are 20, 16 and 24
6820 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6823 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
6827 xoperands
[0] = dest
;
6829 /* First, free up the delay slot. */
6830 if (xdelay
&& dbr_sequence_length () != 0)
6832 /* We can't handle a jump in the delay slot. */
6833 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
6835 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6838 /* Now delete the delay insn. */
6839 SET_INSN_DELETED (NEXT_INSN (insn
));
6842 /* Output an insn to save %r1. The runtime documentation doesn't
6843 specify whether the "Clean Up" slot in the callers frame can
6844 be clobbered by the callee. It isn't copied by HP's builtin
6845 alloca, so this suggests that it can be clobbered if necessary.
6846 The "Static Link" location is copied by HP builtin alloca, so
6847 we avoid using it. Using the cleanup slot might be a problem
6848 if we have to interoperate with languages that pass cleanup
6849 information. However, it should be possible to handle these
6850 situations with GCC's asm feature.
6852 The "Current RP" slot is reserved for the called procedure, so
6853 we try to use it when we don't have a frame of our own. It's
6854 rather unlikely that we won't have a frame when we need to emit
6857 Really the way to go long term is a register scavenger; goto
6858 the target of the jump and find a register which we can use
6859 as a scratch to hold the value in %r1. Then, we wouldn't have
6860 to free up the delay slot or clobber a slot that may be needed
6861 for other purposes. */
6864 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6865 /* Use the return pointer slot in the frame marker. */
6866 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6868 /* Use the slot at -40 in the frame marker since HP builtin
6869 alloca doesn't copy it. */
6870 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6874 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6875 /* Use the return pointer slot in the frame marker. */
6876 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6878 /* Use the "Clean Up" slot in the frame marker. In GCC,
6879 the only other use of this location is for copying a
6880 floating point double argument from a floating-point
6881 register to two general registers. The copy is done
6882 as an "atomic" operation when outputting a call, so it
6883 won't interfere with our using the location here. */
6884 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6887 if (TARGET_PORTABLE_RUNTIME
)
6889 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6890 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6891 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6895 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
6896 xoperands
[2] = xoperands
[1];
6897 pa_output_pic_pcrel_sequence (xoperands
);
6898 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6901 /* Now output a very long branch to the original target. */
6902 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6904 /* Now restore the value of %r1 in the delay slot. */
6907 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6908 return "ldd -16(%%r30),%%r1";
6910 return "ldd -40(%%r30),%%r1";
6914 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6915 return "ldw -20(%%r30),%%r1";
6917 return "ldw -12(%%r30),%%r1";
6921 /* This routine handles all the branch-on-bit conditional branch sequences we
6922 might need to generate. It handles nullification of delay slots,
6923 varying length branches, negated branches and all combinations of the
6924 above. it returns the appropriate output template to emit the branch. */
6927 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
6929 static char buf
[100];
6931 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6932 int length
= get_attr_length (insn
);
6935 /* A conditional branch to the following instruction (e.g. the delay slot) is
6936 asking for a disaster. I do not think this can happen as this pattern
6937 is only used when optimizing; jump optimization should eliminate the
6938 jump. But be prepared just in case. */
6940 if (branch_to_delay_slot_p (insn
))
6943 /* If this is a long branch with its delay slot unfilled, set `nullify'
6944 as it can nullify the delay slot and save a nop. */
6945 if (length
== 8 && dbr_sequence_length () == 0)
6948 /* If this is a short forward conditional branch which did not get
6949 its delay slot filled, the delay slot can still be nullified. */
6950 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6951 nullify
= forward_branch_p (insn
);
6953 /* A forward branch over a single nullified insn can be done with a
6954 extrs instruction. This avoids a single cycle penalty due to
6955 mis-predicted branch if we fall through (branch not taken). */
6956 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6961 /* All short conditional branches except backwards with an unfilled
6965 strcpy (buf
, "{extrs,|extrw,s,}");
6967 strcpy (buf
, "bb,");
6968 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6969 strcpy (buf
, "extrd,s,*");
6970 else if (GET_MODE (operands
[0]) == DImode
)
6971 strcpy (buf
, "bb,*");
6972 if ((which
== 0 && negated
)
6973 || (which
== 1 && ! negated
))
6978 strcat (buf
, " %0,%1,1,%%r0");
6979 else if (nullify
&& negated
)
6981 if (branch_needs_nop_p (insn
))
6982 strcat (buf
, ",n %0,%1,%3%#");
6984 strcat (buf
, ",n %0,%1,%3");
6986 else if (nullify
&& ! negated
)
6988 if (branch_needs_nop_p (insn
))
6989 strcat (buf
, ",n %0,%1,%2%#");
6991 strcat (buf
, ",n %0,%1,%2");
6993 else if (! nullify
&& negated
)
6994 strcat (buf
, " %0,%1,%3");
6995 else if (! nullify
&& ! negated
)
6996 strcat (buf
, " %0,%1,%2");
6999 /* All long conditionals. Note a short backward branch with an
7000 unfilled delay slot is treated just like a long backward branch
7001 with an unfilled delay slot. */
7003 /* Handle weird backwards branch with a filled delay slot
7004 which is nullified. */
7005 if (dbr_sequence_length () != 0
7006 && ! forward_branch_p (insn
)
7009 strcpy (buf
, "bb,");
7010 if (GET_MODE (operands
[0]) == DImode
)
7012 if ((which
== 0 && negated
)
7013 || (which
== 1 && ! negated
))
7018 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
7020 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
7022 /* Handle short backwards branch with an unfilled delay slot.
7023 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7024 taken and untaken branches. */
7025 else if (dbr_sequence_length () == 0
7026 && ! forward_branch_p (insn
)
7027 && INSN_ADDRESSES_SET_P ()
7028 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7029 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7031 strcpy (buf
, "bb,");
7032 if (GET_MODE (operands
[0]) == DImode
)
7034 if ((which
== 0 && negated
)
7035 || (which
== 1 && ! negated
))
7040 strcat (buf
, " %0,%1,%3%#");
7042 strcat (buf
, " %0,%1,%2%#");
7046 if (GET_MODE (operands
[0]) == DImode
)
7047 strcpy (buf
, "extrd,s,*");
7049 strcpy (buf
, "{extrs,|extrw,s,}");
7050 if ((which
== 0 && negated
)
7051 || (which
== 1 && ! negated
))
7055 if (nullify
&& negated
)
7056 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
7057 else if (nullify
&& ! negated
)
7058 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
7060 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
7062 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
7067 /* The reversed conditional branch must branch over one additional
7068 instruction if the delay slot is filled and needs to be extracted
7069 by pa_output_lbranch. If the delay slot is empty or this is a
7070 nullified forward branch, the instruction after the reversed
7071 condition branch must be nullified. */
7072 if (dbr_sequence_length () == 0
7073 || (nullify
&& forward_branch_p (insn
)))
7077 operands
[4] = GEN_INT (length
);
7082 operands
[4] = GEN_INT (length
+ 4);
7085 if (GET_MODE (operands
[0]) == DImode
)
7086 strcpy (buf
, "bb,*");
7088 strcpy (buf
, "bb,");
7089 if ((which
== 0 && negated
)
7090 || (which
== 1 && !negated
))
7095 strcat (buf
, ",n %0,%1,.+%4");
7097 strcat (buf
, " %0,%1,.+%4");
7098 output_asm_insn (buf
, operands
);
7099 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7105 /* This routine handles all the branch-on-variable-bit conditional branch
7106 sequences we might need to generate. It handles nullification of delay
7107 slots, varying length branches, negated branches and all combinations
7108 of the above. it returns the appropriate output template to emit the
7112 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
7115 static char buf
[100];
7117 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7118 int length
= get_attr_length (insn
);
7121 /* A conditional branch to the following instruction (e.g. the delay slot) is
7122 asking for a disaster. I do not think this can happen as this pattern
7123 is only used when optimizing; jump optimization should eliminate the
7124 jump. But be prepared just in case. */
7126 if (branch_to_delay_slot_p (insn
))
7129 /* If this is a long branch with its delay slot unfilled, set `nullify'
7130 as it can nullify the delay slot and save a nop. */
7131 if (length
== 8 && dbr_sequence_length () == 0)
7134 /* If this is a short forward conditional branch which did not get
7135 its delay slot filled, the delay slot can still be nullified. */
7136 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7137 nullify
= forward_branch_p (insn
);
7139 /* A forward branch over a single nullified insn can be done with a
7140 extrs instruction. This avoids a single cycle penalty due to
7141 mis-predicted branch if we fall through (branch not taken). */
7142 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7147 /* All short conditional branches except backwards with an unfilled
7151 strcpy (buf
, "{vextrs,|extrw,s,}");
7153 strcpy (buf
, "{bvb,|bb,}");
7154 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7155 strcpy (buf
, "extrd,s,*");
7156 else if (GET_MODE (operands
[0]) == DImode
)
7157 strcpy (buf
, "bb,*");
7158 if ((which
== 0 && negated
)
7159 || (which
== 1 && ! negated
))
7164 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7165 else if (nullify
&& negated
)
7167 if (branch_needs_nop_p (insn
))
7168 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7170 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7172 else if (nullify
&& ! negated
)
7174 if (branch_needs_nop_p (insn
))
7175 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7177 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7179 else if (! nullify
&& negated
)
7180 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7181 else if (! nullify
&& ! negated
)
7182 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7185 /* All long conditionals. Note a short backward branch with an
7186 unfilled delay slot is treated just like a long backward branch
7187 with an unfilled delay slot. */
7189 /* Handle weird backwards branch with a filled delay slot
7190 which is nullified. */
7191 if (dbr_sequence_length () != 0
7192 && ! forward_branch_p (insn
)
7195 strcpy (buf
, "{bvb,|bb,}");
7196 if (GET_MODE (operands
[0]) == DImode
)
7198 if ((which
== 0 && negated
)
7199 || (which
== 1 && ! negated
))
7204 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7206 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7208 /* Handle short backwards branch with an unfilled delay slot.
7209 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7210 taken and untaken branches. */
7211 else if (dbr_sequence_length () == 0
7212 && ! forward_branch_p (insn
)
7213 && INSN_ADDRESSES_SET_P ()
7214 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7215 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7217 strcpy (buf
, "{bvb,|bb,}");
7218 if (GET_MODE (operands
[0]) == DImode
)
7220 if ((which
== 0 && negated
)
7221 || (which
== 1 && ! negated
))
7226 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7228 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7232 strcpy (buf
, "{vextrs,|extrw,s,}");
7233 if (GET_MODE (operands
[0]) == DImode
)
7234 strcpy (buf
, "extrd,s,*");
7235 if ((which
== 0 && negated
)
7236 || (which
== 1 && ! negated
))
7240 if (nullify
&& negated
)
7241 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7242 else if (nullify
&& ! negated
)
7243 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7245 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7247 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7252 /* The reversed conditional branch must branch over one additional
7253 instruction if the delay slot is filled and needs to be extracted
7254 by pa_output_lbranch. If the delay slot is empty or this is a
7255 nullified forward branch, the instruction after the reversed
7256 condition branch must be nullified. */
7257 if (dbr_sequence_length () == 0
7258 || (nullify
&& forward_branch_p (insn
)))
7262 operands
[4] = GEN_INT (length
);
7267 operands
[4] = GEN_INT (length
+ 4);
7270 if (GET_MODE (operands
[0]) == DImode
)
7271 strcpy (buf
, "bb,*");
7273 strcpy (buf
, "{bvb,|bb,}");
7274 if ((which
== 0 && negated
)
7275 || (which
== 1 && !negated
))
7280 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7282 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7283 output_asm_insn (buf
, operands
);
7284 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7290 /* Return the output template for emitting a dbra type insn.
7292 Note it may perform some output operations on its own before
7293 returning the final output string. */
7295 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7297 int length
= get_attr_length (insn
);
7299 /* A conditional branch to the following instruction (e.g. the delay slot) is
7300 asking for a disaster. Be prepared! */
7302 if (branch_to_delay_slot_p (insn
))
7304 if (which_alternative
== 0)
7305 return "ldo %1(%0),%0";
7306 else if (which_alternative
== 1)
7308 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7309 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7310 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7311 return "{fldws|fldw} -16(%%r30),%0";
7315 output_asm_insn ("ldw %0,%4", operands
);
7316 return "ldo %1(%4),%4\n\tstw %4,%0";
7320 if (which_alternative
== 0)
7322 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7325 /* If this is a long branch with its delay slot unfilled, set `nullify'
7326 as it can nullify the delay slot and save a nop. */
7327 if (length
== 8 && dbr_sequence_length () == 0)
7330 /* If this is a short forward conditional branch which did not get
7331 its delay slot filled, the delay slot can still be nullified. */
7332 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7333 nullify
= forward_branch_p (insn
);
7340 if (branch_needs_nop_p (insn
))
7341 return "addib,%C2,n %1,%0,%3%#";
7343 return "addib,%C2,n %1,%0,%3";
7346 return "addib,%C2 %1,%0,%3";
7349 /* Handle weird backwards branch with a fulled delay slot
7350 which is nullified. */
7351 if (dbr_sequence_length () != 0
7352 && ! forward_branch_p (insn
)
7354 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7355 /* Handle short backwards branch with an unfilled delay slot.
7356 Using a addb;nop rather than addi;bl saves 1 cycle for both
7357 taken and untaken branches. */
7358 else if (dbr_sequence_length () == 0
7359 && ! forward_branch_p (insn
)
7360 && INSN_ADDRESSES_SET_P ()
7361 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7362 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7363 return "addib,%C2 %1,%0,%3%#";
7365 /* Handle normal cases. */
7367 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7369 return "addi,%N2 %1,%0,%0\n\tb %3";
7372 /* The reversed conditional branch must branch over one additional
7373 instruction if the delay slot is filled and needs to be extracted
7374 by pa_output_lbranch. If the delay slot is empty or this is a
7375 nullified forward branch, the instruction after the reversed
7376 condition branch must be nullified. */
7377 if (dbr_sequence_length () == 0
7378 || (nullify
&& forward_branch_p (insn
)))
7382 operands
[4] = GEN_INT (length
);
7387 operands
[4] = GEN_INT (length
+ 4);
7391 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7393 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7395 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7399 /* Deal with gross reload from FP register case. */
7400 else if (which_alternative
== 1)
7402 /* Move loop counter from FP register to MEM then into a GR,
7403 increment the GR, store the GR into MEM, and finally reload
7404 the FP register from MEM from within the branch's delay slot. */
7405 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7407 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7409 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7410 else if (length
== 28)
7411 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7414 operands
[5] = GEN_INT (length
- 16);
7415 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7416 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7417 return pa_output_lbranch (operands
[3], insn
, 0);
7420 /* Deal with gross reload from memory case. */
7423 /* Reload loop counter from memory, the store back to memory
7424 happens in the branch's delay slot. */
7425 output_asm_insn ("ldw %0,%4", operands
);
7427 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7428 else if (length
== 16)
7429 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7432 operands
[5] = GEN_INT (length
- 4);
7433 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7434 return pa_output_lbranch (operands
[3], insn
, 0);
7439 /* Return the output template for emitting a movb type insn.
7441 Note it may perform some output operations on its own before
7442 returning the final output string. */
7444 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7445 int reverse_comparison
)
7447 int length
= get_attr_length (insn
);
7449 /* A conditional branch to the following instruction (e.g. the delay slot) is
7450 asking for a disaster. Be prepared! */
7452 if (branch_to_delay_slot_p (insn
))
7454 if (which_alternative
== 0)
7455 return "copy %1,%0";
7456 else if (which_alternative
== 1)
7458 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7459 return "{fldws|fldw} -16(%%r30),%0";
7461 else if (which_alternative
== 2)
7467 /* Support the second variant. */
7468 if (reverse_comparison
)
7469 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7471 if (which_alternative
== 0)
7473 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7476 /* If this is a long branch with its delay slot unfilled, set `nullify'
7477 as it can nullify the delay slot and save a nop. */
7478 if (length
== 8 && dbr_sequence_length () == 0)
7481 /* If this is a short forward conditional branch which did not get
7482 its delay slot filled, the delay slot can still be nullified. */
7483 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7484 nullify
= forward_branch_p (insn
);
7491 if (branch_needs_nop_p (insn
))
7492 return "movb,%C2,n %1,%0,%3%#";
7494 return "movb,%C2,n %1,%0,%3";
7497 return "movb,%C2 %1,%0,%3";
7500 /* Handle weird backwards branch with a filled delay slot
7501 which is nullified. */
7502 if (dbr_sequence_length () != 0
7503 && ! forward_branch_p (insn
)
7505 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7507 /* Handle short backwards branch with an unfilled delay slot.
7508 Using a movb;nop rather than or;bl saves 1 cycle for both
7509 taken and untaken branches. */
7510 else if (dbr_sequence_length () == 0
7511 && ! forward_branch_p (insn
)
7512 && INSN_ADDRESSES_SET_P ()
7513 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7514 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7515 return "movb,%C2 %1,%0,%3%#";
7516 /* Handle normal cases. */
7518 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7520 return "or,%N2 %1,%%r0,%0\n\tb %3";
7523 /* The reversed conditional branch must branch over one additional
7524 instruction if the delay slot is filled and needs to be extracted
7525 by pa_output_lbranch. If the delay slot is empty or this is a
7526 nullified forward branch, the instruction after the reversed
7527 condition branch must be nullified. */
7528 if (dbr_sequence_length () == 0
7529 || (nullify
&& forward_branch_p (insn
)))
7533 operands
[4] = GEN_INT (length
);
7538 operands
[4] = GEN_INT (length
+ 4);
7542 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7544 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7546 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7549 /* Deal with gross reload for FP destination register case. */
7550 else if (which_alternative
== 1)
7552 /* Move source register to MEM, perform the branch test, then
7553 finally load the FP register from MEM from within the branch's
7555 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7557 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7558 else if (length
== 16)
7559 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7562 operands
[4] = GEN_INT (length
- 4);
7563 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7564 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7565 return pa_output_lbranch (operands
[3], insn
, 0);
7568 /* Deal with gross reload from memory case. */
7569 else if (which_alternative
== 2)
7571 /* Reload loop counter from memory, the store back to memory
7572 happens in the branch's delay slot. */
7574 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7575 else if (length
== 12)
7576 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7579 operands
[4] = GEN_INT (length
);
7580 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7582 return pa_output_lbranch (operands
[3], insn
, 0);
7585 /* Handle SAR as a destination. */
7589 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7590 else if (length
== 12)
7591 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7594 operands
[4] = GEN_INT (length
);
7595 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7597 return pa_output_lbranch (operands
[3], insn
, 0);
7602 /* Copy any FP arguments in INSN into integer registers. */
7604 copy_fp_args (rtx_insn
*insn
)
7609 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7611 int arg_mode
, regno
;
7612 rtx use
= XEXP (link
, 0);
7614 if (! (GET_CODE (use
) == USE
7615 && GET_CODE (XEXP (use
, 0)) == REG
7616 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7619 arg_mode
= GET_MODE (XEXP (use
, 0));
7620 regno
= REGNO (XEXP (use
, 0));
7622 /* Is it a floating point register? */
7623 if (regno
>= 32 && regno
<= 39)
7625 /* Copy the FP register into an integer register via memory. */
7626 if (arg_mode
== SFmode
)
7628 xoperands
[0] = XEXP (use
, 0);
7629 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7630 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7631 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7635 xoperands
[0] = XEXP (use
, 0);
7636 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7637 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7638 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7639 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7645 /* Compute length of the FP argument copy sequence for INSN. */
7647 length_fp_args (rtx_insn
*insn
)
7652 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7654 int arg_mode
, regno
;
7655 rtx use
= XEXP (link
, 0);
7657 if (! (GET_CODE (use
) == USE
7658 && GET_CODE (XEXP (use
, 0)) == REG
7659 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7662 arg_mode
= GET_MODE (XEXP (use
, 0));
7663 regno
= REGNO (XEXP (use
, 0));
7665 /* Is it a floating point register? */
7666 if (regno
>= 32 && regno
<= 39)
7668 if (arg_mode
== SFmode
)
7678 /* Return the attribute length for the millicode call instruction INSN.
7679 The length must match the code generated by pa_output_millicode_call.
7680 We include the delay slot in the returned length as it is better to
7681 over estimate the length than to under estimate it. */
7684 pa_attr_length_millicode_call (rtx_insn
*insn
)
7686 unsigned long distance
= -1;
7687 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7689 if (INSN_ADDRESSES_SET_P ())
7691 distance
= (total
+ insn_current_reference_address (insn
));
7692 if (distance
< total
)
7698 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7703 else if (TARGET_PORTABLE_RUNTIME
)
7707 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7717 /* INSN is a function call.
7719 CALL_DEST is the routine we are calling. */
7722 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
7724 int attr_length
= get_attr_length (insn
);
7725 int seq_length
= dbr_sequence_length ();
7728 xoperands
[0] = call_dest
;
7730 /* Handle the common case where we are sure that the branch will
7731 reach the beginning of the $CODE$ subspace. The within reach
7732 form of the $$sh_func_adrs call has a length of 28. Because it
7733 has an attribute type of sh_func_adrs, it never has a nonzero
7734 sequence length (i.e., the delay slot is never filled). */
7735 if (!TARGET_LONG_CALLS
7736 && (attr_length
== 8
7737 || (attr_length
== 28
7738 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7740 xoperands
[1] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7741 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7747 /* It might seem that one insn could be saved by accessing
7748 the millicode function using the linkage table. However,
7749 this doesn't work in shared libraries and other dynamically
7750 loaded objects. Using a pc-relative sequence also avoids
7751 problems related to the implicit use of the gp register. */
7752 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
7753 xoperands
[2] = xoperands
[1];
7754 pa_output_pic_pcrel_sequence (xoperands
);
7755 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7757 else if (TARGET_PORTABLE_RUNTIME
)
7759 /* Pure portable runtime doesn't allow be/ble; we also don't
7760 have PIC support in the assembler/linker, so this sequence
7763 /* Get the address of our target into %r1. */
7764 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7765 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7767 /* Get our return address into %r31. */
7768 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7769 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7771 /* Jump to our target address in %r1. */
7772 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7776 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7778 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7780 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7784 xoperands
[1] = gen_rtx_REG (Pmode
, 31);
7785 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
7786 pa_output_pic_pcrel_sequence (xoperands
);
7788 /* Adjust return address. */
7789 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands
);
7791 /* Jump to our target address in %r1. */
7792 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7796 if (seq_length
== 0)
7797 output_asm_insn ("nop", xoperands
);
7802 /* Return the attribute length of the call instruction INSN. The SIBCALL
7803 flag indicates whether INSN is a regular call or a sibling call. The
7804 length returned must be longer than the code actually generated by
7805 pa_output_call. Since branch shortening is done before delay branch
7806 sequencing, there is no way to determine whether or not the delay
7807 slot will be filled during branch shortening. Even when the delay
7808 slot is filled, we may have to add a nop if the delay slot contains
7809 a branch that can't reach its target. Thus, we always have to include
7810 the delay slot in the length estimate. This used to be done in
7811 pa_adjust_insn_length but we do it here now as some sequences always
7812 fill the delay slot and we can save four bytes in the estimate for
7816 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
7819 rtx call
, call_dest
;
7822 rtx pat
= PATTERN (insn
);
7823 unsigned long distance
= -1;
7825 gcc_assert (CALL_P (insn
));
7827 if (INSN_ADDRESSES_SET_P ())
7829 unsigned long total
;
7831 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7832 distance
= (total
+ insn_current_reference_address (insn
));
7833 if (distance
< total
)
7837 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7839 /* Get the call rtx. */
7840 call
= XVECEXP (pat
, 0, 0);
7841 if (GET_CODE (call
) == SET
)
7842 call
= SET_SRC (call
);
7844 gcc_assert (GET_CODE (call
) == CALL
);
7846 /* Determine if this is a local call. */
7847 call_dest
= XEXP (XEXP (call
, 0), 0);
7848 call_decl
= SYMBOL_REF_DECL (call_dest
);
7849 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7851 /* pc-relative branch. */
7852 if (!TARGET_LONG_CALLS
7853 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7854 || distance
< MAX_PCREL17F_OFFSET
))
7857 /* 64-bit plabel sequence. */
7858 else if (TARGET_64BIT
&& !local_call
)
7859 length
+= sibcall
? 28 : 24;
7861 /* non-pic long absolute branch sequence. */
7862 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7865 /* long pc-relative branch sequence. */
7866 else if (TARGET_LONG_PIC_SDIFF_CALL
7867 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
7871 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7875 /* 32-bit plabel sequence. */
7881 length
+= length_fp_args (insn
);
7891 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7899 /* INSN is a function call.
7901 CALL_DEST is the routine we are calling. */
7904 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
7906 int seq_length
= dbr_sequence_length ();
7907 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7908 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7911 xoperands
[0] = call_dest
;
7913 /* Handle the common case where we're sure that the branch will reach
7914 the beginning of the "$CODE$" subspace. This is the beginning of
7915 the current function if we are in a named section. */
7916 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7918 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7919 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7923 if (TARGET_64BIT
&& !local_call
)
7925 /* ??? As far as I can tell, the HP linker doesn't support the
7926 long pc-relative sequence described in the 64-bit runtime
7927 architecture. So, we use a slightly longer indirect call. */
7928 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7929 xoperands
[1] = gen_label_rtx ();
7931 /* If this isn't a sibcall, we put the load of %r27 into the
7932 delay slot. We can't do this in a sibcall as we don't
7933 have a second call-clobbered scratch register available.
7934 We don't need to do anything when generating fast indirect
7936 if (seq_length
!= 0 && !sibcall
)
7938 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7941 /* Now delete the delay insn. */
7942 SET_INSN_DELETED (NEXT_INSN (insn
));
7946 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7947 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7948 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7952 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7953 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7954 output_asm_insn ("bve (%%r1)", xoperands
);
7958 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7959 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7960 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7966 int indirect_call
= 0;
7968 /* Emit a long call. There are several different sequences
7969 of increasing length and complexity. In most cases,
7970 they don't allow an instruction in the delay slot. */
7971 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7972 && !TARGET_LONG_PIC_SDIFF_CALL
7973 && !(TARGET_GAS
&& !TARGET_SOM
&& local_call
)
7981 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7983 /* A non-jump insn in the delay slot. By definition we can
7984 emit this insn before the call (and in fact before argument
7986 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7989 /* Now delete the delay insn. */
7990 SET_INSN_DELETED (NEXT_INSN (insn
));
7994 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7996 /* This is the best sequence for making long calls in
7997 non-pic code. Unfortunately, GNU ld doesn't provide
7998 the stub needed for external calls, and GAS's support
7999 for this with the SOM linker is buggy. It is safe
8000 to use this for local calls. */
8001 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8003 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
8007 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8010 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
8012 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8018 /* The HP assembler and linker can handle relocations for
8019 the difference of two symbols. The HP assembler
8020 recognizes the sequence as a pc-relative call and
8021 the linker provides stubs when needed. */
8023 /* GAS currently can't generate the relocations that
8024 are needed for the SOM linker under HP-UX using this
8025 sequence. The GNU linker doesn't generate the stubs
8026 that are needed for external calls on TARGET_ELF32
8027 with this sequence. For now, we have to use a longer
8028 plabel sequence when using GAS for non local calls. */
8029 if (TARGET_LONG_PIC_SDIFF_CALL
8030 || (TARGET_GAS
&& !TARGET_SOM
&& local_call
))
8032 xoperands
[1] = gen_rtx_REG (Pmode
, 1);
8033 xoperands
[2] = xoperands
[1];
8034 pa_output_pic_pcrel_sequence (xoperands
);
8038 /* Emit a long plabel-based call sequence. This is
8039 essentially an inline implementation of $$dyncall.
8040 We don't actually try to call $$dyncall as this is
8041 as difficult as calling the function itself. */
8042 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
8043 xoperands
[1] = gen_label_rtx ();
8045 /* Since the call is indirect, FP arguments in registers
8046 need to be copied to the general registers. Then, the
8047 argument relocation stub will copy them back. */
8049 copy_fp_args (insn
);
8053 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
8054 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
8055 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
8059 output_asm_insn ("addil LR'%0-$global$,%%r27",
8061 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8065 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
8066 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
8067 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
8068 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
8070 if (!sibcall
&& !TARGET_PA_20
)
8072 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8073 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8074 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
8076 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8083 output_asm_insn ("bve (%%r1)", xoperands
);
8088 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8089 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8093 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8098 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8099 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8104 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8105 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8107 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8111 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8112 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8114 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8117 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8119 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8127 if (seq_length
== 0)
8128 output_asm_insn ("nop", xoperands
);
8133 /* Return the attribute length of the indirect call instruction INSN.
8134 The length must match the code generated by output_indirect call.
8135 The returned length includes the delay slot. Currently, the delay
8136 slot of an indirect call sequence is not exposed and it is used by
8137 the sequence itself. */
8140 pa_attr_length_indirect_call (rtx_insn
*insn
)
8142 unsigned long distance
= -1;
8143 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8145 if (INSN_ADDRESSES_SET_P ())
8147 distance
= (total
+ insn_current_reference_address (insn
));
8148 if (distance
< total
)
8155 if (TARGET_FAST_INDIRECT_CALLS
)
8158 if (TARGET_PORTABLE_RUNTIME
)
8161 /* Inline version of $$dyncall. */
8162 if ((TARGET_NO_SPACE_REGS
|| TARGET_PA_20
) && !optimize_size
)
8165 if (!TARGET_LONG_CALLS
8166 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8167 || distance
< MAX_PCREL17F_OFFSET
))
8170 /* Out of reach, can use ble. */
8174 /* Inline version of $$dyncall. */
8175 if (TARGET_NO_SPACE_REGS
|| TARGET_PA_20
)
8181 /* Long PIC pc-relative call. */
8186 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8193 xoperands
[0] = call_dest
;
8194 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8195 "bve,l (%%r2),%%r2\n\t"
8196 "ldd 24(%0),%%r27", xoperands
);
8200 /* First the special case for kernels, level 0 systems, etc. */
8201 if (TARGET_FAST_INDIRECT_CALLS
)
8203 pa_output_arg_descriptor (insn
);
8205 return "bve,l,n (%%r22),%%r2\n\tnop";
8206 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8209 if (TARGET_PORTABLE_RUNTIME
)
8211 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8212 "ldo R'$$dyncall(%%r31),%%r31", xoperands
);
8213 pa_output_arg_descriptor (insn
);
8214 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8217 /* Maybe emit a fast inline version of $$dyncall. */
8218 if ((TARGET_NO_SPACE_REGS
|| TARGET_PA_20
) && !optimize_size
)
8220 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8221 "ldw 2(%%r22),%%r19\n\t"
8222 "ldw -2(%%r22),%%r22", xoperands
);
8223 pa_output_arg_descriptor (insn
);
8224 if (TARGET_NO_SPACE_REGS
)
8227 return "bve,l,n (%%r22),%%r2\n\tnop";
8228 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8230 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8233 /* Now the normal case -- we can reach $$dyncall directly or
8234 we're sure that we can get there via a long-branch stub.
8236 No need to check target flags as the length uniquely identifies
8237 the remaining cases. */
8238 length
= pa_attr_length_indirect_call (insn
);
8241 pa_output_arg_descriptor (insn
);
8243 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8244 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8245 variant of the B,L instruction can't be used on the SOM target. */
8246 if (TARGET_PA_20
&& !TARGET_SOM
)
8247 return "b,l,n $$dyncall,%%r2\n\tnop";
8249 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8252 /* Long millicode call, but we are not generating PIC or portable runtime
8256 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands
);
8257 pa_output_arg_descriptor (insn
);
8258 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8261 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8262 pc-relative call sequence is five instructions. The inline PA 2.0
8263 version of $$dyncall is also five instructions. The PA 1.X versions
8264 are longer but still an overall win. */
8265 if (TARGET_NO_SPACE_REGS
|| TARGET_PA_20
|| !optimize_size
)
8267 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8268 "ldw 2(%%r22),%%r19\n\t"
8269 "ldw -2(%%r22),%%r22", xoperands
);
8270 if (TARGET_NO_SPACE_REGS
)
8272 pa_output_arg_descriptor (insn
);
8274 return "bve,l,n (%%r22),%%r2\n\tnop";
8275 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8279 pa_output_arg_descriptor (insn
);
8280 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8282 output_asm_insn ("bl .+8,%%r2\n\t"
8283 "ldo 16(%%r2),%%r2\n\t"
8284 "ldsid (%%r22),%%r1\n\t"
8285 "mtsp %%r1,%%sr0", xoperands
);
8286 pa_output_arg_descriptor (insn
);
8287 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8290 /* We need a long PIC call to $$dyncall. */
8291 xoperands
[0] = gen_rtx_SYMBOL_REF (Pmode
, "$$dyncall");
8292 xoperands
[1] = gen_rtx_REG (Pmode
, 2);
8293 xoperands
[2] = gen_rtx_REG (Pmode
, 1);
8294 pa_output_pic_pcrel_sequence (xoperands
);
8295 pa_output_arg_descriptor (insn
);
8296 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8299 /* In HPUX 8.0's shared library scheme, special relocations are needed
8300 for function labels if they might be passed to a function
8301 in a shared library (because shared libraries don't live in code
8302 space), and special magic is needed to construct their address. */
8305 pa_encode_label (rtx sym
)
8307 const char *str
= XSTR (sym
, 0);
8308 int len
= strlen (str
) + 1;
8311 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8315 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8319 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8321 int old_referenced
= 0;
8323 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8325 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8327 default_encode_section_info (decl
, rtl
, first
);
8329 if (first
&& TEXT_SPACE_P (decl
))
8331 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8332 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8333 pa_encode_label (XEXP (rtl
, 0));
8335 else if (old_referenced
)
8336 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8339 /* This is sort of inverse to pa_encode_section_info. */
8342 pa_strip_name_encoding (const char *str
)
8344 str
+= (*str
== '@');
8345 str
+= (*str
== '*');
8349 /* Returns 1 if OP is a function label involved in a simple addition
8350 with a constant. Used to keep certain patterns from matching
8351 during instruction combination. */
8353 pa_is_function_label_plus_const (rtx op
)
8355 /* Strip off any CONST. */
8356 if (GET_CODE (op
) == CONST
)
8359 return (GET_CODE (op
) == PLUS
8360 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8361 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8364 /* Output assembly code for a thunk to FUNCTION. */
8367 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8368 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8371 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
8372 static unsigned int current_thunk_number
;
8373 int val_14
= VAL_14_BITS_P (delta
);
8374 unsigned int old_last_address
= last_address
, nbytes
= 0;
8378 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8379 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8380 xoperands
[2] = GEN_INT (delta
);
8382 assemble_start_function (thunk_fndecl
, fnname
);
8383 final_start_function (emit_barrier (), file
, 1);
8385 /* Output the thunk. We know that the function is in the same
8386 translation unit (i.e., the same space) as the thunk, and that
8387 thunks are output after their method. Thus, we don't need an
8388 external branch to reach the function. With SOM and GAS,
8389 functions and thunks are effectively in different sections.
8390 Thus, we can always use a IA-relative branch and the linker
8391 will add a long branch stub if necessary.
8393 However, we have to be careful when generating PIC code on the
8394 SOM port to ensure that the sequence does not transfer to an
8395 import stub for the target function as this could clobber the
8396 return value saved at SP-24. This would also apply to the
8397 32-bit linux port if the multi-space model is implemented. */
8398 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8399 && !(flag_pic
&& TREE_PUBLIC (function
))
8400 && (TARGET_GAS
|| last_address
< 262132))
8401 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8402 && ((targetm_common
.have_named_sections
8403 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8404 /* The GNU 64-bit linker has rather poor stub management.
8405 So, we use a long branch from thunks that aren't in
8406 the same section as the target function. */
8408 && (DECL_SECTION_NAME (thunk_fndecl
)
8409 != DECL_SECTION_NAME (function
)))
8410 || ((DECL_SECTION_NAME (thunk_fndecl
)
8411 == DECL_SECTION_NAME (function
))
8412 && last_address
< 262132)))
8413 /* In this case, we need to be able to reach the start of
8414 the stub table even though the function is likely closer
8415 and can be jumped to directly. */
8416 || (targetm_common
.have_named_sections
8417 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8418 && DECL_SECTION_NAME (function
) == NULL
8419 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8421 || (!targetm_common
.have_named_sections
8422 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8425 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8427 output_asm_insn ("b %0", xoperands
);
8431 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8436 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8440 else if (TARGET_64BIT
)
8444 /* We only have one call-clobbered scratch register, so we can't
8445 make use of the delay slot if delta doesn't fit in 14 bits. */
8448 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8449 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8452 /* Load function address into %r1. */
8453 xop
[0] = xoperands
[0];
8454 xop
[1] = gen_rtx_REG (Pmode
, 1);
8456 pa_output_pic_pcrel_sequence (xop
);
8460 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8461 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8466 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8470 else if (TARGET_PORTABLE_RUNTIME
)
8472 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8473 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8476 output_asm_insn ("ldil L'%2,%%r26", xoperands
);
8478 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8482 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8487 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands
);
8491 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8493 /* The function is accessible from outside this module. The only
8494 way to avoid an import stub between the thunk and function is to
8495 call the function directly with an indirect sequence similar to
8496 that used by $$dyncall. This is possible because $$dyncall acts
8497 as the import stub in an indirect call. */
8498 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8499 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8500 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8501 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8502 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8503 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8504 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8505 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8506 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8510 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8516 output_asm_insn ("bve (%%r22)", xoperands
);
8519 else if (TARGET_NO_SPACE_REGS
)
8521 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8526 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8527 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8528 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8533 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8535 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8541 /* Load function address into %r22. */
8542 xop
[0] = xoperands
[0];
8543 xop
[1] = gen_rtx_REG (Pmode
, 1);
8544 xop
[2] = gen_rtx_REG (Pmode
, 22);
8545 pa_output_pic_pcrel_sequence (xop
);
8548 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8550 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8554 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8559 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8566 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8568 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8569 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8573 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8578 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8583 final_end_function ();
8585 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8587 switch_to_section (data_section
);
8588 output_asm_insn (".align 4", xoperands
);
8589 ASM_OUTPUT_LABEL (file
, label
);
8590 output_asm_insn (".word P'%0", xoperands
);
8593 current_thunk_number
++;
8594 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8595 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8596 last_address
+= nbytes
;
8597 if (old_last_address
> last_address
)
8598 last_address
= UINT_MAX
;
8599 update_total_code_bytes (nbytes
);
8600 assemble_end_function (thunk_fndecl
, fnname
);
8603 /* Only direct calls to static functions are allowed to be sibling (tail)
8606 This restriction is necessary because some linker generated stubs will
8607 store return pointers into rp' in some cases which might clobber a
8608 live value already in rp'.
8610 In a sibcall the current function and the target function share stack
8611 space. Thus if the path to the current function and the path to the
8612 target function save a value in rp', they save the value into the
8613 same stack slot, which has undesirable consequences.
8615 Because of the deferred binding nature of shared libraries any function
8616 with external scope could be in a different load module and thus require
8617 rp' to be saved when calling that function. So sibcall optimizations
8618 can only be safe for static function.
8620 Note that GCC never needs return value relocations, so we don't have to
8621 worry about static calls with return value relocations (which require
8624 It is safe to perform a sibcall optimization when the target function
8625 will never return. */
8627 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8629 /* Sibcalls are not ok because the arg pointer register is not a fixed
8630 register. This prevents the sibcall optimization from occurring. In
8631 addition, there are problems with stub placement using GNU ld. This
8632 is because a normal sibcall branch uses a 17-bit relocation while
8633 a regular call branch uses a 22-bit relocation. As a result, more
8634 care needs to be taken in the placement of long-branch stubs. */
8638 if (TARGET_PORTABLE_RUNTIME
)
8641 /* Sibcalls are only ok within a translation unit. */
8642 return decl
&& targetm
.binds_local_p (decl
);
8645 /* ??? Addition is not commutative on the PA due to the weird implicit
8646 space register selection rules for memory addresses. Therefore, we
8647 don't consider a + b == b + a, as this might be inside a MEM. */
8649 pa_commutative_p (const_rtx x
, int outer_code
)
8651 return (COMMUTATIVE_P (x
)
8652 && (TARGET_NO_SPACE_REGS
8653 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8654 || GET_CODE (x
) != PLUS
));
8657 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8658 use in fmpyadd instructions. */
8660 pa_fmpyaddoperands (rtx
*operands
)
8662 machine_mode mode
= GET_MODE (operands
[0]);
8664 /* Must be a floating point mode. */
8665 if (mode
!= SFmode
&& mode
!= DFmode
)
8668 /* All modes must be the same. */
8669 if (! (mode
== GET_MODE (operands
[1])
8670 && mode
== GET_MODE (operands
[2])
8671 && mode
== GET_MODE (operands
[3])
8672 && mode
== GET_MODE (operands
[4])
8673 && mode
== GET_MODE (operands
[5])))
8676 /* All operands must be registers. */
8677 if (! (GET_CODE (operands
[1]) == REG
8678 && GET_CODE (operands
[2]) == REG
8679 && GET_CODE (operands
[3]) == REG
8680 && GET_CODE (operands
[4]) == REG
8681 && GET_CODE (operands
[5]) == REG
))
8684 /* Only 2 real operands to the addition. One of the input operands must
8685 be the same as the output operand. */
8686 if (! rtx_equal_p (operands
[3], operands
[4])
8687 && ! rtx_equal_p (operands
[3], operands
[5]))
8690 /* Inout operand of add cannot conflict with any operands from multiply. */
8691 if (rtx_equal_p (operands
[3], operands
[0])
8692 || rtx_equal_p (operands
[3], operands
[1])
8693 || rtx_equal_p (operands
[3], operands
[2]))
8696 /* multiply cannot feed into addition operands. */
8697 if (rtx_equal_p (operands
[4], operands
[0])
8698 || rtx_equal_p (operands
[5], operands
[0]))
8701 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8703 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8704 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8705 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8706 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8707 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8708 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8711 /* Passed. Operands are suitable for fmpyadd. */
8715 #if !defined(USE_COLLECT2)
8717 pa_asm_out_constructor (rtx symbol
, int priority
)
8719 if (!function_label_operand (symbol
, VOIDmode
))
8720 pa_encode_label (symbol
);
8722 #ifdef CTORS_SECTION_ASM_OP
8723 default_ctor_section_asm_out_constructor (symbol
, priority
);
8725 # ifdef TARGET_ASM_NAMED_SECTION
8726 default_named_section_asm_out_constructor (symbol
, priority
);
8728 default_stabs_asm_out_constructor (symbol
, priority
);
8734 pa_asm_out_destructor (rtx symbol
, int priority
)
8736 if (!function_label_operand (symbol
, VOIDmode
))
8737 pa_encode_label (symbol
);
8739 #ifdef DTORS_SECTION_ASM_OP
8740 default_dtor_section_asm_out_destructor (symbol
, priority
);
8742 # ifdef TARGET_ASM_NAMED_SECTION
8743 default_named_section_asm_out_destructor (symbol
, priority
);
8745 default_stabs_asm_out_destructor (symbol
, priority
);
8751 /* This function places uninitialized global data in the bss section.
8752 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8753 function on the SOM port to prevent uninitialized global data from
8754 being placed in the data section. */
8757 pa_asm_output_aligned_bss (FILE *stream
,
8759 unsigned HOST_WIDE_INT size
,
8762 switch_to_section (bss_section
);
8763 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8765 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8766 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8769 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8770 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8773 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8774 ASM_OUTPUT_LABEL (stream
, name
);
8775 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8778 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8779 that doesn't allow the alignment of global common storage to be directly
8780 specified. The SOM linker aligns common storage based on the rounded
8781 value of the NUM_BYTES parameter in the .comm directive. It's not
8782 possible to use the .align directive as it doesn't affect the alignment
8783 of the label associated with a .comm directive. */
8786 pa_asm_output_aligned_common (FILE *stream
,
8788 unsigned HOST_WIDE_INT size
,
8791 unsigned int max_common_align
;
8793 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8794 if (align
> max_common_align
)
8796 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8797 "for global common data. Using %u",
8798 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8799 align
= max_common_align
;
8802 switch_to_section (bss_section
);
8804 assemble_name (stream
, name
);
8805 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8806 MAX (size
, align
/ BITS_PER_UNIT
));
8809 /* We can't use .comm for local common storage as the SOM linker effectively
8810 treats the symbol as universal and uses the same storage for local symbols
8811 with the same name in different object files. The .block directive
8812 reserves an uninitialized block of storage. However, it's not common
8813 storage. Fortunately, GCC never requests common storage with the same
8814 name in any given translation unit. */
8817 pa_asm_output_aligned_local (FILE *stream
,
8819 unsigned HOST_WIDE_INT size
,
8822 switch_to_section (bss_section
);
8823 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8826 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8827 assemble_name (stream
, name
);
8828 fprintf (stream
, "\n");
8831 ASM_OUTPUT_LABEL (stream
, name
);
8832 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8835 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8836 use in fmpysub instructions. */
8838 pa_fmpysuboperands (rtx
*operands
)
8840 machine_mode mode
= GET_MODE (operands
[0]);
8842 /* Must be a floating point mode. */
8843 if (mode
!= SFmode
&& mode
!= DFmode
)
8846 /* All modes must be the same. */
8847 if (! (mode
== GET_MODE (operands
[1])
8848 && mode
== GET_MODE (operands
[2])
8849 && mode
== GET_MODE (operands
[3])
8850 && mode
== GET_MODE (operands
[4])
8851 && mode
== GET_MODE (operands
[5])))
8854 /* All operands must be registers. */
8855 if (! (GET_CODE (operands
[1]) == REG
8856 && GET_CODE (operands
[2]) == REG
8857 && GET_CODE (operands
[3]) == REG
8858 && GET_CODE (operands
[4]) == REG
8859 && GET_CODE (operands
[5]) == REG
))
8862 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8863 operation, so operands[4] must be the same as operand[3]. */
8864 if (! rtx_equal_p (operands
[3], operands
[4]))
8867 /* multiply cannot feed into subtraction. */
8868 if (rtx_equal_p (operands
[5], operands
[0]))
8871 /* Inout operand of sub cannot conflict with any operands from multiply. */
8872 if (rtx_equal_p (operands
[3], operands
[0])
8873 || rtx_equal_p (operands
[3], operands
[1])
8874 || rtx_equal_p (operands
[3], operands
[2]))
8877 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8879 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8880 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8881 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8882 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8883 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8884 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8887 /* Passed. Operands are suitable for fmpysub. */
8891 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8892 constants for a MULT embedded inside a memory address. */
8894 pa_mem_shadd_constant_p (int val
)
8896 if (val
== 2 || val
== 4 || val
== 8)
8902 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8903 constants for shadd instructions. */
8905 pa_shadd_constant_p (int val
)
8907 if (val
== 1 || val
== 2 || val
== 3)
8913 /* Return TRUE if INSN branches forward. */
8916 forward_branch_p (rtx_insn
*insn
)
8918 rtx lab
= JUMP_LABEL (insn
);
8920 /* The INSN must have a jump label. */
8921 gcc_assert (lab
!= NULL_RTX
);
8923 if (INSN_ADDRESSES_SET_P ())
8924 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8931 insn
= NEXT_INSN (insn
);
8937 /* Output an unconditional move and branch insn. */
8940 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
8942 int length
= get_attr_length (insn
);
8944 /* These are the cases in which we win. */
8946 return "mov%I1b,tr %1,%0,%2";
8948 /* None of the following cases win, but they don't lose either. */
8951 if (dbr_sequence_length () == 0)
8953 /* Nothing in the delay slot, fake it by putting the combined
8954 insn (the copy or add) in the delay slot of a bl. */
8955 if (GET_CODE (operands
[1]) == CONST_INT
)
8956 return "b %2\n\tldi %1,%0";
8958 return "b %2\n\tcopy %1,%0";
8962 /* Something in the delay slot, but we've got a long branch. */
8963 if (GET_CODE (operands
[1]) == CONST_INT
)
8964 return "ldi %1,%0\n\tb %2";
8966 return "copy %1,%0\n\tb %2";
8970 if (GET_CODE (operands
[1]) == CONST_INT
)
8971 output_asm_insn ("ldi %1,%0", operands
);
8973 output_asm_insn ("copy %1,%0", operands
);
8974 return pa_output_lbranch (operands
[2], insn
, 1);
8977 /* Output an unconditional add and branch insn. */
8980 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
8982 int length
= get_attr_length (insn
);
8984 /* To make life easy we want operand0 to be the shared input/output
8985 operand and operand1 to be the readonly operand. */
8986 if (operands
[0] == operands
[1])
8987 operands
[1] = operands
[2];
8989 /* These are the cases in which we win. */
8991 return "add%I1b,tr %1,%0,%3";
8993 /* None of the following cases win, but they don't lose either. */
8996 if (dbr_sequence_length () == 0)
8997 /* Nothing in the delay slot, fake it by putting the combined
8998 insn (the copy or add) in the delay slot of a bl. */
8999 return "b %3\n\tadd%I1 %1,%0,%0";
9001 /* Something in the delay slot, but we've got a long branch. */
9002 return "add%I1 %1,%0,%0\n\tb %3";
9005 output_asm_insn ("add%I1 %1,%0,%0", operands
);
9006 return pa_output_lbranch (operands
[3], insn
, 1);
9009 /* We use this hook to perform a PA specific optimization which is difficult
9010 to do in earlier passes. */
9015 remove_useless_addtr_insns (1);
9017 if (pa_cpu
< PROCESSOR_8000
)
9018 pa_combine_instructions ();
9021 /* The PA has a number of odd instructions which can perform multiple
9022 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9023 it may be profitable to combine two instructions into one instruction
9024 with two outputs. It's not profitable PA2.0 machines because the
9025 two outputs would take two slots in the reorder buffers.
9027 This routine finds instructions which can be combined and combines
9028 them. We only support some of the potential combinations, and we
9029 only try common ways to find suitable instructions.
9031 * addb can add two registers or a register and a small integer
9032 and jump to a nearby (+-8k) location. Normally the jump to the
9033 nearby location is conditional on the result of the add, but by
9034 using the "true" condition we can make the jump unconditional.
9035 Thus addb can perform two independent operations in one insn.
9037 * movb is similar to addb in that it can perform a reg->reg
9038 or small immediate->reg copy and jump to a nearby (+-8k location).
9040 * fmpyadd and fmpysub can perform a FP multiply and either an
9041 FP add or FP sub if the operands of the multiply and add/sub are
9042 independent (there are other minor restrictions). Note both
9043 the fmpy and fadd/fsub can in theory move to better spots according
9044 to data dependencies, but for now we require the fmpy stay at a
9047 * Many of the memory operations can perform pre & post updates
9048 of index registers. GCC's pre/post increment/decrement addressing
9049 is far too simple to take advantage of all the possibilities. This
9050 pass may not be suitable since those insns may not be independent.
9052 * comclr can compare two ints or an int and a register, nullify
9053 the following instruction and zero some other register. This
9054 is more difficult to use as it's harder to find an insn which
9055 will generate a comclr than finding something like an unconditional
9056 branch. (conditional moves & long branches create comclr insns).
9058 * Most arithmetic operations can conditionally skip the next
9059 instruction. They can be viewed as "perform this operation
9060 and conditionally jump to this nearby location" (where nearby
9061 is an insns away). These are difficult to use due to the
9062 branch length restrictions. */
9065 pa_combine_instructions (void)
9069 /* This can get expensive since the basic algorithm is on the
9070 order of O(n^2) (or worse). Only do it for -O2 or higher
9071 levels of optimization. */
9075 /* Walk down the list of insns looking for "anchor" insns which
9076 may be combined with "floating" insns. As the name implies,
9077 "anchor" instructions don't move, while "floating" insns may
9079 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9080 rtx_insn
*new_rtx
= make_insn_raw (par
);
9082 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9084 enum attr_pa_combine_type anchor_attr
;
9085 enum attr_pa_combine_type floater_attr
;
9087 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9088 Also ignore any special USE insns. */
9089 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
9090 || GET_CODE (PATTERN (anchor
)) == USE
9091 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
9094 anchor_attr
= get_attr_pa_combine_type (anchor
);
9095 /* See if anchor is an insn suitable for combination. */
9096 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9097 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9098 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9099 && ! forward_branch_p (anchor
)))
9103 for (floater
= PREV_INSN (anchor
);
9105 floater
= PREV_INSN (floater
))
9107 if (NOTE_P (floater
)
9108 || (NONJUMP_INSN_P (floater
)
9109 && (GET_CODE (PATTERN (floater
)) == USE
9110 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9113 /* Anything except a regular INSN will stop our search. */
9114 if (! NONJUMP_INSN_P (floater
))
9120 /* See if FLOATER is suitable for combination with the
9122 floater_attr
= get_attr_pa_combine_type (floater
);
9123 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9124 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9125 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9126 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9128 /* If ANCHOR and FLOATER can be combined, then we're
9129 done with this pass. */
9130 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9131 SET_DEST (PATTERN (floater
)),
9132 XEXP (SET_SRC (PATTERN (floater
)), 0),
9133 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9137 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9138 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9140 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9142 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9143 SET_DEST (PATTERN (floater
)),
9144 XEXP (SET_SRC (PATTERN (floater
)), 0),
9145 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9150 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9151 SET_DEST (PATTERN (floater
)),
9152 SET_SRC (PATTERN (floater
)),
9153 SET_SRC (PATTERN (floater
))))
9159 /* If we didn't find anything on the backwards scan try forwards. */
9161 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9162 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9164 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9166 if (NOTE_P (floater
)
9167 || (NONJUMP_INSN_P (floater
)
9168 && (GET_CODE (PATTERN (floater
)) == USE
9169 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9173 /* Anything except a regular INSN will stop our search. */
9174 if (! NONJUMP_INSN_P (floater
))
9180 /* See if FLOATER is suitable for combination with the
9182 floater_attr
= get_attr_pa_combine_type (floater
);
9183 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9184 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9185 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9186 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9188 /* If ANCHOR and FLOATER can be combined, then we're
9189 done with this pass. */
9190 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9191 SET_DEST (PATTERN (floater
)),
9192 XEXP (SET_SRC (PATTERN (floater
)),
9194 XEXP (SET_SRC (PATTERN (floater
)),
9201 /* FLOATER will be nonzero if we found a suitable floating
9202 insn for combination with ANCHOR. */
9204 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9205 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9207 /* Emit the new instruction and delete the old anchor. */
9208 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9209 copy_rtx (PATTERN (floater
)));
9210 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9211 emit_insn_before (temp
, anchor
);
9213 SET_INSN_DELETED (anchor
);
9215 /* Emit a special USE insn for FLOATER, then delete
9216 the floating insn. */
9217 temp
= copy_rtx (PATTERN (floater
));
9218 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9219 delete_insn (floater
);
9224 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9226 /* Emit the new_jump instruction and delete the old anchor. */
9227 rtvec vtemp
= gen_rtvec (2, copy_rtx (PATTERN (anchor
)),
9228 copy_rtx (PATTERN (floater
)));
9229 rtx temp
= gen_rtx_PARALLEL (VOIDmode
, vtemp
);
9230 temp
= emit_jump_insn_before (temp
, anchor
);
9232 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9233 SET_INSN_DELETED (anchor
);
9235 /* Emit a special USE insn for FLOATER, then delete
9236 the floating insn. */
9237 temp
= copy_rtx (PATTERN (floater
));
9238 emit_insn_before (gen_rtx_USE (VOIDmode
, temp
), floater
);
9239 delete_insn (floater
);
9247 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9248 int reversed
, rtx dest
,
9251 int insn_code_number
;
9252 rtx_insn
*start
, *end
;
9254 /* Create a PARALLEL with the patterns of ANCHOR and
9255 FLOATER, try to recognize it, then test constraints
9256 for the resulting pattern.
9258 If the pattern doesn't match or the constraints
9259 aren't met keep searching for a suitable floater
9261 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9262 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9263 INSN_CODE (new_rtx
) = -1;
9264 insn_code_number
= recog_memoized (new_rtx
);
9265 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9266 if (insn_code_number
< 0
9267 || (extract_insn (new_rtx
),
9268 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9282 /* There's up to three operands to consider. One
9283 output and two inputs.
9285 The output must not be used between FLOATER & ANCHOR
9286 exclusive. The inputs must not be set between
9287 FLOATER and ANCHOR exclusive. */
9289 if (reg_used_between_p (dest
, start
, end
))
9292 if (reg_set_between_p (src1
, start
, end
))
9295 if (reg_set_between_p (src2
, start
, end
))
9298 /* If we get here, then everything is good. */
9302 /* Return nonzero if references for INSN are delayed.
9304 Millicode insns are actually function calls with some special
9305 constraints on arguments and register usage.
9307 Millicode calls always expect their arguments in the integer argument
9308 registers, and always return their result in %r29 (ret1). They
9309 are expected to clobber their arguments, %r1, %r29, and the return
9310 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9312 This function tells reorg that the references to arguments and
9313 millicode calls do not appear to happen until after the millicode call.
9314 This allows reorg to put insns which set the argument registers into the
9315 delay slot of the millicode call -- thus they act more like traditional
9318 Note we cannot consider side effects of the insn to be delayed because
9319 the branch and link insn will clobber the return pointer. If we happened
9320 to use the return pointer in the delay slot of the call, then we lose.
9322 get_attr_type will try to recognize the given insn, so make sure to
9323 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9326 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9328 return ((NONJUMP_INSN_P (insn
)
9329 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9330 && GET_CODE (PATTERN (insn
)) != USE
9331 && GET_CODE (PATTERN (insn
)) != CLOBBER
9332 && get_attr_type (insn
) == TYPE_MILLI
));
9335 /* Promote the return value, but not the arguments. */
9338 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9340 int *punsignedp ATTRIBUTE_UNUSED
,
9341 const_tree fntype ATTRIBUTE_UNUSED
,
9344 if (for_return
== 0)
9346 return promote_mode (type
, mode
, punsignedp
);
9349 /* On the HP-PA the value is found in register(s) 28(-29), unless
9350 the mode is SF or DF. Then the value is returned in fr4 (32).
9352 This must perform the same promotions as PROMOTE_MODE, else promoting
9353 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9355 Small structures must be returned in a PARALLEL on PA64 in order
9356 to match the HP Compiler ABI. */
9359 pa_function_value (const_tree valtype
,
9360 const_tree func ATTRIBUTE_UNUSED
,
9361 bool outgoing ATTRIBUTE_UNUSED
)
9363 machine_mode valmode
;
9365 if (AGGREGATE_TYPE_P (valtype
)
9366 || TREE_CODE (valtype
) == COMPLEX_TYPE
9367 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9369 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9371 /* Handle aggregates that fit exactly in a word or double word. */
9372 if ((valsize
& (UNITS_PER_WORD
- 1)) == 0)
9373 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9377 /* Aggregates with a size less than or equal to 128 bits are
9378 returned in GR 28(-29). They are left justified. The pad
9379 bits are undefined. Larger aggregates are returned in
9383 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9385 for (i
= 0; i
< ub
; i
++)
9387 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9388 gen_rtx_REG (DImode
, 28 + i
),
9393 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9395 else if (valsize
> UNITS_PER_WORD
)
9397 /* Aggregates 5 to 8 bytes in size are returned in general
9398 registers r28-r29 in the same manner as other non
9399 floating-point objects. The data is right-justified and
9400 zero-extended to 64 bits. This is opposite to the normal
9401 justification used on big endian targets and requires
9402 special treatment. */
9403 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9404 gen_rtx_REG (DImode
, 28), const0_rtx
);
9405 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9409 if ((INTEGRAL_TYPE_P (valtype
)
9410 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9411 || POINTER_TYPE_P (valtype
))
9412 valmode
= word_mode
;
9414 valmode
= TYPE_MODE (valtype
);
9416 if (TREE_CODE (valtype
) == REAL_TYPE
9417 && !AGGREGATE_TYPE_P (valtype
)
9418 && TYPE_MODE (valtype
) != TFmode
9419 && !TARGET_SOFT_FLOAT
)
9420 return gen_rtx_REG (valmode
, 32);
9422 return gen_rtx_REG (valmode
, 28);
9425 /* Implement the TARGET_LIBCALL_VALUE hook. */
9428 pa_libcall_value (machine_mode mode
,
9429 const_rtx fun ATTRIBUTE_UNUSED
)
9431 if (! TARGET_SOFT_FLOAT
9432 && (mode
== SFmode
|| mode
== DFmode
))
9433 return gen_rtx_REG (mode
, 32);
9435 return gen_rtx_REG (mode
, 28);
9438 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9441 pa_function_value_regno_p (const unsigned int regno
)
9444 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9450 /* Update the data in CUM to advance over an argument
9451 of mode MODE and data type TYPE.
9452 (TYPE is null for libcalls where that information may not be available.) */
9455 pa_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
9456 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9458 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9459 int arg_size
= pa_function_arg_size (mode
, type
);
9461 cum
->nargs_prototype
--;
9462 cum
->words
+= (arg_size
9463 + ((cum
->words
& 01)
9464 && type
!= NULL_TREE
9468 /* Return the location of a parameter that is passed in a register or NULL
9469 if the parameter has any component that is passed in memory.
9471 This is new code and will be pushed to into the net sources after
9474 ??? We might want to restructure this so that it looks more like other
9477 pa_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
9478 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9480 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9481 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9488 if (mode
== VOIDmode
)
9491 arg_size
= pa_function_arg_size (mode
, type
);
9493 /* If this arg would be passed partially or totally on the stack, then
9494 this routine should return zero. pa_arg_partial_bytes will
9495 handle arguments which are split between regs and stack slots if
9496 the ABI mandates split arguments. */
9499 /* The 32-bit ABI does not split arguments. */
9500 if (cum
->words
+ arg_size
> max_arg_words
)
9506 alignment
= cum
->words
& 1;
9507 if (cum
->words
+ alignment
>= max_arg_words
)
9511 /* The 32bit ABIs and the 64bit ABIs are rather different,
9512 particularly in their handling of FP registers. We might
9513 be able to cleverly share code between them, but I'm not
9514 going to bother in the hope that splitting them up results
9515 in code that is more easily understood. */
9519 /* Advance the base registers to their current locations.
9521 Remember, gprs grow towards smaller register numbers while
9522 fprs grow to higher register numbers. Also remember that
9523 although FP regs are 32-bit addressable, we pretend that
9524 the registers are 64-bits wide. */
9525 gpr_reg_base
= 26 - cum
->words
;
9526 fpr_reg_base
= 32 + cum
->words
;
9528 /* Arguments wider than one word and small aggregates need special
9532 || (type
&& (AGGREGATE_TYPE_P (type
)
9533 || TREE_CODE (type
) == COMPLEX_TYPE
9534 || TREE_CODE (type
) == VECTOR_TYPE
)))
9536 /* Double-extended precision (80-bit), quad-precision (128-bit)
9537 and aggregates including complex numbers are aligned on
9538 128-bit boundaries. The first eight 64-bit argument slots
9539 are associated one-to-one, with general registers r26
9540 through r19, and also with floating-point registers fr4
9541 through fr11. Arguments larger than one word are always
9542 passed in general registers.
9544 Using a PARALLEL with a word mode register results in left
9545 justified data on a big-endian target. */
9548 int i
, offset
= 0, ub
= arg_size
;
9550 /* Align the base register. */
9551 gpr_reg_base
-= alignment
;
9553 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9554 for (i
= 0; i
< ub
; i
++)
9556 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9557 gen_rtx_REG (DImode
, gpr_reg_base
),
9563 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9568 /* If the argument is larger than a word, then we know precisely
9569 which registers we must use. */
9583 /* Structures 5 to 8 bytes in size are passed in the general
9584 registers in the same manner as other non floating-point
9585 objects. The data is right-justified and zero-extended
9586 to 64 bits. This is opposite to the normal justification
9587 used on big endian targets and requires special treatment.
9588 We now define BLOCK_REG_PADDING to pad these objects.
9589 Aggregates, complex and vector types are passed in the same
9590 manner as structures. */
9592 || (type
&& (AGGREGATE_TYPE_P (type
)
9593 || TREE_CODE (type
) == COMPLEX_TYPE
9594 || TREE_CODE (type
) == VECTOR_TYPE
)))
9596 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9597 gen_rtx_REG (DImode
, gpr_reg_base
),
9599 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9604 /* We have a single word (32 bits). A simple computation
9605 will get us the register #s we need. */
9606 gpr_reg_base
= 26 - cum
->words
;
9607 fpr_reg_base
= 32 + 2 * cum
->words
;
9611 /* Determine if the argument needs to be passed in both general and
9612 floating point registers. */
9613 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9614 /* If we are doing soft-float with portable runtime, then there
9615 is no need to worry about FP regs. */
9616 && !TARGET_SOFT_FLOAT
9617 /* The parameter must be some kind of scalar float, else we just
9618 pass it in integer registers. */
9619 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9620 /* The target function must not have a prototype. */
9621 && cum
->nargs_prototype
<= 0
9622 /* libcalls do not need to pass items in both FP and general
9624 && type
!= NULL_TREE
9625 /* All this hair applies to "outgoing" args only. This includes
9626 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9628 /* Also pass outgoing floating arguments in both registers in indirect
9629 calls with the 32 bit ABI and the HP assembler since there is no
9630 way to the specify argument locations in static functions. */
9635 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9641 gen_rtx_EXPR_LIST (VOIDmode
,
9642 gen_rtx_REG (mode
, fpr_reg_base
),
9644 gen_rtx_EXPR_LIST (VOIDmode
,
9645 gen_rtx_REG (mode
, gpr_reg_base
),
9650 /* See if we should pass this parameter in a general register. */
9651 if (TARGET_SOFT_FLOAT
9652 /* Indirect calls in the normal 32bit ABI require all arguments
9653 to be passed in general registers. */
9654 || (!TARGET_PORTABLE_RUNTIME
9658 /* If the parameter is not a scalar floating-point parameter,
9659 then it belongs in GPRs. */
9660 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9661 /* Structure with single SFmode field belongs in GPR. */
9662 || (type
&& AGGREGATE_TYPE_P (type
)))
9663 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9665 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9670 /* Arguments larger than one word are double word aligned. */
9673 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
9675 bool singleword
= (type
9676 ? (integer_zerop (TYPE_SIZE (type
))
9677 || !TREE_CONSTANT (TYPE_SIZE (type
))
9678 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9679 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9681 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9684 /* If this arg would be passed totally in registers or totally on the stack,
9685 then this routine should return zero. */
9688 pa_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
9689 tree type
, bool named ATTRIBUTE_UNUSED
)
9691 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9692 unsigned int max_arg_words
= 8;
9693 unsigned int offset
= 0;
9698 if (pa_function_arg_size (mode
, type
) > 1 && (cum
->words
& 1))
9701 if (cum
->words
+ offset
+ pa_function_arg_size (mode
, type
) <= max_arg_words
)
9702 /* Arg fits fully into registers. */
9704 else if (cum
->words
+ offset
>= max_arg_words
)
9705 /* Arg fully on the stack. */
9709 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9713 /* A get_unnamed_section callback for switching to the text section.
9715 This function is only used with SOM. Because we don't support
9716 named subspaces, we can only create a new subspace or switch back
9717 to the default text subspace. */
9720 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9722 gcc_assert (TARGET_SOM
);
9725 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9727 /* We only want to emit a .nsubspa directive once at the
9728 start of the function. */
9729 cfun
->machine
->in_nsubspa
= 1;
9731 /* Create a new subspace for the text. This provides
9732 better stub placement and one-only functions. */
9734 && DECL_ONE_ONLY (cfun
->decl
)
9735 && !DECL_WEAK (cfun
->decl
))
9737 output_section_asm_op ("\t.SPACE $TEXT$\n"
9738 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9739 "ACCESS=44,SORT=24,COMDAT");
9745 /* There isn't a current function or the body of the current
9746 function has been completed. So, we are changing to the
9747 text section to output debugging information. Thus, we
9748 need to forget that we are in the text section so that
9749 varasm.c will call us when text_section is selected again. */
9750 gcc_assert (!cfun
|| !cfun
->machine
9751 || cfun
->machine
->in_nsubspa
== 2);
9754 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9757 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9760 /* A get_unnamed_section callback for switching to comdat data
9761 sections. This function is only used with SOM. */
9764 som_output_comdat_data_section_asm_op (const void *data
)
9767 output_section_asm_op (data
);
9770 /* Implement TARGET_ASM_INIT_SECTIONS. */
9773 pa_som_asm_init_sections (void)
9776 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9778 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9779 is not being generated. */
9780 som_readonly_data_section
9781 = get_unnamed_section (0, output_section_asm_op
,
9782 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9784 /* When secondary definitions are not supported, SOM makes readonly
9785 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9787 som_one_only_readonly_data_section
9788 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9790 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9791 "ACCESS=0x2c,SORT=16,COMDAT");
9794 /* When secondary definitions are not supported, SOM makes data one-only
9795 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9796 som_one_only_data_section
9797 = get_unnamed_section (SECTION_WRITE
,
9798 som_output_comdat_data_section_asm_op
,
9799 "\t.SPACE $PRIVATE$\n"
9800 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9801 "ACCESS=31,SORT=24,COMDAT");
9804 som_tm_clone_table_section
9805 = get_unnamed_section (0, output_section_asm_op
,
9806 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9808 /* HPUX ld generates incorrect GOT entries for "T" fixups which
9809 reference data within the $TEXT$ space (for example constant
9810 strings in the $LIT$ subspace).
9812 The assemblers (GAS and HP as) both have problems with handling
9813 the difference of two symbols. This is the other correct way to
9814 reference constant data during PIC code generation.
9816 Thus, we can't put constant data needing relocation in the $TEXT$
9817 space during PIC generation.
9819 Previously, we placed all constant data into the $DATA$ subspace
9820 when generating PIC code. This reduces sharing, but it works
9821 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
9822 This puts constant data not needing relocation into the $TEXT$ space. */
9823 readonly_data_section
= som_readonly_data_section
;
9825 /* We must not have a reference to an external symbol defined in a
9826 shared library in a readonly section, else the SOM linker will
9829 So, we force exception information into the data section. */
9830 exception_section
= data_section
;
9833 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9836 pa_som_tm_clone_table_section (void)
9838 return som_tm_clone_table_section
;
9841 /* On hpux10, the linker will give an error if we have a reference
9842 in the read-only data section to a symbol defined in a shared
9843 library. Therefore, expressions that might require a reloc
9844 cannot be placed in the read-only data section. */
9847 pa_select_section (tree exp
, int reloc
,
9848 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9850 if (TREE_CODE (exp
) == VAR_DECL
9851 && TREE_READONLY (exp
)
9852 && !TREE_THIS_VOLATILE (exp
)
9853 && DECL_INITIAL (exp
)
9854 && (DECL_INITIAL (exp
) == error_mark_node
9855 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9856 && !(reloc
& pa_reloc_rw_mask ()))
9859 && DECL_ONE_ONLY (exp
)
9860 && !DECL_WEAK (exp
))
9861 return som_one_only_readonly_data_section
;
9863 return readonly_data_section
;
9865 else if (CONSTANT_CLASS_P (exp
)
9866 && !(reloc
& pa_reloc_rw_mask ()))
9867 return readonly_data_section
;
9869 && TREE_CODE (exp
) == VAR_DECL
9870 && DECL_ONE_ONLY (exp
)
9871 && !DECL_WEAK (exp
))
9872 return som_one_only_data_section
;
9874 return data_section
;
9877 /* Implement pa_reloc_rw_mask. */
9880 pa_reloc_rw_mask (void)
9882 if (flag_pic
|| (TARGET_SOM
&& !TARGET_HPUX_11
))
9885 /* HP linker does not support global relocs in readonly memory. */
9886 return TARGET_SOM
? 2 : 0;
9890 pa_globalize_label (FILE *stream
, const char *name
)
9892 /* We only handle DATA objects here, functions are globalized in
9893 ASM_DECLARE_FUNCTION_NAME. */
9894 if (! FUNCTION_NAME_P (name
))
9896 fputs ("\t.EXPORT ", stream
);
9897 assemble_name (stream
, name
);
9898 fputs (",DATA\n", stream
);
9902 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9905 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9906 int incoming ATTRIBUTE_UNUSED
)
9908 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9911 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9914 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9916 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9917 PA64 ABI says that objects larger than 128 bits are returned in memory.
9918 Note, int_size_in_bytes can return -1 if the size of the object is
9919 variable or larger than the maximum value that can be expressed as
9920 a HOST_WIDE_INT. It can also return zero for an empty type. The
9921 simplest way to handle variable and empty types is to pass them in
9922 memory. This avoids problems in defining the boundaries of argument
9923 slots, allocating registers, etc. */
9924 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9925 || int_size_in_bytes (type
) <= 0);
9928 /* Structure to hold declaration and name of external symbols that are
9929 emitted by GCC. We generate a vector of these symbols and output them
9930 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9931 This avoids putting out names that are never really used. */
9933 typedef struct GTY(()) extern_symbol
9939 /* Define gc'd vector type for extern_symbol. */
9941 /* Vector of extern_symbol pointers. */
9942 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9944 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9945 /* Mark DECL (name NAME) as an external reference (assembler output
9946 file FILE). This saves the names to output at the end of the file
9947 if actually referenced. */
9950 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9952 gcc_assert (file
== asm_out_file
);
9953 extern_symbol p
= {decl
, name
};
9954 vec_safe_push (extern_symbols
, p
);
9958 /* Output text required at the end of an assembler file.
9959 This includes deferred plabels and .import directives for
9960 all external symbols that were actually referenced. */
9965 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9969 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9970 output_deferred_profile_counters ();
9973 output_deferred_plabels ();
9975 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9976 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9978 tree decl
= p
->decl
;
9980 if (!TREE_ASM_WRITTEN (decl
)
9981 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9982 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9985 vec_free (extern_symbols
);
9988 if (NEED_INDICATE_EXEC_STACK
)
9989 file_end_indicate_exec_stack ();
9992 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9995 pa_can_change_mode_class (machine_mode from
, machine_mode to
,
10001 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
10004 /* Reject changes to/from modes with zero size. */
10005 if (!GET_MODE_SIZE (from
) || !GET_MODE_SIZE (to
))
10008 /* Reject changes to/from complex and vector modes. */
10009 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
10010 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
10013 /* There is no way to load QImode or HImode values directly from memory
10014 to a FP register. SImode loads to the FP registers are not zero
10015 extended. On the 64-bit target, this conflicts with the definition
10016 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10017 except for DImode to SImode on the 64-bit target. It is handled by
10018 register renaming in pa_print_operand. */
10019 if (MAYBE_FP_REG_CLASS_P (rclass
))
10020 return TARGET_64BIT
&& from
== DImode
&& to
== SImode
;
10022 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10023 in specific sets of registers. Thus, we cannot allow changing
10024 to a larger mode when it's larger than a word. */
10025 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
10026 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
10032 /* Implement TARGET_MODES_TIEABLE_P.
10034 We should return FALSE for QImode and HImode because these modes
10035 are not ok in the floating-point registers. However, this prevents
10036 tieing these modes to SImode and DImode in the general registers.
10037 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10038 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10039 in the floating-point registers. */
10042 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10044 /* Don't tie modes in different classes. */
10045 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
10052 /* Length in units of the trampoline instruction code. */
10054 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10057 /* Output assembler code for a block containing the constant parts
10058 of a trampoline, leaving space for the variable parts.\
10060 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10061 and then branches to the specified routine.
10063 This code template is copied from text segment to stack location
10064 and then patched with pa_trampoline_init to contain valid values,
10065 and then entered as a subroutine.
10067 It is best to keep this as small as possible to avoid having to
10068 flush multiple lines in the cache. */
10071 pa_asm_trampoline_template (FILE *f
)
10075 fputs ("\tldw 36(%r22),%r21\n", f
);
10076 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
10077 if (ASSEMBLER_DIALECT
== 0)
10078 fputs ("\tdepi 0,31,2,%r21\n", f
);
10080 fputs ("\tdepwi 0,31,2,%r21\n", f
);
10081 fputs ("\tldw 4(%r21),%r19\n", f
);
10082 fputs ("\tldw 0(%r21),%r21\n", f
);
10085 fputs ("\tbve (%r21)\n", f
);
10086 fputs ("\tldw 40(%r22),%r29\n", f
);
10087 fputs ("\t.word 0\n", f
);
10088 fputs ("\t.word 0\n", f
);
10092 fputs ("\tldsid (%r21),%r1\n", f
);
10093 fputs ("\tmtsp %r1,%sr0\n", f
);
10094 fputs ("\tbe 0(%sr0,%r21)\n", f
);
10095 fputs ("\tldw 40(%r22),%r29\n", f
);
10097 fputs ("\t.word 0\n", f
);
10098 fputs ("\t.word 0\n", f
);
10099 fputs ("\t.word 0\n", f
);
10100 fputs ("\t.word 0\n", f
);
10104 fputs ("\t.dword 0\n", f
);
10105 fputs ("\t.dword 0\n", f
);
10106 fputs ("\t.dword 0\n", f
);
10107 fputs ("\t.dword 0\n", f
);
10108 fputs ("\tmfia %r31\n", f
);
10109 fputs ("\tldd 24(%r31),%r1\n", f
);
10110 fputs ("\tldd 24(%r1),%r27\n", f
);
10111 fputs ("\tldd 16(%r1),%r1\n", f
);
10112 fputs ("\tbve (%r1)\n", f
);
10113 fputs ("\tldd 32(%r31),%r31\n", f
);
10114 fputs ("\t.dword 0 ; fptr\n", f
);
10115 fputs ("\t.dword 0 ; static link\n", f
);
10119 /* Emit RTL insns to initialize the variable parts of a trampoline.
10120 FNADDR is an RTX for the address of the function's pure code.
10121 CXT is an RTX for the static chain value for the function.
10123 Move the function address to the trampoline template at offset 36.
10124 Move the static chain value to trampoline template at offset 40.
10125 Move the trampoline address to trampoline template at offset 44.
10126 Move r19 to trampoline template at offset 48. The latter two
10127 words create a plabel for the indirect call to the trampoline.
10129 A similar sequence is used for the 64-bit port but the plabel is
10130 at the beginning of the trampoline.
10132 Finally, the cache entries for the trampoline code are flushed.
10133 This is necessary to ensure that the trampoline instruction sequence
10134 is written to memory prior to any attempts at prefetching the code
10138 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10140 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10141 rtx start_addr
= gen_reg_rtx (Pmode
);
10142 rtx end_addr
= gen_reg_rtx (Pmode
);
10143 rtx line_length
= gen_reg_rtx (Pmode
);
10146 emit_block_move (m_tramp
, assemble_trampoline_template (),
10147 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10148 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10152 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10153 emit_move_insn (tmp
, fnaddr
);
10154 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10155 emit_move_insn (tmp
, chain_value
);
10157 /* Create a fat pointer for the trampoline. */
10158 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10159 emit_move_insn (tmp
, r_tramp
);
10160 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10161 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10163 /* fdc and fic only use registers for the address to flush,
10164 they do not accept integer displacements. We align the
10165 start and end addresses to the beginning of their respective
10166 cache lines to minimize the number of lines flushed. */
10167 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10168 GEN_INT (-MIN_CACHELINE_SIZE
)));
10169 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10170 TRAMPOLINE_CODE_SIZE
-1));
10171 emit_insn (gen_andsi3 (end_addr
, tmp
,
10172 GEN_INT (-MIN_CACHELINE_SIZE
)));
10173 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10174 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10175 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10176 gen_reg_rtx (Pmode
),
10177 gen_reg_rtx (Pmode
)));
10181 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10182 emit_move_insn (tmp
, fnaddr
);
10183 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10184 emit_move_insn (tmp
, chain_value
);
10186 /* Create a fat pointer for the trampoline. */
10187 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10188 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10190 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10191 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10193 /* fdc and fic only use registers for the address to flush,
10194 they do not accept integer displacements. We align the
10195 start and end addresses to the beginning of their respective
10196 cache lines to minimize the number of lines flushed. */
10197 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10198 emit_insn (gen_anddi3 (start_addr
, tmp
,
10199 GEN_INT (-MIN_CACHELINE_SIZE
)));
10200 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10201 TRAMPOLINE_CODE_SIZE
- 1));
10202 emit_insn (gen_anddi3 (end_addr
, tmp
,
10203 GEN_INT (-MIN_CACHELINE_SIZE
)));
10204 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10205 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10206 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10207 gen_reg_rtx (Pmode
),
10208 gen_reg_rtx (Pmode
)));
10211 #ifdef HAVE_ENABLE_EXECUTE_STACK
10212 Â
emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10213 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
10217 /* Perform any machine-specific adjustment in the address of the trampoline.
10218 ADDR contains the address that was passed to pa_trampoline_init.
10219 Adjust the trampoline address to point to the plabel at offset 44. */
10222 pa_trampoline_adjust_address (rtx addr
)
10225 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10230 pa_delegitimize_address (rtx orig_x
)
10232 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10234 if (GET_CODE (x
) == LO_SUM
10235 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10236 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10237 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10242 pa_internal_arg_pointer (void)
10244 /* The argument pointer and the hard frame pointer are the same in
10245 the 32-bit runtime, so we don't need a copy. */
10247 return copy_to_reg (virtual_incoming_args_rtx
);
10249 return virtual_incoming_args_rtx
;
10252 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10253 Frame pointer elimination is automatically handled. */
10256 pa_can_eliminate (const int from
, const int to
)
10258 /* The argument cannot be eliminated in the 64-bit runtime. */
10259 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10262 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10263 ? ! frame_pointer_needed
10267 /* Define the offset between two registers, FROM to be eliminated and its
10268 replacement TO, at the start of a routine. */
10270 pa_initial_elimination_offset (int from
, int to
)
10272 HOST_WIDE_INT offset
;
10274 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10275 && to
== STACK_POINTER_REGNUM
)
10276 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10277 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10280 gcc_unreachable ();
10286 pa_conditional_register_usage (void)
10290 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10292 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10293 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10294 for (i
= 33; i
< 56; i
+= 2)
10295 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10297 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10299 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10300 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10303 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10306 /* Target hook for c_mode_for_suffix. */
10308 static machine_mode
10309 pa_c_mode_for_suffix (char suffix
)
10311 if (HPUX_LONG_DOUBLE_LIBRARY
)
10320 /* Target hook for function_section. */
10323 pa_function_section (tree decl
, enum node_frequency freq
,
10324 bool startup
, bool exit
)
10326 /* Put functions in text section if target doesn't have named sections. */
10327 if (!targetm_common
.have_named_sections
)
10328 return text_section
;
10330 /* Force nested functions into the same section as the containing
10333 && DECL_SECTION_NAME (decl
) == NULL
10334 && DECL_CONTEXT (decl
) != NULL_TREE
10335 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10336 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10337 return function_section (DECL_CONTEXT (decl
));
10339 /* Otherwise, use the default function section. */
10340 return default_function_section (decl
, freq
, startup
, exit
);
10343 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10345 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10346 that need more than three instructions to load prior to reload. This
10347 limit is somewhat arbitrary. It takes three instructions to load a
10348 CONST_INT from memory but two are memory accesses. It may be better
10349 to increase the allowed range for CONST_INTS. We may also be able
10350 to handle CONST_DOUBLES. */
10353 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10355 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10358 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10361 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10362 legitimate constants. The other variants can't be handled by
10363 the move patterns after reload starts. */
10364 if (tls_referenced_p (x
))
10367 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10371 && HOST_BITS_PER_WIDE_INT
> 32
10372 && GET_CODE (x
) == CONST_INT
10373 && !reload_in_progress
10374 && !reload_completed
10375 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10376 && !pa_cint_ok_for_move (UINTVAL (x
)))
10379 if (function_label_operand (x
, mode
))
10385 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10387 static unsigned int
10388 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10390 unsigned int flags
;
10392 flags
= default_section_type_flags (decl
, name
, reloc
);
10394 /* Function labels are placed in the constant pool. This can
10395 cause a section conflict if decls are put in ".data.rel.ro"
10396 or ".data.rel.ro.local" using the __attribute__ construct. */
10397 if (strcmp (name
, ".data.rel.ro") == 0
10398 || strcmp (name
, ".data.rel.ro.local") == 0)
10399 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10404 /* pa_legitimate_address_p recognizes an RTL expression that is a
10405 valid memory address for an instruction. The MODE argument is the
10406 machine mode for the MEM expression that wants to use this address.
10408 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10409 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10410 available with floating point loads and stores, and integer loads.
10411 We get better code by allowing indexed addresses in the initial
10414 The acceptance of indexed addresses as legitimate implies that we
10415 must provide patterns for doing indexed integer stores, or the move
10416 expanders must force the address of an indexed store to a register.
10417 We have adopted the latter approach.
10419 Another function of pa_legitimate_address_p is to ensure that
10420 the base register is a valid pointer for indexed instructions.
10421 On targets that have non-equivalent space registers, we have to
10422 know at the time of assembler output which register in a REG+REG
10423 pair is the base register. The REG_POINTER flag is sometimes lost
10424 in reload and the following passes, so it can't be relied on during
10425 code generation. Thus, we either have to canonicalize the order
10426 of the registers in REG+REG indexed addresses, or treat REG+REG
10427 addresses separately and provide patterns for both permutations.
10429 The latter approach requires several hundred additional lines of
10430 code in pa.md. The downside to canonicalizing is that a PLUS
10431 in the wrong order can't combine to form to make a scaled indexed
10432 memory operand. As we won't need to canonicalize the operands if
10433 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10435 We initially break out scaled indexed addresses in canonical order
10436 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10437 scaled indexed addresses during RTL generation. However, fold_rtx
10438 has its own opinion on how the operands of a PLUS should be ordered.
10439 If one of the operands is equivalent to a constant, it will make
10440 that operand the second operand. As the base register is likely to
10441 be equivalent to a SYMBOL_REF, we have made it the second operand.
10443 pa_legitimate_address_p accepts REG+REG as legitimate when the
10444 operands are in the order INDEX+BASE on targets with non-equivalent
10445 space registers, and in any order on targets with equivalent space
10446 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10448 We treat a SYMBOL_REF as legitimate if it is part of the current
10449 function's constant-pool, because such addresses can actually be
10450 output as REG+SMALLINT. */
10453 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
)
10456 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10457 : REG_OK_FOR_BASE_P (x
)))
10458 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10459 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10460 && REG_P (XEXP (x
, 0))
10461 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10462 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10465 if (GET_CODE (x
) == PLUS
)
10469 /* For REG+REG, the base register should be in XEXP (x, 1),
10470 so check it first. */
10471 if (REG_P (XEXP (x
, 1))
10472 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10473 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10474 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10475 else if (REG_P (XEXP (x
, 0))
10476 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10477 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10478 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10482 if (GET_CODE (index
) == CONST_INT
)
10484 if (INT_5_BITS (index
))
10487 /* When INT14_OK_STRICT is false, a secondary reload is needed
10488 to adjust the displacement of SImode and DImode floating point
10489 instructions but this may fail when the register also needs
10490 reloading. So, we return false when STRICT is true. We
10491 also reject long displacements for float mode addresses since
10492 the majority of accesses will use floating point instructions
10493 that don't support 14-bit offsets. */
10494 if (!INT14_OK_STRICT
10495 && (strict
|| !(reload_in_progress
|| reload_completed
))
10500 return base14_operand (index
, mode
);
10503 if (!TARGET_DISABLE_INDEXING
10504 /* Only accept the "canonical" INDEX+BASE operand order
10505 on targets with non-equivalent space registers. */
10506 && (TARGET_NO_SPACE_REGS
10508 : (base
== XEXP (x
, 1) && REG_P (index
)
10509 && (reload_completed
10510 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10511 || REG_POINTER (base
))
10512 && (reload_completed
10513 || (reload_in_progress
&& HARD_REGISTER_P (index
))
10514 || !REG_POINTER (index
))))
10515 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
10516 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
10517 : REG_OK_FOR_INDEX_P (index
))
10518 && borx_reg_operand (base
, Pmode
)
10519 && borx_reg_operand (index
, Pmode
))
10522 if (!TARGET_DISABLE_INDEXING
10523 && GET_CODE (index
) == MULT
10524 /* Only accept base operands with the REG_POINTER flag prior to
10525 reload on targets with non-equivalent space registers. */
10526 && (TARGET_NO_SPACE_REGS
10527 || (base
== XEXP (x
, 1)
10528 && (reload_completed
10529 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10530 || REG_POINTER (base
))))
10531 && REG_P (XEXP (index
, 0))
10532 && GET_MODE (XEXP (index
, 0)) == Pmode
10533 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
10534 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
10535 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
10536 && GET_CODE (XEXP (index
, 1)) == CONST_INT
10537 && INTVAL (XEXP (index
, 1))
10538 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
10539 && borx_reg_operand (base
, Pmode
))
10545 if (GET_CODE (x
) == LO_SUM
)
10547 rtx y
= XEXP (x
, 0);
10549 if (GET_CODE (y
) == SUBREG
)
10550 y
= SUBREG_REG (y
);
10553 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
10554 : REG_OK_FOR_BASE_P (y
)))
10556 /* Needed for -fPIC */
10558 && GET_CODE (XEXP (x
, 1)) == UNSPEC
)
10561 if (!INT14_OK_STRICT
10562 && (strict
|| !(reload_in_progress
|| reload_completed
))
10567 if (CONSTANT_P (XEXP (x
, 1)))
10573 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
10579 /* Look for machine dependent ways to make the invalid address AD a
10582 For the PA, transform:
10584 memory(X + <large int>)
10588 if (<large int> & mask) >= 16
10589 Y = (<large int> & ~mask) + mask + 1 Round up.
10591 Y = (<large int> & ~mask) Round down.
10593 memory (Z + (<large int> - Y));
10595 This makes reload inheritance and reload_cse work better since Z
10598 There may be more opportunities to improve code with this hook. */
10601 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
10602 int opnum
, int type
,
10603 int ind_levels ATTRIBUTE_UNUSED
)
10605 long offset
, newoffset
, mask
;
10606 rtx new_rtx
, temp
= NULL_RTX
;
10608 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
10609 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
10611 if (optimize
&& GET_CODE (ad
) == PLUS
)
10612 temp
= simplify_binary_operation (PLUS
, Pmode
,
10613 XEXP (ad
, 0), XEXP (ad
, 1));
10615 new_rtx
= temp
? temp
: ad
;
10618 && GET_CODE (new_rtx
) == PLUS
10619 && GET_CODE (XEXP (new_rtx
, 0)) == REG
10620 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
10622 offset
= INTVAL (XEXP ((new_rtx
), 1));
10624 /* Choose rounding direction. Round up if we are >= halfway. */
10625 if ((offset
& mask
) >= ((mask
+ 1) / 2))
10626 newoffset
= (offset
& ~mask
) + mask
+ 1;
10628 newoffset
= offset
& ~mask
;
10630 /* Ensure that long displacements are aligned. */
10632 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
10633 || (TARGET_64BIT
&& (mode
) == DImode
)))
10634 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
10636 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
10638 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
10639 GEN_INT (newoffset
));
10640 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
10641 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
10642 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10643 opnum
, (enum reload_type
) type
);
10651 /* Output address vector. */
10654 pa_output_addr_vec (rtx lab
, rtx body
)
10656 int idx
, vlen
= XVECLEN (body
, 0);
10659 fputs ("\t.align 4\n", asm_out_file
);
10660 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10662 fputs ("\t.begin_brtab\n", asm_out_file
);
10663 for (idx
= 0; idx
< vlen
; idx
++)
10665 ASM_OUTPUT_ADDR_VEC_ELT
10666 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
10669 fputs ("\t.end_brtab\n", asm_out_file
);
10672 /* Output address difference vector. */
10675 pa_output_addr_diff_vec (rtx lab
, rtx body
)
10677 rtx base
= XEXP (XEXP (body
, 0), 0);
10678 int idx
, vlen
= XVECLEN (body
, 1);
10680 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10682 fputs ("\t.begin_brtab\n", asm_out_file
);
10683 for (idx
= 0; idx
< vlen
; idx
++)
10685 ASM_OUTPUT_ADDR_DIFF_ELT
10688 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
10689 CODE_LABEL_NUMBER (base
));
10692 fputs ("\t.end_brtab\n", asm_out_file
);
10695 /* This is a helper function for the other atomic operations. This function
10696 emits a loop that contains SEQ that iterates until a compare-and-swap
10697 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10698 a set of instructions that takes a value from OLD_REG as an input and
10699 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10700 set to the current contents of MEM. After SEQ, a compare-and-swap will
10701 attempt to update MEM with NEW_REG. The function returns true when the
10702 loop was generated successfully. */
10705 pa_expand_compare_and_swap_loop (rtx mem
, rtx old_reg
, rtx new_reg
, rtx seq
)
10707 machine_mode mode
= GET_MODE (mem
);
10708 rtx_code_label
*label
;
10709 rtx cmp_reg
, success
, oldval
;
10711 /* The loop we want to generate looks like
10717 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10721 Note that we only do the plain load from memory once. Subsequent
10722 iterations use the value loaded by the compare-and-swap pattern. */
10724 label
= gen_label_rtx ();
10725 cmp_reg
= gen_reg_rtx (mode
);
10727 emit_move_insn (cmp_reg
, mem
);
10728 emit_label (label
);
10729 emit_move_insn (old_reg
, cmp_reg
);
10733 success
= NULL_RTX
;
10735 if (!expand_atomic_compare_and_swap (&success
, &oldval
, mem
, old_reg
,
10736 new_reg
, false, MEMMODEL_SYNC_SEQ_CST
,
10740 if (oldval
!= cmp_reg
)
10741 emit_move_insn (cmp_reg
, oldval
);
10743 /* Mark this jump predicted not taken. */
10744 emit_cmp_and_jump_insns (success
, const0_rtx
, EQ
, const0_rtx
,
10745 GET_MODE (success
), 1, label
,
10746 profile_probability::guessed_never ());
10750 /* This function tries to implement an atomic exchange operation using a
10751 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10752 *MEM are returned, using TARGET if possible. No memory model is required
10753 since a compare_and_swap loop is seq-cst. */
10756 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target
, rtx mem
, rtx val
)
10758 machine_mode mode
= GET_MODE (mem
);
10760 if (can_compare_and_swap_p (mode
, true))
10762 if (!target
|| !register_operand (target
, mode
))
10763 target
= gen_reg_rtx (mode
);
10764 if (pa_expand_compare_and_swap_loop (mem
, target
, val
, NULL_RTX
))
10771 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10772 arguments passed by hidden reference in the 32-bit HP runtime. Users
10773 can override this behavior for better compatibility with openmp at the
10774 risk of library incompatibilities. Arguments are always passed by value
10775 in the 64-bit HP runtime. */
10778 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED
,
10779 machine_mode mode ATTRIBUTE_UNUSED
,
10780 const_tree type ATTRIBUTE_UNUSED
,
10781 bool named ATTRIBUTE_UNUSED
)
10783 return !TARGET_CALLER_COPIES
;
10786 /* Implement TARGET_HARD_REGNO_NREGS. */
10788 static unsigned int
10789 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED
, machine_mode mode
)
10791 return PA_HARD_REGNO_NREGS (regno
, mode
);
10794 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10797 pa_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10799 return PA_HARD_REGNO_MODE_OK (regno
, mode
);
10802 /* Implement TARGET_STARTING_FRAME_OFFSET.
10804 On the 32-bit ports, we reserve one slot for the previous frame
10805 pointer and one fill slot. The fill slot is for compatibility
10806 with HP compiled programs. On the 64-bit ports, we reserve one
10807 slot for the previous frame pointer. */
10809 static HOST_WIDE_INT
10810 pa_starting_frame_offset (void)
10815 /* Figure out the size in words of the function argument. The size
10816 returned by this function should always be greater than zero because
10817 we pass variable and zero sized objects by reference. */
10820 pa_function_arg_size (machine_mode mode
, const_tree type
)
10822 HOST_WIDE_INT size
;
10824 size
= mode
!= BLKmode
? GET_MODE_SIZE (mode
) : int_size_in_bytes (type
);
10825 return CEIL (size
, UNITS_PER_WORD
);