1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "stringpool.h"
50 #include "insn-codes.h"
53 #include "diagnostic-core.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "dominance.h"
66 #include "cfgcleanup.h"
67 #include "basic-block.h"
72 #include "target-def.h"
74 /* Return nonzero if there is a bypass for the output of
75 OUT_INSN and the fp store IN_INSN. */
77 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
79 machine_mode store_mode
;
80 machine_mode other_mode
;
83 if (recog_memoized (in_insn
) < 0
84 || (get_attr_type (in_insn
) != TYPE_FPSTORE
85 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
86 || recog_memoized (out_insn
) < 0)
89 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
91 set
= single_set (out_insn
);
95 other_mode
= GET_MODE (SET_SRC (set
));
97 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
101 #ifndef DO_FRAME_NOTES
102 #ifdef INCOMING_RETURN_ADDR_RTX
103 #define DO_FRAME_NOTES 1
105 #define DO_FRAME_NOTES 0
109 static void pa_option_override (void);
110 static void copy_reg_pointer (rtx
, rtx
);
111 static void fix_range (const char *);
112 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
114 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
115 static bool hppa_rtx_costs (rtx
, int, int, int, int *, bool);
116 static inline rtx
force_mode (machine_mode
, rtx
);
117 static void pa_reorg (void);
118 static void pa_combine_instructions (void);
119 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
121 static bool forward_branch_p (rtx_insn
*);
122 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
123 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
124 static int compute_movmem_length (rtx_insn
*);
125 static int compute_clrmem_length (rtx_insn
*);
126 static bool pa_assemble_integer (rtx
, unsigned int, int);
127 static void remove_useless_addtr_insns (int);
128 static void store_reg (int, HOST_WIDE_INT
, int);
129 static void store_reg_modify (int, int, HOST_WIDE_INT
);
130 static void load_reg (int, HOST_WIDE_INT
, int);
131 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
132 static rtx
pa_function_value (const_tree
, const_tree
, bool);
133 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
134 static bool pa_function_value_regno_p (const unsigned int);
135 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
136 static void update_total_code_bytes (unsigned int);
137 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
138 static int pa_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
139 static int pa_adjust_priority (rtx_insn
*, int);
140 static int pa_issue_rate (void);
141 static int pa_reloc_rw_mask (void);
142 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
143 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
144 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
146 static void pa_encode_section_info (tree
, rtx
, int);
147 static const char *pa_strip_name_encoding (const char *);
148 static bool pa_function_ok_for_sibcall (tree
, tree
);
149 static void pa_globalize_label (FILE *, const char *)
151 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
152 HOST_WIDE_INT
, tree
);
153 #if !defined(USE_COLLECT2)
154 static void pa_asm_out_constructor (rtx
, int);
155 static void pa_asm_out_destructor (rtx
, int);
157 static void pa_init_builtins (void);
158 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
159 static rtx
hppa_builtin_saveregs (void);
160 static void hppa_va_start (tree
, rtx
);
161 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
162 static bool pa_scalar_mode_supported_p (machine_mode
);
163 static bool pa_commutative_p (const_rtx x
, int outer_code
);
164 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
165 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
166 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
167 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
168 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
169 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
170 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
171 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
172 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
173 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
174 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
175 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
176 static void output_deferred_plabels (void);
177 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
178 #ifdef ASM_OUTPUT_EXTERNAL_REAL
179 static void pa_hpux_file_end (void);
181 static void pa_init_libfuncs (void);
182 static rtx
pa_struct_value_rtx (tree
, int);
183 static bool pa_pass_by_reference (cumulative_args_t
, machine_mode
,
185 static int pa_arg_partial_bytes (cumulative_args_t
, machine_mode
,
187 static void pa_function_arg_advance (cumulative_args_t
, machine_mode
,
189 static rtx
pa_function_arg (cumulative_args_t
, machine_mode
,
191 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
192 static struct machine_function
* pa_init_machine_status (void);
193 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
195 secondary_reload_info
*);
196 static void pa_extra_live_on_entry (bitmap
);
197 static machine_mode
pa_promote_function_mode (const_tree
,
201 static void pa_asm_trampoline_template (FILE *);
202 static void pa_trampoline_init (rtx
, tree
, rtx
);
203 static rtx
pa_trampoline_adjust_address (rtx
);
204 static rtx
pa_delegitimize_address (rtx
);
205 static bool pa_print_operand_punct_valid_p (unsigned char);
206 static rtx
pa_internal_arg_pointer (void);
207 static bool pa_can_eliminate (const int, const int);
208 static void pa_conditional_register_usage (void);
209 static machine_mode
pa_c_mode_for_suffix (char);
210 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
211 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
212 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
213 static unsigned int pa_section_type_flags (tree
, const char *, int);
214 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool);
216 /* The following extra sections are only used for SOM. */
217 static GTY(()) section
*som_readonly_data_section
;
218 static GTY(()) section
*som_one_only_readonly_data_section
;
219 static GTY(()) section
*som_one_only_data_section
;
220 static GTY(()) section
*som_tm_clone_table_section
;
222 /* Counts for the number of callee-saved general and floating point
223 registers which were saved by the current function's prologue. */
224 static int gr_saved
, fr_saved
;
226 /* Boolean indicating whether the return pointer was saved by the
227 current function's prologue. */
228 static bool rp_saved
;
230 static rtx
find_addr_reg (rtx
);
232 /* Keep track of the number of bytes we have output in the CODE subspace
233 during this compilation so we'll know when to emit inline long-calls. */
234 unsigned long total_code_bytes
;
236 /* The last address of the previous function plus the number of bytes in
237 associated thunks that have been output. This is used to determine if
238 a thunk can use an IA-relative branch to reach its target function. */
239 static unsigned int last_address
;
241 /* Variables to handle plabels that we discover are necessary at assembly
242 output time. They are output after the current function. */
243 struct GTY(()) deferred_plabel
248 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
250 static size_t n_deferred_plabels
= 0;
252 /* Initialize the GCC target structure. */
254 #undef TARGET_OPTION_OVERRIDE
255 #define TARGET_OPTION_OVERRIDE pa_option_override
257 #undef TARGET_ASM_ALIGNED_HI_OP
258 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
259 #undef TARGET_ASM_ALIGNED_SI_OP
260 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
261 #undef TARGET_ASM_ALIGNED_DI_OP
262 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
263 #undef TARGET_ASM_UNALIGNED_HI_OP
264 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
265 #undef TARGET_ASM_UNALIGNED_SI_OP
266 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
267 #undef TARGET_ASM_UNALIGNED_DI_OP
268 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
269 #undef TARGET_ASM_INTEGER
270 #define TARGET_ASM_INTEGER pa_assemble_integer
272 #undef TARGET_ASM_FUNCTION_PROLOGUE
273 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
274 #undef TARGET_ASM_FUNCTION_EPILOGUE
275 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
277 #undef TARGET_FUNCTION_VALUE
278 #define TARGET_FUNCTION_VALUE pa_function_value
279 #undef TARGET_LIBCALL_VALUE
280 #define TARGET_LIBCALL_VALUE pa_libcall_value
281 #undef TARGET_FUNCTION_VALUE_REGNO_P
282 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
284 #undef TARGET_LEGITIMIZE_ADDRESS
285 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
287 #undef TARGET_SCHED_ADJUST_COST
288 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
289 #undef TARGET_SCHED_ADJUST_PRIORITY
290 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
291 #undef TARGET_SCHED_ISSUE_RATE
292 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
294 #undef TARGET_ENCODE_SECTION_INFO
295 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
296 #undef TARGET_STRIP_NAME_ENCODING
297 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
299 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
300 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
302 #undef TARGET_COMMUTATIVE_P
303 #define TARGET_COMMUTATIVE_P pa_commutative_p
305 #undef TARGET_ASM_OUTPUT_MI_THUNK
306 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
307 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
308 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
310 #undef TARGET_ASM_FILE_END
311 #ifdef ASM_OUTPUT_EXTERNAL_REAL
312 #define TARGET_ASM_FILE_END pa_hpux_file_end
314 #define TARGET_ASM_FILE_END output_deferred_plabels
317 #undef TARGET_ASM_RELOC_RW_MASK
318 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
320 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
321 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
323 #if !defined(USE_COLLECT2)
324 #undef TARGET_ASM_CONSTRUCTOR
325 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
326 #undef TARGET_ASM_DESTRUCTOR
327 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
330 #undef TARGET_INIT_BUILTINS
331 #define TARGET_INIT_BUILTINS pa_init_builtins
333 #undef TARGET_EXPAND_BUILTIN
334 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
336 #undef TARGET_REGISTER_MOVE_COST
337 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
338 #undef TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS hppa_rtx_costs
340 #undef TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST hppa_address_cost
343 #undef TARGET_MACHINE_DEPENDENT_REORG
344 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
346 #undef TARGET_INIT_LIBFUNCS
347 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
349 #undef TARGET_PROMOTE_FUNCTION_MODE
350 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
351 #undef TARGET_PROMOTE_PROTOTYPES
352 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
354 #undef TARGET_STRUCT_VALUE_RTX
355 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
356 #undef TARGET_RETURN_IN_MEMORY
357 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
358 #undef TARGET_MUST_PASS_IN_STACK
359 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
360 #undef TARGET_PASS_BY_REFERENCE
361 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
362 #undef TARGET_CALLEE_COPIES
363 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
364 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
366 #undef TARGET_FUNCTION_ARG
367 #define TARGET_FUNCTION_ARG pa_function_arg
368 #undef TARGET_FUNCTION_ARG_ADVANCE
369 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
370 #undef TARGET_FUNCTION_ARG_BOUNDARY
371 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
373 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
374 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
375 #undef TARGET_EXPAND_BUILTIN_VA_START
376 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
377 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
378 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
380 #undef TARGET_SCALAR_MODE_SUPPORTED_P
381 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
383 #undef TARGET_CANNOT_FORCE_CONST_MEM
384 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
386 #undef TARGET_SECONDARY_RELOAD
387 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
389 #undef TARGET_EXTRA_LIVE_ON_ENTRY
390 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
392 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
393 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
394 #undef TARGET_TRAMPOLINE_INIT
395 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
396 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
397 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
398 #undef TARGET_DELEGITIMIZE_ADDRESS
399 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
400 #undef TARGET_INTERNAL_ARG_POINTER
401 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
402 #undef TARGET_CAN_ELIMINATE
403 #define TARGET_CAN_ELIMINATE pa_can_eliminate
404 #undef TARGET_CONDITIONAL_REGISTER_USAGE
405 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
406 #undef TARGET_C_MODE_FOR_SUFFIX
407 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
408 #undef TARGET_ASM_FUNCTION_SECTION
409 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
411 #undef TARGET_LEGITIMATE_CONSTANT_P
412 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
413 #undef TARGET_SECTION_TYPE_FLAGS
414 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
415 #undef TARGET_LEGITIMATE_ADDRESS_P
416 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
418 struct gcc_target targetm
= TARGET_INITIALIZER
;
420 /* Parse the -mfixed-range= option string. */
423 fix_range (const char *const_str
)
426 char *str
, *dash
, *comma
;
428 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
429 REG2 are either register names or register numbers. The effect
430 of this option is to mark the registers in the range from REG1 to
431 REG2 as ``fixed'' so they won't be used by the compiler. This is
432 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
434 i
= strlen (const_str
);
435 str
= (char *) alloca (i
+ 1);
436 memcpy (str
, const_str
, i
+ 1);
440 dash
= strchr (str
, '-');
443 warning (0, "value of -mfixed-range must have form REG1-REG2");
448 comma
= strchr (dash
+ 1, ',');
452 first
= decode_reg_name (str
);
455 warning (0, "unknown register name: %s", str
);
459 last
= decode_reg_name (dash
+ 1);
462 warning (0, "unknown register name: %s", dash
+ 1);
470 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
474 for (i
= first
; i
<= last
; ++i
)
475 fixed_regs
[i
] = call_used_regs
[i
] = 1;
484 /* Check if all floating point registers have been fixed. */
485 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
490 target_flags
|= MASK_DISABLE_FPREGS
;
493 /* Implement the TARGET_OPTION_OVERRIDE hook. */
496 pa_option_override (void)
499 cl_deferred_option
*opt
;
500 vec
<cl_deferred_option
> *v
501 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
504 FOR_EACH_VEC_ELT (*v
, i
, opt
)
506 switch (opt
->opt_index
)
508 case OPT_mfixed_range_
:
509 fix_range (opt
->arg
);
517 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
519 warning (0, "PIC code generation is not supported in the portable runtime model");
522 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
524 warning (0, "PIC code generation is not compatible with fast indirect calls");
527 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
529 warning (0, "-g is only supported when using GAS on this processor,");
530 warning (0, "-g option disabled");
531 write_symbols
= NO_DEBUG
;
534 /* We only support the "big PIC" model now. And we always generate PIC
535 code when in 64bit mode. */
536 if (flag_pic
== 1 || TARGET_64BIT
)
539 /* Disable -freorder-blocks-and-partition as we don't support hot and
540 cold partitioning. */
541 if (flag_reorder_blocks_and_partition
)
543 inform (input_location
,
544 "-freorder-blocks-and-partition does not work "
545 "on this architecture");
546 flag_reorder_blocks_and_partition
= 0;
547 flag_reorder_blocks
= 1;
550 /* We can't guarantee that .dword is available for 32-bit targets. */
551 if (UNITS_PER_WORD
== 4)
552 targetm
.asm_out
.aligned_op
.di
= NULL
;
554 /* The unaligned ops are only available when using GAS. */
557 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
558 targetm
.asm_out
.unaligned_op
.si
= NULL
;
559 targetm
.asm_out
.unaligned_op
.di
= NULL
;
562 init_machine_status
= pa_init_machine_status
;
567 PA_BUILTIN_COPYSIGNQ
,
570 PA_BUILTIN_HUGE_VALQ
,
574 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
577 pa_init_builtins (void)
579 #ifdef DONT_HAVE_FPUTC_UNLOCKED
581 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
582 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
583 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
590 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
591 set_user_assembler_name (decl
, "_Isfinite");
592 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
593 set_user_assembler_name (decl
, "_Isfinitef");
597 if (HPUX_LONG_DOUBLE_LIBRARY
)
601 /* Under HPUX, the __float128 type is a synonym for "long double". */
602 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
605 /* TFmode support builtins. */
606 ftype
= build_function_type_list (long_double_type_node
,
607 long_double_type_node
,
609 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
610 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
611 "_U_Qfabs", NULL_TREE
);
612 TREE_READONLY (decl
) = 1;
613 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
615 ftype
= build_function_type_list (long_double_type_node
,
616 long_double_type_node
,
617 long_double_type_node
,
619 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
620 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
621 "_U_Qfcopysign", NULL_TREE
);
622 TREE_READONLY (decl
) = 1;
623 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
625 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
626 decl
= add_builtin_function ("__builtin_infq", ftype
,
627 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
629 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
631 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
632 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
634 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
639 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
640 machine_mode mode ATTRIBUTE_UNUSED
,
641 int ignore ATTRIBUTE_UNUSED
)
643 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
644 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
648 case PA_BUILTIN_FABSQ
:
649 case PA_BUILTIN_COPYSIGNQ
:
650 return expand_call (exp
, target
, ignore
);
652 case PA_BUILTIN_INFQ
:
653 case PA_BUILTIN_HUGE_VALQ
:
655 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
660 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, target_mode
);
662 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
665 target
= gen_reg_rtx (target_mode
);
667 emit_move_insn (target
, tmp
);
678 /* Function to init struct machine_function.
679 This will be called, via a pointer variable,
680 from push_function_context. */
682 static struct machine_function
*
683 pa_init_machine_status (void)
685 return ggc_cleared_alloc
<machine_function
> ();
688 /* If FROM is a probable pointer register, mark TO as a probable
689 pointer register with the same pointer alignment as FROM. */
692 copy_reg_pointer (rtx to
, rtx from
)
694 if (REG_POINTER (from
))
695 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
698 /* Return 1 if X contains a symbolic expression. We know these
699 expressions will have one of a few well defined forms, so
700 we need only check those forms. */
702 pa_symbolic_expression_p (rtx x
)
705 /* Strip off any HIGH. */
706 if (GET_CODE (x
) == HIGH
)
709 return symbolic_operand (x
, VOIDmode
);
712 /* Accept any constant that can be moved in one instruction into a
715 pa_cint_ok_for_move (HOST_WIDE_INT ival
)
717 /* OK if ldo, ldil, or zdepi, can be used. */
718 return (VAL_14_BITS_P (ival
)
719 || pa_ldil_cint_p (ival
)
720 || pa_zdepi_cint_p (ival
));
723 /* True iff ldil can be used to load this CONST_INT. The least
724 significant 11 bits of the value must be zero and the value must
725 not change sign when extended from 32 to 64 bits. */
727 pa_ldil_cint_p (HOST_WIDE_INT ival
)
729 HOST_WIDE_INT x
= ival
& (((HOST_WIDE_INT
) -1 << 31) | 0x7ff);
731 return x
== 0 || x
== ((HOST_WIDE_INT
) -1 << 31);
734 /* True iff zdepi can be used to generate this CONST_INT.
735 zdepi first sign extends a 5-bit signed number to a given field
736 length, then places this field anywhere in a zero. */
738 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
740 unsigned HOST_WIDE_INT lsb_mask
, t
;
742 /* This might not be obvious, but it's at least fast.
743 This function is critical; we don't have the time loops would take. */
745 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
746 /* Return true iff t is a power of two. */
747 return ((t
& (t
- 1)) == 0);
750 /* True iff depi or extru can be used to compute (reg & mask).
751 Accept bit pattern like these:
756 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
759 mask
+= mask
& -mask
;
760 return (mask
& (mask
- 1)) == 0;
763 /* True iff depi can be used to compute (reg | MASK). */
765 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
767 mask
+= mask
& -mask
;
768 return (mask
& (mask
- 1)) == 0;
771 /* Legitimize PIC addresses. If the address is already
772 position-independent, we return ORIG. Newly generated
773 position-independent addresses go to REG. If we need more
774 than one register, we lose. */
777 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
781 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
783 /* Labels need special handling. */
784 if (pic_label_operand (orig
, mode
))
788 /* We do not want to go through the movXX expanders here since that
789 would create recursion.
791 Nor do we really want to call a generator for a named pattern
792 since that requires multiple patterns if we want to support
795 So instead we just emit the raw set, which avoids the movXX
796 expanders completely. */
797 mark_reg_pointer (reg
, BITS_PER_UNIT
);
798 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
800 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
801 add_reg_note (insn
, REG_EQUAL
, orig
);
803 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
804 and update LABEL_NUSES because this is not done automatically. */
805 if (reload_in_progress
|| reload_completed
)
807 /* Extract LABEL_REF. */
808 if (GET_CODE (orig
) == CONST
)
809 orig
= XEXP (XEXP (orig
, 0), 0);
810 /* Extract CODE_LABEL. */
811 orig
= XEXP (orig
, 0);
812 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
813 /* Make sure we have label and not a note. */
815 LABEL_NUSES (orig
)++;
817 crtl
->uses_pic_offset_table
= 1;
820 if (GET_CODE (orig
) == SYMBOL_REF
)
827 /* Before reload, allocate a temporary register for the intermediate
828 result. This allows the sequence to be deleted when the final
829 result is unused and the insns are trivially dead. */
830 tmp_reg
= ((reload_in_progress
|| reload_completed
)
831 ? reg
: gen_reg_rtx (Pmode
));
833 if (function_label_operand (orig
, VOIDmode
))
835 /* Force function label into memory in word mode. */
836 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
837 /* Load plabel address from DLT. */
838 emit_move_insn (tmp_reg
,
839 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
840 gen_rtx_HIGH (word_mode
, orig
)));
842 = gen_const_mem (Pmode
,
843 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
844 gen_rtx_UNSPEC (Pmode
,
847 emit_move_insn (reg
, pic_ref
);
848 /* Now load address of function descriptor. */
849 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
853 /* Load symbol reference from DLT. */
854 emit_move_insn (tmp_reg
,
855 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
856 gen_rtx_HIGH (word_mode
, orig
)));
858 = gen_const_mem (Pmode
,
859 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
860 gen_rtx_UNSPEC (Pmode
,
865 crtl
->uses_pic_offset_table
= 1;
866 mark_reg_pointer (reg
, BITS_PER_UNIT
);
867 insn
= emit_move_insn (reg
, pic_ref
);
869 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
870 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
874 else if (GET_CODE (orig
) == CONST
)
878 if (GET_CODE (XEXP (orig
, 0)) == PLUS
879 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
883 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
885 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
886 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
887 base
== reg
? 0 : reg
);
889 if (GET_CODE (orig
) == CONST_INT
)
891 if (INT_14_BITS (orig
))
892 return plus_constant (Pmode
, base
, INTVAL (orig
));
893 orig
= force_reg (Pmode
, orig
);
895 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
896 /* Likewise, should we set special REG_NOTEs here? */
902 static GTY(()) rtx gen_tls_tga
;
905 gen_tls_get_addr (void)
908 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
913 hppa_tls_call (rtx arg
)
917 ret
= gen_reg_rtx (Pmode
);
918 emit_library_call_value (gen_tls_get_addr (), ret
,
919 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
925 legitimize_tls_address (rtx addr
)
927 rtx ret
, tmp
, t1
, t2
, tp
;
930 /* Currently, we can't handle anything but a SYMBOL_REF. */
931 if (GET_CODE (addr
) != SYMBOL_REF
)
934 switch (SYMBOL_REF_TLS_MODEL (addr
))
936 case TLS_MODEL_GLOBAL_DYNAMIC
:
937 tmp
= gen_reg_rtx (Pmode
);
939 emit_insn (gen_tgd_load_pic (tmp
, addr
));
941 emit_insn (gen_tgd_load (tmp
, addr
));
942 ret
= hppa_tls_call (tmp
);
945 case TLS_MODEL_LOCAL_DYNAMIC
:
946 ret
= gen_reg_rtx (Pmode
);
947 tmp
= gen_reg_rtx (Pmode
);
950 emit_insn (gen_tld_load_pic (tmp
, addr
));
952 emit_insn (gen_tld_load (tmp
, addr
));
953 t1
= hppa_tls_call (tmp
);
956 t2
= gen_reg_rtx (Pmode
);
957 emit_libcall_block (insn
, t2
, t1
,
958 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
960 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
963 case TLS_MODEL_INITIAL_EXEC
:
964 tp
= gen_reg_rtx (Pmode
);
965 tmp
= gen_reg_rtx (Pmode
);
966 ret
= gen_reg_rtx (Pmode
);
967 emit_insn (gen_tp_load (tp
));
969 emit_insn (gen_tie_load_pic (tmp
, addr
));
971 emit_insn (gen_tie_load (tmp
, addr
));
972 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
975 case TLS_MODEL_LOCAL_EXEC
:
976 tp
= gen_reg_rtx (Pmode
);
977 ret
= gen_reg_rtx (Pmode
);
978 emit_insn (gen_tp_load (tp
));
979 emit_insn (gen_tle_load (ret
, addr
, tp
));
989 /* Helper for hppa_legitimize_address. Given X, return true if it
990 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
992 This respectively represent canonical shift-add rtxs or scaled
995 mem_shadd_or_shadd_rtx_p (rtx x
)
997 return ((GET_CODE (x
) == ASHIFT
998 || GET_CODE (x
) == MULT
)
999 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1000 && ((GET_CODE (x
) == ASHIFT
1001 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
1002 || (GET_CODE (x
) == MULT
1003 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
1006 /* Try machine-dependent ways of modifying an illegitimate address
1007 to be legitimate. If we find one, return the new, valid address.
1008 This macro is used in only one place: `memory_address' in explow.c.
1010 OLDX is the address as it was before break_out_memory_refs was called.
1011 In some cases it is useful to look at this to decide what needs to be done.
1013 It is always safe for this macro to do nothing. It exists to recognize
1014 opportunities to optimize the output.
1016 For the PA, transform:
1018 memory(X + <large int>)
1022 if (<large int> & mask) >= 16
1023 Y = (<large int> & ~mask) + mask + 1 Round up.
1025 Y = (<large int> & ~mask) Round down.
1027 memory (Z + (<large int> - Y));
1029 This is for CSE to find several similar references, and only use one Z.
1031 X can either be a SYMBOL_REF or REG, but because combine cannot
1032 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1033 D will not fit in 14 bits.
1035 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1038 MODE_INT references allow displacements which fit in 14 bits, so use
1041 This relies on the fact that most mode MODE_FLOAT references will use FP
1042 registers and most mode MODE_INT references will use integer registers.
1043 (In the rare case of an FP register used in an integer MODE, we depend
1044 on secondary reloads to clean things up.)
1047 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1048 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1049 addressing modes to be used).
1051 Note that the addresses passed into hppa_legitimize_address always
1052 come from a MEM, so we only have to match the MULT form on incoming
1053 addresses. But to be future proof we also match the ASHIFT form.
1055 However, this routine always places those shift-add sequences into
1056 registers, so we have to generate the ASHIFT form as our output.
1058 Put X and Z into registers. Then put the entire expression into
1062 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1067 /* We need to canonicalize the order of operands in unscaled indexed
1068 addresses since the code that checks if an address is valid doesn't
1069 always try both orders. */
1070 if (!TARGET_NO_SPACE_REGS
1071 && GET_CODE (x
) == PLUS
1072 && GET_MODE (x
) == Pmode
1073 && REG_P (XEXP (x
, 0))
1074 && REG_P (XEXP (x
, 1))
1075 && REG_POINTER (XEXP (x
, 0))
1076 && !REG_POINTER (XEXP (x
, 1)))
1077 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1079 if (tls_referenced_p (x
))
1080 return legitimize_tls_address (x
);
1082 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1084 /* Strip off CONST. */
1085 if (GET_CODE (x
) == CONST
)
1088 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1089 That should always be safe. */
1090 if (GET_CODE (x
) == PLUS
1091 && GET_CODE (XEXP (x
, 0)) == REG
1092 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1094 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1095 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1098 /* Note we must reject symbols which represent function addresses
1099 since the assembler/linker can't handle arithmetic on plabels. */
1100 if (GET_CODE (x
) == PLUS
1101 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1102 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1103 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1104 || GET_CODE (XEXP (x
, 0)) == REG
))
1106 rtx int_part
, ptr_reg
;
1108 int offset
= INTVAL (XEXP (x
, 1));
1111 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1112 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1114 /* Choose which way to round the offset. Round up if we
1115 are >= halfway to the next boundary. */
1116 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1117 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1119 newoffset
= (offset
& ~ mask
);
1121 /* If the newoffset will not fit in 14 bits (ldo), then
1122 handling this would take 4 or 5 instructions (2 to load
1123 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1124 add the new offset and the SYMBOL_REF.) Combine can
1125 not handle 4->2 or 5->2 combinations, so do not create
1127 if (! VAL_14_BITS_P (newoffset
)
1128 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1130 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1133 gen_rtx_HIGH (Pmode
, const_part
));
1136 gen_rtx_LO_SUM (Pmode
,
1137 tmp_reg
, const_part
));
1141 if (! VAL_14_BITS_P (newoffset
))
1142 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1144 int_part
= GEN_INT (newoffset
);
1146 ptr_reg
= force_reg (Pmode
,
1147 gen_rtx_PLUS (Pmode
,
1148 force_reg (Pmode
, XEXP (x
, 0)),
1151 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1154 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1156 if (GET_CODE (x
) == PLUS
1157 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1158 && (OBJECT_P (XEXP (x
, 1))
1159 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1160 && GET_CODE (XEXP (x
, 1)) != CONST
)
1162 /* If we were given a MULT, we must fix the constant
1163 as we're going to create the ASHIFT form. */
1164 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1165 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1166 shift_val
= exact_log2 (shift_val
);
1170 if (GET_CODE (reg1
) != REG
)
1171 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1173 reg2
= XEXP (XEXP (x
, 0), 0);
1174 if (GET_CODE (reg2
) != REG
)
1175 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1177 return force_reg (Pmode
,
1178 gen_rtx_PLUS (Pmode
,
1179 gen_rtx_ASHIFT (Pmode
, reg2
,
1180 GEN_INT (shift_val
)),
1184 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1186 Only do so for floating point modes since this is more speculative
1187 and we lose if it's an integer store. */
1188 if (GET_CODE (x
) == PLUS
1189 && GET_CODE (XEXP (x
, 0)) == PLUS
1190 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1191 && (mode
== SFmode
|| mode
== DFmode
))
1193 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1195 /* If we were given a MULT, we must fix the constant
1196 as we're going to create the ASHIFT form. */
1197 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1198 shift_val
= exact_log2 (shift_val
);
1200 /* Try and figure out what to use as a base register. */
1201 rtx reg1
, reg2
, base
, idx
;
1203 reg1
= XEXP (XEXP (x
, 0), 1);
1208 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1209 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1210 it's a base register below. */
1211 if (GET_CODE (reg1
) != REG
)
1212 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1214 if (GET_CODE (reg2
) != REG
)
1215 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1217 /* Figure out what the base and index are. */
1219 if (GET_CODE (reg1
) == REG
1220 && REG_POINTER (reg1
))
1223 idx
= gen_rtx_PLUS (Pmode
,
1224 gen_rtx_ASHIFT (Pmode
,
1225 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1226 GEN_INT (shift_val
)),
1229 else if (GET_CODE (reg2
) == REG
1230 && REG_POINTER (reg2
))
1239 /* If the index adds a large constant, try to scale the
1240 constant so that it can be loaded with only one insn. */
1241 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1242 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1243 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1244 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1246 /* Divide the CONST_INT by the scale factor, then add it to A. */
1247 int val
= INTVAL (XEXP (idx
, 1));
1248 val
/= (1 << shift_val
);
1250 reg1
= XEXP (XEXP (idx
, 0), 0);
1251 if (GET_CODE (reg1
) != REG
)
1252 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1254 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1256 /* We can now generate a simple scaled indexed address. */
1259 (Pmode
, gen_rtx_PLUS (Pmode
,
1260 gen_rtx_ASHIFT (Pmode
, reg1
,
1261 GEN_INT (shift_val
)),
1265 /* If B + C is still a valid base register, then add them. */
1266 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1267 && INTVAL (XEXP (idx
, 1)) <= 4096
1268 && INTVAL (XEXP (idx
, 1)) >= -4096)
1272 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1274 reg2
= XEXP (XEXP (idx
, 0), 0);
1275 if (GET_CODE (reg2
) != CONST_INT
)
1276 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1278 return force_reg (Pmode
,
1279 gen_rtx_PLUS (Pmode
,
1280 gen_rtx_ASHIFT (Pmode
, reg2
,
1281 GEN_INT (shift_val
)),
1285 /* Get the index into a register, then add the base + index and
1286 return a register holding the result. */
1288 /* First get A into a register. */
1289 reg1
= XEXP (XEXP (idx
, 0), 0);
1290 if (GET_CODE (reg1
) != REG
)
1291 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1293 /* And get B into a register. */
1294 reg2
= XEXP (idx
, 1);
1295 if (GET_CODE (reg2
) != REG
)
1296 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1298 reg1
= force_reg (Pmode
,
1299 gen_rtx_PLUS (Pmode
,
1300 gen_rtx_ASHIFT (Pmode
, reg1
,
1301 GEN_INT (shift_val
)),
1304 /* Add the result to our base register and return. */
1305 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1309 /* Uh-oh. We might have an address for x[n-100000]. This needs
1310 special handling to avoid creating an indexed memory address
1311 with x-100000 as the base.
1313 If the constant part is small enough, then it's still safe because
1314 there is a guard page at the beginning and end of the data segment.
1316 Scaled references are common enough that we want to try and rearrange the
1317 terms so that we can use indexing for these addresses too. Only
1318 do the optimization for floatint point modes. */
1320 if (GET_CODE (x
) == PLUS
1321 && pa_symbolic_expression_p (XEXP (x
, 1)))
1323 /* Ugly. We modify things here so that the address offset specified
1324 by the index expression is computed first, then added to x to form
1325 the entire address. */
1327 rtx regx1
, regx2
, regy1
, regy2
, y
;
1329 /* Strip off any CONST. */
1331 if (GET_CODE (y
) == CONST
)
1334 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1336 /* See if this looks like
1337 (plus (mult (reg) (mem_shadd_const))
1338 (const (plus (symbol_ref) (const_int))))
1340 Where const_int is small. In that case the const
1341 expression is a valid pointer for indexing.
1343 If const_int is big, but can be divided evenly by shadd_const
1344 and added to (reg). This allows more scaled indexed addresses. */
1345 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1346 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1347 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1348 && INTVAL (XEXP (y
, 1)) >= -4096
1349 && INTVAL (XEXP (y
, 1)) <= 4095)
1351 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1353 /* If we were given a MULT, we must fix the constant
1354 as we're going to create the ASHIFT form. */
1355 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1356 shift_val
= exact_log2 (shift_val
);
1361 if (GET_CODE (reg1
) != REG
)
1362 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1364 reg2
= XEXP (XEXP (x
, 0), 0);
1365 if (GET_CODE (reg2
) != REG
)
1366 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1370 gen_rtx_PLUS (Pmode
,
1371 gen_rtx_ASHIFT (Pmode
,
1373 GEN_INT (shift_val
)),
1376 else if ((mode
== DFmode
|| mode
== SFmode
)
1377 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1378 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1379 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1380 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1382 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1384 /* If we were given a MULT, we must fix the constant
1385 as we're going to create the ASHIFT form. */
1386 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1387 shift_val
= exact_log2 (shift_val
);
1390 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1391 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1392 regx2
= XEXP (XEXP (x
, 0), 0);
1393 if (GET_CODE (regx2
) != REG
)
1394 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1395 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1399 gen_rtx_PLUS (Pmode
,
1400 gen_rtx_ASHIFT (Pmode
, regx2
,
1401 GEN_INT (shift_val
)),
1402 force_reg (Pmode
, XEXP (y
, 0))));
1404 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1405 && INTVAL (XEXP (y
, 1)) >= -4096
1406 && INTVAL (XEXP (y
, 1)) <= 4095)
1408 /* This is safe because of the guard page at the
1409 beginning and end of the data space. Just
1410 return the original address. */
1415 /* Doesn't look like one we can optimize. */
1416 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1417 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1418 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1419 regx1
= force_reg (Pmode
,
1420 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1422 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1430 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1432 Compute extra cost of moving data between one register class
1435 Make moves from SAR so expensive they should never happen. We used to
1436 have 0xffff here, but that generates overflow in rare cases.
1438 Copies involving a FP register and a non-FP register are relatively
1439 expensive because they must go through memory.
1441 Other copies are reasonably cheap. */
1444 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1445 reg_class_t from
, reg_class_t to
)
1447 if (from
== SHIFT_REGS
)
1449 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1451 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1452 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1458 /* For the HPPA, REG and REG+CONST is cost 0
1459 and addresses involving symbolic constants are cost 2.
1461 PIC addresses are very expensive.
1463 It is no coincidence that this has the same structure
1464 as pa_legitimate_address_p. */
1467 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1468 addr_space_t as ATTRIBUTE_UNUSED
,
1469 bool speed ATTRIBUTE_UNUSED
)
1471 switch (GET_CODE (X
))
1484 /* Compute a (partial) cost for rtx X. Return true if the complete
1485 cost has been computed, and false if subexpressions should be
1486 scanned. In either case, *TOTAL contains the cost result. */
1489 hppa_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
1490 int *total
, bool speed ATTRIBUTE_UNUSED
)
1497 if (INTVAL (x
) == 0)
1499 else if (INT_14_BITS (x
))
1516 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1517 && outer_code
!= SET
)
1524 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1526 *total
= COSTS_N_INSNS (3);
1530 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1531 factor
= GET_MODE_SIZE (GET_MODE (x
)) / 4;
1535 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1536 *total
= factor
* factor
* COSTS_N_INSNS (8);
1538 *total
= factor
* factor
* COSTS_N_INSNS (20);
1542 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1544 *total
= COSTS_N_INSNS (14);
1552 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1553 factor
= GET_MODE_SIZE (GET_MODE (x
)) / 4;
1557 *total
= factor
* factor
* COSTS_N_INSNS (60);
1560 case PLUS
: /* this includes shNadd insns */
1562 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1564 *total
= COSTS_N_INSNS (3);
1568 /* A size N times larger than UNITS_PER_WORD needs N times as
1569 many insns, taking N times as long. */
1570 factor
= GET_MODE_SIZE (GET_MODE (x
)) / UNITS_PER_WORD
;
1573 *total
= factor
* COSTS_N_INSNS (1);
1579 *total
= COSTS_N_INSNS (1);
1587 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1588 new rtx with the correct mode. */
1590 force_mode (machine_mode mode
, rtx orig
)
1592 if (mode
== GET_MODE (orig
))
1595 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1597 return gen_rtx_REG (mode
, REGNO (orig
));
1600 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1603 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1605 return tls_referenced_p (x
);
1608 /* Emit insns to move operands[1] into operands[0].
1610 Return 1 if we have written out everything that needs to be done to
1611 do the move. Otherwise, return 0 and the caller will emit the move
1614 Note SCRATCH_REG may not be in the proper mode depending on how it
1615 will be used. This routine is responsible for creating a new copy
1616 of SCRATCH_REG in the proper mode. */
1619 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1621 register rtx operand0
= operands
[0];
1622 register rtx operand1
= operands
[1];
1625 /* We can only handle indexed addresses in the destination operand
1626 of floating point stores. Thus, we need to break out indexed
1627 addresses from the destination operand. */
1628 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1630 gcc_assert (can_create_pseudo_p ());
1632 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1633 operand0
= replace_equiv_address (operand0
, tem
);
1636 /* On targets with non-equivalent space registers, break out unscaled
1637 indexed addresses from the source operand before the final CSE.
1638 We have to do this because the REG_POINTER flag is not correctly
1639 carried through various optimization passes and CSE may substitute
1640 a pseudo without the pointer set for one with the pointer set. As
1641 a result, we loose various opportunities to create insns with
1642 unscaled indexed addresses. */
1643 if (!TARGET_NO_SPACE_REGS
1644 && !cse_not_expected
1645 && GET_CODE (operand1
) == MEM
1646 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1647 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1648 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1650 = replace_equiv_address (operand1
,
1651 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1654 && reload_in_progress
&& GET_CODE (operand0
) == REG
1655 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1656 operand0
= reg_equiv_mem (REGNO (operand0
));
1657 else if (scratch_reg
1658 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1659 && GET_CODE (SUBREG_REG (operand0
)) == REG
1660 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1662 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1663 the code which tracks sets/uses for delete_output_reload. */
1664 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1665 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1666 SUBREG_BYTE (operand0
));
1667 operand0
= alter_subreg (&temp
, true);
1671 && reload_in_progress
&& GET_CODE (operand1
) == REG
1672 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1673 operand1
= reg_equiv_mem (REGNO (operand1
));
1674 else if (scratch_reg
1675 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1676 && GET_CODE (SUBREG_REG (operand1
)) == REG
1677 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1679 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1680 the code which tracks sets/uses for delete_output_reload. */
1681 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1682 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1683 SUBREG_BYTE (operand1
));
1684 operand1
= alter_subreg (&temp
, true);
1687 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1688 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1689 != XEXP (operand0
, 0)))
1690 operand0
= replace_equiv_address (operand0
, tem
);
1692 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1693 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1694 != XEXP (operand1
, 0)))
1695 operand1
= replace_equiv_address (operand1
, tem
);
1697 /* Handle secondary reloads for loads/stores of FP registers from
1698 REG+D addresses where D does not fit in 5 or 14 bits, including
1699 (subreg (mem (addr))) cases. */
1701 && fp_reg_operand (operand0
, mode
)
1702 && (MEM_P (operand1
)
1703 || (GET_CODE (operand1
) == SUBREG
1704 && MEM_P (XEXP (operand1
, 0))))
1705 && !floating_point_store_memory_operand (operand1
, mode
))
1707 if (GET_CODE (operand1
) == SUBREG
)
1708 operand1
= XEXP (operand1
, 0);
1710 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1711 it in WORD_MODE regardless of what mode it was originally given
1713 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1715 /* D might not fit in 14 bits either; for such cases load D into
1717 if (reg_plus_base_memory_operand (operand1
, mode
)
1720 && INT_14_BITS (XEXP (XEXP (operand1
, 0), 1))))
1722 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1723 emit_move_insn (scratch_reg
,
1724 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
, 0)),
1726 XEXP (XEXP (operand1
, 0), 0),
1730 emit_move_insn (scratch_reg
, XEXP (operand1
, 0));
1731 emit_insn (gen_rtx_SET (operand0
,
1732 replace_equiv_address (operand1
, scratch_reg
)));
1735 else if (scratch_reg
1736 && fp_reg_operand (operand1
, mode
)
1737 && (MEM_P (operand0
)
1738 || (GET_CODE (operand0
) == SUBREG
1739 && MEM_P (XEXP (operand0
, 0))))
1740 && !floating_point_store_memory_operand (operand0
, mode
))
1742 if (GET_CODE (operand0
) == SUBREG
)
1743 operand0
= XEXP (operand0
, 0);
1745 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1746 it in WORD_MODE regardless of what mode it was originally given
1748 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1750 /* D might not fit in 14 bits either; for such cases load D into
1752 if (reg_plus_base_memory_operand (operand0
, mode
)
1755 && INT_14_BITS (XEXP (XEXP (operand0
, 0), 1))))
1757 emit_move_insn (scratch_reg
, XEXP (XEXP (operand0
, 0), 1));
1758 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0
,
1761 XEXP (XEXP (operand0
, 0),
1766 emit_move_insn (scratch_reg
, XEXP (operand0
, 0));
1767 emit_insn (gen_rtx_SET (replace_equiv_address (operand0
, scratch_reg
),
1771 /* Handle secondary reloads for loads of FP registers from constant
1772 expressions by forcing the constant into memory. For the most part,
1773 this is only necessary for SImode and DImode.
1775 Use scratch_reg to hold the address of the memory location. */
1776 else if (scratch_reg
1777 && CONSTANT_P (operand1
)
1778 && fp_reg_operand (operand0
, mode
))
1780 rtx const_mem
, xoperands
[2];
1782 if (operand1
== CONST0_RTX (mode
))
1784 emit_insn (gen_rtx_SET (operand0
, operand1
));
1788 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1789 it in WORD_MODE regardless of what mode it was originally given
1791 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1793 /* Force the constant into memory and put the address of the
1794 memory location into scratch_reg. */
1795 const_mem
= force_const_mem (mode
, operand1
);
1796 xoperands
[0] = scratch_reg
;
1797 xoperands
[1] = XEXP (const_mem
, 0);
1798 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1800 /* Now load the destination register. */
1801 emit_insn (gen_rtx_SET (operand0
,
1802 replace_equiv_address (const_mem
, scratch_reg
)));
1805 /* Handle secondary reloads for SAR. These occur when trying to load
1806 the SAR from memory or a constant. */
1807 else if (scratch_reg
1808 && GET_CODE (operand0
) == REG
1809 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1810 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1811 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1813 /* D might not fit in 14 bits either; for such cases load D into
1815 if (GET_CODE (operand1
) == MEM
1816 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1818 /* We are reloading the address into the scratch register, so we
1819 want to make sure the scratch register is a full register. */
1820 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1822 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1823 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1826 XEXP (XEXP (operand1
, 0),
1830 /* Now we are going to load the scratch register from memory,
1831 we want to load it in the same width as the original MEM,
1832 which must be the same as the width of the ultimate destination,
1834 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1836 emit_move_insn (scratch_reg
,
1837 replace_equiv_address (operand1
, scratch_reg
));
1841 /* We want to load the scratch register using the same mode as
1842 the ultimate destination. */
1843 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1845 emit_move_insn (scratch_reg
, operand1
);
1848 /* And emit the insn to set the ultimate destination. We know that
1849 the scratch register has the same mode as the destination at this
1851 emit_move_insn (operand0
, scratch_reg
);
1854 /* Handle the most common case: storing into a register. */
1855 else if (register_operand (operand0
, mode
))
1857 /* Legitimize TLS symbol references. This happens for references
1858 that aren't a legitimate constant. */
1859 if (PA_SYMBOL_REF_TLS_P (operand1
))
1860 operand1
= legitimize_tls_address (operand1
);
1862 if (register_operand (operand1
, mode
)
1863 || (GET_CODE (operand1
) == CONST_INT
1864 && pa_cint_ok_for_move (INTVAL (operand1
)))
1865 || (operand1
== CONST0_RTX (mode
))
1866 || (GET_CODE (operand1
) == HIGH
1867 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1868 /* Only `general_operands' can come here, so MEM is ok. */
1869 || GET_CODE (operand1
) == MEM
)
1871 /* Various sets are created during RTL generation which don't
1872 have the REG_POINTER flag correctly set. After the CSE pass,
1873 instruction recognition can fail if we don't consistently
1874 set this flag when performing register copies. This should
1875 also improve the opportunities for creating insns that use
1876 unscaled indexing. */
1877 if (REG_P (operand0
) && REG_P (operand1
))
1879 if (REG_POINTER (operand1
)
1880 && !REG_POINTER (operand0
)
1881 && !HARD_REGISTER_P (operand0
))
1882 copy_reg_pointer (operand0
, operand1
);
1885 /* When MEMs are broken out, the REG_POINTER flag doesn't
1886 get set. In some cases, we can set the REG_POINTER flag
1887 from the declaration for the MEM. */
1888 if (REG_P (operand0
)
1889 && GET_CODE (operand1
) == MEM
1890 && !REG_POINTER (operand0
))
1892 tree decl
= MEM_EXPR (operand1
);
1894 /* Set the register pointer flag and register alignment
1895 if the declaration for this memory reference is a
1901 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1903 if (TREE_CODE (decl
) == COMPONENT_REF
)
1904 decl
= TREE_OPERAND (decl
, 1);
1906 type
= TREE_TYPE (decl
);
1907 type
= strip_array_types (type
);
1909 if (POINTER_TYPE_P (type
))
1913 type
= TREE_TYPE (type
);
1914 /* Using TYPE_ALIGN_OK is rather conservative as
1915 only the ada frontend actually sets it. */
1916 align
= (TYPE_ALIGN_OK (type
) ? TYPE_ALIGN (type
)
1918 mark_reg_pointer (operand0
, align
);
1923 emit_insn (gen_rtx_SET (operand0
, operand1
));
1927 else if (GET_CODE (operand0
) == MEM
)
1929 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1930 && !(reload_in_progress
|| reload_completed
))
1932 rtx temp
= gen_reg_rtx (DFmode
);
1934 emit_insn (gen_rtx_SET (temp
, operand1
));
1935 emit_insn (gen_rtx_SET (operand0
, temp
));
1938 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1940 /* Run this case quickly. */
1941 emit_insn (gen_rtx_SET (operand0
, operand1
));
1944 if (! (reload_in_progress
|| reload_completed
))
1946 operands
[0] = validize_mem (operand0
);
1947 operands
[1] = operand1
= force_reg (mode
, operand1
);
1951 /* Simplify the source if we need to.
1952 Note we do have to handle function labels here, even though we do
1953 not consider them legitimate constants. Loop optimizations can
1954 call the emit_move_xxx with one as a source. */
1955 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1956 || (GET_CODE (operand1
) == HIGH
1957 && symbolic_operand (XEXP (operand1
, 0), mode
))
1958 || function_label_operand (operand1
, VOIDmode
)
1959 || tls_referenced_p (operand1
))
1963 if (GET_CODE (operand1
) == HIGH
)
1966 operand1
= XEXP (operand1
, 0);
1968 if (symbolic_operand (operand1
, mode
))
1970 /* Argh. The assembler and linker can't handle arithmetic
1973 So we force the plabel into memory, load operand0 from
1974 the memory location, then add in the constant part. */
1975 if ((GET_CODE (operand1
) == CONST
1976 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1977 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
1979 || function_label_operand (operand1
, VOIDmode
))
1981 rtx temp
, const_part
;
1983 /* Figure out what (if any) scratch register to use. */
1984 if (reload_in_progress
|| reload_completed
)
1986 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
1987 /* SCRATCH_REG will hold an address and maybe the actual
1988 data. We want it in WORD_MODE regardless of what mode it
1989 was originally given to us. */
1990 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1993 scratch_reg
= gen_reg_rtx (Pmode
);
1995 if (GET_CODE (operand1
) == CONST
)
1997 /* Save away the constant part of the expression. */
1998 const_part
= XEXP (XEXP (operand1
, 0), 1);
1999 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
2001 /* Force the function label into memory. */
2002 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2006 /* No constant part. */
2007 const_part
= NULL_RTX
;
2009 /* Force the function label into memory. */
2010 temp
= force_const_mem (mode
, operand1
);
2014 /* Get the address of the memory location. PIC-ify it if
2016 temp
= XEXP (temp
, 0);
2018 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2020 /* Put the address of the memory location into our destination
2023 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2025 /* Now load from the memory location into our destination
2027 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2028 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2030 /* And add back in the constant part. */
2031 if (const_part
!= NULL_RTX
)
2032 expand_inc (operand0
, const_part
);
2042 if (reload_in_progress
|| reload_completed
)
2044 temp
= scratch_reg
? scratch_reg
: operand0
;
2045 /* TEMP will hold an address and maybe the actual
2046 data. We want it in WORD_MODE regardless of what mode it
2047 was originally given to us. */
2048 temp
= force_mode (word_mode
, temp
);
2051 temp
= gen_reg_rtx (Pmode
);
2053 /* Force (const (plus (symbol) (const_int))) to memory
2054 if the const_int will not fit in 14 bits. Although
2055 this requires a relocation, the instruction sequence
2056 needed to load the value is shorter. */
2057 if (GET_CODE (operand1
) == CONST
2058 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2059 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2060 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2062 rtx x
, m
= force_const_mem (mode
, operand1
);
2064 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2065 x
= replace_equiv_address (m
, x
);
2066 insn
= emit_move_insn (operand0
, x
);
2070 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2071 if (REG_P (operand0
) && REG_P (operands
[1]))
2072 copy_reg_pointer (operand0
, operands
[1]);
2073 insn
= emit_move_insn (operand0
, operands
[1]);
2076 /* Put a REG_EQUAL note on this insn. */
2077 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2079 /* On the HPPA, references to data space are supposed to use dp,
2080 register 27, but showing it in the RTL inhibits various cse
2081 and loop optimizations. */
2086 if (reload_in_progress
|| reload_completed
)
2088 temp
= scratch_reg
? scratch_reg
: operand0
;
2089 /* TEMP will hold an address and maybe the actual
2090 data. We want it in WORD_MODE regardless of what mode it
2091 was originally given to us. */
2092 temp
= force_mode (word_mode
, temp
);
2095 temp
= gen_reg_rtx (mode
);
2097 /* Loading a SYMBOL_REF into a register makes that register
2098 safe to be used as the base in an indexed address.
2100 Don't mark hard registers though. That loses. */
2101 if (GET_CODE (operand0
) == REG
2102 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2103 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2104 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2105 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2108 set
= gen_rtx_SET (operand0
, temp
);
2110 set
= gen_rtx_SET (operand0
,
2111 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2113 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2119 else if (tls_referenced_p (operand1
))
2124 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2126 addend
= XEXP (XEXP (tmp
, 0), 1);
2127 tmp
= XEXP (XEXP (tmp
, 0), 0);
2130 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2131 tmp
= legitimize_tls_address (tmp
);
2134 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2135 tmp
= force_operand (tmp
, operands
[0]);
2139 else if (GET_CODE (operand1
) != CONST_INT
2140 || !pa_cint_ok_for_move (INTVAL (operand1
)))
2145 HOST_WIDE_INT value
= 0;
2146 HOST_WIDE_INT insv
= 0;
2149 if (GET_CODE (operand1
) == CONST_INT
)
2150 value
= INTVAL (operand1
);
2153 && GET_CODE (operand1
) == CONST_INT
2154 && HOST_BITS_PER_WIDE_INT
> 32
2155 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2159 /* Extract the low order 32 bits of the value and sign extend.
2160 If the new value is the same as the original value, we can
2161 can use the original value as-is. If the new value is
2162 different, we use it and insert the most-significant 32-bits
2163 of the original value into the final result. */
2164 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2165 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2168 #if HOST_BITS_PER_WIDE_INT > 32
2169 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2173 operand1
= GEN_INT (nval
);
2177 if (reload_in_progress
|| reload_completed
)
2178 temp
= scratch_reg
? scratch_reg
: operand0
;
2180 temp
= gen_reg_rtx (mode
);
2182 /* We don't directly split DImode constants on 32-bit targets
2183 because PLUS uses an 11-bit immediate and the insn sequence
2184 generated is not as efficient as the one using HIGH/LO_SUM. */
2185 if (GET_CODE (operand1
) == CONST_INT
2186 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2187 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2190 /* Directly break constant into high and low parts. This
2191 provides better optimization opportunities because various
2192 passes recognize constants split with PLUS but not LO_SUM.
2193 We use a 14-bit signed low part except when the addition
2194 of 0x4000 to the high part might change the sign of the
2196 HOST_WIDE_INT low
= value
& 0x3fff;
2197 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2201 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2209 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2210 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2214 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2215 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2218 insn
= emit_move_insn (operands
[0], operands
[1]);
2220 /* Now insert the most significant 32 bits of the value
2221 into the register. When we don't have a second register
2222 available, it could take up to nine instructions to load
2223 a 64-bit integer constant. Prior to reload, we force
2224 constants that would take more than three instructions
2225 to load to the constant pool. During and after reload,
2226 we have to handle all possible values. */
2229 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2230 register and the value to be inserted is outside the
2231 range that can be loaded with three depdi instructions. */
2232 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2234 operand1
= GEN_INT (insv
);
2236 emit_insn (gen_rtx_SET (temp
,
2237 gen_rtx_HIGH (mode
, operand1
)));
2238 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2240 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2243 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2248 int len
= 5, pos
= 27;
2250 /* Insert the bits using the depdi instruction. */
2253 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2254 HOST_WIDE_INT sign
= v5
< 0;
2256 /* Left extend the insertion. */
2257 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2258 while (pos
> 0 && (insv
& 1) == sign
)
2260 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2266 insn
= emit_insn (gen_insvdi (operand0
,
2271 insn
= emit_insn (gen_insvsi (operand0
,
2276 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2282 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2287 /* Now have insn-emit do whatever it normally does. */
2291 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2292 it will need a link/runtime reloc). */
2295 pa_reloc_needed (tree exp
)
2299 switch (TREE_CODE (exp
))
2304 case POINTER_PLUS_EXPR
:
2307 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2308 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2312 case NON_LVALUE_EXPR
:
2313 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2319 unsigned HOST_WIDE_INT ix
;
2321 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2323 reloc
|= pa_reloc_needed (value
);
2337 /* Return the best assembler insn template
2338 for moving operands[1] into operands[0] as a fullword. */
2340 pa_singlemove_string (rtx
*operands
)
2342 HOST_WIDE_INT intval
;
2344 if (GET_CODE (operands
[0]) == MEM
)
2345 return "stw %r1,%0";
2346 if (GET_CODE (operands
[1]) == MEM
)
2348 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2353 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2355 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2357 REAL_VALUE_FROM_CONST_DOUBLE (d
, operands
[1]);
2358 REAL_VALUE_TO_TARGET_SINGLE (d
, i
);
2360 operands
[1] = GEN_INT (i
);
2361 /* Fall through to CONST_INT case. */
2363 if (GET_CODE (operands
[1]) == CONST_INT
)
2365 intval
= INTVAL (operands
[1]);
2367 if (VAL_14_BITS_P (intval
))
2369 else if ((intval
& 0x7ff) == 0)
2370 return "ldil L'%1,%0";
2371 else if (pa_zdepi_cint_p (intval
))
2372 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2374 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2376 return "copy %1,%0";
2380 /* Compute position (in OP[1]) and width (in OP[2])
2381 useful for copying IMM to a register using the zdepi
2382 instructions. Store the immediate value to insert in OP[0]. */
2384 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2388 /* Find the least significant set bit in IMM. */
2389 for (lsb
= 0; lsb
< 32; lsb
++)
2396 /* Choose variants based on *sign* of the 5-bit field. */
2397 if ((imm
& 0x10) == 0)
2398 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2401 /* Find the width of the bitstring in IMM. */
2402 for (len
= 5; len
< 32 - lsb
; len
++)
2404 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2408 /* Sign extend IMM as a 5-bit value. */
2409 imm
= (imm
& 0xf) - 0x10;
2417 /* Compute position (in OP[1]) and width (in OP[2])
2418 useful for copying IMM to a register using the depdi,z
2419 instructions. Store the immediate value to insert in OP[0]. */
2422 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2424 int lsb
, len
, maxlen
;
2426 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2428 /* Find the least significant set bit in IMM. */
2429 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2436 /* Choose variants based on *sign* of the 5-bit field. */
2437 if ((imm
& 0x10) == 0)
2438 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2441 /* Find the width of the bitstring in IMM. */
2442 for (len
= 5; len
< maxlen
- lsb
; len
++)
2444 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2448 /* Extend length if host is narrow and IMM is negative. */
2449 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2452 /* Sign extend IMM as a 5-bit value. */
2453 imm
= (imm
& 0xf) - 0x10;
2461 /* Output assembler code to perform a doubleword move insn
2462 with operands OPERANDS. */
2465 pa_output_move_double (rtx
*operands
)
2467 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2469 rtx addreg0
= 0, addreg1
= 0;
2471 /* First classify both operands. */
2473 if (REG_P (operands
[0]))
2475 else if (offsettable_memref_p (operands
[0]))
2477 else if (GET_CODE (operands
[0]) == MEM
)
2482 if (REG_P (operands
[1]))
2484 else if (CONSTANT_P (operands
[1]))
2486 else if (offsettable_memref_p (operands
[1]))
2488 else if (GET_CODE (operands
[1]) == MEM
)
2493 /* Check for the cases that the operand constraints are not
2494 supposed to allow to happen. */
2495 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2497 /* Handle copies between general and floating registers. */
2499 if (optype0
== REGOP
&& optype1
== REGOP
2500 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2502 if (FP_REG_P (operands
[0]))
2504 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2505 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2506 return "{fldds|fldd} -16(%%sp),%0";
2510 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2511 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2512 return "{ldws|ldw} -12(%%sp),%R0";
2516 /* Handle auto decrementing and incrementing loads and stores
2517 specifically, since the structure of the function doesn't work
2518 for them without major modification. Do it better when we learn
2519 this port about the general inc/dec addressing of PA.
2520 (This was written by tege. Chide him if it doesn't work.) */
2522 if (optype0
== MEMOP
)
2524 /* We have to output the address syntax ourselves, since print_operand
2525 doesn't deal with the addresses we want to use. Fix this later. */
2527 rtx addr
= XEXP (operands
[0], 0);
2528 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2530 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2532 operands
[0] = XEXP (addr
, 0);
2533 gcc_assert (GET_CODE (operands
[1]) == REG
2534 && GET_CODE (operands
[0]) == REG
);
2536 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2538 /* No overlap between high target register and address
2539 register. (We do this in a non-obvious way to
2540 save a register file writeback) */
2541 if (GET_CODE (addr
) == POST_INC
)
2542 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2543 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2545 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2547 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2549 operands
[0] = XEXP (addr
, 0);
2550 gcc_assert (GET_CODE (operands
[1]) == REG
2551 && GET_CODE (operands
[0]) == REG
);
2553 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2554 /* No overlap between high target register and address
2555 register. (We do this in a non-obvious way to save a
2556 register file writeback) */
2557 if (GET_CODE (addr
) == PRE_INC
)
2558 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2559 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2562 if (optype1
== MEMOP
)
2564 /* We have to output the address syntax ourselves, since print_operand
2565 doesn't deal with the addresses we want to use. Fix this later. */
2567 rtx addr
= XEXP (operands
[1], 0);
2568 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2570 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2572 operands
[1] = XEXP (addr
, 0);
2573 gcc_assert (GET_CODE (operands
[0]) == REG
2574 && GET_CODE (operands
[1]) == REG
);
2576 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2578 /* No overlap between high target register and address
2579 register. (We do this in a non-obvious way to
2580 save a register file writeback) */
2581 if (GET_CODE (addr
) == POST_INC
)
2582 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2583 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2587 /* This is an undefined situation. We should load into the
2588 address register *and* update that register. Probably
2589 we don't need to handle this at all. */
2590 if (GET_CODE (addr
) == POST_INC
)
2591 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2592 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2595 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2597 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2599 operands
[1] = XEXP (addr
, 0);
2600 gcc_assert (GET_CODE (operands
[0]) == REG
2601 && GET_CODE (operands
[1]) == REG
);
2603 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2605 /* No overlap between high target register and address
2606 register. (We do this in a non-obvious way to
2607 save a register file writeback) */
2608 if (GET_CODE (addr
) == PRE_INC
)
2609 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2610 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2614 /* This is an undefined situation. We should load into the
2615 address register *and* update that register. Probably
2616 we don't need to handle this at all. */
2617 if (GET_CODE (addr
) == PRE_INC
)
2618 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2619 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2622 else if (GET_CODE (addr
) == PLUS
2623 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2627 /* Load address into left half of destination register. */
2628 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2629 xoperands
[1] = XEXP (addr
, 1);
2630 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2631 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2632 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2634 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2636 else if (GET_CODE (addr
) == PLUS
2637 && REG_P (XEXP (addr
, 0))
2638 && REG_P (XEXP (addr
, 1)))
2642 /* Load address into left half of destination register. */
2643 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2644 xoperands
[1] = XEXP (addr
, 0);
2645 xoperands
[2] = XEXP (addr
, 1);
2646 output_asm_insn ("{addl|add,l} %1,%2,%0",
2648 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2652 /* If an operand is an unoffsettable memory ref, find a register
2653 we can increment temporarily to make it refer to the second word. */
2655 if (optype0
== MEMOP
)
2656 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2658 if (optype1
== MEMOP
)
2659 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2661 /* Ok, we can do one word at a time.
2662 Normally we do the low-numbered word first.
2664 In either case, set up in LATEHALF the operands to use
2665 for the high-numbered word and in some cases alter the
2666 operands in OPERANDS to be suitable for the low-numbered word. */
2668 if (optype0
== REGOP
)
2669 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2670 else if (optype0
== OFFSOP
)
2671 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2673 latehalf
[0] = operands
[0];
2675 if (optype1
== REGOP
)
2676 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2677 else if (optype1
== OFFSOP
)
2678 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
2679 else if (optype1
== CNSTOP
)
2680 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2682 latehalf
[1] = operands
[1];
2684 /* If the first move would clobber the source of the second one,
2685 do them in the other order.
2687 This can happen in two cases:
2689 mem -> register where the first half of the destination register
2690 is the same register used in the memory's address. Reload
2691 can create such insns.
2693 mem in this case will be either register indirect or register
2694 indirect plus a valid offset.
2696 register -> register move where REGNO(dst) == REGNO(src + 1)
2697 someone (Tim/Tege?) claimed this can happen for parameter loads.
2699 Handle mem -> register case first. */
2700 if (optype0
== REGOP
2701 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2702 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
2704 /* Do the late half first. */
2706 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2707 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2711 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2712 return pa_singlemove_string (operands
);
2715 /* Now handle register -> register case. */
2716 if (optype0
== REGOP
&& optype1
== REGOP
2717 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2719 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2720 return pa_singlemove_string (operands
);
2723 /* Normal case: do the two words, low-numbered first. */
2725 output_asm_insn (pa_singlemove_string (operands
), operands
);
2727 /* Make any unoffsettable addresses point at high-numbered word. */
2729 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2731 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2734 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2736 /* Undo the adds we just did. */
2738 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2740 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2746 pa_output_fp_move_double (rtx
*operands
)
2748 if (FP_REG_P (operands
[0]))
2750 if (FP_REG_P (operands
[1])
2751 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2752 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2754 output_asm_insn ("fldd%F1 %1,%0", operands
);
2756 else if (FP_REG_P (operands
[1]))
2758 output_asm_insn ("fstd%F0 %1,%0", operands
);
2764 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2766 /* This is a pain. You have to be prepared to deal with an
2767 arbitrary address here including pre/post increment/decrement.
2769 so avoid this in the MD. */
2770 gcc_assert (GET_CODE (operands
[0]) == REG
);
2772 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2773 xoperands
[0] = operands
[0];
2774 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2779 /* Return a REG that occurs in ADDR with coefficient 1.
2780 ADDR can be effectively incremented by incrementing REG. */
2783 find_addr_reg (rtx addr
)
2785 while (GET_CODE (addr
) == PLUS
)
2787 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2788 addr
= XEXP (addr
, 0);
2789 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2790 addr
= XEXP (addr
, 1);
2791 else if (CONSTANT_P (XEXP (addr
, 0)))
2792 addr
= XEXP (addr
, 1);
2793 else if (CONSTANT_P (XEXP (addr
, 1)))
2794 addr
= XEXP (addr
, 0);
2798 gcc_assert (GET_CODE (addr
) == REG
);
2802 /* Emit code to perform a block move.
2804 OPERANDS[0] is the destination pointer as a REG, clobbered.
2805 OPERANDS[1] is the source pointer as a REG, clobbered.
2806 OPERANDS[2] is a register for temporary storage.
2807 OPERANDS[3] is a register for temporary storage.
2808 OPERANDS[4] is the size as a CONST_INT
2809 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2810 OPERANDS[6] is another temporary register. */
2813 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2815 int align
= INTVAL (operands
[5]);
2816 unsigned long n_bytes
= INTVAL (operands
[4]);
2818 /* We can't move more than a word at a time because the PA
2819 has no longer integer move insns. (Could use fp mem ops?) */
2820 if (align
> (TARGET_64BIT
? 8 : 4))
2821 align
= (TARGET_64BIT
? 8 : 4);
2823 /* Note that we know each loop below will execute at least twice
2824 (else we would have open-coded the copy). */
2828 /* Pre-adjust the loop counter. */
2829 operands
[4] = GEN_INT (n_bytes
- 16);
2830 output_asm_insn ("ldi %4,%2", operands
);
2833 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2834 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2835 output_asm_insn ("std,ma %3,8(%0)", operands
);
2836 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2837 output_asm_insn ("std,ma %6,8(%0)", operands
);
2839 /* Handle the residual. There could be up to 7 bytes of
2840 residual to copy! */
2841 if (n_bytes
% 16 != 0)
2843 operands
[4] = GEN_INT (n_bytes
% 8);
2844 if (n_bytes
% 16 >= 8)
2845 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2846 if (n_bytes
% 8 != 0)
2847 output_asm_insn ("ldd 0(%1),%6", operands
);
2848 if (n_bytes
% 16 >= 8)
2849 output_asm_insn ("std,ma %3,8(%0)", operands
);
2850 if (n_bytes
% 8 != 0)
2851 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2856 /* Pre-adjust the loop counter. */
2857 operands
[4] = GEN_INT (n_bytes
- 8);
2858 output_asm_insn ("ldi %4,%2", operands
);
2861 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2862 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2863 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2864 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2865 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2867 /* Handle the residual. There could be up to 7 bytes of
2868 residual to copy! */
2869 if (n_bytes
% 8 != 0)
2871 operands
[4] = GEN_INT (n_bytes
% 4);
2872 if (n_bytes
% 8 >= 4)
2873 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2874 if (n_bytes
% 4 != 0)
2875 output_asm_insn ("ldw 0(%1),%6", operands
);
2876 if (n_bytes
% 8 >= 4)
2877 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2878 if (n_bytes
% 4 != 0)
2879 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2884 /* Pre-adjust the loop counter. */
2885 operands
[4] = GEN_INT (n_bytes
- 4);
2886 output_asm_insn ("ldi %4,%2", operands
);
2889 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2890 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2891 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2892 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2893 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2895 /* Handle the residual. */
2896 if (n_bytes
% 4 != 0)
2898 if (n_bytes
% 4 >= 2)
2899 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2900 if (n_bytes
% 2 != 0)
2901 output_asm_insn ("ldb 0(%1),%6", operands
);
2902 if (n_bytes
% 4 >= 2)
2903 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2904 if (n_bytes
% 2 != 0)
2905 output_asm_insn ("stb %6,0(%0)", operands
);
2910 /* Pre-adjust the loop counter. */
2911 operands
[4] = GEN_INT (n_bytes
- 2);
2912 output_asm_insn ("ldi %4,%2", operands
);
2915 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2916 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2917 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2918 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2919 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2921 /* Handle the residual. */
2922 if (n_bytes
% 2 != 0)
2924 output_asm_insn ("ldb 0(%1),%3", operands
);
2925 output_asm_insn ("stb %3,0(%0)", operands
);
2934 /* Count the number of insns necessary to handle this block move.
2936 Basic structure is the same as emit_block_move, except that we
2937 count insns rather than emit them. */
2940 compute_movmem_length (rtx_insn
*insn
)
2942 rtx pat
= PATTERN (insn
);
2943 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2944 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2945 unsigned int n_insns
= 0;
2947 /* We can't move more than four bytes at a time because the PA
2948 has no longer integer move insns. (Could use fp mem ops?) */
2949 if (align
> (TARGET_64BIT
? 8 : 4))
2950 align
= (TARGET_64BIT
? 8 : 4);
2952 /* The basic copying loop. */
2956 if (n_bytes
% (2 * align
) != 0)
2958 if ((n_bytes
% (2 * align
)) >= align
)
2961 if ((n_bytes
% align
) != 0)
2965 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2969 /* Emit code to perform a block clear.
2971 OPERANDS[0] is the destination pointer as a REG, clobbered.
2972 OPERANDS[1] is a register for temporary storage.
2973 OPERANDS[2] is the size as a CONST_INT
2974 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2977 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2979 int align
= INTVAL (operands
[3]);
2980 unsigned long n_bytes
= INTVAL (operands
[2]);
2982 /* We can't clear more than a word at a time because the PA
2983 has no longer integer move insns. */
2984 if (align
> (TARGET_64BIT
? 8 : 4))
2985 align
= (TARGET_64BIT
? 8 : 4);
2987 /* Note that we know each loop below will execute at least twice
2988 (else we would have open-coded the copy). */
2992 /* Pre-adjust the loop counter. */
2993 operands
[2] = GEN_INT (n_bytes
- 16);
2994 output_asm_insn ("ldi %2,%1", operands
);
2997 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2998 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
2999 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3001 /* Handle the residual. There could be up to 7 bytes of
3002 residual to copy! */
3003 if (n_bytes
% 16 != 0)
3005 operands
[2] = GEN_INT (n_bytes
% 8);
3006 if (n_bytes
% 16 >= 8)
3007 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3008 if (n_bytes
% 8 != 0)
3009 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3014 /* Pre-adjust the loop counter. */
3015 operands
[2] = GEN_INT (n_bytes
- 8);
3016 output_asm_insn ("ldi %2,%1", operands
);
3019 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3020 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3021 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3023 /* Handle the residual. There could be up to 7 bytes of
3024 residual to copy! */
3025 if (n_bytes
% 8 != 0)
3027 operands
[2] = GEN_INT (n_bytes
% 4);
3028 if (n_bytes
% 8 >= 4)
3029 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3030 if (n_bytes
% 4 != 0)
3031 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3036 /* Pre-adjust the loop counter. */
3037 operands
[2] = GEN_INT (n_bytes
- 4);
3038 output_asm_insn ("ldi %2,%1", operands
);
3041 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3042 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3043 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3045 /* Handle the residual. */
3046 if (n_bytes
% 4 != 0)
3048 if (n_bytes
% 4 >= 2)
3049 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3050 if (n_bytes
% 2 != 0)
3051 output_asm_insn ("stb %%r0,0(%0)", operands
);
3056 /* Pre-adjust the loop counter. */
3057 operands
[2] = GEN_INT (n_bytes
- 2);
3058 output_asm_insn ("ldi %2,%1", operands
);
3061 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3062 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3063 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3065 /* Handle the residual. */
3066 if (n_bytes
% 2 != 0)
3067 output_asm_insn ("stb %%r0,0(%0)", operands
);
3076 /* Count the number of insns necessary to handle this block move.
3078 Basic structure is the same as emit_block_move, except that we
3079 count insns rather than emit them. */
3082 compute_clrmem_length (rtx_insn
*insn
)
3084 rtx pat
= PATTERN (insn
);
3085 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3086 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3087 unsigned int n_insns
= 0;
3089 /* We can't clear more than a word at a time because the PA
3090 has no longer integer move insns. */
3091 if (align
> (TARGET_64BIT
? 8 : 4))
3092 align
= (TARGET_64BIT
? 8 : 4);
3094 /* The basic loop. */
3098 if (n_bytes
% (2 * align
) != 0)
3100 if ((n_bytes
% (2 * align
)) >= align
)
3103 if ((n_bytes
% align
) != 0)
3107 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3113 pa_output_and (rtx
*operands
)
3115 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3117 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3118 int ls0
, ls1
, ms0
, p
, len
;
3120 for (ls0
= 0; ls0
< 32; ls0
++)
3121 if ((mask
& (1 << ls0
)) == 0)
3124 for (ls1
= ls0
; ls1
< 32; ls1
++)
3125 if ((mask
& (1 << ls1
)) != 0)
3128 for (ms0
= ls1
; ms0
< 32; ms0
++)
3129 if ((mask
& (1 << ms0
)) == 0)
3132 gcc_assert (ms0
== 32);
3140 operands
[2] = GEN_INT (len
);
3141 return "{extru|extrw,u} %1,31,%2,%0";
3145 /* We could use this `depi' for the case above as well, but `depi'
3146 requires one more register file access than an `extru'. */
3151 operands
[2] = GEN_INT (p
);
3152 operands
[3] = GEN_INT (len
);
3153 return "{depi|depwi} 0,%2,%3,%0";
3157 return "and %1,%2,%0";
3160 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3161 storing the result in operands[0]. */
3163 pa_output_64bit_and (rtx
*operands
)
3165 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3167 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3168 int ls0
, ls1
, ms0
, p
, len
;
3170 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3171 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3174 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3175 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3178 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3179 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3182 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3184 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3190 operands
[2] = GEN_INT (len
);
3191 return "extrd,u %1,63,%2,%0";
3195 /* We could use this `depi' for the case above as well, but `depi'
3196 requires one more register file access than an `extru'. */
3201 operands
[2] = GEN_INT (p
);
3202 operands
[3] = GEN_INT (len
);
3203 return "depdi 0,%2,%3,%0";
3207 return "and %1,%2,%0";
3211 pa_output_ior (rtx
*operands
)
3213 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3214 int bs0
, bs1
, p
, len
;
3216 if (INTVAL (operands
[2]) == 0)
3217 return "copy %1,%0";
3219 for (bs0
= 0; bs0
< 32; bs0
++)
3220 if ((mask
& (1 << bs0
)) != 0)
3223 for (bs1
= bs0
; bs1
< 32; bs1
++)
3224 if ((mask
& (1 << bs1
)) == 0)
3227 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3232 operands
[2] = GEN_INT (p
);
3233 operands
[3] = GEN_INT (len
);
3234 return "{depi|depwi} -1,%2,%3,%0";
3237 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3238 storing the result in operands[0]. */
3240 pa_output_64bit_ior (rtx
*operands
)
3242 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3243 int bs0
, bs1
, p
, len
;
3245 if (INTVAL (operands
[2]) == 0)
3246 return "copy %1,%0";
3248 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3249 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3252 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3253 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3256 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3257 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3262 operands
[2] = GEN_INT (p
);
3263 operands
[3] = GEN_INT (len
);
3264 return "depdi -1,%2,%3,%0";
3267 /* Target hook for assembling integer objects. This code handles
3268 aligned SI and DI integers specially since function references
3269 must be preceded by P%. */
3272 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3274 if (size
== UNITS_PER_WORD
3276 && function_label_operand (x
, VOIDmode
))
3278 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3280 /* We don't want an OPD when generating fast indirect calls. */
3281 if (!TARGET_FAST_INDIRECT_CALLS
)
3282 fputs ("P%", asm_out_file
);
3284 output_addr_const (asm_out_file
, x
);
3285 fputc ('\n', asm_out_file
);
3288 return default_assemble_integer (x
, size
, aligned_p
);
3291 /* Output an ascii string. */
3293 pa_output_ascii (FILE *file
, const char *p
, int size
)
3297 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3299 /* The HP assembler can only take strings of 256 characters at one
3300 time. This is a limitation on input line length, *not* the
3301 length of the string. Sigh. Even worse, it seems that the
3302 restriction is in number of input characters (see \xnn &
3303 \whatever). So we have to do this very carefully. */
3305 fputs ("\t.STRING \"", file
);
3308 for (i
= 0; i
< size
; i
+= 4)
3312 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3314 register unsigned int c
= (unsigned char) p
[i
+ io
];
3316 if (c
== '\"' || c
== '\\')
3317 partial_output
[co
++] = '\\';
3318 if (c
>= ' ' && c
< 0177)
3319 partial_output
[co
++] = c
;
3323 partial_output
[co
++] = '\\';
3324 partial_output
[co
++] = 'x';
3325 hexd
= c
/ 16 - 0 + '0';
3327 hexd
-= '9' - 'a' + 1;
3328 partial_output
[co
++] = hexd
;
3329 hexd
= c
% 16 - 0 + '0';
3331 hexd
-= '9' - 'a' + 1;
3332 partial_output
[co
++] = hexd
;
3335 if (chars_output
+ co
> 243)
3337 fputs ("\"\n\t.STRING \"", file
);
3340 fwrite (partial_output
, 1, (size_t) co
, file
);
3344 fputs ("\"\n", file
);
3347 /* Try to rewrite floating point comparisons & branches to avoid
3348 useless add,tr insns.
3350 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3351 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3352 first attempt to remove useless add,tr insns. It is zero
3353 for the second pass as reorg sometimes leaves bogus REG_DEAD
3356 When CHECK_NOTES is zero we can only eliminate add,tr insns
3357 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3360 remove_useless_addtr_insns (int check_notes
)
3363 static int pass
= 0;
3365 /* This is fairly cheap, so always run it when optimizing. */
3369 int fbranch_count
= 0;
3371 /* Walk all the insns in this function looking for fcmp & fbranch
3372 instructions. Keep track of how many of each we find. */
3373 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3377 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3378 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3381 tmp
= PATTERN (insn
);
3383 /* It must be a set. */
3384 if (GET_CODE (tmp
) != SET
)
3387 /* If the destination is CCFP, then we've found an fcmp insn. */
3388 tmp
= SET_DEST (tmp
);
3389 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3395 tmp
= PATTERN (insn
);
3396 /* If this is an fbranch instruction, bump the fbranch counter. */
3397 if (GET_CODE (tmp
) == SET
3398 && SET_DEST (tmp
) == pc_rtx
3399 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3400 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3401 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3402 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3410 /* Find all floating point compare + branch insns. If possible,
3411 reverse the comparison & the branch to avoid add,tr insns. */
3412 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3417 /* Ignore anything that isn't an INSN. */
3418 if (! NONJUMP_INSN_P (insn
))
3421 tmp
= PATTERN (insn
);
3423 /* It must be a set. */
3424 if (GET_CODE (tmp
) != SET
)
3427 /* The destination must be CCFP, which is register zero. */
3428 tmp
= SET_DEST (tmp
);
3429 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3432 /* INSN should be a set of CCFP.
3434 See if the result of this insn is used in a reversed FP
3435 conditional branch. If so, reverse our condition and
3436 the branch. Doing so avoids useless add,tr insns. */
3437 next
= next_insn (insn
);
3440 /* Jumps, calls and labels stop our search. */
3441 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3444 /* As does another fcmp insn. */
3445 if (NONJUMP_INSN_P (next
)
3446 && GET_CODE (PATTERN (next
)) == SET
3447 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3448 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3451 next
= next_insn (next
);
3454 /* Is NEXT_INSN a branch? */
3455 if (next
&& JUMP_P (next
))
3457 rtx pattern
= PATTERN (next
);
3459 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3460 and CCFP dies, then reverse our conditional and the branch
3461 to avoid the add,tr. */
3462 if (GET_CODE (pattern
) == SET
3463 && SET_DEST (pattern
) == pc_rtx
3464 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3465 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3466 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3467 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3468 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3469 && (fcmp_count
== fbranch_count
3471 && find_regno_note (next
, REG_DEAD
, 0))))
3473 /* Reverse the branch. */
3474 tmp
= XEXP (SET_SRC (pattern
), 1);
3475 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3476 XEXP (SET_SRC (pattern
), 2) = tmp
;
3477 INSN_CODE (next
) = -1;
3479 /* Reverse our condition. */
3480 tmp
= PATTERN (insn
);
3481 PUT_CODE (XEXP (tmp
, 1),
3482 (reverse_condition_maybe_unordered
3483 (GET_CODE (XEXP (tmp
, 1)))));
3493 /* You may have trouble believing this, but this is the 32 bit HP-PA
3498 Variable arguments (optional; any number may be allocated)
3500 SP-(4*(N+9)) arg word N
3505 Fixed arguments (must be allocated; may remain unused)
3514 SP-32 External Data Pointer (DP)
3516 SP-24 External/stub RP (RP')
3520 SP-8 Calling Stub RP (RP'')
3525 SP-0 Stack Pointer (points to next available address)
3529 /* This function saves registers as follows. Registers marked with ' are
3530 this function's registers (as opposed to the previous function's).
3531 If a frame_pointer isn't needed, r4 is saved as a general register;
3532 the space for the frame pointer is still allocated, though, to keep
3538 SP (FP') Previous FP
3539 SP + 4 Alignment filler (sigh)
3540 SP + 8 Space for locals reserved here.
3544 SP + n All call saved register used.
3548 SP + o All call saved fp registers used.
3552 SP + p (SP') points to next available address.
3556 /* Global variables set by output_function_prologue(). */
3557 /* Size of frame. Need to know this to emit return insns from
3559 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3560 static int save_fregs
;
3562 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3563 Handle case where DISP > 8k by using the add_high_const patterns.
3565 Note in DISP > 8k case, we will leave the high part of the address
3566 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3569 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3571 rtx dest
, src
, basereg
;
3574 src
= gen_rtx_REG (word_mode
, reg
);
3575 basereg
= gen_rtx_REG (Pmode
, base
);
3576 if (VAL_14_BITS_P (disp
))
3578 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3579 insn
= emit_move_insn (dest
, src
);
3581 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3583 rtx delta
= GEN_INT (disp
);
3584 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3586 emit_move_insn (tmpreg
, delta
);
3587 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3590 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3591 gen_rtx_SET (tmpreg
,
3592 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3593 RTX_FRAME_RELATED_P (insn
) = 1;
3595 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3596 insn
= emit_move_insn (dest
, src
);
3600 rtx delta
= GEN_INT (disp
);
3601 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3602 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3604 emit_move_insn (tmpreg
, high
);
3605 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3606 insn
= emit_move_insn (dest
, src
);
3608 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3609 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3610 gen_rtx_PLUS (word_mode
,
3617 RTX_FRAME_RELATED_P (insn
) = 1;
3620 /* Emit RTL to store REG at the memory location specified by BASE and then
3621 add MOD to BASE. MOD must be <= 8k. */
3624 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3626 rtx basereg
, srcreg
, delta
;
3629 gcc_assert (VAL_14_BITS_P (mod
));
3631 basereg
= gen_rtx_REG (Pmode
, base
);
3632 srcreg
= gen_rtx_REG (word_mode
, reg
);
3633 delta
= GEN_INT (mod
);
3635 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3638 RTX_FRAME_RELATED_P (insn
) = 1;
3640 /* RTX_FRAME_RELATED_P must be set on each frame related set
3641 in a parallel with more than one element. */
3642 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3643 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3647 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3648 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3649 whether to add a frame note or not.
3651 In the DISP > 8k case, we leave the high part of the address in %r1.
3652 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3655 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3659 if (VAL_14_BITS_P (disp
))
3661 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3662 plus_constant (Pmode
,
3663 gen_rtx_REG (Pmode
, base
), disp
));
3665 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3667 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3668 rtx delta
= GEN_INT (disp
);
3669 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3671 emit_move_insn (tmpreg
, delta
);
3672 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3673 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3675 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3676 gen_rtx_SET (tmpreg
,
3677 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3681 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3682 rtx delta
= GEN_INT (disp
);
3683 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3685 emit_move_insn (tmpreg
,
3686 gen_rtx_PLUS (Pmode
, basereg
,
3687 gen_rtx_HIGH (Pmode
, delta
)));
3688 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3689 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3692 if (DO_FRAME_NOTES
&& note
)
3693 RTX_FRAME_RELATED_P (insn
) = 1;
3697 pa_compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3702 /* The code in pa_expand_prologue and pa_expand_epilogue must
3703 be consistent with the rounding and size calculation done here.
3704 Change them at the same time. */
3706 /* We do our own stack alignment. First, round the size of the
3707 stack locals up to a word boundary. */
3708 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3710 /* Space for previous frame pointer + filler. If any frame is
3711 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3712 waste some space here for the sake of HP compatibility. The
3713 first slot is only used when the frame pointer is needed. */
3714 if (size
|| frame_pointer_needed
)
3715 size
+= STARTING_FRAME_OFFSET
;
3717 /* If the current function calls __builtin_eh_return, then we need
3718 to allocate stack space for registers that will hold data for
3719 the exception handler. */
3720 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3724 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3726 size
+= i
* UNITS_PER_WORD
;
3729 /* Account for space used by the callee general register saves. */
3730 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3731 if (df_regs_ever_live_p (i
))
3732 size
+= UNITS_PER_WORD
;
3734 /* Account for space used by the callee floating point register saves. */
3735 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3736 if (df_regs_ever_live_p (i
)
3737 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3741 /* We always save both halves of the FP register, so always
3742 increment the frame size by 8 bytes. */
3746 /* If any of the floating registers are saved, account for the
3747 alignment needed for the floating point register save block. */
3750 size
= (size
+ 7) & ~7;
3755 /* The various ABIs include space for the outgoing parameters in the
3756 size of the current function's stack frame. We don't need to align
3757 for the outgoing arguments as their alignment is set by the final
3758 rounding for the frame as a whole. */
3759 size
+= crtl
->outgoing_args_size
;
3761 /* Allocate space for the fixed frame marker. This space must be
3762 allocated for any function that makes calls or allocates
3764 if (!crtl
->is_leaf
|| size
)
3765 size
+= TARGET_64BIT
? 48 : 32;
3767 /* Finally, round to the preferred stack boundary. */
3768 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3769 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3772 /* Generate the assembly code for function entry. FILE is a stdio
3773 stream to output the code to. SIZE is an int: how many units of
3774 temporary storage to allocate.
3776 Refer to the array `regs_ever_live' to determine which registers to
3777 save; `regs_ever_live[I]' is nonzero if register number I is ever
3778 used in the function. This function is responsible for knowing
3779 which registers should not be saved even if used. */
3781 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3782 of memory. If any fpu reg is used in the function, we allocate
3783 such a block here, at the bottom of the frame, just in case it's needed.
3785 If this function is a leaf procedure, then we may choose not
3786 to do a "save" insn. The decision about whether or not
3787 to do this is made in regclass.c. */
3790 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3792 /* The function's label and associated .PROC must never be
3793 separated and must be output *after* any profiling declarations
3794 to avoid changing spaces/subspaces within a procedure. */
3795 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3796 fputs ("\t.PROC\n", file
);
3798 /* pa_expand_prologue does the dirty work now. We just need
3799 to output the assembler directives which denote the start
3801 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3803 fputs (",NO_CALLS", file
);
3805 fputs (",CALLS", file
);
3807 fputs (",SAVE_RP", file
);
3809 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3810 at the beginning of the frame and that it is used as the frame
3811 pointer for the frame. We do this because our current frame
3812 layout doesn't conform to that specified in the HP runtime
3813 documentation and we need a way to indicate to programs such as
3814 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3815 isn't used by HP compilers but is supported by the assembler.
3816 However, SAVE_SP is supposed to indicate that the previous stack
3817 pointer has been saved in the frame marker. */
3818 if (frame_pointer_needed
)
3819 fputs (",SAVE_SP", file
);
3821 /* Pass on information about the number of callee register saves
3822 performed in the prologue.
3824 The compiler is supposed to pass the highest register number
3825 saved, the assembler then has to adjust that number before
3826 entering it into the unwind descriptor (to account for any
3827 caller saved registers with lower register numbers than the
3828 first callee saved register). */
3830 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3833 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3835 fputs ("\n\t.ENTRY\n", file
);
3837 remove_useless_addtr_insns (0);
3841 pa_expand_prologue (void)
3843 int merge_sp_adjust_with_store
= 0;
3844 HOST_WIDE_INT size
= get_frame_size ();
3845 HOST_WIDE_INT offset
;
3854 /* Compute total size for frame pointer, filler, locals and rounding to
3855 the next word boundary. Similar code appears in pa_compute_frame_size
3856 and must be changed in tandem with this code. */
3857 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3858 if (local_fsize
|| frame_pointer_needed
)
3859 local_fsize
+= STARTING_FRAME_OFFSET
;
3861 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3862 if (flag_stack_usage_info
)
3863 current_function_static_stack_size
= actual_fsize
;
3865 /* Compute a few things we will use often. */
3866 tmpreg
= gen_rtx_REG (word_mode
, 1);
3868 /* Save RP first. The calling conventions manual states RP will
3869 always be stored into the caller's frame at sp - 20 or sp - 16
3870 depending on which ABI is in use. */
3871 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3873 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3879 /* Allocate the local frame and set up the frame pointer if needed. */
3880 if (actual_fsize
!= 0)
3882 if (frame_pointer_needed
)
3884 /* Copy the old frame pointer temporarily into %r1. Set up the
3885 new stack pointer, then store away the saved old frame pointer
3886 into the stack at sp and at the same time update the stack
3887 pointer by actual_fsize bytes. Two versions, first
3888 handles small (<8k) frames. The second handles large (>=8k)
3890 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3892 RTX_FRAME_RELATED_P (insn
) = 1;
3894 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3896 RTX_FRAME_RELATED_P (insn
) = 1;
3898 if (VAL_14_BITS_P (actual_fsize
))
3899 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3902 /* It is incorrect to store the saved frame pointer at *sp,
3903 then increment sp (writes beyond the current stack boundary).
3905 So instead use stwm to store at *sp and post-increment the
3906 stack pointer as an atomic operation. Then increment sp to
3907 finish allocating the new frame. */
3908 HOST_WIDE_INT adjust1
= 8192 - 64;
3909 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3911 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3912 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3916 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3917 we need to store the previous stack pointer (frame pointer)
3918 into the frame marker on targets that use the HP unwind
3919 library. This allows the HP unwind library to be used to
3920 unwind GCC frames. However, we are not fully compatible
3921 with the HP library because our frame layout differs from
3922 that specified in the HP runtime specification.
3924 We don't want a frame note on this instruction as the frame
3925 marker moves during dynamic stack allocation.
3927 This instruction also serves as a blockage to prevent
3928 register spills from being scheduled before the stack
3929 pointer is raised. This is necessary as we store
3930 registers using the frame pointer as a base register,
3931 and the frame pointer is set before sp is raised. */
3932 if (TARGET_HPUX_UNWIND_LIBRARY
)
3934 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3935 GEN_INT (TARGET_64BIT
? -8 : -4));
3937 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3938 hard_frame_pointer_rtx
);
3941 emit_insn (gen_blockage ());
3943 /* no frame pointer needed. */
3946 /* In some cases we can perform the first callee register save
3947 and allocating the stack frame at the same time. If so, just
3948 make a note of it and defer allocating the frame until saving
3949 the callee registers. */
3950 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
3951 merge_sp_adjust_with_store
= 1;
3952 /* Can not optimize. Adjust the stack frame by actual_fsize
3955 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3960 /* Normal register save.
3962 Do not save the frame pointer in the frame_pointer_needed case. It
3963 was done earlier. */
3964 if (frame_pointer_needed
)
3966 offset
= local_fsize
;
3968 /* Saving the EH return data registers in the frame is the simplest
3969 way to get the frame unwind information emitted. We put them
3970 just before the general registers. */
3971 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3973 unsigned int i
, regno
;
3977 regno
= EH_RETURN_DATA_REGNO (i
);
3978 if (regno
== INVALID_REGNUM
)
3981 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
3982 offset
+= UNITS_PER_WORD
;
3986 for (i
= 18; i
>= 4; i
--)
3987 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3989 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
3990 offset
+= UNITS_PER_WORD
;
3993 /* Account for %r3 which is saved in a special place. */
3996 /* No frame pointer needed. */
3999 offset
= local_fsize
- actual_fsize
;
4001 /* Saving the EH return data registers in the frame is the simplest
4002 way to get the frame unwind information emitted. */
4003 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4005 unsigned int i
, regno
;
4009 regno
= EH_RETURN_DATA_REGNO (i
);
4010 if (regno
== INVALID_REGNUM
)
4013 /* If merge_sp_adjust_with_store is nonzero, then we can
4014 optimize the first save. */
4015 if (merge_sp_adjust_with_store
)
4017 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4018 merge_sp_adjust_with_store
= 0;
4021 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4022 offset
+= UNITS_PER_WORD
;
4026 for (i
= 18; i
>= 3; i
--)
4027 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4029 /* If merge_sp_adjust_with_store is nonzero, then we can
4030 optimize the first GR save. */
4031 if (merge_sp_adjust_with_store
)
4033 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4034 merge_sp_adjust_with_store
= 0;
4037 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4038 offset
+= UNITS_PER_WORD
;
4042 /* If we wanted to merge the SP adjustment with a GR save, but we never
4043 did any GR saves, then just emit the adjustment here. */
4044 if (merge_sp_adjust_with_store
)
4045 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4049 /* The hppa calling conventions say that %r19, the pic offset
4050 register, is saved at sp - 32 (in this function's frame)
4051 when generating PIC code. FIXME: What is the correct thing
4052 to do for functions which make no calls and allocate no
4053 frame? Do we need to allocate a frame, or can we just omit
4054 the save? For now we'll just omit the save.
4056 We don't want a note on this insn as the frame marker can
4057 move if there is a dynamic stack allocation. */
4058 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4060 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4062 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4066 /* Align pointer properly (doubleword boundary). */
4067 offset
= (offset
+ 7) & ~7;
4069 /* Floating point register store. */
4074 /* First get the frame or stack pointer to the start of the FP register
4076 if (frame_pointer_needed
)
4078 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4079 base
= hard_frame_pointer_rtx
;
4083 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4084 base
= stack_pointer_rtx
;
4087 /* Now actually save the FP registers. */
4088 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4090 if (df_regs_ever_live_p (i
)
4091 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4095 addr
= gen_rtx_MEM (DFmode
,
4096 gen_rtx_POST_INC (word_mode
, tmpreg
));
4097 reg
= gen_rtx_REG (DFmode
, i
);
4098 insn
= emit_move_insn (addr
, reg
);
4101 RTX_FRAME_RELATED_P (insn
) = 1;
4104 rtx mem
= gen_rtx_MEM (DFmode
,
4105 plus_constant (Pmode
, base
,
4107 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4108 gen_rtx_SET (mem
, reg
));
4112 rtx meml
= gen_rtx_MEM (SFmode
,
4113 plus_constant (Pmode
, base
,
4115 rtx memr
= gen_rtx_MEM (SFmode
,
4116 plus_constant (Pmode
, base
,
4118 rtx regl
= gen_rtx_REG (SFmode
, i
);
4119 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4120 rtx setl
= gen_rtx_SET (meml
, regl
);
4121 rtx setr
= gen_rtx_SET (memr
, regr
);
4124 RTX_FRAME_RELATED_P (setl
) = 1;
4125 RTX_FRAME_RELATED_P (setr
) = 1;
4126 vec
= gen_rtvec (2, setl
, setr
);
4127 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4128 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4131 offset
+= GET_MODE_SIZE (DFmode
);
4138 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4139 Handle case where DISP > 8k by using the add_high_const patterns. */
4142 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4144 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4145 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4148 if (VAL_14_BITS_P (disp
))
4149 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4150 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4152 rtx delta
= GEN_INT (disp
);
4153 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4155 emit_move_insn (tmpreg
, delta
);
4156 if (TARGET_DISABLE_INDEXING
)
4158 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4159 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4162 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4166 rtx delta
= GEN_INT (disp
);
4167 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4168 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4170 emit_move_insn (tmpreg
, high
);
4171 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4174 emit_move_insn (dest
, src
);
4177 /* Update the total code bytes output to the text section. */
4180 update_total_code_bytes (unsigned int nbytes
)
4182 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4183 && !IN_NAMED_SECTION_P (cfun
->decl
))
4185 unsigned int old_total
= total_code_bytes
;
4187 total_code_bytes
+= nbytes
;
4189 /* Be prepared to handle overflows. */
4190 if (old_total
> total_code_bytes
)
4191 total_code_bytes
= UINT_MAX
;
4195 /* This function generates the assembly code for function exit.
4196 Args are as for output_function_prologue ().
4198 The function epilogue should not depend on the current stack
4199 pointer! It should use the frame pointer only. This is mandatory
4200 because of alloca; we also take advantage of it to omit stack
4201 adjustments before returning. */
4204 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4206 rtx_insn
*insn
= get_last_insn ();
4209 /* pa_expand_epilogue does the dirty work now. We just need
4210 to output the assembler directives which denote the end
4213 To make debuggers happy, emit a nop if the epilogue was completely
4214 eliminated due to a volatile call as the last insn in the
4215 current function. That way the return address (in %r2) will
4216 always point to a valid instruction in the current function. */
4218 /* Get the last real insn. */
4220 insn
= prev_real_insn (insn
);
4222 /* If it is a sequence, then look inside. */
4223 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4224 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4226 /* If insn is a CALL_INSN, then it must be a call to a volatile
4227 function (otherwise there would be epilogue insns). */
4228 if (insn
&& CALL_P (insn
))
4230 fputs ("\tnop\n", file
);
4236 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4238 if (TARGET_SOM
&& TARGET_GAS
)
4240 /* We are done with this subspace except possibly for some additional
4241 debug information. Forget that we are in this subspace to ensure
4242 that the next function is output in its own subspace. */
4244 cfun
->machine
->in_nsubspa
= 2;
4247 /* Thunks do their own insn accounting. */
4251 if (INSN_ADDRESSES_SET_P ())
4253 last_address
= extra_nop
? 4 : 0;
4254 insn
= get_last_nonnote_insn ();
4257 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4259 last_address
+= insn_default_length (insn
);
4261 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4262 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4265 last_address
= UINT_MAX
;
4267 /* Finally, update the total number of code bytes output so far. */
4268 update_total_code_bytes (last_address
);
4272 pa_expand_epilogue (void)
4275 HOST_WIDE_INT offset
;
4276 HOST_WIDE_INT ret_off
= 0;
4278 int merge_sp_adjust_with_load
= 0;
4280 /* We will use this often. */
4281 tmpreg
= gen_rtx_REG (word_mode
, 1);
4283 /* Try to restore RP early to avoid load/use interlocks when
4284 RP gets used in the return (bv) instruction. This appears to still
4285 be necessary even when we schedule the prologue and epilogue. */
4288 ret_off
= TARGET_64BIT
? -16 : -20;
4289 if (frame_pointer_needed
)
4291 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4296 /* No frame pointer, and stack is smaller than 8k. */
4297 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4299 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4305 /* General register restores. */
4306 if (frame_pointer_needed
)
4308 offset
= local_fsize
;
4310 /* If the current function calls __builtin_eh_return, then we need
4311 to restore the saved EH data registers. */
4312 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4314 unsigned int i
, regno
;
4318 regno
= EH_RETURN_DATA_REGNO (i
);
4319 if (regno
== INVALID_REGNUM
)
4322 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4323 offset
+= UNITS_PER_WORD
;
4327 for (i
= 18; i
>= 4; i
--)
4328 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4330 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4331 offset
+= UNITS_PER_WORD
;
4336 offset
= local_fsize
- actual_fsize
;
4338 /* If the current function calls __builtin_eh_return, then we need
4339 to restore the saved EH data registers. */
4340 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4342 unsigned int i
, regno
;
4346 regno
= EH_RETURN_DATA_REGNO (i
);
4347 if (regno
== INVALID_REGNUM
)
4350 /* Only for the first load.
4351 merge_sp_adjust_with_load holds the register load
4352 with which we will merge the sp adjustment. */
4353 if (merge_sp_adjust_with_load
== 0
4355 && VAL_14_BITS_P (-actual_fsize
))
4356 merge_sp_adjust_with_load
= regno
;
4358 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4359 offset
+= UNITS_PER_WORD
;
4363 for (i
= 18; i
>= 3; i
--)
4365 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4367 /* Only for the first load.
4368 merge_sp_adjust_with_load holds the register load
4369 with which we will merge the sp adjustment. */
4370 if (merge_sp_adjust_with_load
== 0
4372 && VAL_14_BITS_P (-actual_fsize
))
4373 merge_sp_adjust_with_load
= i
;
4375 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4376 offset
+= UNITS_PER_WORD
;
4381 /* Align pointer properly (doubleword boundary). */
4382 offset
= (offset
+ 7) & ~7;
4384 /* FP register restores. */
4387 /* Adjust the register to index off of. */
4388 if (frame_pointer_needed
)
4389 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4391 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4393 /* Actually do the restores now. */
4394 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4395 if (df_regs_ever_live_p (i
)
4396 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4398 rtx src
= gen_rtx_MEM (DFmode
,
4399 gen_rtx_POST_INC (word_mode
, tmpreg
));
4400 rtx dest
= gen_rtx_REG (DFmode
, i
);
4401 emit_move_insn (dest
, src
);
4405 /* Emit a blockage insn here to keep these insns from being moved to
4406 an earlier spot in the epilogue, or into the main instruction stream.
4408 This is necessary as we must not cut the stack back before all the
4409 restores are finished. */
4410 emit_insn (gen_blockage ());
4412 /* Reset stack pointer (and possibly frame pointer). The stack
4413 pointer is initially set to fp + 64 to avoid a race condition. */
4414 if (frame_pointer_needed
)
4416 rtx delta
= GEN_INT (-64);
4418 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4419 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4420 stack_pointer_rtx
, delta
));
4422 /* If we were deferring a callee register restore, do it now. */
4423 else if (merge_sp_adjust_with_load
)
4425 rtx delta
= GEN_INT (-actual_fsize
);
4426 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4428 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4430 else if (actual_fsize
!= 0)
4431 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4434 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4435 frame greater than 8k), do so now. */
4437 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4439 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4441 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4443 emit_insn (gen_blockage ());
4444 emit_insn (TARGET_64BIT
4445 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4446 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4451 pa_can_use_return_insn (void)
4453 if (!reload_completed
)
4456 if (frame_pointer_needed
)
4459 if (df_regs_ever_live_p (2))
4465 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4469 hppa_pic_save_rtx (void)
4471 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4474 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4475 #define NO_DEFERRED_PROFILE_COUNTERS 0
4479 /* Vector of funcdef numbers. */
4480 static vec
<int> funcdef_nos
;
4482 /* Output deferred profile counters. */
4484 output_deferred_profile_counters (void)
4489 if (funcdef_nos
.is_empty ())
4492 switch_to_section (data_section
);
4493 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4494 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4496 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4498 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4499 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4502 funcdef_nos
.release ();
4506 hppa_profile_hook (int label_no
)
4508 /* We use SImode for the address of the function in both 32 and
4509 64-bit code to avoid having to provide DImode versions of the
4510 lcla2 and load_offset_label_address insn patterns. */
4511 rtx reg
= gen_reg_rtx (SImode
);
4512 rtx_code_label
*label_rtx
= gen_label_rtx ();
4513 rtx begin_label_rtx
;
4514 rtx_insn
*call_insn
;
4515 char begin_label_name
[16];
4517 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4519 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4522 emit_move_insn (arg_pointer_rtx
,
4523 gen_rtx_PLUS (word_mode
, virtual_outgoing_args_rtx
,
4526 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4528 /* The address of the function is loaded into %r25 with an instruction-
4529 relative sequence that avoids the use of relocations. The sequence
4530 is split so that the load_offset_label_address instruction can
4531 occupy the delay slot of the call to _mcount. */
4533 emit_insn (gen_lcla2 (reg
, label_rtx
));
4535 emit_insn (gen_lcla1 (reg
, label_rtx
));
4537 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4538 reg
, begin_label_rtx
, label_rtx
));
4540 #if !NO_DEFERRED_PROFILE_COUNTERS
4542 rtx count_label_rtx
, addr
, r24
;
4543 char count_label_name
[16];
4545 funcdef_nos
.safe_push (label_no
);
4546 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4547 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (count_label_name
));
4549 addr
= force_reg (Pmode
, count_label_rtx
);
4550 r24
= gen_rtx_REG (Pmode
, 24);
4551 emit_move_insn (r24
, addr
);
4554 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4555 gen_rtx_SYMBOL_REF (Pmode
,
4557 GEN_INT (TARGET_64BIT
? 24 : 12)));
4559 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4564 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4565 gen_rtx_SYMBOL_REF (Pmode
,
4567 GEN_INT (TARGET_64BIT
? 16 : 8)));
4571 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4572 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4574 /* Indicate the _mcount call cannot throw, nor will it execute a
4576 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4579 /* Fetch the return address for the frame COUNT steps up from
4580 the current frame, after the prologue. FRAMEADDR is the
4581 frame pointer of the COUNT frame.
4583 We want to ignore any export stub remnants here. To handle this,
4584 we examine the code at the return address, and if it is an export
4585 stub, we return a memory rtx for the stub return address stored
4588 The value returned is used in two different ways:
4590 1. To find a function's caller.
4592 2. To change the return address for a function.
4594 This function handles most instances of case 1; however, it will
4595 fail if there are two levels of stubs to execute on the return
4596 path. The only way I believe that can happen is if the return value
4597 needs a parameter relocation, which never happens for C code.
4599 This function handles most instances of case 2; however, it will
4600 fail if we did not originally have stub code on the return path
4601 but will need stub code on the new return path. This can happen if
4602 the caller & callee are both in the main program, but the new
4603 return location is in a shared library. */
4606 pa_return_addr_rtx (int count
, rtx frameaddr
)
4613 /* The instruction stream at the return address of a PA1.X export stub is:
4615 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4616 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4617 0x00011820 | stub+16: mtsp r1,sr0
4618 0xe0400002 | stub+20: be,n 0(sr0,rp)
4620 0xe0400002 must be specified as -532676606 so that it won't be
4621 rejected as an invalid immediate operand on 64-bit hosts.
4623 The instruction stream at the return address of a PA2.0 export stub is:
4625 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4626 0xe840d002 | stub+12: bve,n (rp)
4629 HOST_WIDE_INT insns
[4];
4635 rp
= get_hard_reg_initial_val (Pmode
, 2);
4637 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4640 /* If there is no export stub then just use the value saved from
4641 the return pointer register. */
4643 saved_rp
= gen_reg_rtx (Pmode
);
4644 emit_move_insn (saved_rp
, rp
);
4646 /* Get pointer to the instruction stream. We have to mask out the
4647 privilege level from the two low order bits of the return address
4648 pointer here so that ins will point to the start of the first
4649 instruction that would have been executed if we returned. */
4650 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4651 label
= gen_label_rtx ();
4655 insns
[0] = 0x4bc23fd1;
4656 insns
[1] = -398405630;
4661 insns
[0] = 0x4bc23fd1;
4662 insns
[1] = 0x004010a1;
4663 insns
[2] = 0x00011820;
4664 insns
[3] = -532676606;
4668 /* Check the instruction stream at the normal return address for the
4669 export stub. If it is an export stub, than our return address is
4670 really in -24[frameaddr]. */
4672 for (i
= 0; i
< len
; i
++)
4674 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4675 rtx op1
= GEN_INT (insns
[i
]);
4676 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4679 /* Here we know that our return address points to an export
4680 stub. We don't want to return the address of the export stub,
4681 but rather the return address of the export stub. That return
4682 address is stored at -24[frameaddr]. */
4684 emit_move_insn (saved_rp
,
4686 memory_address (Pmode
,
4687 plus_constant (Pmode
, frameaddr
,
4696 pa_emit_bcond_fp (rtx operands
[])
4698 enum rtx_code code
= GET_CODE (operands
[0]);
4699 rtx operand0
= operands
[1];
4700 rtx operand1
= operands
[2];
4701 rtx label
= operands
[3];
4703 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
4704 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4706 emit_jump_insn (gen_rtx_SET (pc_rtx
,
4707 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4710 gen_rtx_REG (CCFPmode
, 0),
4712 gen_rtx_LABEL_REF (VOIDmode
, label
),
4717 /* Adjust the cost of a scheduling dependency. Return the new cost of
4718 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4721 pa_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
4723 enum attr_type attr_type
;
4725 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4726 true dependencies as they are described with bypasses now. */
4727 if (pa_cpu
>= PROCESSOR_8000
|| REG_NOTE_KIND (link
) == 0)
4730 if (! recog_memoized (insn
))
4733 attr_type
= get_attr_type (insn
);
4735 switch (REG_NOTE_KIND (link
))
4738 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4741 if (attr_type
== TYPE_FPLOAD
)
4743 rtx pat
= PATTERN (insn
);
4744 rtx dep_pat
= PATTERN (dep_insn
);
4745 if (GET_CODE (pat
) == PARALLEL
)
4747 /* This happens for the fldXs,mb patterns. */
4748 pat
= XVECEXP (pat
, 0, 0);
4750 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4751 /* If this happens, we have to extend this to schedule
4752 optimally. Return 0 for now. */
4755 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4757 if (! recog_memoized (dep_insn
))
4759 switch (get_attr_type (dep_insn
))
4766 case TYPE_FPSQRTSGL
:
4767 case TYPE_FPSQRTDBL
:
4768 /* A fpload can't be issued until one cycle before a
4769 preceding arithmetic operation has finished if
4770 the target of the fpload is any of the sources
4771 (or destination) of the arithmetic operation. */
4772 return insn_default_latency (dep_insn
) - 1;
4779 else if (attr_type
== TYPE_FPALU
)
4781 rtx pat
= PATTERN (insn
);
4782 rtx dep_pat
= PATTERN (dep_insn
);
4783 if (GET_CODE (pat
) == PARALLEL
)
4785 /* This happens for the fldXs,mb patterns. */
4786 pat
= XVECEXP (pat
, 0, 0);
4788 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4789 /* If this happens, we have to extend this to schedule
4790 optimally. Return 0 for now. */
4793 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4795 if (! recog_memoized (dep_insn
))
4797 switch (get_attr_type (dep_insn
))
4801 case TYPE_FPSQRTSGL
:
4802 case TYPE_FPSQRTDBL
:
4803 /* An ALU flop can't be issued until two cycles before a
4804 preceding divide or sqrt operation has finished if
4805 the target of the ALU flop is any of the sources
4806 (or destination) of the divide or sqrt operation. */
4807 return insn_default_latency (dep_insn
) - 2;
4815 /* For other anti dependencies, the cost is 0. */
4818 case REG_DEP_OUTPUT
:
4819 /* Output dependency; DEP_INSN writes a register that INSN writes some
4821 if (attr_type
== TYPE_FPLOAD
)
4823 rtx pat
= PATTERN (insn
);
4824 rtx dep_pat
= PATTERN (dep_insn
);
4825 if (GET_CODE (pat
) == PARALLEL
)
4827 /* This happens for the fldXs,mb patterns. */
4828 pat
= XVECEXP (pat
, 0, 0);
4830 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4831 /* If this happens, we have to extend this to schedule
4832 optimally. Return 0 for now. */
4835 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4837 if (! recog_memoized (dep_insn
))
4839 switch (get_attr_type (dep_insn
))
4846 case TYPE_FPSQRTSGL
:
4847 case TYPE_FPSQRTDBL
:
4848 /* A fpload can't be issued until one cycle before a
4849 preceding arithmetic operation has finished if
4850 the target of the fpload is the destination of the
4851 arithmetic operation.
4853 Exception: For PA7100LC, PA7200 and PA7300, the cost
4854 is 3 cycles, unless they bundle together. We also
4855 pay the penalty if the second insn is a fpload. */
4856 return insn_default_latency (dep_insn
) - 1;
4863 else if (attr_type
== TYPE_FPALU
)
4865 rtx pat
= PATTERN (insn
);
4866 rtx dep_pat
= PATTERN (dep_insn
);
4867 if (GET_CODE (pat
) == PARALLEL
)
4869 /* This happens for the fldXs,mb patterns. */
4870 pat
= XVECEXP (pat
, 0, 0);
4872 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4873 /* If this happens, we have to extend this to schedule
4874 optimally. Return 0 for now. */
4877 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4879 if (! recog_memoized (dep_insn
))
4881 switch (get_attr_type (dep_insn
))
4885 case TYPE_FPSQRTSGL
:
4886 case TYPE_FPSQRTDBL
:
4887 /* An ALU flop can't be issued until two cycles before a
4888 preceding divide or sqrt operation has finished if
4889 the target of the ALU flop is also the target of
4890 the divide or sqrt operation. */
4891 return insn_default_latency (dep_insn
) - 2;
4899 /* For other output dependencies, the cost is 0. */
4907 /* Adjust scheduling priorities. We use this to try and keep addil
4908 and the next use of %r1 close together. */
4910 pa_adjust_priority (rtx_insn
*insn
, int priority
)
4912 rtx set
= single_set (insn
);
4916 src
= SET_SRC (set
);
4917 dest
= SET_DEST (set
);
4918 if (GET_CODE (src
) == LO_SUM
4919 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4920 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4923 else if (GET_CODE (src
) == MEM
4924 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4925 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4926 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
4929 else if (GET_CODE (dest
) == MEM
4930 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
4931 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
4932 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
4938 /* The 700 can only issue a single insn at a time.
4939 The 7XXX processors can issue two insns at a time.
4940 The 8000 can issue 4 insns at a time. */
4942 pa_issue_rate (void)
4946 case PROCESSOR_700
: return 1;
4947 case PROCESSOR_7100
: return 2;
4948 case PROCESSOR_7100LC
: return 2;
4949 case PROCESSOR_7200
: return 2;
4950 case PROCESSOR_7300
: return 2;
4951 case PROCESSOR_8000
: return 4;
4960 /* Return any length plus adjustment needed by INSN which already has
4961 its length computed as LENGTH. Return LENGTH if no adjustment is
4964 Also compute the length of an inline block move here as it is too
4965 complicated to express as a length attribute in pa.md. */
4967 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
4969 rtx pat
= PATTERN (insn
);
4971 /* If length is negative or undefined, provide initial length. */
4972 if ((unsigned int) length
>= INT_MAX
)
4974 if (GET_CODE (pat
) == SEQUENCE
)
4975 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
4977 switch (get_attr_type (insn
))
4980 length
= pa_attr_length_millicode_call (insn
);
4983 length
= pa_attr_length_call (insn
, 0);
4986 length
= pa_attr_length_call (insn
, 1);
4989 length
= pa_attr_length_indirect_call (insn
);
4991 case TYPE_SH_FUNC_ADRS
:
4992 length
= pa_attr_length_millicode_call (insn
) + 20;
4999 /* Block move pattern. */
5000 if (NONJUMP_INSN_P (insn
)
5001 && GET_CODE (pat
) == PARALLEL
5002 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5003 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5004 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
5005 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5006 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5007 length
+= compute_movmem_length (insn
) - 4;
5008 /* Block clear pattern. */
5009 else if (NONJUMP_INSN_P (insn
)
5010 && GET_CODE (pat
) == PARALLEL
5011 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5012 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5013 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5014 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5015 length
+= compute_clrmem_length (insn
) - 4;
5016 /* Conditional branch with an unfilled delay slot. */
5017 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5019 /* Adjust a short backwards conditional with an unfilled delay slot. */
5020 if (GET_CODE (pat
) == SET
5022 && JUMP_LABEL (insn
) != NULL_RTX
5023 && ! forward_branch_p (insn
))
5025 else if (GET_CODE (pat
) == PARALLEL
5026 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5029 /* Adjust dbra insn with short backwards conditional branch with
5030 unfilled delay slot -- only for case where counter is in a
5031 general register register. */
5032 else if (GET_CODE (pat
) == PARALLEL
5033 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5034 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5035 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5037 && ! forward_branch_p (insn
))
5043 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5046 pa_print_operand_punct_valid_p (unsigned char code
)
5057 /* Print operand X (an rtx) in assembler syntax to file FILE.
5058 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5059 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5062 pa_print_operand (FILE *file
, rtx x
, int code
)
5067 /* Output a 'nop' if there's nothing for the delay slot. */
5068 if (dbr_sequence_length () == 0)
5069 fputs ("\n\tnop", file
);
5072 /* Output a nullification completer if there's nothing for the */
5073 /* delay slot or nullification is requested. */
5074 if (dbr_sequence_length () == 0 ||
5076 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5080 /* Print out the second register name of a register pair.
5081 I.e., R (6) => 7. */
5082 fputs (reg_names
[REGNO (x
) + 1], file
);
5085 /* A register or zero. */
5087 || (x
== CONST0_RTX (DFmode
))
5088 || (x
== CONST0_RTX (SFmode
)))
5090 fputs ("%r0", file
);
5096 /* A register or zero (floating point). */
5098 || (x
== CONST0_RTX (DFmode
))
5099 || (x
== CONST0_RTX (SFmode
)))
5101 fputs ("%fr0", file
);
5110 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5111 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5112 pa_output_global_address (file
, xoperands
[1], 0);
5113 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5117 case 'C': /* Plain (C)ondition */
5119 switch (GET_CODE (x
))
5122 fputs ("=", file
); break;
5124 fputs ("<>", file
); break;
5126 fputs (">", file
); break;
5128 fputs (">=", file
); break;
5130 fputs (">>=", file
); break;
5132 fputs (">>", file
); break;
5134 fputs ("<", file
); break;
5136 fputs ("<=", file
); break;
5138 fputs ("<<=", file
); break;
5140 fputs ("<<", file
); break;
5145 case 'N': /* Condition, (N)egated */
5146 switch (GET_CODE (x
))
5149 fputs ("<>", file
); break;
5151 fputs ("=", file
); break;
5153 fputs ("<=", file
); break;
5155 fputs ("<", file
); break;
5157 fputs ("<<", file
); break;
5159 fputs ("<<=", file
); break;
5161 fputs (">=", file
); break;
5163 fputs (">", file
); break;
5165 fputs (">>", file
); break;
5167 fputs (">>=", file
); break;
5172 /* For floating point comparisons. Note that the output
5173 predicates are the complement of the desired mode. The
5174 conditions for GT, GE, LT, LE and LTGT cause an invalid
5175 operation exception if the result is unordered and this
5176 exception is enabled in the floating-point status register. */
5178 switch (GET_CODE (x
))
5181 fputs ("!=", file
); break;
5183 fputs ("=", file
); break;
5185 fputs ("!>", file
); break;
5187 fputs ("!>=", file
); break;
5189 fputs ("!<", file
); break;
5191 fputs ("!<=", file
); break;
5193 fputs ("!<>", file
); break;
5195 fputs ("!?<=", file
); break;
5197 fputs ("!?<", file
); break;
5199 fputs ("!?>=", file
); break;
5201 fputs ("!?>", file
); break;
5203 fputs ("!?=", file
); break;
5205 fputs ("!?", file
); break;
5207 fputs ("?", file
); break;
5212 case 'S': /* Condition, operands are (S)wapped. */
5213 switch (GET_CODE (x
))
5216 fputs ("=", file
); break;
5218 fputs ("<>", file
); break;
5220 fputs ("<", file
); break;
5222 fputs ("<=", file
); break;
5224 fputs ("<<=", file
); break;
5226 fputs ("<<", file
); break;
5228 fputs (">", file
); break;
5230 fputs (">=", file
); break;
5232 fputs (">>=", file
); break;
5234 fputs (">>", file
); break;
5239 case 'B': /* Condition, (B)oth swapped and negate. */
5240 switch (GET_CODE (x
))
5243 fputs ("<>", file
); break;
5245 fputs ("=", file
); break;
5247 fputs (">=", file
); break;
5249 fputs (">", file
); break;
5251 fputs (">>", file
); break;
5253 fputs (">>=", file
); break;
5255 fputs ("<=", file
); break;
5257 fputs ("<", file
); break;
5259 fputs ("<<", file
); break;
5261 fputs ("<<=", file
); break;
5267 gcc_assert (GET_CODE (x
) == CONST_INT
);
5268 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5271 gcc_assert (GET_CODE (x
) == CONST_INT
);
5272 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5275 gcc_assert (GET_CODE (x
) == CONST_INT
);
5276 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5279 gcc_assert (GET_CODE (x
) == CONST_INT
5280 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5281 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5284 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5285 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5288 gcc_assert (GET_CODE (x
) == CONST_INT
);
5289 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5292 gcc_assert (GET_CODE (x
) == CONST_INT
);
5293 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5296 if (GET_CODE (x
) == CONST_INT
)
5301 switch (GET_CODE (XEXP (x
, 0)))
5305 if (ASSEMBLER_DIALECT
== 0)
5306 fputs ("s,mb", file
);
5308 fputs (",mb", file
);
5312 if (ASSEMBLER_DIALECT
== 0)
5313 fputs ("s,ma", file
);
5315 fputs (",ma", file
);
5318 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5319 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5321 if (ASSEMBLER_DIALECT
== 0)
5324 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5325 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5327 if (ASSEMBLER_DIALECT
== 0)
5328 fputs ("x,s", file
);
5332 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5336 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5342 pa_output_global_address (file
, x
, 0);
5345 pa_output_global_address (file
, x
, 1);
5347 case 0: /* Don't do anything special */
5352 compute_zdepwi_operands (INTVAL (x
), op
);
5353 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5359 compute_zdepdi_operands (INTVAL (x
), op
);
5360 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5364 /* We can get here from a .vtable_inherit due to our
5365 CONSTANT_ADDRESS_P rejecting perfectly good constant
5371 if (GET_CODE (x
) == REG
)
5373 fputs (reg_names
[REGNO (x
)], file
);
5374 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5380 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5381 && (REGNO (x
) & 1) == 0)
5384 else if (GET_CODE (x
) == MEM
)
5386 int size
= GET_MODE_SIZE (GET_MODE (x
));
5387 rtx base
= NULL_RTX
;
5388 switch (GET_CODE (XEXP (x
, 0)))
5392 base
= XEXP (XEXP (x
, 0), 0);
5393 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5397 base
= XEXP (XEXP (x
, 0), 0);
5398 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5401 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5402 fprintf (file
, "%s(%s)",
5403 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5404 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5405 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5406 fprintf (file
, "%s(%s)",
5407 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5408 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5409 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5410 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5412 /* Because the REG_POINTER flag can get lost during reload,
5413 pa_legitimate_address_p canonicalizes the order of the
5414 index and base registers in the combined move patterns. */
5415 rtx base
= XEXP (XEXP (x
, 0), 1);
5416 rtx index
= XEXP (XEXP (x
, 0), 0);
5418 fprintf (file
, "%s(%s)",
5419 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5422 output_address (XEXP (x
, 0));
5425 output_address (XEXP (x
, 0));
5430 output_addr_const (file
, x
);
5433 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5436 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5439 /* Imagine (high (const (plus ...))). */
5440 if (GET_CODE (x
) == HIGH
)
5443 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5444 output_addr_const (file
, x
);
5445 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5447 output_addr_const (file
, x
);
5448 fputs ("-$global$", file
);
5450 else if (GET_CODE (x
) == CONST
)
5452 const char *sep
= "";
5453 int offset
= 0; /* assembler wants -$global$ at end */
5454 rtx base
= NULL_RTX
;
5456 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5460 base
= XEXP (XEXP (x
, 0), 0);
5461 output_addr_const (file
, base
);
5464 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5470 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5474 base
= XEXP (XEXP (x
, 0), 1);
5475 output_addr_const (file
, base
);
5478 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5484 /* How bogus. The compiler is apparently responsible for
5485 rounding the constant if it uses an LR field selector.
5487 The linker and/or assembler seem a better place since
5488 they have to do this kind of thing already.
5490 If we fail to do this, HP's optimizing linker may eliminate
5491 an addil, but not update the ldw/stw/ldo instruction that
5492 uses the result of the addil. */
5494 offset
= ((offset
+ 0x1000) & ~0x1fff);
5496 switch (GET_CODE (XEXP (x
, 0)))
5509 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5517 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5518 fputs ("-$global$", file
);
5520 fprintf (file
, "%s%d", sep
, offset
);
5523 output_addr_const (file
, x
);
5526 /* Output boilerplate text to appear at the beginning of the file.
5527 There are several possible versions. */
5528 #define aputs(x) fputs(x, asm_out_file)
5530 pa_file_start_level (void)
5533 aputs ("\t.LEVEL 2.0w\n");
5534 else if (TARGET_PA_20
)
5535 aputs ("\t.LEVEL 2.0\n");
5536 else if (TARGET_PA_11
)
5537 aputs ("\t.LEVEL 1.1\n");
5539 aputs ("\t.LEVEL 1.0\n");
5543 pa_file_start_space (int sortspace
)
5545 aputs ("\t.SPACE $PRIVATE$");
5548 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5550 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5551 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5552 "\n\t.SPACE $TEXT$");
5555 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5556 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5560 pa_file_start_file (int want_version
)
5562 if (write_symbols
!= NO_DEBUG
)
5564 output_file_directive (asm_out_file
, main_input_filename
);
5566 aputs ("\t.version\t\"01.01\"\n");
5571 pa_file_start_mcount (const char *aswhat
)
5574 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5578 pa_elf_file_start (void)
5580 pa_file_start_level ();
5581 pa_file_start_mcount ("ENTRY");
5582 pa_file_start_file (0);
5586 pa_som_file_start (void)
5588 pa_file_start_level ();
5589 pa_file_start_space (0);
5590 aputs ("\t.IMPORT $global$,DATA\n"
5591 "\t.IMPORT $$dyncall,MILLICODE\n");
5592 pa_file_start_mcount ("CODE");
5593 pa_file_start_file (0);
5597 pa_linux_file_start (void)
5599 pa_file_start_file (1);
5600 pa_file_start_level ();
5601 pa_file_start_mcount ("CODE");
5605 pa_hpux64_gas_file_start (void)
5607 pa_file_start_level ();
5608 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5610 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5612 pa_file_start_file (1);
5616 pa_hpux64_hpas_file_start (void)
5618 pa_file_start_level ();
5619 pa_file_start_space (1);
5620 pa_file_start_mcount ("CODE");
5621 pa_file_start_file (0);
5625 /* Search the deferred plabel list for SYMBOL and return its internal
5626 label. If an entry for SYMBOL is not found, a new entry is created. */
5629 pa_get_deferred_plabel (rtx symbol
)
5631 const char *fname
= XSTR (symbol
, 0);
5634 /* See if we have already put this function on the list of deferred
5635 plabels. This list is generally small, so a liner search is not
5636 too ugly. If it proves too slow replace it with something faster. */
5637 for (i
= 0; i
< n_deferred_plabels
; i
++)
5638 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5641 /* If the deferred plabel list is empty, or this entry was not found
5642 on the list, create a new entry on the list. */
5643 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5647 if (deferred_plabels
== 0)
5648 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5650 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5652 n_deferred_plabels
+ 1);
5654 i
= n_deferred_plabels
++;
5655 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5656 deferred_plabels
[i
].symbol
= symbol
;
5658 /* Gross. We have just implicitly taken the address of this
5659 function. Mark it in the same manner as assemble_name. */
5660 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5662 mark_referenced (id
);
5665 return deferred_plabels
[i
].internal_label
;
5669 output_deferred_plabels (void)
5673 /* If we have some deferred plabels, then we need to switch into the
5674 data or readonly data section, and align it to a 4 byte boundary
5675 before outputting the deferred plabels. */
5676 if (n_deferred_plabels
)
5678 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5679 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5682 /* Now output the deferred plabels. */
5683 for (i
= 0; i
< n_deferred_plabels
; i
++)
5685 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5686 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5687 assemble_integer (deferred_plabels
[i
].symbol
,
5688 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5692 /* Initialize optabs to point to emulation routines. */
5695 pa_init_libfuncs (void)
5697 if (HPUX_LONG_DOUBLE_LIBRARY
)
5699 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5700 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5701 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5702 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5703 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5704 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5705 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5706 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5707 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5709 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5710 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5711 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5712 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5713 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5714 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5715 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5717 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5718 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5719 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5720 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5722 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5723 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5724 : "_U_Qfcnvfxt_quad_to_sgl");
5725 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5726 "_U_Qfcnvfxt_quad_to_dbl");
5727 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5728 "_U_Qfcnvfxt_quad_to_usgl");
5729 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5730 "_U_Qfcnvfxt_quad_to_udbl");
5732 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5733 "_U_Qfcnvxf_sgl_to_quad");
5734 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5735 "_U_Qfcnvxf_dbl_to_quad");
5736 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5737 "_U_Qfcnvxf_usgl_to_quad");
5738 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5739 "_U_Qfcnvxf_udbl_to_quad");
5742 if (TARGET_SYNC_LIBCALL
)
5743 init_sync_libfuncs (UNITS_PER_WORD
);
5746 /* HP's millicode routines mean something special to the assembler.
5747 Keep track of which ones we have used. */
5749 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5750 static void import_milli (enum millicodes
);
5751 static char imported
[(int) end1000
];
5752 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5753 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5754 #define MILLI_START 10
5757 import_milli (enum millicodes code
)
5759 char str
[sizeof (import_string
)];
5761 if (!imported
[(int) code
])
5763 imported
[(int) code
] = 1;
5764 strcpy (str
, import_string
);
5765 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5766 output_asm_insn (str
, 0);
5770 /* The register constraints have put the operands and return value in
5771 the proper registers. */
5774 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
5776 import_milli (mulI
);
5777 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5780 /* Emit the rtl for doing a division by a constant. */
5782 /* Do magic division millicodes exist for this value? */
5783 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5785 /* We'll use an array to keep track of the magic millicodes and
5786 whether or not we've used them already. [n][0] is signed, [n][1] is
5789 static int div_milli
[16][2];
5792 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5794 if (GET_CODE (operands
[2]) == CONST_INT
5795 && INTVAL (operands
[2]) > 0
5796 && INTVAL (operands
[2]) < 16
5797 && pa_magic_milli
[INTVAL (operands
[2])])
5799 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5801 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5805 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
5806 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5808 gen_rtx_REG (SImode
, 26),
5810 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5811 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5812 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5813 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5814 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5815 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5822 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
5826 /* If the divisor is a constant, try to use one of the special
5828 if (GET_CODE (operands
[0]) == CONST_INT
)
5830 static char buf
[100];
5831 divisor
= INTVAL (operands
[0]);
5832 if (!div_milli
[divisor
][unsignedp
])
5834 div_milli
[divisor
][unsignedp
] = 1;
5836 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5838 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5842 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5843 INTVAL (operands
[0]));
5844 return pa_output_millicode_call (insn
,
5845 gen_rtx_SYMBOL_REF (SImode
, buf
));
5849 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5850 INTVAL (operands
[0]));
5851 return pa_output_millicode_call (insn
,
5852 gen_rtx_SYMBOL_REF (SImode
, buf
));
5855 /* Divisor isn't a special constant. */
5860 import_milli (divU
);
5861 return pa_output_millicode_call (insn
,
5862 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5866 import_milli (divI
);
5867 return pa_output_millicode_call (insn
,
5868 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5873 /* Output a $$rem millicode to do mod. */
5876 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
5880 import_milli (remU
);
5881 return pa_output_millicode_call (insn
,
5882 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5886 import_milli (remI
);
5887 return pa_output_millicode_call (insn
,
5888 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5893 pa_output_arg_descriptor (rtx_insn
*call_insn
)
5895 const char *arg_regs
[4];
5896 machine_mode arg_mode
;
5898 int i
, output_flag
= 0;
5901 /* We neither need nor want argument location descriptors for the
5902 64bit runtime environment or the ELF32 environment. */
5903 if (TARGET_64BIT
|| TARGET_ELF32
)
5906 for (i
= 0; i
< 4; i
++)
5909 /* Specify explicitly that no argument relocations should take place
5910 if using the portable runtime calling conventions. */
5911 if (TARGET_PORTABLE_RUNTIME
)
5913 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5918 gcc_assert (CALL_P (call_insn
));
5919 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5920 link
; link
= XEXP (link
, 1))
5922 rtx use
= XEXP (link
, 0);
5924 if (! (GET_CODE (use
) == USE
5925 && GET_CODE (XEXP (use
, 0)) == REG
5926 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5929 arg_mode
= GET_MODE (XEXP (use
, 0));
5930 regno
= REGNO (XEXP (use
, 0));
5931 if (regno
>= 23 && regno
<= 26)
5933 arg_regs
[26 - regno
] = "GR";
5934 if (arg_mode
== DImode
)
5935 arg_regs
[25 - regno
] = "GR";
5937 else if (regno
>= 32 && regno
<= 39)
5939 if (arg_mode
== SFmode
)
5940 arg_regs
[(regno
- 32) / 2] = "FR";
5943 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5944 arg_regs
[(regno
- 34) / 2] = "FR";
5945 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
5947 arg_regs
[(regno
- 34) / 2] = "FU";
5948 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
5953 fputs ("\t.CALL ", asm_out_file
);
5954 for (i
= 0; i
< 4; i
++)
5959 fputc (',', asm_out_file
);
5960 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
5963 fputc ('\n', asm_out_file
);
5966 /* Inform reload about cases where moving X with a mode MODE to or from
5967 a register in RCLASS requires an extra scratch or immediate register.
5968 Return the class needed for the immediate register. */
5971 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
5972 machine_mode mode
, secondary_reload_info
*sri
)
5975 enum reg_class rclass
= (enum reg_class
) rclass_i
;
5977 /* Handle the easy stuff first. */
5978 if (rclass
== R1_REGS
)
5984 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
5990 /* If we have something like (mem (mem (...)), we can safely assume the
5991 inner MEM will end up in a general register after reloading, so there's
5992 no need for a secondary reload. */
5993 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
5996 /* Trying to load a constant into a FP register during PIC code
5997 generation requires %r1 as a scratch register. For float modes,
5998 the only legitimate constant is CONST0_RTX. However, there are
5999 a few patterns that accept constant double operands. */
6001 && FP_REG_CLASS_P (rclass
)
6002 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6007 sri
->icode
= CODE_FOR_reload_insi_r1
;
6011 sri
->icode
= CODE_FOR_reload_indi_r1
;
6015 sri
->icode
= CODE_FOR_reload_insf_r1
;
6019 sri
->icode
= CODE_FOR_reload_indf_r1
;
6028 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6029 register when we're generating PIC code or when the operand isn't
6031 if (pa_symbolic_expression_p (x
))
6033 if (GET_CODE (x
) == HIGH
)
6036 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6041 sri
->icode
= CODE_FOR_reload_insi_r1
;
6045 sri
->icode
= CODE_FOR_reload_indi_r1
;
6055 /* Profiling showed the PA port spends about 1.3% of its compilation
6056 time in true_regnum from calls inside pa_secondary_reload_class. */
6057 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6058 regno
= true_regnum (x
);
6060 /* Handle reloads for floating point loads and stores. */
6061 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6062 && FP_REG_CLASS_P (rclass
))
6068 /* We don't need a secondary reload for indexed memory addresses.
6070 When INT14_OK_STRICT is true, it might appear that we could
6071 directly allow register indirect memory addresses. However,
6072 this doesn't work because we don't support SUBREGs in
6073 floating-point register copies and reload doesn't tell us
6074 when it's going to use a SUBREG. */
6075 if (IS_INDEX_ADDR_P (x
))
6079 /* Request a secondary reload with a general scratch register
6080 for everything else. ??? Could symbolic operands be handled
6081 directly when generating non-pic PA 2.0 code? */
6083 ? direct_optab_handler (reload_in_optab
, mode
)
6084 : direct_optab_handler (reload_out_optab
, mode
));
6088 /* A SAR<->FP register copy requires an intermediate general register
6089 and secondary memory. We need a secondary reload with a general
6090 scratch register for spills. */
6091 if (rclass
== SHIFT_REGS
)
6094 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6097 ? direct_optab_handler (reload_in_optab
, mode
)
6098 : direct_optab_handler (reload_out_optab
, mode
));
6102 /* Handle FP copy. */
6103 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6104 return GENERAL_REGS
;
6107 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6108 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6109 && FP_REG_CLASS_P (rclass
))
6110 return GENERAL_REGS
;
6115 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6116 is only marked as live on entry by df-scan when it is a fixed
6117 register. It isn't a fixed register in the 64-bit runtime,
6118 so we need to mark it here. */
6121 pa_extra_live_on_entry (bitmap regs
)
6124 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6127 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6128 to prevent it from being deleted. */
6131 pa_eh_return_handler_rtx (void)
6135 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6136 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6137 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6142 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6143 by invisible reference. As a GCC extension, we also pass anything
6144 with a zero or variable size by reference.
6146 The 64-bit runtime does not describe passing any types by invisible
6147 reference. The internals of GCC can't currently handle passing
6148 empty structures, and zero or variable length arrays when they are
6149 not passed entirely on the stack or by reference. Thus, as a GCC
6150 extension, we pass these types by reference. The HP compiler doesn't
6151 support these types, so hopefully there shouldn't be any compatibility
6152 issues. This may have to be revisited when HP releases a C99 compiler
6153 or updates the ABI. */
6156 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6157 machine_mode mode
, const_tree type
,
6158 bool named ATTRIBUTE_UNUSED
)
6163 size
= int_size_in_bytes (type
);
6165 size
= GET_MODE_SIZE (mode
);
6170 return size
<= 0 || size
> 8;
6174 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6179 && (AGGREGATE_TYPE_P (type
)
6180 || TREE_CODE (type
) == COMPLEX_TYPE
6181 || TREE_CODE (type
) == VECTOR_TYPE
)))
6183 /* Return none if justification is not required. */
6185 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6186 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6189 /* The directions set here are ignored when a BLKmode argument larger
6190 than a word is placed in a register. Different code is used for
6191 the stack and registers. This makes it difficult to have a
6192 consistent data representation for both the stack and registers.
6193 For both runtimes, the justification and padding for arguments on
6194 the stack and in registers should be identical. */
6196 /* The 64-bit runtime specifies left justification for aggregates. */
6199 /* The 32-bit runtime architecture specifies right justification.
6200 When the argument is passed on the stack, the argument is padded
6201 with garbage on the left. The HP compiler pads with zeros. */
6205 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6212 /* Do what is necessary for `va_start'. We look at the current function
6213 to determine if stdargs or varargs is used and fill in an initial
6214 va_list. A pointer to this constructor is returned. */
6217 hppa_builtin_saveregs (void)
6220 tree fntype
= TREE_TYPE (current_function_decl
);
6221 int argadj
= ((!stdarg_p (fntype
))
6222 ? UNITS_PER_WORD
: 0);
6225 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6227 offset
= crtl
->args
.arg_offset_rtx
;
6233 /* Adjust for varargs/stdarg differences. */
6235 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6237 offset
= crtl
->args
.arg_offset_rtx
;
6239 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6240 from the incoming arg pointer and growing to larger addresses. */
6241 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6242 emit_move_insn (gen_rtx_MEM (word_mode
,
6243 plus_constant (Pmode
,
6244 arg_pointer_rtx
, off
)),
6245 gen_rtx_REG (word_mode
, i
));
6247 /* The incoming args pointer points just beyond the flushback area;
6248 normally this is not a serious concern. However, when we are doing
6249 varargs/stdargs we want to make the arg pointer point to the start
6250 of the incoming argument area. */
6251 emit_move_insn (virtual_incoming_args_rtx
,
6252 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6254 /* Now return a pointer to the first anonymous argument. */
6255 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6256 virtual_incoming_args_rtx
,
6257 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6260 /* Store general registers on the stack. */
6261 dest
= gen_rtx_MEM (BLKmode
,
6262 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6264 set_mem_alias_set (dest
, get_varargs_alias_set ());
6265 set_mem_align (dest
, BITS_PER_WORD
);
6266 move_block_from_reg (23, dest
, 4);
6268 /* move_block_from_reg will emit code to store the argument registers
6269 individually as scalar stores.
6271 However, other insns may later load from the same addresses for
6272 a structure load (passing a struct to a varargs routine).
6274 The alias code assumes that such aliasing can never happen, so we
6275 have to keep memory referencing insns from moving up beyond the
6276 last argument register store. So we emit a blockage insn here. */
6277 emit_insn (gen_blockage ());
6279 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6280 crtl
->args
.internal_arg_pointer
,
6281 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6285 hppa_va_start (tree valist
, rtx nextarg
)
6287 nextarg
= expand_builtin_saveregs ();
6288 std_expand_builtin_va_start (valist
, nextarg
);
6292 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6297 /* Args grow upward. We can use the generic routines. */
6298 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6300 else /* !TARGET_64BIT */
6302 tree ptr
= build_pointer_type (type
);
6305 unsigned int size
, ofs
;
6308 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6312 ptr
= build_pointer_type (type
);
6314 size
= int_size_in_bytes (type
);
6315 valist_type
= TREE_TYPE (valist
);
6317 /* Args grow down. Not handled by generic routines. */
6319 u
= fold_convert (sizetype
, size_in_bytes (type
));
6320 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6321 t
= fold_build_pointer_plus (valist
, u
);
6323 /* Align to 4 or 8 byte boundary depending on argument size. */
6325 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6326 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6327 t
= fold_convert (valist_type
, t
);
6329 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6331 ofs
= (8 - size
) % 4;
6333 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6335 t
= fold_convert (ptr
, t
);
6336 t
= build_va_arg_indirect_ref (t
);
6339 t
= build_va_arg_indirect_ref (t
);
6345 /* True if MODE is valid for the target. By "valid", we mean able to
6346 be manipulated in non-trivial ways. In particular, this means all
6347 the arithmetic is supported.
6349 Currently, TImode is not valid as the HP 64-bit runtime documentation
6350 doesn't document the alignment and calling conventions for this type.
6351 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6352 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6355 pa_scalar_mode_supported_p (machine_mode mode
)
6357 int precision
= GET_MODE_PRECISION (mode
);
6359 switch (GET_MODE_CLASS (mode
))
6361 case MODE_PARTIAL_INT
:
6363 if (precision
== CHAR_TYPE_SIZE
)
6365 if (precision
== SHORT_TYPE_SIZE
)
6367 if (precision
== INT_TYPE_SIZE
)
6369 if (precision
== LONG_TYPE_SIZE
)
6371 if (precision
== LONG_LONG_TYPE_SIZE
)
6376 if (precision
== FLOAT_TYPE_SIZE
)
6378 if (precision
== DOUBLE_TYPE_SIZE
)
6380 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6384 case MODE_DECIMAL_FLOAT
:
6392 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6393 it branches into the delay slot. Otherwise, return FALSE. */
6396 branch_to_delay_slot_p (rtx_insn
*insn
)
6398 rtx_insn
*jump_insn
;
6400 if (dbr_sequence_length ())
6403 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6406 insn
= next_active_insn (insn
);
6407 if (jump_insn
== insn
)
6410 /* We can't rely on the length of asms. So, we return FALSE when
6411 the branch is followed by an asm. */
6413 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6414 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
6415 || get_attr_length (insn
) > 0)
6422 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6424 This occurs when INSN has an unfilled delay slot and is followed
6425 by an asm. Disaster can occur if the asm is empty and the jump
6426 branches into the delay slot. So, we add a nop in the delay slot
6427 when this occurs. */
6430 branch_needs_nop_p (rtx_insn
*insn
)
6432 rtx_insn
*jump_insn
;
6434 if (dbr_sequence_length ())
6437 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6440 insn
= next_active_insn (insn
);
6441 if (!insn
|| jump_insn
== insn
)
6444 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6445 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6446 && get_attr_length (insn
) > 0)
6453 /* Return TRUE if INSN, a forward jump insn, can use nullification
6454 to skip the following instruction. This avoids an extra cycle due
6455 to a mis-predicted branch when we fall through. */
6458 use_skip_p (rtx_insn
*insn
)
6460 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6464 insn
= next_active_insn (insn
);
6466 /* We can't rely on the length of asms, so we can't skip asms. */
6468 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6469 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6471 if (get_attr_length (insn
) == 4
6472 && jump_insn
== next_active_insn (insn
))
6474 if (get_attr_length (insn
) > 0)
6481 /* This routine handles all the normal conditional branch sequences we
6482 might need to generate. It handles compare immediate vs compare
6483 register, nullification of delay slots, varying length branches,
6484 negated branches, and all combinations of the above. It returns the
6485 output appropriate to emit the branch corresponding to all given
6489 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6491 static char buf
[100];
6493 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6494 int length
= get_attr_length (insn
);
6497 /* A conditional branch to the following instruction (e.g. the delay slot)
6498 is asking for a disaster. This can happen when not optimizing and
6499 when jump optimization fails.
6501 While it is usually safe to emit nothing, this can fail if the
6502 preceding instruction is a nullified branch with an empty delay
6503 slot and the same branch target as this branch. We could check
6504 for this but jump optimization should eliminate nop jumps. It
6505 is always safe to emit a nop. */
6506 if (branch_to_delay_slot_p (insn
))
6509 /* The doubleword form of the cmpib instruction doesn't have the LEU
6510 and GTU conditions while the cmpb instruction does. Since we accept
6511 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6512 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6513 operands
[2] = gen_rtx_REG (DImode
, 0);
6514 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6515 operands
[1] = gen_rtx_REG (DImode
, 0);
6517 /* If this is a long branch with its delay slot unfilled, set `nullify'
6518 as it can nullify the delay slot and save a nop. */
6519 if (length
== 8 && dbr_sequence_length () == 0)
6522 /* If this is a short forward conditional branch which did not get
6523 its delay slot filled, the delay slot can still be nullified. */
6524 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6525 nullify
= forward_branch_p (insn
);
6527 /* A forward branch over a single nullified insn can be done with a
6528 comclr instruction. This avoids a single cycle penalty due to
6529 mis-predicted branch if we fall through (branch not taken). */
6530 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6534 /* All short conditional branches except backwards with an unfilled
6538 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6540 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6541 if (GET_MODE (operands
[1]) == DImode
)
6544 strcat (buf
, "%B3");
6546 strcat (buf
, "%S3");
6548 strcat (buf
, " %2,%r1,%%r0");
6551 if (branch_needs_nop_p (insn
))
6552 strcat (buf
, ",n %2,%r1,%0%#");
6554 strcat (buf
, ",n %2,%r1,%0");
6557 strcat (buf
, " %2,%r1,%0");
6560 /* All long conditionals. Note a short backward branch with an
6561 unfilled delay slot is treated just like a long backward branch
6562 with an unfilled delay slot. */
6564 /* Handle weird backwards branch with a filled delay slot
6565 which is nullified. */
6566 if (dbr_sequence_length () != 0
6567 && ! forward_branch_p (insn
)
6570 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6571 if (GET_MODE (operands
[1]) == DImode
)
6574 strcat (buf
, "%S3");
6576 strcat (buf
, "%B3");
6577 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6579 /* Handle short backwards branch with an unfilled delay slot.
6580 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6581 taken and untaken branches. */
6582 else if (dbr_sequence_length () == 0
6583 && ! forward_branch_p (insn
)
6584 && INSN_ADDRESSES_SET_P ()
6585 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6586 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6588 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6589 if (GET_MODE (operands
[1]) == DImode
)
6592 strcat (buf
, "%B3 %2,%r1,%0%#");
6594 strcat (buf
, "%S3 %2,%r1,%0%#");
6598 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6599 if (GET_MODE (operands
[1]) == DImode
)
6602 strcat (buf
, "%S3");
6604 strcat (buf
, "%B3");
6606 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6608 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6613 /* The reversed conditional branch must branch over one additional
6614 instruction if the delay slot is filled and needs to be extracted
6615 by pa_output_lbranch. If the delay slot is empty or this is a
6616 nullified forward branch, the instruction after the reversed
6617 condition branch must be nullified. */
6618 if (dbr_sequence_length () == 0
6619 || (nullify
&& forward_branch_p (insn
)))
6623 operands
[4] = GEN_INT (length
);
6628 operands
[4] = GEN_INT (length
+ 4);
6631 /* Create a reversed conditional branch which branches around
6632 the following insns. */
6633 if (GET_MODE (operands
[1]) != DImode
)
6639 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6642 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6648 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6651 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6660 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6663 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6669 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6672 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6676 output_asm_insn (buf
, operands
);
6677 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6682 /* This routine handles output of long unconditional branches that
6683 exceed the maximum range of a simple branch instruction. Since
6684 we don't have a register available for the branch, we save register
6685 %r1 in the frame marker, load the branch destination DEST into %r1,
6686 execute the branch, and restore %r1 in the delay slot of the branch.
6688 Since long branches may have an insn in the delay slot and the
6689 delay slot is used to restore %r1, we in general need to extract
6690 this insn and execute it before the branch. However, to facilitate
6691 use of this function by conditional branches, we also provide an
6692 option to not extract the delay insn so that it will be emitted
6693 after the long branch. So, if there is an insn in the delay slot,
6694 it is extracted if XDELAY is nonzero.
6696 The lengths of the various long-branch sequences are 20, 16 and 24
6697 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6700 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
6704 xoperands
[0] = dest
;
6706 /* First, free up the delay slot. */
6707 if (xdelay
&& dbr_sequence_length () != 0)
6709 /* We can't handle a jump in the delay slot. */
6710 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
6712 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6715 /* Now delete the delay insn. */
6716 SET_INSN_DELETED (NEXT_INSN (insn
));
6719 /* Output an insn to save %r1. The runtime documentation doesn't
6720 specify whether the "Clean Up" slot in the callers frame can
6721 be clobbered by the callee. It isn't copied by HP's builtin
6722 alloca, so this suggests that it can be clobbered if necessary.
6723 The "Static Link" location is copied by HP builtin alloca, so
6724 we avoid using it. Using the cleanup slot might be a problem
6725 if we have to interoperate with languages that pass cleanup
6726 information. However, it should be possible to handle these
6727 situations with GCC's asm feature.
6729 The "Current RP" slot is reserved for the called procedure, so
6730 we try to use it when we don't have a frame of our own. It's
6731 rather unlikely that we won't have a frame when we need to emit
6734 Really the way to go long term is a register scavenger; goto
6735 the target of the jump and find a register which we can use
6736 as a scratch to hold the value in %r1. Then, we wouldn't have
6737 to free up the delay slot or clobber a slot that may be needed
6738 for other purposes. */
6741 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6742 /* Use the return pointer slot in the frame marker. */
6743 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6745 /* Use the slot at -40 in the frame marker since HP builtin
6746 alloca doesn't copy it. */
6747 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6751 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6752 /* Use the return pointer slot in the frame marker. */
6753 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6755 /* Use the "Clean Up" slot in the frame marker. In GCC,
6756 the only other use of this location is for copying a
6757 floating point double argument from a floating-point
6758 register to two general registers. The copy is done
6759 as an "atomic" operation when outputting a call, so it
6760 won't interfere with our using the location here. */
6761 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6764 if (TARGET_PORTABLE_RUNTIME
)
6766 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6767 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6768 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6772 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
6773 if (TARGET_SOM
|| !TARGET_GAS
)
6775 xoperands
[1] = gen_label_rtx ();
6776 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands
);
6777 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6778 CODE_LABEL_NUMBER (xoperands
[1]));
6779 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands
);
6783 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands
);
6784 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
6786 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6789 /* Now output a very long branch to the original target. */
6790 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6792 /* Now restore the value of %r1 in the delay slot. */
6795 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6796 return "ldd -16(%%r30),%%r1";
6798 return "ldd -40(%%r30),%%r1";
6802 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6803 return "ldw -20(%%r30),%%r1";
6805 return "ldw -12(%%r30),%%r1";
6809 /* This routine handles all the branch-on-bit conditional branch sequences we
6810 might need to generate. It handles nullification of delay slots,
6811 varying length branches, negated branches and all combinations of the
6812 above. it returns the appropriate output template to emit the branch. */
6815 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
6817 static char buf
[100];
6819 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6820 int length
= get_attr_length (insn
);
6823 /* A conditional branch to the following instruction (e.g. the delay slot) is
6824 asking for a disaster. I do not think this can happen as this pattern
6825 is only used when optimizing; jump optimization should eliminate the
6826 jump. But be prepared just in case. */
6828 if (branch_to_delay_slot_p (insn
))
6831 /* If this is a long branch with its delay slot unfilled, set `nullify'
6832 as it can nullify the delay slot and save a nop. */
6833 if (length
== 8 && dbr_sequence_length () == 0)
6836 /* If this is a short forward conditional branch which did not get
6837 its delay slot filled, the delay slot can still be nullified. */
6838 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6839 nullify
= forward_branch_p (insn
);
6841 /* A forward branch over a single nullified insn can be done with a
6842 extrs instruction. This avoids a single cycle penalty due to
6843 mis-predicted branch if we fall through (branch not taken). */
6844 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6849 /* All short conditional branches except backwards with an unfilled
6853 strcpy (buf
, "{extrs,|extrw,s,}");
6855 strcpy (buf
, "bb,");
6856 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6857 strcpy (buf
, "extrd,s,*");
6858 else if (GET_MODE (operands
[0]) == DImode
)
6859 strcpy (buf
, "bb,*");
6860 if ((which
== 0 && negated
)
6861 || (which
== 1 && ! negated
))
6866 strcat (buf
, " %0,%1,1,%%r0");
6867 else if (nullify
&& negated
)
6869 if (branch_needs_nop_p (insn
))
6870 strcat (buf
, ",n %0,%1,%3%#");
6872 strcat (buf
, ",n %0,%1,%3");
6874 else if (nullify
&& ! negated
)
6876 if (branch_needs_nop_p (insn
))
6877 strcat (buf
, ",n %0,%1,%2%#");
6879 strcat (buf
, ",n %0,%1,%2");
6881 else if (! nullify
&& negated
)
6882 strcat (buf
, " %0,%1,%3");
6883 else if (! nullify
&& ! negated
)
6884 strcat (buf
, " %0,%1,%2");
6887 /* All long conditionals. Note a short backward branch with an
6888 unfilled delay slot is treated just like a long backward branch
6889 with an unfilled delay slot. */
6891 /* Handle weird backwards branch with a filled delay slot
6892 which is nullified. */
6893 if (dbr_sequence_length () != 0
6894 && ! forward_branch_p (insn
)
6897 strcpy (buf
, "bb,");
6898 if (GET_MODE (operands
[0]) == DImode
)
6900 if ((which
== 0 && negated
)
6901 || (which
== 1 && ! negated
))
6906 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
6908 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
6910 /* Handle short backwards branch with an unfilled delay slot.
6911 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6912 taken and untaken branches. */
6913 else if (dbr_sequence_length () == 0
6914 && ! forward_branch_p (insn
)
6915 && INSN_ADDRESSES_SET_P ()
6916 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6917 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6919 strcpy (buf
, "bb,");
6920 if (GET_MODE (operands
[0]) == DImode
)
6922 if ((which
== 0 && negated
)
6923 || (which
== 1 && ! negated
))
6928 strcat (buf
, " %0,%1,%3%#");
6930 strcat (buf
, " %0,%1,%2%#");
6934 if (GET_MODE (operands
[0]) == DImode
)
6935 strcpy (buf
, "extrd,s,*");
6937 strcpy (buf
, "{extrs,|extrw,s,}");
6938 if ((which
== 0 && negated
)
6939 || (which
== 1 && ! negated
))
6943 if (nullify
&& negated
)
6944 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
6945 else if (nullify
&& ! negated
)
6946 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
6948 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
6950 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
6955 /* The reversed conditional branch must branch over one additional
6956 instruction if the delay slot is filled and needs to be extracted
6957 by pa_output_lbranch. If the delay slot is empty or this is a
6958 nullified forward branch, the instruction after the reversed
6959 condition branch must be nullified. */
6960 if (dbr_sequence_length () == 0
6961 || (nullify
&& forward_branch_p (insn
)))
6965 operands
[4] = GEN_INT (length
);
6970 operands
[4] = GEN_INT (length
+ 4);
6973 if (GET_MODE (operands
[0]) == DImode
)
6974 strcpy (buf
, "bb,*");
6976 strcpy (buf
, "bb,");
6977 if ((which
== 0 && negated
)
6978 || (which
== 1 && !negated
))
6983 strcat (buf
, ",n %0,%1,.+%4");
6985 strcat (buf
, " %0,%1,.+%4");
6986 output_asm_insn (buf
, operands
);
6987 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
6993 /* This routine handles all the branch-on-variable-bit conditional branch
6994 sequences we might need to generate. It handles nullification of delay
6995 slots, varying length branches, negated branches and all combinations
6996 of the above. it returns the appropriate output template to emit the
7000 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
7003 static char buf
[100];
7005 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7006 int length
= get_attr_length (insn
);
7009 /* A conditional branch to the following instruction (e.g. the delay slot) is
7010 asking for a disaster. I do not think this can happen as this pattern
7011 is only used when optimizing; jump optimization should eliminate the
7012 jump. But be prepared just in case. */
7014 if (branch_to_delay_slot_p (insn
))
7017 /* If this is a long branch with its delay slot unfilled, set `nullify'
7018 as it can nullify the delay slot and save a nop. */
7019 if (length
== 8 && dbr_sequence_length () == 0)
7022 /* If this is a short forward conditional branch which did not get
7023 its delay slot filled, the delay slot can still be nullified. */
7024 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7025 nullify
= forward_branch_p (insn
);
7027 /* A forward branch over a single nullified insn can be done with a
7028 extrs instruction. This avoids a single cycle penalty due to
7029 mis-predicted branch if we fall through (branch not taken). */
7030 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7035 /* All short conditional branches except backwards with an unfilled
7039 strcpy (buf
, "{vextrs,|extrw,s,}");
7041 strcpy (buf
, "{bvb,|bb,}");
7042 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7043 strcpy (buf
, "extrd,s,*");
7044 else if (GET_MODE (operands
[0]) == DImode
)
7045 strcpy (buf
, "bb,*");
7046 if ((which
== 0 && negated
)
7047 || (which
== 1 && ! negated
))
7052 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7053 else if (nullify
&& negated
)
7055 if (branch_needs_nop_p (insn
))
7056 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7058 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7060 else if (nullify
&& ! negated
)
7062 if (branch_needs_nop_p (insn
))
7063 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7065 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7067 else if (! nullify
&& negated
)
7068 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7069 else if (! nullify
&& ! negated
)
7070 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7073 /* All long conditionals. Note a short backward branch with an
7074 unfilled delay slot is treated just like a long backward branch
7075 with an unfilled delay slot. */
7077 /* Handle weird backwards branch with a filled delay slot
7078 which is nullified. */
7079 if (dbr_sequence_length () != 0
7080 && ! forward_branch_p (insn
)
7083 strcpy (buf
, "{bvb,|bb,}");
7084 if (GET_MODE (operands
[0]) == DImode
)
7086 if ((which
== 0 && negated
)
7087 || (which
== 1 && ! negated
))
7092 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7094 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7096 /* Handle short backwards branch with an unfilled delay slot.
7097 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7098 taken and untaken branches. */
7099 else if (dbr_sequence_length () == 0
7100 && ! forward_branch_p (insn
)
7101 && INSN_ADDRESSES_SET_P ()
7102 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7103 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7105 strcpy (buf
, "{bvb,|bb,}");
7106 if (GET_MODE (operands
[0]) == DImode
)
7108 if ((which
== 0 && negated
)
7109 || (which
== 1 && ! negated
))
7114 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7116 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7120 strcpy (buf
, "{vextrs,|extrw,s,}");
7121 if (GET_MODE (operands
[0]) == DImode
)
7122 strcpy (buf
, "extrd,s,*");
7123 if ((which
== 0 && negated
)
7124 || (which
== 1 && ! negated
))
7128 if (nullify
&& negated
)
7129 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7130 else if (nullify
&& ! negated
)
7131 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7133 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7135 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7140 /* The reversed conditional branch must branch over one additional
7141 instruction if the delay slot is filled and needs to be extracted
7142 by pa_output_lbranch. If the delay slot is empty or this is a
7143 nullified forward branch, the instruction after the reversed
7144 condition branch must be nullified. */
7145 if (dbr_sequence_length () == 0
7146 || (nullify
&& forward_branch_p (insn
)))
7150 operands
[4] = GEN_INT (length
);
7155 operands
[4] = GEN_INT (length
+ 4);
7158 if (GET_MODE (operands
[0]) == DImode
)
7159 strcpy (buf
, "bb,*");
7161 strcpy (buf
, "{bvb,|bb,}");
7162 if ((which
== 0 && negated
)
7163 || (which
== 1 && !negated
))
7168 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7170 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7171 output_asm_insn (buf
, operands
);
7172 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7178 /* Return the output template for emitting a dbra type insn.
7180 Note it may perform some output operations on its own before
7181 returning the final output string. */
7183 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7185 int length
= get_attr_length (insn
);
7187 /* A conditional branch to the following instruction (e.g. the delay slot) is
7188 asking for a disaster. Be prepared! */
7190 if (branch_to_delay_slot_p (insn
))
7192 if (which_alternative
== 0)
7193 return "ldo %1(%0),%0";
7194 else if (which_alternative
== 1)
7196 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7197 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7198 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7199 return "{fldws|fldw} -16(%%r30),%0";
7203 output_asm_insn ("ldw %0,%4", operands
);
7204 return "ldo %1(%4),%4\n\tstw %4,%0";
7208 if (which_alternative
== 0)
7210 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7213 /* If this is a long branch with its delay slot unfilled, set `nullify'
7214 as it can nullify the delay slot and save a nop. */
7215 if (length
== 8 && dbr_sequence_length () == 0)
7218 /* If this is a short forward conditional branch which did not get
7219 its delay slot filled, the delay slot can still be nullified. */
7220 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7221 nullify
= forward_branch_p (insn
);
7228 if (branch_needs_nop_p (insn
))
7229 return "addib,%C2,n %1,%0,%3%#";
7231 return "addib,%C2,n %1,%0,%3";
7234 return "addib,%C2 %1,%0,%3";
7237 /* Handle weird backwards branch with a fulled delay slot
7238 which is nullified. */
7239 if (dbr_sequence_length () != 0
7240 && ! forward_branch_p (insn
)
7242 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7243 /* Handle short backwards branch with an unfilled delay slot.
7244 Using a addb;nop rather than addi;bl saves 1 cycle for both
7245 taken and untaken branches. */
7246 else if (dbr_sequence_length () == 0
7247 && ! forward_branch_p (insn
)
7248 && INSN_ADDRESSES_SET_P ()
7249 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7250 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7251 return "addib,%C2 %1,%0,%3%#";
7253 /* Handle normal cases. */
7255 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7257 return "addi,%N2 %1,%0,%0\n\tb %3";
7260 /* The reversed conditional branch must branch over one additional
7261 instruction if the delay slot is filled and needs to be extracted
7262 by pa_output_lbranch. If the delay slot is empty or this is a
7263 nullified forward branch, the instruction after the reversed
7264 condition branch must be nullified. */
7265 if (dbr_sequence_length () == 0
7266 || (nullify
&& forward_branch_p (insn
)))
7270 operands
[4] = GEN_INT (length
);
7275 operands
[4] = GEN_INT (length
+ 4);
7279 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7281 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7283 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7287 /* Deal with gross reload from FP register case. */
7288 else if (which_alternative
== 1)
7290 /* Move loop counter from FP register to MEM then into a GR,
7291 increment the GR, store the GR into MEM, and finally reload
7292 the FP register from MEM from within the branch's delay slot. */
7293 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7295 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7297 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7298 else if (length
== 28)
7299 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7302 operands
[5] = GEN_INT (length
- 16);
7303 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7304 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7305 return pa_output_lbranch (operands
[3], insn
, 0);
7308 /* Deal with gross reload from memory case. */
7311 /* Reload loop counter from memory, the store back to memory
7312 happens in the branch's delay slot. */
7313 output_asm_insn ("ldw %0,%4", operands
);
7315 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7316 else if (length
== 16)
7317 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7320 operands
[5] = GEN_INT (length
- 4);
7321 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7322 return pa_output_lbranch (operands
[3], insn
, 0);
7327 /* Return the output template for emitting a movb type insn.
7329 Note it may perform some output operations on its own before
7330 returning the final output string. */
7332 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7333 int reverse_comparison
)
7335 int length
= get_attr_length (insn
);
7337 /* A conditional branch to the following instruction (e.g. the delay slot) is
7338 asking for a disaster. Be prepared! */
7340 if (branch_to_delay_slot_p (insn
))
7342 if (which_alternative
== 0)
7343 return "copy %1,%0";
7344 else if (which_alternative
== 1)
7346 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7347 return "{fldws|fldw} -16(%%r30),%0";
7349 else if (which_alternative
== 2)
7355 /* Support the second variant. */
7356 if (reverse_comparison
)
7357 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7359 if (which_alternative
== 0)
7361 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7364 /* If this is a long branch with its delay slot unfilled, set `nullify'
7365 as it can nullify the delay slot and save a nop. */
7366 if (length
== 8 && dbr_sequence_length () == 0)
7369 /* If this is a short forward conditional branch which did not get
7370 its delay slot filled, the delay slot can still be nullified. */
7371 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7372 nullify
= forward_branch_p (insn
);
7379 if (branch_needs_nop_p (insn
))
7380 return "movb,%C2,n %1,%0,%3%#";
7382 return "movb,%C2,n %1,%0,%3";
7385 return "movb,%C2 %1,%0,%3";
7388 /* Handle weird backwards branch with a filled delay slot
7389 which is nullified. */
7390 if (dbr_sequence_length () != 0
7391 && ! forward_branch_p (insn
)
7393 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7395 /* Handle short backwards branch with an unfilled delay slot.
7396 Using a movb;nop rather than or;bl saves 1 cycle for both
7397 taken and untaken branches. */
7398 else if (dbr_sequence_length () == 0
7399 && ! forward_branch_p (insn
)
7400 && INSN_ADDRESSES_SET_P ()
7401 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7402 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7403 return "movb,%C2 %1,%0,%3%#";
7404 /* Handle normal cases. */
7406 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7408 return "or,%N2 %1,%%r0,%0\n\tb %3";
7411 /* The reversed conditional branch must branch over one additional
7412 instruction if the delay slot is filled and needs to be extracted
7413 by pa_output_lbranch. If the delay slot is empty or this is a
7414 nullified forward branch, the instruction after the reversed
7415 condition branch must be nullified. */
7416 if (dbr_sequence_length () == 0
7417 || (nullify
&& forward_branch_p (insn
)))
7421 operands
[4] = GEN_INT (length
);
7426 operands
[4] = GEN_INT (length
+ 4);
7430 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7432 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7434 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7437 /* Deal with gross reload for FP destination register case. */
7438 else if (which_alternative
== 1)
7440 /* Move source register to MEM, perform the branch test, then
7441 finally load the FP register from MEM from within the branch's
7443 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7445 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7446 else if (length
== 16)
7447 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7450 operands
[4] = GEN_INT (length
- 4);
7451 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7452 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7453 return pa_output_lbranch (operands
[3], insn
, 0);
7456 /* Deal with gross reload from memory case. */
7457 else if (which_alternative
== 2)
7459 /* Reload loop counter from memory, the store back to memory
7460 happens in the branch's delay slot. */
7462 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7463 else if (length
== 12)
7464 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7467 operands
[4] = GEN_INT (length
);
7468 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7470 return pa_output_lbranch (operands
[3], insn
, 0);
7473 /* Handle SAR as a destination. */
7477 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7478 else if (length
== 12)
7479 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7482 operands
[4] = GEN_INT (length
);
7483 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7485 return pa_output_lbranch (operands
[3], insn
, 0);
7490 /* Copy any FP arguments in INSN into integer registers. */
7492 copy_fp_args (rtx_insn
*insn
)
7497 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7499 int arg_mode
, regno
;
7500 rtx use
= XEXP (link
, 0);
7502 if (! (GET_CODE (use
) == USE
7503 && GET_CODE (XEXP (use
, 0)) == REG
7504 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7507 arg_mode
= GET_MODE (XEXP (use
, 0));
7508 regno
= REGNO (XEXP (use
, 0));
7510 /* Is it a floating point register? */
7511 if (regno
>= 32 && regno
<= 39)
7513 /* Copy the FP register into an integer register via memory. */
7514 if (arg_mode
== SFmode
)
7516 xoperands
[0] = XEXP (use
, 0);
7517 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7518 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7519 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7523 xoperands
[0] = XEXP (use
, 0);
7524 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7525 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7526 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7527 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7533 /* Compute length of the FP argument copy sequence for INSN. */
7535 length_fp_args (rtx_insn
*insn
)
7540 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7542 int arg_mode
, regno
;
7543 rtx use
= XEXP (link
, 0);
7545 if (! (GET_CODE (use
) == USE
7546 && GET_CODE (XEXP (use
, 0)) == REG
7547 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7550 arg_mode
= GET_MODE (XEXP (use
, 0));
7551 regno
= REGNO (XEXP (use
, 0));
7553 /* Is it a floating point register? */
7554 if (regno
>= 32 && regno
<= 39)
7556 if (arg_mode
== SFmode
)
7566 /* Return the attribute length for the millicode call instruction INSN.
7567 The length must match the code generated by pa_output_millicode_call.
7568 We include the delay slot in the returned length as it is better to
7569 over estimate the length than to under estimate it. */
7572 pa_attr_length_millicode_call (rtx_insn
*insn
)
7574 unsigned long distance
= -1;
7575 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7577 if (INSN_ADDRESSES_SET_P ())
7579 distance
= (total
+ insn_current_reference_address (insn
));
7580 if (distance
< total
)
7586 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7591 else if (TARGET_PORTABLE_RUNTIME
)
7595 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7605 /* INSN is a function call.
7607 CALL_DEST is the routine we are calling. */
7610 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
7612 int attr_length
= get_attr_length (insn
);
7613 int seq_length
= dbr_sequence_length ();
7616 xoperands
[0] = call_dest
;
7617 xoperands
[2] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7619 /* Handle the common case where we are sure that the branch will
7620 reach the beginning of the $CODE$ subspace. The within reach
7621 form of the $$sh_func_adrs call has a length of 28. Because it
7622 has an attribute type of sh_func_adrs, it never has a nonzero
7623 sequence length (i.e., the delay slot is never filled). */
7624 if (!TARGET_LONG_CALLS
7625 && (attr_length
== 8
7626 || (attr_length
== 28
7627 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7629 output_asm_insn ("{bl|b,l} %0,%2", xoperands
);
7635 /* It might seem that one insn could be saved by accessing
7636 the millicode function using the linkage table. However,
7637 this doesn't work in shared libraries and other dynamically
7638 loaded objects. Using a pc-relative sequence also avoids
7639 problems related to the implicit use of the gp register. */
7640 output_asm_insn ("b,l .+8,%%r1", xoperands
);
7644 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
7645 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
7649 xoperands
[1] = gen_label_rtx ();
7650 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7651 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7652 CODE_LABEL_NUMBER (xoperands
[1]));
7653 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7656 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7658 else if (TARGET_PORTABLE_RUNTIME
)
7660 /* Pure portable runtime doesn't allow be/ble; we also don't
7661 have PIC support in the assembler/linker, so this sequence
7664 /* Get the address of our target into %r1. */
7665 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7666 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7668 /* Get our return address into %r31. */
7669 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7670 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7672 /* Jump to our target address in %r1. */
7673 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7677 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7679 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7681 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7685 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7686 output_asm_insn ("addi 16,%%r1,%%r31", xoperands
);
7688 if (TARGET_SOM
|| !TARGET_GAS
)
7690 /* The HP assembler can generate relocations for the
7691 difference of two symbols. GAS can do this for a
7692 millicode symbol but not an arbitrary external
7693 symbol when generating SOM output. */
7694 xoperands
[1] = gen_label_rtx ();
7695 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7696 CODE_LABEL_NUMBER (xoperands
[1]));
7697 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7698 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7702 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands
);
7703 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7707 /* Jump to our target address in %r1. */
7708 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7712 if (seq_length
== 0)
7713 output_asm_insn ("nop", xoperands
);
7718 /* Return the attribute length of the call instruction INSN. The SIBCALL
7719 flag indicates whether INSN is a regular call or a sibling call. The
7720 length returned must be longer than the code actually generated by
7721 pa_output_call. Since branch shortening is done before delay branch
7722 sequencing, there is no way to determine whether or not the delay
7723 slot will be filled during branch shortening. Even when the delay
7724 slot is filled, we may have to add a nop if the delay slot contains
7725 a branch that can't reach its target. Thus, we always have to include
7726 the delay slot in the length estimate. This used to be done in
7727 pa_adjust_insn_length but we do it here now as some sequences always
7728 fill the delay slot and we can save four bytes in the estimate for
7732 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
7735 rtx call
, call_dest
;
7738 rtx pat
= PATTERN (insn
);
7739 unsigned long distance
= -1;
7741 gcc_assert (CALL_P (insn
));
7743 if (INSN_ADDRESSES_SET_P ())
7745 unsigned long total
;
7747 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7748 distance
= (total
+ insn_current_reference_address (insn
));
7749 if (distance
< total
)
7753 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7755 /* Get the call rtx. */
7756 call
= XVECEXP (pat
, 0, 0);
7757 if (GET_CODE (call
) == SET
)
7758 call
= SET_SRC (call
);
7760 gcc_assert (GET_CODE (call
) == CALL
);
7762 /* Determine if this is a local call. */
7763 call_dest
= XEXP (XEXP (call
, 0), 0);
7764 call_decl
= SYMBOL_REF_DECL (call_dest
);
7765 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7767 /* pc-relative branch. */
7768 if (!TARGET_LONG_CALLS
7769 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7770 || distance
< MAX_PCREL17F_OFFSET
))
7773 /* 64-bit plabel sequence. */
7774 else if (TARGET_64BIT
&& !local_call
)
7775 length
+= sibcall
? 28 : 24;
7777 /* non-pic long absolute branch sequence. */
7778 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7781 /* long pc-relative branch sequence. */
7782 else if (TARGET_LONG_PIC_SDIFF_CALL
7783 || (TARGET_GAS
&& !TARGET_SOM
7784 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
)))
7788 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7792 /* 32-bit plabel sequence. */
7798 length
+= length_fp_args (insn
);
7808 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7816 /* INSN is a function call.
7818 CALL_DEST is the routine we are calling. */
7821 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
7823 int seq_length
= dbr_sequence_length ();
7824 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7825 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7828 xoperands
[0] = call_dest
;
7830 /* Handle the common case where we're sure that the branch will reach
7831 the beginning of the "$CODE$" subspace. This is the beginning of
7832 the current function if we are in a named section. */
7833 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7835 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7836 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7840 if (TARGET_64BIT
&& !local_call
)
7842 /* ??? As far as I can tell, the HP linker doesn't support the
7843 long pc-relative sequence described in the 64-bit runtime
7844 architecture. So, we use a slightly longer indirect call. */
7845 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7846 xoperands
[1] = gen_label_rtx ();
7848 /* If this isn't a sibcall, we put the load of %r27 into the
7849 delay slot. We can't do this in a sibcall as we don't
7850 have a second call-clobbered scratch register available.
7851 We don't need to do anything when generating fast indirect
7853 if (seq_length
!= 0 && !sibcall
)
7855 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7858 /* Now delete the delay insn. */
7859 SET_INSN_DELETED (NEXT_INSN (insn
));
7863 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7864 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7865 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7869 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7870 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7871 output_asm_insn ("bve (%%r1)", xoperands
);
7875 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7876 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7877 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7883 int indirect_call
= 0;
7885 /* Emit a long call. There are several different sequences
7886 of increasing length and complexity. In most cases,
7887 they don't allow an instruction in the delay slot. */
7888 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7889 && !TARGET_LONG_PIC_SDIFF_CALL
7890 && !(TARGET_GAS
&& !TARGET_SOM
7891 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7899 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7901 /* A non-jump insn in the delay slot. By definition we can
7902 emit this insn before the call (and in fact before argument
7904 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7907 /* Now delete the delay insn. */
7908 SET_INSN_DELETED (NEXT_INSN (insn
));
7912 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7914 /* This is the best sequence for making long calls in
7915 non-pic code. Unfortunately, GNU ld doesn't provide
7916 the stub needed for external calls, and GAS's support
7917 for this with the SOM linker is buggy. It is safe
7918 to use this for local calls. */
7919 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7921 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
7925 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7928 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7930 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7936 if (TARGET_LONG_PIC_SDIFF_CALL
)
7938 /* The HP assembler and linker can handle relocations
7939 for the difference of two symbols. The HP assembler
7940 recognizes the sequence as a pc-relative call and
7941 the linker provides stubs when needed. */
7942 xoperands
[1] = gen_label_rtx ();
7943 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7944 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7945 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7946 CODE_LABEL_NUMBER (xoperands
[1]));
7947 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7949 else if (TARGET_GAS
&& !TARGET_SOM
7950 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7952 /* GAS currently can't generate the relocations that
7953 are needed for the SOM linker under HP-UX using this
7954 sequence. The GNU linker doesn't generate the stubs
7955 that are needed for external calls on TARGET_ELF32
7956 with this sequence. For now, we have to use a
7957 longer plabel sequence when using GAS. */
7958 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7959 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7961 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7966 /* Emit a long plabel-based call sequence. This is
7967 essentially an inline implementation of $$dyncall.
7968 We don't actually try to call $$dyncall as this is
7969 as difficult as calling the function itself. */
7970 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7971 xoperands
[1] = gen_label_rtx ();
7973 /* Since the call is indirect, FP arguments in registers
7974 need to be copied to the general registers. Then, the
7975 argument relocation stub will copy them back. */
7977 copy_fp_args (insn
);
7981 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
7982 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
7983 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
7987 output_asm_insn ("addil LR'%0-$global$,%%r27",
7989 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7993 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
7994 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
7995 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
7996 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
7998 if (!sibcall
&& !TARGET_PA_20
)
8000 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8001 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8002 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
8004 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8011 output_asm_insn ("bve (%%r1)", xoperands
);
8016 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8017 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8021 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8026 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8027 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8032 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8033 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8035 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8039 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8040 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8042 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8045 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8047 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8055 if (seq_length
== 0)
8056 output_asm_insn ("nop", xoperands
);
8061 /* Return the attribute length of the indirect call instruction INSN.
8062 The length must match the code generated by output_indirect call.
8063 The returned length includes the delay slot. Currently, the delay
8064 slot of an indirect call sequence is not exposed and it is used by
8065 the sequence itself. */
8068 pa_attr_length_indirect_call (rtx_insn
*insn
)
8070 unsigned long distance
= -1;
8071 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8073 if (INSN_ADDRESSES_SET_P ())
8075 distance
= (total
+ insn_current_reference_address (insn
));
8076 if (distance
< total
)
8083 if (TARGET_FAST_INDIRECT_CALLS
8084 || (!TARGET_LONG_CALLS
8085 && !TARGET_PORTABLE_RUNTIME
8086 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8087 || distance
< MAX_PCREL17F_OFFSET
)))
8093 if (TARGET_PORTABLE_RUNTIME
)
8096 /* Out of reach, can use ble. */
8101 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8107 xoperands
[0] = call_dest
;
8108 output_asm_insn ("ldd 16(%0),%%r2", xoperands
);
8109 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands
);
8113 /* First the special case for kernels, level 0 systems, etc. */
8114 if (TARGET_FAST_INDIRECT_CALLS
)
8115 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8117 /* Now the normal case -- we can reach $$dyncall directly or
8118 we're sure that we can get there via a long-branch stub.
8120 No need to check target flags as the length uniquely identifies
8121 the remaining cases. */
8122 if (pa_attr_length_indirect_call (insn
) == 8)
8124 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8125 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8126 variant of the B,L instruction can't be used on the SOM target. */
8127 if (TARGET_PA_20
&& !TARGET_SOM
)
8128 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8130 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8133 /* Long millicode call, but we are not generating PIC or portable runtime
8135 if (pa_attr_length_indirect_call (insn
) == 12)
8136 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8138 /* Long millicode call for portable runtime. */
8139 if (pa_attr_length_indirect_call (insn
) == 16)
8140 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8142 /* We need a long PIC call to $$dyncall. */
8143 xoperands
[0] = NULL_RTX
;
8144 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8145 if (TARGET_SOM
|| !TARGET_GAS
)
8147 xoperands
[0] = gen_label_rtx ();
8148 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands
);
8149 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8150 CODE_LABEL_NUMBER (xoperands
[0]));
8151 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands
);
8155 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands
);
8156 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8159 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8160 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands
);
8164 /* In HPUX 8.0's shared library scheme, special relocations are needed
8165 for function labels if they might be passed to a function
8166 in a shared library (because shared libraries don't live in code
8167 space), and special magic is needed to construct their address. */
8170 pa_encode_label (rtx sym
)
8172 const char *str
= XSTR (sym
, 0);
8173 int len
= strlen (str
) + 1;
8176 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8180 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8184 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8186 int old_referenced
= 0;
8188 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8190 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8192 default_encode_section_info (decl
, rtl
, first
);
8194 if (first
&& TEXT_SPACE_P (decl
))
8196 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8197 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8198 pa_encode_label (XEXP (rtl
, 0));
8200 else if (old_referenced
)
8201 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8204 /* This is sort of inverse to pa_encode_section_info. */
8207 pa_strip_name_encoding (const char *str
)
8209 str
+= (*str
== '@');
8210 str
+= (*str
== '*');
8214 /* Returns 1 if OP is a function label involved in a simple addition
8215 with a constant. Used to keep certain patterns from matching
8216 during instruction combination. */
8218 pa_is_function_label_plus_const (rtx op
)
8220 /* Strip off any CONST. */
8221 if (GET_CODE (op
) == CONST
)
8224 return (GET_CODE (op
) == PLUS
8225 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8226 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8229 /* Output assembly code for a thunk to FUNCTION. */
8232 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8233 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8236 static unsigned int current_thunk_number
;
8237 int val_14
= VAL_14_BITS_P (delta
);
8238 unsigned int old_last_address
= last_address
, nbytes
= 0;
8242 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8243 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8244 xoperands
[2] = GEN_INT (delta
);
8246 final_start_function (emit_barrier (), file
, 1);
8248 /* Output the thunk. We know that the function is in the same
8249 translation unit (i.e., the same space) as the thunk, and that
8250 thunks are output after their method. Thus, we don't need an
8251 external branch to reach the function. With SOM and GAS,
8252 functions and thunks are effectively in different sections.
8253 Thus, we can always use a IA-relative branch and the linker
8254 will add a long branch stub if necessary.
8256 However, we have to be careful when generating PIC code on the
8257 SOM port to ensure that the sequence does not transfer to an
8258 import stub for the target function as this could clobber the
8259 return value saved at SP-24. This would also apply to the
8260 32-bit linux port if the multi-space model is implemented. */
8261 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8262 && !(flag_pic
&& TREE_PUBLIC (function
))
8263 && (TARGET_GAS
|| last_address
< 262132))
8264 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8265 && ((targetm_common
.have_named_sections
8266 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8267 /* The GNU 64-bit linker has rather poor stub management.
8268 So, we use a long branch from thunks that aren't in
8269 the same section as the target function. */
8271 && (DECL_SECTION_NAME (thunk_fndecl
)
8272 != DECL_SECTION_NAME (function
)))
8273 || ((DECL_SECTION_NAME (thunk_fndecl
)
8274 == DECL_SECTION_NAME (function
))
8275 && last_address
< 262132)))
8276 /* In this case, we need to be able to reach the start of
8277 the stub table even though the function is likely closer
8278 and can be jumped to directly. */
8279 || (targetm_common
.have_named_sections
8280 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8281 && DECL_SECTION_NAME (function
) == NULL
8282 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8284 || (!targetm_common
.have_named_sections
8285 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8288 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8290 output_asm_insn ("b %0", xoperands
);
8294 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8299 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8303 else if (TARGET_64BIT
)
8305 /* We only have one call-clobbered scratch register, so we can't
8306 make use of the delay slot if delta doesn't fit in 14 bits. */
8309 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8310 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8313 output_asm_insn ("b,l .+8,%%r1", xoperands
);
8317 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8318 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
8322 xoperands
[3] = GEN_INT (val_14
? 8 : 16);
8323 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands
);
8328 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8329 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8334 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8338 else if (TARGET_PORTABLE_RUNTIME
)
8340 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8341 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8344 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8346 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8350 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8355 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8359 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8361 /* The function is accessible from outside this module. The only
8362 way to avoid an import stub between the thunk and function is to
8363 call the function directly with an indirect sequence similar to
8364 that used by $$dyncall. This is possible because $$dyncall acts
8365 as the import stub in an indirect call. */
8366 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8367 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8368 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8369 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8370 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8371 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8372 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8373 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8374 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8378 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8384 output_asm_insn ("bve (%%r22)", xoperands
);
8387 else if (TARGET_NO_SPACE_REGS
)
8389 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8394 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8395 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8396 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8401 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8403 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8407 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8409 if (TARGET_SOM
|| !TARGET_GAS
)
8411 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands
);
8412 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands
);
8416 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8417 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands
);
8421 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8423 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8427 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8432 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8439 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8441 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8442 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8446 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8451 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8456 final_end_function ();
8458 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8460 switch_to_section (data_section
);
8461 output_asm_insn (".align 4", xoperands
);
8462 ASM_OUTPUT_LABEL (file
, label
);
8463 output_asm_insn (".word P'%0", xoperands
);
8466 current_thunk_number
++;
8467 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8468 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8469 last_address
+= nbytes
;
8470 if (old_last_address
> last_address
)
8471 last_address
= UINT_MAX
;
8472 update_total_code_bytes (nbytes
);
8475 /* Only direct calls to static functions are allowed to be sibling (tail)
8478 This restriction is necessary because some linker generated stubs will
8479 store return pointers into rp' in some cases which might clobber a
8480 live value already in rp'.
8482 In a sibcall the current function and the target function share stack
8483 space. Thus if the path to the current function and the path to the
8484 target function save a value in rp', they save the value into the
8485 same stack slot, which has undesirable consequences.
8487 Because of the deferred binding nature of shared libraries any function
8488 with external scope could be in a different load module and thus require
8489 rp' to be saved when calling that function. So sibcall optimizations
8490 can only be safe for static function.
8492 Note that GCC never needs return value relocations, so we don't have to
8493 worry about static calls with return value relocations (which require
8496 It is safe to perform a sibcall optimization when the target function
8497 will never return. */
8499 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8501 if (TARGET_PORTABLE_RUNTIME
)
8504 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8505 single subspace mode and the call is not indirect. As far as I know,
8506 there is no operating system support for the multiple subspace mode.
8507 It might be possible to support indirect calls if we didn't use
8508 $$dyncall (see the indirect sequence generated in pa_output_call). */
8510 return (decl
!= NULL_TREE
);
8512 /* Sibcalls are not ok because the arg pointer register is not a fixed
8513 register. This prevents the sibcall optimization from occurring. In
8514 addition, there are problems with stub placement using GNU ld. This
8515 is because a normal sibcall branch uses a 17-bit relocation while
8516 a regular call branch uses a 22-bit relocation. As a result, more
8517 care needs to be taken in the placement of long-branch stubs. */
8521 /* Sibcalls are only ok within a translation unit. */
8522 return (decl
&& !TREE_PUBLIC (decl
));
8525 /* ??? Addition is not commutative on the PA due to the weird implicit
8526 space register selection rules for memory addresses. Therefore, we
8527 don't consider a + b == b + a, as this might be inside a MEM. */
8529 pa_commutative_p (const_rtx x
, int outer_code
)
8531 return (COMMUTATIVE_P (x
)
8532 && (TARGET_NO_SPACE_REGS
8533 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8534 || GET_CODE (x
) != PLUS
));
8537 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8538 use in fmpyadd instructions. */
8540 pa_fmpyaddoperands (rtx
*operands
)
8542 machine_mode mode
= GET_MODE (operands
[0]);
8544 /* Must be a floating point mode. */
8545 if (mode
!= SFmode
&& mode
!= DFmode
)
8548 /* All modes must be the same. */
8549 if (! (mode
== GET_MODE (operands
[1])
8550 && mode
== GET_MODE (operands
[2])
8551 && mode
== GET_MODE (operands
[3])
8552 && mode
== GET_MODE (operands
[4])
8553 && mode
== GET_MODE (operands
[5])))
8556 /* All operands must be registers. */
8557 if (! (GET_CODE (operands
[1]) == REG
8558 && GET_CODE (operands
[2]) == REG
8559 && GET_CODE (operands
[3]) == REG
8560 && GET_CODE (operands
[4]) == REG
8561 && GET_CODE (operands
[5]) == REG
))
8564 /* Only 2 real operands to the addition. One of the input operands must
8565 be the same as the output operand. */
8566 if (! rtx_equal_p (operands
[3], operands
[4])
8567 && ! rtx_equal_p (operands
[3], operands
[5]))
8570 /* Inout operand of add cannot conflict with any operands from multiply. */
8571 if (rtx_equal_p (operands
[3], operands
[0])
8572 || rtx_equal_p (operands
[3], operands
[1])
8573 || rtx_equal_p (operands
[3], operands
[2]))
8576 /* multiply cannot feed into addition operands. */
8577 if (rtx_equal_p (operands
[4], operands
[0])
8578 || rtx_equal_p (operands
[5], operands
[0]))
8581 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8583 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8584 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8585 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8586 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8587 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8588 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8591 /* Passed. Operands are suitable for fmpyadd. */
8595 #if !defined(USE_COLLECT2)
8597 pa_asm_out_constructor (rtx symbol
, int priority
)
8599 if (!function_label_operand (symbol
, VOIDmode
))
8600 pa_encode_label (symbol
);
8602 #ifdef CTORS_SECTION_ASM_OP
8603 default_ctor_section_asm_out_constructor (symbol
, priority
);
8605 # ifdef TARGET_ASM_NAMED_SECTION
8606 default_named_section_asm_out_constructor (symbol
, priority
);
8608 default_stabs_asm_out_constructor (symbol
, priority
);
8614 pa_asm_out_destructor (rtx symbol
, int priority
)
8616 if (!function_label_operand (symbol
, VOIDmode
))
8617 pa_encode_label (symbol
);
8619 #ifdef DTORS_SECTION_ASM_OP
8620 default_dtor_section_asm_out_destructor (symbol
, priority
);
8622 # ifdef TARGET_ASM_NAMED_SECTION
8623 default_named_section_asm_out_destructor (symbol
, priority
);
8625 default_stabs_asm_out_destructor (symbol
, priority
);
8631 /* This function places uninitialized global data in the bss section.
8632 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8633 function on the SOM port to prevent uninitialized global data from
8634 being placed in the data section. */
8637 pa_asm_output_aligned_bss (FILE *stream
,
8639 unsigned HOST_WIDE_INT size
,
8642 switch_to_section (bss_section
);
8643 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8645 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8646 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8649 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8650 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8653 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8654 ASM_OUTPUT_LABEL (stream
, name
);
8655 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8658 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8659 that doesn't allow the alignment of global common storage to be directly
8660 specified. The SOM linker aligns common storage based on the rounded
8661 value of the NUM_BYTES parameter in the .comm directive. It's not
8662 possible to use the .align directive as it doesn't affect the alignment
8663 of the label associated with a .comm directive. */
8666 pa_asm_output_aligned_common (FILE *stream
,
8668 unsigned HOST_WIDE_INT size
,
8671 unsigned int max_common_align
;
8673 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8674 if (align
> max_common_align
)
8676 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8677 "for global common data. Using %u",
8678 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8679 align
= max_common_align
;
8682 switch_to_section (bss_section
);
8684 assemble_name (stream
, name
);
8685 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8686 MAX (size
, align
/ BITS_PER_UNIT
));
8689 /* We can't use .comm for local common storage as the SOM linker effectively
8690 treats the symbol as universal and uses the same storage for local symbols
8691 with the same name in different object files. The .block directive
8692 reserves an uninitialized block of storage. However, it's not common
8693 storage. Fortunately, GCC never requests common storage with the same
8694 name in any given translation unit. */
8697 pa_asm_output_aligned_local (FILE *stream
,
8699 unsigned HOST_WIDE_INT size
,
8702 switch_to_section (bss_section
);
8703 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8706 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8707 assemble_name (stream
, name
);
8708 fprintf (stream
, "\n");
8711 ASM_OUTPUT_LABEL (stream
, name
);
8712 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8715 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8716 use in fmpysub instructions. */
8718 pa_fmpysuboperands (rtx
*operands
)
8720 machine_mode mode
= GET_MODE (operands
[0]);
8722 /* Must be a floating point mode. */
8723 if (mode
!= SFmode
&& mode
!= DFmode
)
8726 /* All modes must be the same. */
8727 if (! (mode
== GET_MODE (operands
[1])
8728 && mode
== GET_MODE (operands
[2])
8729 && mode
== GET_MODE (operands
[3])
8730 && mode
== GET_MODE (operands
[4])
8731 && mode
== GET_MODE (operands
[5])))
8734 /* All operands must be registers. */
8735 if (! (GET_CODE (operands
[1]) == REG
8736 && GET_CODE (operands
[2]) == REG
8737 && GET_CODE (operands
[3]) == REG
8738 && GET_CODE (operands
[4]) == REG
8739 && GET_CODE (operands
[5]) == REG
))
8742 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8743 operation, so operands[4] must be the same as operand[3]. */
8744 if (! rtx_equal_p (operands
[3], operands
[4]))
8747 /* multiply cannot feed into subtraction. */
8748 if (rtx_equal_p (operands
[5], operands
[0]))
8751 /* Inout operand of sub cannot conflict with any operands from multiply. */
8752 if (rtx_equal_p (operands
[3], operands
[0])
8753 || rtx_equal_p (operands
[3], operands
[1])
8754 || rtx_equal_p (operands
[3], operands
[2]))
8757 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8759 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8760 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8761 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8762 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8763 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8764 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8767 /* Passed. Operands are suitable for fmpysub. */
8771 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8772 constants for a MULT embedded inside a memory address. */
8774 pa_mem_shadd_constant_p (int val
)
8776 if (val
== 2 || val
== 4 || val
== 8)
8782 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8783 constants for shadd instructions. */
8785 pa_shadd_constant_p (int val
)
8787 if (val
== 1 || val
== 2 || val
== 3)
8793 /* Return TRUE if INSN branches forward. */
8796 forward_branch_p (rtx_insn
*insn
)
8798 rtx lab
= JUMP_LABEL (insn
);
8800 /* The INSN must have a jump label. */
8801 gcc_assert (lab
!= NULL_RTX
);
8803 if (INSN_ADDRESSES_SET_P ())
8804 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8811 insn
= NEXT_INSN (insn
);
8817 /* Output an unconditional move and branch insn. */
8820 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
8822 int length
= get_attr_length (insn
);
8824 /* These are the cases in which we win. */
8826 return "mov%I1b,tr %1,%0,%2";
8828 /* None of the following cases win, but they don't lose either. */
8831 if (dbr_sequence_length () == 0)
8833 /* Nothing in the delay slot, fake it by putting the combined
8834 insn (the copy or add) in the delay slot of a bl. */
8835 if (GET_CODE (operands
[1]) == CONST_INT
)
8836 return "b %2\n\tldi %1,%0";
8838 return "b %2\n\tcopy %1,%0";
8842 /* Something in the delay slot, but we've got a long branch. */
8843 if (GET_CODE (operands
[1]) == CONST_INT
)
8844 return "ldi %1,%0\n\tb %2";
8846 return "copy %1,%0\n\tb %2";
8850 if (GET_CODE (operands
[1]) == CONST_INT
)
8851 output_asm_insn ("ldi %1,%0", operands
);
8853 output_asm_insn ("copy %1,%0", operands
);
8854 return pa_output_lbranch (operands
[2], insn
, 1);
8857 /* Output an unconditional add and branch insn. */
8860 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
8862 int length
= get_attr_length (insn
);
8864 /* To make life easy we want operand0 to be the shared input/output
8865 operand and operand1 to be the readonly operand. */
8866 if (operands
[0] == operands
[1])
8867 operands
[1] = operands
[2];
8869 /* These are the cases in which we win. */
8871 return "add%I1b,tr %1,%0,%3";
8873 /* None of the following cases win, but they don't lose either. */
8876 if (dbr_sequence_length () == 0)
8877 /* Nothing in the delay slot, fake it by putting the combined
8878 insn (the copy or add) in the delay slot of a bl. */
8879 return "b %3\n\tadd%I1 %1,%0,%0";
8881 /* Something in the delay slot, but we've got a long branch. */
8882 return "add%I1 %1,%0,%0\n\tb %3";
8885 output_asm_insn ("add%I1 %1,%0,%0", operands
);
8886 return pa_output_lbranch (operands
[3], insn
, 1);
8889 /* We use this hook to perform a PA specific optimization which is difficult
8890 to do in earlier passes. */
8895 remove_useless_addtr_insns (1);
8897 if (pa_cpu
< PROCESSOR_8000
)
8898 pa_combine_instructions ();
8901 /* The PA has a number of odd instructions which can perform multiple
8902 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8903 it may be profitable to combine two instructions into one instruction
8904 with two outputs. It's not profitable PA2.0 machines because the
8905 two outputs would take two slots in the reorder buffers.
8907 This routine finds instructions which can be combined and combines
8908 them. We only support some of the potential combinations, and we
8909 only try common ways to find suitable instructions.
8911 * addb can add two registers or a register and a small integer
8912 and jump to a nearby (+-8k) location. Normally the jump to the
8913 nearby location is conditional on the result of the add, but by
8914 using the "true" condition we can make the jump unconditional.
8915 Thus addb can perform two independent operations in one insn.
8917 * movb is similar to addb in that it can perform a reg->reg
8918 or small immediate->reg copy and jump to a nearby (+-8k location).
8920 * fmpyadd and fmpysub can perform a FP multiply and either an
8921 FP add or FP sub if the operands of the multiply and add/sub are
8922 independent (there are other minor restrictions). Note both
8923 the fmpy and fadd/fsub can in theory move to better spots according
8924 to data dependencies, but for now we require the fmpy stay at a
8927 * Many of the memory operations can perform pre & post updates
8928 of index registers. GCC's pre/post increment/decrement addressing
8929 is far too simple to take advantage of all the possibilities. This
8930 pass may not be suitable since those insns may not be independent.
8932 * comclr can compare two ints or an int and a register, nullify
8933 the following instruction and zero some other register. This
8934 is more difficult to use as it's harder to find an insn which
8935 will generate a comclr than finding something like an unconditional
8936 branch. (conditional moves & long branches create comclr insns).
8938 * Most arithmetic operations can conditionally skip the next
8939 instruction. They can be viewed as "perform this operation
8940 and conditionally jump to this nearby location" (where nearby
8941 is an insns away). These are difficult to use due to the
8942 branch length restrictions. */
8945 pa_combine_instructions (void)
8949 /* This can get expensive since the basic algorithm is on the
8950 order of O(n^2) (or worse). Only do it for -O2 or higher
8951 levels of optimization. */
8955 /* Walk down the list of insns looking for "anchor" insns which
8956 may be combined with "floating" insns. As the name implies,
8957 "anchor" instructions don't move, while "floating" insns may
8959 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
8960 rtx_insn
*new_rtx
= make_insn_raw (par
);
8962 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
8964 enum attr_pa_combine_type anchor_attr
;
8965 enum attr_pa_combine_type floater_attr
;
8967 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8968 Also ignore any special USE insns. */
8969 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
8970 || GET_CODE (PATTERN (anchor
)) == USE
8971 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
8974 anchor_attr
= get_attr_pa_combine_type (anchor
);
8975 /* See if anchor is an insn suitable for combination. */
8976 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
8977 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
8978 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
8979 && ! forward_branch_p (anchor
)))
8983 for (floater
= PREV_INSN (anchor
);
8985 floater
= PREV_INSN (floater
))
8987 if (NOTE_P (floater
)
8988 || (NONJUMP_INSN_P (floater
)
8989 && (GET_CODE (PATTERN (floater
)) == USE
8990 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
8993 /* Anything except a regular INSN will stop our search. */
8994 if (! NONJUMP_INSN_P (floater
))
9000 /* See if FLOATER is suitable for combination with the
9002 floater_attr
= get_attr_pa_combine_type (floater
);
9003 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9004 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9005 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9006 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9008 /* If ANCHOR and FLOATER can be combined, then we're
9009 done with this pass. */
9010 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9011 SET_DEST (PATTERN (floater
)),
9012 XEXP (SET_SRC (PATTERN (floater
)), 0),
9013 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9017 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9018 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9020 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9022 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9023 SET_DEST (PATTERN (floater
)),
9024 XEXP (SET_SRC (PATTERN (floater
)), 0),
9025 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9030 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9031 SET_DEST (PATTERN (floater
)),
9032 SET_SRC (PATTERN (floater
)),
9033 SET_SRC (PATTERN (floater
))))
9039 /* If we didn't find anything on the backwards scan try forwards. */
9041 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9042 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9044 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9046 if (NOTE_P (floater
)
9047 || (NONJUMP_INSN_P (floater
)
9048 && (GET_CODE (PATTERN (floater
)) == USE
9049 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9053 /* Anything except a regular INSN will stop our search. */
9054 if (! NONJUMP_INSN_P (floater
))
9060 /* See if FLOATER is suitable for combination with the
9062 floater_attr
= get_attr_pa_combine_type (floater
);
9063 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9064 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9065 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9066 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9068 /* If ANCHOR and FLOATER can be combined, then we're
9069 done with this pass. */
9070 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9071 SET_DEST (PATTERN (floater
)),
9072 XEXP (SET_SRC (PATTERN (floater
)),
9074 XEXP (SET_SRC (PATTERN (floater
)),
9081 /* FLOATER will be nonzero if we found a suitable floating
9082 insn for combination with ANCHOR. */
9084 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9085 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9087 /* Emit the new instruction and delete the old anchor. */
9088 emit_insn_before (gen_rtx_PARALLEL
9090 gen_rtvec (2, PATTERN (anchor
),
9091 PATTERN (floater
))),
9094 SET_INSN_DELETED (anchor
);
9096 /* Emit a special USE insn for FLOATER, then delete
9097 the floating insn. */
9098 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9099 delete_insn (floater
);
9104 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9107 /* Emit the new_jump instruction and delete the old anchor. */
9109 = emit_jump_insn_before (gen_rtx_PARALLEL
9111 gen_rtvec (2, PATTERN (anchor
),
9112 PATTERN (floater
))),
9115 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9116 SET_INSN_DELETED (anchor
);
9118 /* Emit a special USE insn for FLOATER, then delete
9119 the floating insn. */
9120 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9121 delete_insn (floater
);
9129 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9130 int reversed
, rtx dest
,
9133 int insn_code_number
;
9134 rtx_insn
*start
, *end
;
9136 /* Create a PARALLEL with the patterns of ANCHOR and
9137 FLOATER, try to recognize it, then test constraints
9138 for the resulting pattern.
9140 If the pattern doesn't match or the constraints
9141 aren't met keep searching for a suitable floater
9143 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9144 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9145 INSN_CODE (new_rtx
) = -1;
9146 insn_code_number
= recog_memoized (new_rtx
);
9147 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9148 if (insn_code_number
< 0
9149 || (extract_insn (new_rtx
),
9150 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9164 /* There's up to three operands to consider. One
9165 output and two inputs.
9167 The output must not be used between FLOATER & ANCHOR
9168 exclusive. The inputs must not be set between
9169 FLOATER and ANCHOR exclusive. */
9171 if (reg_used_between_p (dest
, start
, end
))
9174 if (reg_set_between_p (src1
, start
, end
))
9177 if (reg_set_between_p (src2
, start
, end
))
9180 /* If we get here, then everything is good. */
9184 /* Return nonzero if references for INSN are delayed.
9186 Millicode insns are actually function calls with some special
9187 constraints on arguments and register usage.
9189 Millicode calls always expect their arguments in the integer argument
9190 registers, and always return their result in %r29 (ret1). They
9191 are expected to clobber their arguments, %r1, %r29, and the return
9192 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9194 This function tells reorg that the references to arguments and
9195 millicode calls do not appear to happen until after the millicode call.
9196 This allows reorg to put insns which set the argument registers into the
9197 delay slot of the millicode call -- thus they act more like traditional
9200 Note we cannot consider side effects of the insn to be delayed because
9201 the branch and link insn will clobber the return pointer. If we happened
9202 to use the return pointer in the delay slot of the call, then we lose.
9204 get_attr_type will try to recognize the given insn, so make sure to
9205 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9208 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9210 return ((NONJUMP_INSN_P (insn
)
9211 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9212 && GET_CODE (PATTERN (insn
)) != USE
9213 && GET_CODE (PATTERN (insn
)) != CLOBBER
9214 && get_attr_type (insn
) == TYPE_MILLI
));
9217 /* Promote the return value, but not the arguments. */
9220 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9222 int *punsignedp ATTRIBUTE_UNUSED
,
9223 const_tree fntype ATTRIBUTE_UNUSED
,
9226 if (for_return
== 0)
9228 return promote_mode (type
, mode
, punsignedp
);
9231 /* On the HP-PA the value is found in register(s) 28(-29), unless
9232 the mode is SF or DF. Then the value is returned in fr4 (32).
9234 This must perform the same promotions as PROMOTE_MODE, else promoting
9235 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9237 Small structures must be returned in a PARALLEL on PA64 in order
9238 to match the HP Compiler ABI. */
9241 pa_function_value (const_tree valtype
,
9242 const_tree func ATTRIBUTE_UNUSED
,
9243 bool outgoing ATTRIBUTE_UNUSED
)
9245 machine_mode valmode
;
9247 if (AGGREGATE_TYPE_P (valtype
)
9248 || TREE_CODE (valtype
) == COMPLEX_TYPE
9249 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9251 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9253 /* Handle aggregates that fit exactly in a word or double word. */
9254 if ((valsize
& (UNITS_PER_WORD
- 1)) == 0)
9255 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9259 /* Aggregates with a size less than or equal to 128 bits are
9260 returned in GR 28(-29). They are left justified. The pad
9261 bits are undefined. Larger aggregates are returned in
9265 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9267 for (i
= 0; i
< ub
; i
++)
9269 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9270 gen_rtx_REG (DImode
, 28 + i
),
9275 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9277 else if (valsize
> UNITS_PER_WORD
)
9279 /* Aggregates 5 to 8 bytes in size are returned in general
9280 registers r28-r29 in the same manner as other non
9281 floating-point objects. The data is right-justified and
9282 zero-extended to 64 bits. This is opposite to the normal
9283 justification used on big endian targets and requires
9284 special treatment. */
9285 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9286 gen_rtx_REG (DImode
, 28), const0_rtx
);
9287 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9291 if ((INTEGRAL_TYPE_P (valtype
)
9292 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9293 || POINTER_TYPE_P (valtype
))
9294 valmode
= word_mode
;
9296 valmode
= TYPE_MODE (valtype
);
9298 if (TREE_CODE (valtype
) == REAL_TYPE
9299 && !AGGREGATE_TYPE_P (valtype
)
9300 && TYPE_MODE (valtype
) != TFmode
9301 && !TARGET_SOFT_FLOAT
)
9302 return gen_rtx_REG (valmode
, 32);
9304 return gen_rtx_REG (valmode
, 28);
9307 /* Implement the TARGET_LIBCALL_VALUE hook. */
9310 pa_libcall_value (machine_mode mode
,
9311 const_rtx fun ATTRIBUTE_UNUSED
)
9313 if (! TARGET_SOFT_FLOAT
9314 && (mode
== SFmode
|| mode
== DFmode
))
9315 return gen_rtx_REG (mode
, 32);
9317 return gen_rtx_REG (mode
, 28);
9320 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9323 pa_function_value_regno_p (const unsigned int regno
)
9326 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9332 /* Update the data in CUM to advance over an argument
9333 of mode MODE and data type TYPE.
9334 (TYPE is null for libcalls where that information may not be available.) */
9337 pa_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
9338 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9340 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9341 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9343 cum
->nargs_prototype
--;
9344 cum
->words
+= (arg_size
9345 + ((cum
->words
& 01)
9346 && type
!= NULL_TREE
9350 /* Return the location of a parameter that is passed in a register or NULL
9351 if the parameter has any component that is passed in memory.
9353 This is new code and will be pushed to into the net sources after
9356 ??? We might want to restructure this so that it looks more like other
9359 pa_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
9360 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9362 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9363 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9370 if (mode
== VOIDmode
)
9373 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9375 /* If this arg would be passed partially or totally on the stack, then
9376 this routine should return zero. pa_arg_partial_bytes will
9377 handle arguments which are split between regs and stack slots if
9378 the ABI mandates split arguments. */
9381 /* The 32-bit ABI does not split arguments. */
9382 if (cum
->words
+ arg_size
> max_arg_words
)
9388 alignment
= cum
->words
& 1;
9389 if (cum
->words
+ alignment
>= max_arg_words
)
9393 /* The 32bit ABIs and the 64bit ABIs are rather different,
9394 particularly in their handling of FP registers. We might
9395 be able to cleverly share code between them, but I'm not
9396 going to bother in the hope that splitting them up results
9397 in code that is more easily understood. */
9401 /* Advance the base registers to their current locations.
9403 Remember, gprs grow towards smaller register numbers while
9404 fprs grow to higher register numbers. Also remember that
9405 although FP regs are 32-bit addressable, we pretend that
9406 the registers are 64-bits wide. */
9407 gpr_reg_base
= 26 - cum
->words
;
9408 fpr_reg_base
= 32 + cum
->words
;
9410 /* Arguments wider than one word and small aggregates need special
9414 || (type
&& (AGGREGATE_TYPE_P (type
)
9415 || TREE_CODE (type
) == COMPLEX_TYPE
9416 || TREE_CODE (type
) == VECTOR_TYPE
)))
9418 /* Double-extended precision (80-bit), quad-precision (128-bit)
9419 and aggregates including complex numbers are aligned on
9420 128-bit boundaries. The first eight 64-bit argument slots
9421 are associated one-to-one, with general registers r26
9422 through r19, and also with floating-point registers fr4
9423 through fr11. Arguments larger than one word are always
9424 passed in general registers.
9426 Using a PARALLEL with a word mode register results in left
9427 justified data on a big-endian target. */
9430 int i
, offset
= 0, ub
= arg_size
;
9432 /* Align the base register. */
9433 gpr_reg_base
-= alignment
;
9435 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9436 for (i
= 0; i
< ub
; i
++)
9438 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9439 gen_rtx_REG (DImode
, gpr_reg_base
),
9445 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9450 /* If the argument is larger than a word, then we know precisely
9451 which registers we must use. */
9465 /* Structures 5 to 8 bytes in size are passed in the general
9466 registers in the same manner as other non floating-point
9467 objects. The data is right-justified and zero-extended
9468 to 64 bits. This is opposite to the normal justification
9469 used on big endian targets and requires special treatment.
9470 We now define BLOCK_REG_PADDING to pad these objects.
9471 Aggregates, complex and vector types are passed in the same
9472 manner as structures. */
9474 || (type
&& (AGGREGATE_TYPE_P (type
)
9475 || TREE_CODE (type
) == COMPLEX_TYPE
9476 || TREE_CODE (type
) == VECTOR_TYPE
)))
9478 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9479 gen_rtx_REG (DImode
, gpr_reg_base
),
9481 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9486 /* We have a single word (32 bits). A simple computation
9487 will get us the register #s we need. */
9488 gpr_reg_base
= 26 - cum
->words
;
9489 fpr_reg_base
= 32 + 2 * cum
->words
;
9493 /* Determine if the argument needs to be passed in both general and
9494 floating point registers. */
9495 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9496 /* If we are doing soft-float with portable runtime, then there
9497 is no need to worry about FP regs. */
9498 && !TARGET_SOFT_FLOAT
9499 /* The parameter must be some kind of scalar float, else we just
9500 pass it in integer registers. */
9501 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9502 /* The target function must not have a prototype. */
9503 && cum
->nargs_prototype
<= 0
9504 /* libcalls do not need to pass items in both FP and general
9506 && type
!= NULL_TREE
9507 /* All this hair applies to "outgoing" args only. This includes
9508 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9510 /* Also pass outgoing floating arguments in both registers in indirect
9511 calls with the 32 bit ABI and the HP assembler since there is no
9512 way to the specify argument locations in static functions. */
9517 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9523 gen_rtx_EXPR_LIST (VOIDmode
,
9524 gen_rtx_REG (mode
, fpr_reg_base
),
9526 gen_rtx_EXPR_LIST (VOIDmode
,
9527 gen_rtx_REG (mode
, gpr_reg_base
),
9532 /* See if we should pass this parameter in a general register. */
9533 if (TARGET_SOFT_FLOAT
9534 /* Indirect calls in the normal 32bit ABI require all arguments
9535 to be passed in general registers. */
9536 || (!TARGET_PORTABLE_RUNTIME
9540 /* If the parameter is not a scalar floating-point parameter,
9541 then it belongs in GPRs. */
9542 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9543 /* Structure with single SFmode field belongs in GPR. */
9544 || (type
&& AGGREGATE_TYPE_P (type
)))
9545 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9547 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9552 /* Arguments larger than one word are double word aligned. */
9555 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
9557 bool singleword
= (type
9558 ? (integer_zerop (TYPE_SIZE (type
))
9559 || !TREE_CONSTANT (TYPE_SIZE (type
))
9560 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9561 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9563 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9566 /* If this arg would be passed totally in registers or totally on the stack,
9567 then this routine should return zero. */
9570 pa_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
9571 tree type
, bool named ATTRIBUTE_UNUSED
)
9573 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9574 unsigned int max_arg_words
= 8;
9575 unsigned int offset
= 0;
9580 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9583 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9584 /* Arg fits fully into registers. */
9586 else if (cum
->words
+ offset
>= max_arg_words
)
9587 /* Arg fully on the stack. */
9591 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9595 /* A get_unnamed_section callback for switching to the text section.
9597 This function is only used with SOM. Because we don't support
9598 named subspaces, we can only create a new subspace or switch back
9599 to the default text subspace. */
9602 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9604 gcc_assert (TARGET_SOM
);
9607 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9609 /* We only want to emit a .nsubspa directive once at the
9610 start of the function. */
9611 cfun
->machine
->in_nsubspa
= 1;
9613 /* Create a new subspace for the text. This provides
9614 better stub placement and one-only functions. */
9616 && DECL_ONE_ONLY (cfun
->decl
)
9617 && !DECL_WEAK (cfun
->decl
))
9619 output_section_asm_op ("\t.SPACE $TEXT$\n"
9620 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9621 "ACCESS=44,SORT=24,COMDAT");
9627 /* There isn't a current function or the body of the current
9628 function has been completed. So, we are changing to the
9629 text section to output debugging information. Thus, we
9630 need to forget that we are in the text section so that
9631 varasm.c will call us when text_section is selected again. */
9632 gcc_assert (!cfun
|| !cfun
->machine
9633 || cfun
->machine
->in_nsubspa
== 2);
9636 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9639 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9642 /* A get_unnamed_section callback for switching to comdat data
9643 sections. This function is only used with SOM. */
9646 som_output_comdat_data_section_asm_op (const void *data
)
9649 output_section_asm_op (data
);
9652 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9655 pa_som_asm_init_sections (void)
9658 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9660 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9661 is not being generated. */
9662 som_readonly_data_section
9663 = get_unnamed_section (0, output_section_asm_op
,
9664 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9666 /* When secondary definitions are not supported, SOM makes readonly
9667 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9669 som_one_only_readonly_data_section
9670 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9672 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9673 "ACCESS=0x2c,SORT=16,COMDAT");
9676 /* When secondary definitions are not supported, SOM makes data one-only
9677 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9678 som_one_only_data_section
9679 = get_unnamed_section (SECTION_WRITE
,
9680 som_output_comdat_data_section_asm_op
,
9681 "\t.SPACE $PRIVATE$\n"
9682 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9683 "ACCESS=31,SORT=24,COMDAT");
9686 som_tm_clone_table_section
9687 = get_unnamed_section (0, output_section_asm_op
,
9688 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9690 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9691 which reference data within the $TEXT$ space (for example constant
9692 strings in the $LIT$ subspace).
9694 The assemblers (GAS and HP as) both have problems with handling
9695 the difference of two symbols which is the other correct way to
9696 reference constant data during PIC code generation.
9698 So, there's no way to reference constant data which is in the
9699 $TEXT$ space during PIC generation. Instead place all constant
9700 data into the $PRIVATE$ subspace (this reduces sharing, but it
9701 works correctly). */
9702 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9704 /* We must not have a reference to an external symbol defined in a
9705 shared library in a readonly section, else the SOM linker will
9708 So, we force exception information into the data section. */
9709 exception_section
= data_section
;
9712 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9715 pa_som_tm_clone_table_section (void)
9717 return som_tm_clone_table_section
;
9720 /* On hpux10, the linker will give an error if we have a reference
9721 in the read-only data section to a symbol defined in a shared
9722 library. Therefore, expressions that might require a reloc can
9723 not be placed in the read-only data section. */
9726 pa_select_section (tree exp
, int reloc
,
9727 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9729 if (TREE_CODE (exp
) == VAR_DECL
9730 && TREE_READONLY (exp
)
9731 && !TREE_THIS_VOLATILE (exp
)
9732 && DECL_INITIAL (exp
)
9733 && (DECL_INITIAL (exp
) == error_mark_node
9734 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9738 && DECL_ONE_ONLY (exp
)
9739 && !DECL_WEAK (exp
))
9740 return som_one_only_readonly_data_section
;
9742 return readonly_data_section
;
9744 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9745 return readonly_data_section
;
9747 && TREE_CODE (exp
) == VAR_DECL
9748 && DECL_ONE_ONLY (exp
)
9749 && !DECL_WEAK (exp
))
9750 return som_one_only_data_section
;
9752 return data_section
;
9755 /* Implement pa_reloc_rw_mask. */
9758 pa_reloc_rw_mask (void)
9760 /* We force (const (plus (symbol) (const_int))) to memory when the
9761 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9762 handle this construct in read-only memory and we want to avoid
9763 this for ELF. So, we always force an RTX needing relocation to
9764 the data section. */
9769 pa_globalize_label (FILE *stream
, const char *name
)
9771 /* We only handle DATA objects here, functions are globalized in
9772 ASM_DECLARE_FUNCTION_NAME. */
9773 if (! FUNCTION_NAME_P (name
))
9775 fputs ("\t.EXPORT ", stream
);
9776 assemble_name (stream
, name
);
9777 fputs (",DATA\n", stream
);
9781 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9784 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9785 int incoming ATTRIBUTE_UNUSED
)
9787 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9790 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9793 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9795 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9796 PA64 ABI says that objects larger than 128 bits are returned in memory.
9797 Note, int_size_in_bytes can return -1 if the size of the object is
9798 variable or larger than the maximum value that can be expressed as
9799 a HOST_WIDE_INT. It can also return zero for an empty type. The
9800 simplest way to handle variable and empty types is to pass them in
9801 memory. This avoids problems in defining the boundaries of argument
9802 slots, allocating registers, etc. */
9803 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9804 || int_size_in_bytes (type
) <= 0);
9807 /* Structure to hold declaration and name of external symbols that are
9808 emitted by GCC. We generate a vector of these symbols and output them
9809 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9810 This avoids putting out names that are never really used. */
9812 typedef struct GTY(()) extern_symbol
9818 /* Define gc'd vector type for extern_symbol. */
9820 /* Vector of extern_symbol pointers. */
9821 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9823 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9824 /* Mark DECL (name NAME) as an external reference (assembler output
9825 file FILE). This saves the names to output at the end of the file
9826 if actually referenced. */
9829 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9831 gcc_assert (file
== asm_out_file
);
9832 extern_symbol p
= {decl
, name
};
9833 vec_safe_push (extern_symbols
, p
);
9836 /* Output text required at the end of an assembler file.
9837 This includes deferred plabels and .import directives for
9838 all external symbols that were actually referenced. */
9841 pa_hpux_file_end (void)
9846 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9847 output_deferred_profile_counters ();
9849 output_deferred_plabels ();
9851 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9853 tree decl
= p
->decl
;
9855 if (!TREE_ASM_WRITTEN (decl
)
9856 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9857 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9860 vec_free (extern_symbols
);
9864 /* Return true if a change from mode FROM to mode TO for a register
9865 in register class RCLASS is invalid. */
9868 pa_cannot_change_mode_class (machine_mode from
, machine_mode to
,
9869 enum reg_class rclass
)
9874 /* Reject changes to/from complex and vector modes. */
9875 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
9876 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
9879 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
9882 /* There is no way to load QImode or HImode values directly from
9883 memory. SImode loads to the FP registers are not zero extended.
9884 On the 64-bit target, this conflicts with the definition of
9885 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9886 with different sizes in the floating-point registers. */
9887 if (MAYBE_FP_REG_CLASS_P (rclass
))
9890 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9891 in specific sets of registers. Thus, we cannot allow changing
9892 to a larger mode when it's larger than a word. */
9893 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
9894 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
9900 /* Returns TRUE if it is a good idea to tie two pseudo registers
9901 when one has mode MODE1 and one has mode MODE2.
9902 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9903 for any hard reg, then this must be FALSE for correct output.
9905 We should return FALSE for QImode and HImode because these modes
9906 are not ok in the floating-point registers. However, this prevents
9907 tieing these modes to SImode and DImode in the general registers.
9908 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9909 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9910 in the floating-point registers. */
9913 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
9915 /* Don't tie modes in different classes. */
9916 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
9923 /* Length in units of the trampoline instruction code. */
9925 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9928 /* Output assembler code for a block containing the constant parts
9929 of a trampoline, leaving space for the variable parts.\
9931 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9932 and then branches to the specified routine.
9934 This code template is copied from text segment to stack location
9935 and then patched with pa_trampoline_init to contain valid values,
9936 and then entered as a subroutine.
9938 It is best to keep this as small as possible to avoid having to
9939 flush multiple lines in the cache. */
9942 pa_asm_trampoline_template (FILE *f
)
9946 fputs ("\tldw 36(%r22),%r21\n", f
);
9947 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
9948 if (ASSEMBLER_DIALECT
== 0)
9949 fputs ("\tdepi 0,31,2,%r21\n", f
);
9951 fputs ("\tdepwi 0,31,2,%r21\n", f
);
9952 fputs ("\tldw 4(%r21),%r19\n", f
);
9953 fputs ("\tldw 0(%r21),%r21\n", f
);
9956 fputs ("\tbve (%r21)\n", f
);
9957 fputs ("\tldw 40(%r22),%r29\n", f
);
9958 fputs ("\t.word 0\n", f
);
9959 fputs ("\t.word 0\n", f
);
9963 fputs ("\tldsid (%r21),%r1\n", f
);
9964 fputs ("\tmtsp %r1,%sr0\n", f
);
9965 fputs ("\tbe 0(%sr0,%r21)\n", f
);
9966 fputs ("\tldw 40(%r22),%r29\n", f
);
9968 fputs ("\t.word 0\n", f
);
9969 fputs ("\t.word 0\n", f
);
9970 fputs ("\t.word 0\n", f
);
9971 fputs ("\t.word 0\n", f
);
9975 fputs ("\t.dword 0\n", f
);
9976 fputs ("\t.dword 0\n", f
);
9977 fputs ("\t.dword 0\n", f
);
9978 fputs ("\t.dword 0\n", f
);
9979 fputs ("\tmfia %r31\n", f
);
9980 fputs ("\tldd 24(%r31),%r1\n", f
);
9981 fputs ("\tldd 24(%r1),%r27\n", f
);
9982 fputs ("\tldd 16(%r1),%r1\n", f
);
9983 fputs ("\tbve (%r1)\n", f
);
9984 fputs ("\tldd 32(%r31),%r31\n", f
);
9985 fputs ("\t.dword 0 ; fptr\n", f
);
9986 fputs ("\t.dword 0 ; static link\n", f
);
9990 /* Emit RTL insns to initialize the variable parts of a trampoline.
9991 FNADDR is an RTX for the address of the function's pure code.
9992 CXT is an RTX for the static chain value for the function.
9994 Move the function address to the trampoline template at offset 36.
9995 Move the static chain value to trampoline template at offset 40.
9996 Move the trampoline address to trampoline template at offset 44.
9997 Move r19 to trampoline template at offset 48. The latter two
9998 words create a plabel for the indirect call to the trampoline.
10000 A similar sequence is used for the 64-bit port but the plabel is
10001 at the beginning of the trampoline.
10003 Finally, the cache entries for the trampoline code are flushed.
10004 This is necessary to ensure that the trampoline instruction sequence
10005 is written to memory prior to any attempts at prefetching the code
10009 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10011 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10012 rtx start_addr
= gen_reg_rtx (Pmode
);
10013 rtx end_addr
= gen_reg_rtx (Pmode
);
10014 rtx line_length
= gen_reg_rtx (Pmode
);
10017 emit_block_move (m_tramp
, assemble_trampoline_template (),
10018 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10019 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10023 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10024 emit_move_insn (tmp
, fnaddr
);
10025 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10026 emit_move_insn (tmp
, chain_value
);
10028 /* Create a fat pointer for the trampoline. */
10029 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10030 emit_move_insn (tmp
, r_tramp
);
10031 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10032 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10034 /* fdc and fic only use registers for the address to flush,
10035 they do not accept integer displacements. We align the
10036 start and end addresses to the beginning of their respective
10037 cache lines to minimize the number of lines flushed. */
10038 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10039 GEN_INT (-MIN_CACHELINE_SIZE
)));
10040 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10041 TRAMPOLINE_CODE_SIZE
-1));
10042 emit_insn (gen_andsi3 (end_addr
, tmp
,
10043 GEN_INT (-MIN_CACHELINE_SIZE
)));
10044 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10045 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10046 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10047 gen_reg_rtx (Pmode
),
10048 gen_reg_rtx (Pmode
)));
10052 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10053 emit_move_insn (tmp
, fnaddr
);
10054 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10055 emit_move_insn (tmp
, chain_value
);
10057 /* Create a fat pointer for the trampoline. */
10058 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10059 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10061 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10062 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10064 /* fdc and fic only use registers for the address to flush,
10065 they do not accept integer displacements. We align the
10066 start and end addresses to the beginning of their respective
10067 cache lines to minimize the number of lines flushed. */
10068 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10069 emit_insn (gen_anddi3 (start_addr
, tmp
,
10070 GEN_INT (-MIN_CACHELINE_SIZE
)));
10071 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10072 TRAMPOLINE_CODE_SIZE
- 1));
10073 emit_insn (gen_anddi3 (end_addr
, tmp
,
10074 GEN_INT (-MIN_CACHELINE_SIZE
)));
10075 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10076 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10077 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10078 gen_reg_rtx (Pmode
),
10079 gen_reg_rtx (Pmode
)));
10082 #ifdef HAVE_ENABLE_EXECUTE_STACK
10083 Â
emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10084 Â Â Â Â LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
10088 /* Perform any machine-specific adjustment in the address of the trampoline.
10089 ADDR contains the address that was passed to pa_trampoline_init.
10090 Adjust the trampoline address to point to the plabel at offset 44. */
10093 pa_trampoline_adjust_address (rtx addr
)
10096 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10101 pa_delegitimize_address (rtx orig_x
)
10103 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10105 if (GET_CODE (x
) == LO_SUM
10106 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10107 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10108 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10113 pa_internal_arg_pointer (void)
10115 /* The argument pointer and the hard frame pointer are the same in
10116 the 32-bit runtime, so we don't need a copy. */
10118 return copy_to_reg (virtual_incoming_args_rtx
);
10120 return virtual_incoming_args_rtx
;
10123 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10124 Frame pointer elimination is automatically handled. */
10127 pa_can_eliminate (const int from
, const int to
)
10129 /* The argument cannot be eliminated in the 64-bit runtime. */
10130 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10133 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10134 ? ! frame_pointer_needed
10138 /* Define the offset between two registers, FROM to be eliminated and its
10139 replacement TO, at the start of a routine. */
10141 pa_initial_elimination_offset (int from
, int to
)
10143 HOST_WIDE_INT offset
;
10145 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10146 && to
== STACK_POINTER_REGNUM
)
10147 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10148 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10151 gcc_unreachable ();
10157 pa_conditional_register_usage (void)
10161 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10163 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10164 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10165 for (i
= 33; i
< 56; i
+= 2)
10166 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10168 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10170 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10171 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10174 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10177 /* Target hook for c_mode_for_suffix. */
10179 static machine_mode
10180 pa_c_mode_for_suffix (char suffix
)
10182 if (HPUX_LONG_DOUBLE_LIBRARY
)
10191 /* Target hook for function_section. */
10194 pa_function_section (tree decl
, enum node_frequency freq
,
10195 bool startup
, bool exit
)
10197 /* Put functions in text section if target doesn't have named sections. */
10198 if (!targetm_common
.have_named_sections
)
10199 return text_section
;
10201 /* Force nested functions into the same section as the containing
10204 && DECL_SECTION_NAME (decl
) == NULL
10205 && DECL_CONTEXT (decl
) != NULL_TREE
10206 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10207 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10208 return function_section (DECL_CONTEXT (decl
));
10210 /* Otherwise, use the default function section. */
10211 return default_function_section (decl
, freq
, startup
, exit
);
10214 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10216 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10217 that need more than three instructions to load prior to reload. This
10218 limit is somewhat arbitrary. It takes three instructions to load a
10219 CONST_INT from memory but two are memory accesses. It may be better
10220 to increase the allowed range for CONST_INTS. We may also be able
10221 to handle CONST_DOUBLES. */
10224 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10226 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10229 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10232 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10233 legitimate constants. The other variants can't be handled by
10234 the move patterns after reload starts. */
10235 if (tls_referenced_p (x
))
10238 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10242 && HOST_BITS_PER_WIDE_INT
> 32
10243 && GET_CODE (x
) == CONST_INT
10244 && !reload_in_progress
10245 && !reload_completed
10246 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10247 && !pa_cint_ok_for_move (INTVAL (x
)))
10250 if (function_label_operand (x
, mode
))
10256 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10258 static unsigned int
10259 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10261 unsigned int flags
;
10263 flags
= default_section_type_flags (decl
, name
, reloc
);
10265 /* Function labels are placed in the constant pool. This can
10266 cause a section conflict if decls are put in ".data.rel.ro"
10267 or ".data.rel.ro.local" using the __attribute__ construct. */
10268 if (strcmp (name
, ".data.rel.ro") == 0
10269 || strcmp (name
, ".data.rel.ro.local") == 0)
10270 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10275 /* pa_legitimate_address_p recognizes an RTL expression that is a
10276 valid memory address for an instruction. The MODE argument is the
10277 machine mode for the MEM expression that wants to use this address.
10279 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10280 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10281 available with floating point loads and stores, and integer loads.
10282 We get better code by allowing indexed addresses in the initial
10285 The acceptance of indexed addresses as legitimate implies that we
10286 must provide patterns for doing indexed integer stores, or the move
10287 expanders must force the address of an indexed store to a register.
10288 We have adopted the latter approach.
10290 Another function of pa_legitimate_address_p is to ensure that
10291 the base register is a valid pointer for indexed instructions.
10292 On targets that have non-equivalent space registers, we have to
10293 know at the time of assembler output which register in a REG+REG
10294 pair is the base register. The REG_POINTER flag is sometimes lost
10295 in reload and the following passes, so it can't be relied on during
10296 code generation. Thus, we either have to canonicalize the order
10297 of the registers in REG+REG indexed addresses, or treat REG+REG
10298 addresses separately and provide patterns for both permutations.
10300 The latter approach requires several hundred additional lines of
10301 code in pa.md. The downside to canonicalizing is that a PLUS
10302 in the wrong order can't combine to form to make a scaled indexed
10303 memory operand. As we won't need to canonicalize the operands if
10304 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10306 We initially break out scaled indexed addresses in canonical order
10307 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10308 scaled indexed addresses during RTL generation. However, fold_rtx
10309 has its own opinion on how the operands of a PLUS should be ordered.
10310 If one of the operands is equivalent to a constant, it will make
10311 that operand the second operand. As the base register is likely to
10312 be equivalent to a SYMBOL_REF, we have made it the second operand.
10314 pa_legitimate_address_p accepts REG+REG as legitimate when the
10315 operands are in the order INDEX+BASE on targets with non-equivalent
10316 space registers, and in any order on targets with equivalent space
10317 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10319 We treat a SYMBOL_REF as legitimate if it is part of the current
10320 function's constant-pool, because such addresses can actually be
10321 output as REG+SMALLINT. */
10324 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
)
10327 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10328 : REG_OK_FOR_BASE_P (x
)))
10329 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10330 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10331 && REG_P (XEXP (x
, 0))
10332 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10333 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10336 if (GET_CODE (x
) == PLUS
)
10340 /* For REG+REG, the base register should be in XEXP (x, 1),
10341 so check it first. */
10342 if (REG_P (XEXP (x
, 1))
10343 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10344 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10345 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10346 else if (REG_P (XEXP (x
, 0))
10347 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10348 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10349 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10353 if (GET_CODE (index
) == CONST_INT
)
10355 if (INT_5_BITS (index
))
10358 /* When INT14_OK_STRICT is false, a secondary reload is needed
10359 to adjust the displacement of SImode and DImode floating point
10360 instructions but this may fail when the register also needs
10361 reloading. So, we return false when STRICT is true. We
10362 also reject long displacements for float mode addresses since
10363 the majority of accesses will use floating point instructions
10364 that don't support 14-bit offsets. */
10365 if (!INT14_OK_STRICT
10366 && (strict
|| !(reload_in_progress
|| reload_completed
))
10371 return base14_operand (index
, mode
);
10374 if (!TARGET_DISABLE_INDEXING
10375 /* Only accept the "canonical" INDEX+BASE operand order
10376 on targets with non-equivalent space registers. */
10377 && (TARGET_NO_SPACE_REGS
10379 : (base
== XEXP (x
, 1) && REG_P (index
)
10380 && (reload_completed
10381 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10382 || REG_POINTER (base
))
10383 && (reload_completed
10384 || (reload_in_progress
&& HARD_REGISTER_P (index
))
10385 || !REG_POINTER (index
))))
10386 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
10387 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
10388 : REG_OK_FOR_INDEX_P (index
))
10389 && borx_reg_operand (base
, Pmode
)
10390 && borx_reg_operand (index
, Pmode
))
10393 if (!TARGET_DISABLE_INDEXING
10394 && GET_CODE (index
) == MULT
10395 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
10396 && REG_P (XEXP (index
, 0))
10397 && GET_MODE (XEXP (index
, 0)) == Pmode
10398 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
10399 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
10400 && GET_CODE (XEXP (index
, 1)) == CONST_INT
10401 && INTVAL (XEXP (index
, 1))
10402 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
10403 && borx_reg_operand (base
, Pmode
))
10409 if (GET_CODE (x
) == LO_SUM
)
10411 rtx y
= XEXP (x
, 0);
10413 if (GET_CODE (y
) == SUBREG
)
10414 y
= SUBREG_REG (y
);
10417 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
10418 : REG_OK_FOR_BASE_P (y
)))
10420 /* Needed for -fPIC */
10422 && GET_CODE (XEXP (x
, 1)) == UNSPEC
)
10425 if (!INT14_OK_STRICT
10426 && (strict
|| !(reload_in_progress
|| reload_completed
))
10431 if (CONSTANT_P (XEXP (x
, 1)))
10437 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
10443 /* Look for machine dependent ways to make the invalid address AD a
10446 For the PA, transform:
10448 memory(X + <large int>)
10452 if (<large int> & mask) >= 16
10453 Y = (<large int> & ~mask) + mask + 1 Round up.
10455 Y = (<large int> & ~mask) Round down.
10457 memory (Z + (<large int> - Y));
10459 This makes reload inheritance and reload_cse work better since Z
10462 There may be more opportunities to improve code with this hook. */
10465 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
10466 int opnum
, int type
,
10467 int ind_levels ATTRIBUTE_UNUSED
)
10469 long offset
, newoffset
, mask
;
10470 rtx new_rtx
, temp
= NULL_RTX
;
10472 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
10473 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
10475 if (optimize
&& GET_CODE (ad
) == PLUS
)
10476 temp
= simplify_binary_operation (PLUS
, Pmode
,
10477 XEXP (ad
, 0), XEXP (ad
, 1));
10479 new_rtx
= temp
? temp
: ad
;
10482 && GET_CODE (new_rtx
) == PLUS
10483 && GET_CODE (XEXP (new_rtx
, 0)) == REG
10484 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
10486 offset
= INTVAL (XEXP ((new_rtx
), 1));
10488 /* Choose rounding direction. Round up if we are >= halfway. */
10489 if ((offset
& mask
) >= ((mask
+ 1) / 2))
10490 newoffset
= (offset
& ~mask
) + mask
+ 1;
10492 newoffset
= offset
& ~mask
;
10494 /* Ensure that long displacements are aligned. */
10496 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
10497 || (TARGET_64BIT
&& (mode
) == DImode
)))
10498 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
10500 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
10502 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
10503 GEN_INT (newoffset
));
10504 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
10505 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
10506 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10507 opnum
, (enum reload_type
) type
);
10515 /* Output address vector. */
10518 pa_output_addr_vec (rtx lab
, rtx body
)
10520 int idx
, vlen
= XVECLEN (body
, 0);
10522 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10524 fputs ("\t.begin_brtab\n", asm_out_file
);
10525 for (idx
= 0; idx
< vlen
; idx
++)
10527 ASM_OUTPUT_ADDR_VEC_ELT
10528 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
10531 fputs ("\t.end_brtab\n", asm_out_file
);
10534 /* Output address difference vector. */
10537 pa_output_addr_diff_vec (rtx lab
, rtx body
)
10539 rtx base
= XEXP (XEXP (body
, 0), 0);
10540 int idx
, vlen
= XVECLEN (body
, 1);
10542 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10544 fputs ("\t.begin_brtab\n", asm_out_file
);
10545 for (idx
= 0; idx
< vlen
; idx
++)
10547 ASM_OUTPUT_ADDR_DIFF_ELT
10550 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
10551 CODE_LABEL_NUMBER (base
));
10554 fputs ("\t.end_brtab\n", asm_out_file
);