1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "insn-attr.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "stringpool.h"
48 #include "insn-codes.h"
51 #include "diagnostic-core.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
61 #include "cfgcleanup.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Return nonzero if there is a bypass for the output of
69 OUT_INSN and the fp store IN_INSN. */
71 pa_fpstore_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
73 machine_mode store_mode
;
74 machine_mode other_mode
;
77 if (recog_memoized (in_insn
) < 0
78 || (get_attr_type (in_insn
) != TYPE_FPSTORE
79 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
80 || recog_memoized (out_insn
) < 0)
83 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
85 set
= single_set (out_insn
);
89 other_mode
= GET_MODE (SET_SRC (set
));
91 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
95 #ifndef DO_FRAME_NOTES
96 #ifdef INCOMING_RETURN_ADDR_RTX
97 #define DO_FRAME_NOTES 1
99 #define DO_FRAME_NOTES 0
103 static void pa_option_override (void);
104 static void copy_reg_pointer (rtx
, rtx
);
105 static void fix_range (const char *);
106 static int hppa_register_move_cost (machine_mode mode
, reg_class_t
,
108 static int hppa_address_cost (rtx
, machine_mode mode
, addr_space_t
, bool);
109 static bool hppa_rtx_costs (rtx
, int, int, int, int *, bool);
110 static inline rtx
force_mode (machine_mode
, rtx
);
111 static void pa_reorg (void);
112 static void pa_combine_instructions (void);
113 static int pa_can_combine_p (rtx_insn
*, rtx_insn
*, rtx_insn
*, int, rtx
,
115 static bool forward_branch_p (rtx_insn
*);
116 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
117 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT
, unsigned *);
118 static int compute_movmem_length (rtx_insn
*);
119 static int compute_clrmem_length (rtx_insn
*);
120 static bool pa_assemble_integer (rtx
, unsigned int, int);
121 static void remove_useless_addtr_insns (int);
122 static void store_reg (int, HOST_WIDE_INT
, int);
123 static void store_reg_modify (int, int, HOST_WIDE_INT
);
124 static void load_reg (int, HOST_WIDE_INT
, int);
125 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
126 static rtx
pa_function_value (const_tree
, const_tree
, bool);
127 static rtx
pa_libcall_value (machine_mode
, const_rtx
);
128 static bool pa_function_value_regno_p (const unsigned int);
129 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
130 static void update_total_code_bytes (unsigned int);
131 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
132 static int pa_adjust_cost (rtx_insn
*, rtx
, rtx_insn
*, int);
133 static int pa_adjust_priority (rtx_insn
*, int);
134 static int pa_issue_rate (void);
135 static int pa_reloc_rw_mask (void);
136 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
137 static section
*pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED
;
138 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
140 static void pa_encode_section_info (tree
, rtx
, int);
141 static const char *pa_strip_name_encoding (const char *);
142 static bool pa_function_ok_for_sibcall (tree
, tree
);
143 static void pa_globalize_label (FILE *, const char *)
145 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
146 HOST_WIDE_INT
, tree
);
147 #if !defined(USE_COLLECT2)
148 static void pa_asm_out_constructor (rtx
, int);
149 static void pa_asm_out_destructor (rtx
, int);
151 static void pa_init_builtins (void);
152 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, machine_mode mode
, int);
153 static rtx
hppa_builtin_saveregs (void);
154 static void hppa_va_start (tree
, rtx
);
155 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
156 static bool pa_scalar_mode_supported_p (machine_mode
);
157 static bool pa_commutative_p (const_rtx x
, int outer_code
);
158 static void copy_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
159 static int length_fp_args (rtx_insn
*) ATTRIBUTE_UNUSED
;
160 static rtx
hppa_legitimize_address (rtx
, rtx
, machine_mode
);
161 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
162 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
163 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
164 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
165 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
166 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
167 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
168 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
169 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
170 static void output_deferred_plabels (void);
171 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
172 #ifdef ASM_OUTPUT_EXTERNAL_REAL
173 static void pa_hpux_file_end (void);
175 static void pa_init_libfuncs (void);
176 static rtx
pa_struct_value_rtx (tree
, int);
177 static bool pa_pass_by_reference (cumulative_args_t
, machine_mode
,
179 static int pa_arg_partial_bytes (cumulative_args_t
, machine_mode
,
181 static void pa_function_arg_advance (cumulative_args_t
, machine_mode
,
183 static rtx
pa_function_arg (cumulative_args_t
, machine_mode
,
185 static unsigned int pa_function_arg_boundary (machine_mode
, const_tree
);
186 static struct machine_function
* pa_init_machine_status (void);
187 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
189 secondary_reload_info
*);
190 static void pa_extra_live_on_entry (bitmap
);
191 static machine_mode
pa_promote_function_mode (const_tree
,
195 static void pa_asm_trampoline_template (FILE *);
196 static void pa_trampoline_init (rtx
, tree
, rtx
);
197 static rtx
pa_trampoline_adjust_address (rtx
);
198 static rtx
pa_delegitimize_address (rtx
);
199 static bool pa_print_operand_punct_valid_p (unsigned char);
200 static rtx
pa_internal_arg_pointer (void);
201 static bool pa_can_eliminate (const int, const int);
202 static void pa_conditional_register_usage (void);
203 static machine_mode
pa_c_mode_for_suffix (char);
204 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
205 static bool pa_cannot_force_const_mem (machine_mode
, rtx
);
206 static bool pa_legitimate_constant_p (machine_mode
, rtx
);
207 static unsigned int pa_section_type_flags (tree
, const char *, int);
208 static bool pa_legitimate_address_p (machine_mode
, rtx
, bool);
210 /* The following extra sections are only used for SOM. */
211 static GTY(()) section
*som_readonly_data_section
;
212 static GTY(()) section
*som_one_only_readonly_data_section
;
213 static GTY(()) section
*som_one_only_data_section
;
214 static GTY(()) section
*som_tm_clone_table_section
;
216 /* Counts for the number of callee-saved general and floating point
217 registers which were saved by the current function's prologue. */
218 static int gr_saved
, fr_saved
;
220 /* Boolean indicating whether the return pointer was saved by the
221 current function's prologue. */
222 static bool rp_saved
;
224 static rtx
find_addr_reg (rtx
);
226 /* Keep track of the number of bytes we have output in the CODE subspace
227 during this compilation so we'll know when to emit inline long-calls. */
228 unsigned long total_code_bytes
;
230 /* The last address of the previous function plus the number of bytes in
231 associated thunks that have been output. This is used to determine if
232 a thunk can use an IA-relative branch to reach its target function. */
233 static unsigned int last_address
;
235 /* Variables to handle plabels that we discover are necessary at assembly
236 output time. They are output after the current function. */
237 struct GTY(()) deferred_plabel
242 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
244 static size_t n_deferred_plabels
= 0;
246 /* Initialize the GCC target structure. */
248 #undef TARGET_OPTION_OVERRIDE
249 #define TARGET_OPTION_OVERRIDE pa_option_override
251 #undef TARGET_ASM_ALIGNED_HI_OP
252 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
253 #undef TARGET_ASM_ALIGNED_SI_OP
254 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
255 #undef TARGET_ASM_ALIGNED_DI_OP
256 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
257 #undef TARGET_ASM_UNALIGNED_HI_OP
258 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
259 #undef TARGET_ASM_UNALIGNED_SI_OP
260 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
261 #undef TARGET_ASM_UNALIGNED_DI_OP
262 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
263 #undef TARGET_ASM_INTEGER
264 #define TARGET_ASM_INTEGER pa_assemble_integer
266 #undef TARGET_ASM_FUNCTION_PROLOGUE
267 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
268 #undef TARGET_ASM_FUNCTION_EPILOGUE
269 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
271 #undef TARGET_FUNCTION_VALUE
272 #define TARGET_FUNCTION_VALUE pa_function_value
273 #undef TARGET_LIBCALL_VALUE
274 #define TARGET_LIBCALL_VALUE pa_libcall_value
275 #undef TARGET_FUNCTION_VALUE_REGNO_P
276 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
278 #undef TARGET_LEGITIMIZE_ADDRESS
279 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
281 #undef TARGET_SCHED_ADJUST_COST
282 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
283 #undef TARGET_SCHED_ADJUST_PRIORITY
284 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
285 #undef TARGET_SCHED_ISSUE_RATE
286 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
288 #undef TARGET_ENCODE_SECTION_INFO
289 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
290 #undef TARGET_STRIP_NAME_ENCODING
291 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
293 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
294 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
296 #undef TARGET_COMMUTATIVE_P
297 #define TARGET_COMMUTATIVE_P pa_commutative_p
299 #undef TARGET_ASM_OUTPUT_MI_THUNK
300 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
301 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
302 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
304 #undef TARGET_ASM_FILE_END
305 #ifdef ASM_OUTPUT_EXTERNAL_REAL
306 #define TARGET_ASM_FILE_END pa_hpux_file_end
308 #define TARGET_ASM_FILE_END output_deferred_plabels
311 #undef TARGET_ASM_RELOC_RW_MASK
312 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
314 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
315 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
317 #if !defined(USE_COLLECT2)
318 #undef TARGET_ASM_CONSTRUCTOR
319 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
320 #undef TARGET_ASM_DESTRUCTOR
321 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
324 #undef TARGET_INIT_BUILTINS
325 #define TARGET_INIT_BUILTINS pa_init_builtins
327 #undef TARGET_EXPAND_BUILTIN
328 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
330 #undef TARGET_REGISTER_MOVE_COST
331 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
332 #undef TARGET_RTX_COSTS
333 #define TARGET_RTX_COSTS hppa_rtx_costs
334 #undef TARGET_ADDRESS_COST
335 #define TARGET_ADDRESS_COST hppa_address_cost
337 #undef TARGET_MACHINE_DEPENDENT_REORG
338 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
340 #undef TARGET_INIT_LIBFUNCS
341 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
343 #undef TARGET_PROMOTE_FUNCTION_MODE
344 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
345 #undef TARGET_PROMOTE_PROTOTYPES
346 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
348 #undef TARGET_STRUCT_VALUE_RTX
349 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
350 #undef TARGET_RETURN_IN_MEMORY
351 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
352 #undef TARGET_MUST_PASS_IN_STACK
353 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
354 #undef TARGET_PASS_BY_REFERENCE
355 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
356 #undef TARGET_CALLEE_COPIES
357 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
358 #undef TARGET_ARG_PARTIAL_BYTES
359 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
360 #undef TARGET_FUNCTION_ARG
361 #define TARGET_FUNCTION_ARG pa_function_arg
362 #undef TARGET_FUNCTION_ARG_ADVANCE
363 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
364 #undef TARGET_FUNCTION_ARG_BOUNDARY
365 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
367 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
368 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
369 #undef TARGET_EXPAND_BUILTIN_VA_START
370 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
371 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
372 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
374 #undef TARGET_SCALAR_MODE_SUPPORTED_P
375 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
377 #undef TARGET_CANNOT_FORCE_CONST_MEM
378 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
380 #undef TARGET_SECONDARY_RELOAD
381 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
383 #undef TARGET_EXTRA_LIVE_ON_ENTRY
384 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
386 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
387 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
388 #undef TARGET_TRAMPOLINE_INIT
389 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
390 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
391 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
392 #undef TARGET_DELEGITIMIZE_ADDRESS
393 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
394 #undef TARGET_INTERNAL_ARG_POINTER
395 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
396 #undef TARGET_CAN_ELIMINATE
397 #define TARGET_CAN_ELIMINATE pa_can_eliminate
398 #undef TARGET_CONDITIONAL_REGISTER_USAGE
399 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
400 #undef TARGET_C_MODE_FOR_SUFFIX
401 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
402 #undef TARGET_ASM_FUNCTION_SECTION
403 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
405 #undef TARGET_LEGITIMATE_CONSTANT_P
406 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
407 #undef TARGET_SECTION_TYPE_FLAGS
408 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
409 #undef TARGET_LEGITIMATE_ADDRESS_P
410 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
412 struct gcc_target targetm
= TARGET_INITIALIZER
;
414 /* Parse the -mfixed-range= option string. */
417 fix_range (const char *const_str
)
420 char *str
, *dash
, *comma
;
422 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
423 REG2 are either register names or register numbers. The effect
424 of this option is to mark the registers in the range from REG1 to
425 REG2 as ``fixed'' so they won't be used by the compiler. This is
426 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
428 i
= strlen (const_str
);
429 str
= (char *) alloca (i
+ 1);
430 memcpy (str
, const_str
, i
+ 1);
434 dash
= strchr (str
, '-');
437 warning (0, "value of -mfixed-range must have form REG1-REG2");
442 comma
= strchr (dash
+ 1, ',');
446 first
= decode_reg_name (str
);
449 warning (0, "unknown register name: %s", str
);
453 last
= decode_reg_name (dash
+ 1);
456 warning (0, "unknown register name: %s", dash
+ 1);
464 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
468 for (i
= first
; i
<= last
; ++i
)
469 fixed_regs
[i
] = call_used_regs
[i
] = 1;
478 /* Check if all floating point registers have been fixed. */
479 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
484 target_flags
|= MASK_DISABLE_FPREGS
;
487 /* Implement the TARGET_OPTION_OVERRIDE hook. */
490 pa_option_override (void)
493 cl_deferred_option
*opt
;
494 vec
<cl_deferred_option
> *v
495 = (vec
<cl_deferred_option
> *) pa_deferred_options
;
498 FOR_EACH_VEC_ELT (*v
, i
, opt
)
500 switch (opt
->opt_index
)
502 case OPT_mfixed_range_
:
503 fix_range (opt
->arg
);
511 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
513 warning (0, "PIC code generation is not supported in the portable runtime model");
516 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
518 warning (0, "PIC code generation is not compatible with fast indirect calls");
521 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
523 warning (0, "-g is only supported when using GAS on this processor,");
524 warning (0, "-g option disabled");
525 write_symbols
= NO_DEBUG
;
528 /* We only support the "big PIC" model now. And we always generate PIC
529 code when in 64bit mode. */
530 if (flag_pic
== 1 || TARGET_64BIT
)
533 /* Disable -freorder-blocks-and-partition as we don't support hot and
534 cold partitioning. */
535 if (flag_reorder_blocks_and_partition
)
537 inform (input_location
,
538 "-freorder-blocks-and-partition does not work "
539 "on this architecture");
540 flag_reorder_blocks_and_partition
= 0;
541 flag_reorder_blocks
= 1;
544 /* We can't guarantee that .dword is available for 32-bit targets. */
545 if (UNITS_PER_WORD
== 4)
546 targetm
.asm_out
.aligned_op
.di
= NULL
;
548 /* The unaligned ops are only available when using GAS. */
551 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
552 targetm
.asm_out
.unaligned_op
.si
= NULL
;
553 targetm
.asm_out
.unaligned_op
.di
= NULL
;
556 init_machine_status
= pa_init_machine_status
;
561 PA_BUILTIN_COPYSIGNQ
,
564 PA_BUILTIN_HUGE_VALQ
,
568 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
571 pa_init_builtins (void)
573 #ifdef DONT_HAVE_FPUTC_UNLOCKED
575 tree decl
= builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED
);
576 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED
, decl
,
577 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED
));
584 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITE
)) != NULL_TREE
)
585 set_user_assembler_name (decl
, "_Isfinite");
586 if ((decl
= builtin_decl_explicit (BUILT_IN_FINITEF
)) != NULL_TREE
)
587 set_user_assembler_name (decl
, "_Isfinitef");
591 if (HPUX_LONG_DOUBLE_LIBRARY
)
595 /* Under HPUX, the __float128 type is a synonym for "long double". */
596 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
599 /* TFmode support builtins. */
600 ftype
= build_function_type_list (long_double_type_node
,
601 long_double_type_node
,
603 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
604 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
605 "_U_Qfabs", NULL_TREE
);
606 TREE_READONLY (decl
) = 1;
607 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
609 ftype
= build_function_type_list (long_double_type_node
,
610 long_double_type_node
,
611 long_double_type_node
,
613 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
614 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
615 "_U_Qfcopysign", NULL_TREE
);
616 TREE_READONLY (decl
) = 1;
617 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
619 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
620 decl
= add_builtin_function ("__builtin_infq", ftype
,
621 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
623 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
625 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
626 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
628 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
633 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
634 machine_mode mode ATTRIBUTE_UNUSED
,
635 int ignore ATTRIBUTE_UNUSED
)
637 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
638 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
642 case PA_BUILTIN_FABSQ
:
643 case PA_BUILTIN_COPYSIGNQ
:
644 return expand_call (exp
, target
, ignore
);
646 case PA_BUILTIN_INFQ
:
647 case PA_BUILTIN_HUGE_VALQ
:
649 machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
654 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, target_mode
);
656 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
659 target
= gen_reg_rtx (target_mode
);
661 emit_move_insn (target
, tmp
);
672 /* Function to init struct machine_function.
673 This will be called, via a pointer variable,
674 from push_function_context. */
676 static struct machine_function
*
677 pa_init_machine_status (void)
679 return ggc_cleared_alloc
<machine_function
> ();
682 /* If FROM is a probable pointer register, mark TO as a probable
683 pointer register with the same pointer alignment as FROM. */
686 copy_reg_pointer (rtx to
, rtx from
)
688 if (REG_POINTER (from
))
689 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
692 /* Return 1 if X contains a symbolic expression. We know these
693 expressions will have one of a few well defined forms, so
694 we need only check those forms. */
696 pa_symbolic_expression_p (rtx x
)
699 /* Strip off any HIGH. */
700 if (GET_CODE (x
) == HIGH
)
703 return symbolic_operand (x
, VOIDmode
);
706 /* Accept any constant that can be moved in one instruction into a
709 pa_cint_ok_for_move (HOST_WIDE_INT ival
)
711 /* OK if ldo, ldil, or zdepi, can be used. */
712 return (VAL_14_BITS_P (ival
)
713 || pa_ldil_cint_p (ival
)
714 || pa_zdepi_cint_p (ival
));
717 /* True iff ldil can be used to load this CONST_INT. The least
718 significant 11 bits of the value must be zero and the value must
719 not change sign when extended from 32 to 64 bits. */
721 pa_ldil_cint_p (HOST_WIDE_INT ival
)
723 HOST_WIDE_INT x
= ival
& (((HOST_WIDE_INT
) -1 << 31) | 0x7ff);
725 return x
== 0 || x
== ((HOST_WIDE_INT
) -1 << 31);
728 /* True iff zdepi can be used to generate this CONST_INT.
729 zdepi first sign extends a 5-bit signed number to a given field
730 length, then places this field anywhere in a zero. */
732 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x
)
734 unsigned HOST_WIDE_INT lsb_mask
, t
;
736 /* This might not be obvious, but it's at least fast.
737 This function is critical; we don't have the time loops would take. */
739 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
740 /* Return true iff t is a power of two. */
741 return ((t
& (t
- 1)) == 0);
744 /* True iff depi or extru can be used to compute (reg & mask).
745 Accept bit pattern like these:
750 pa_and_mask_p (unsigned HOST_WIDE_INT mask
)
753 mask
+= mask
& -mask
;
754 return (mask
& (mask
- 1)) == 0;
757 /* True iff depi can be used to compute (reg | MASK). */
759 pa_ior_mask_p (unsigned HOST_WIDE_INT mask
)
761 mask
+= mask
& -mask
;
762 return (mask
& (mask
- 1)) == 0;
765 /* Legitimize PIC addresses. If the address is already
766 position-independent, we return ORIG. Newly generated
767 position-independent addresses go to REG. If we need more
768 than one register, we lose. */
771 legitimize_pic_address (rtx orig
, machine_mode mode
, rtx reg
)
775 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
777 /* Labels need special handling. */
778 if (pic_label_operand (orig
, mode
))
782 /* We do not want to go through the movXX expanders here since that
783 would create recursion.
785 Nor do we really want to call a generator for a named pattern
786 since that requires multiple patterns if we want to support
789 So instead we just emit the raw set, which avoids the movXX
790 expanders completely. */
791 mark_reg_pointer (reg
, BITS_PER_UNIT
);
792 insn
= emit_insn (gen_rtx_SET (reg
, orig
));
794 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
795 add_reg_note (insn
, REG_EQUAL
, orig
);
797 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
798 and update LABEL_NUSES because this is not done automatically. */
799 if (reload_in_progress
|| reload_completed
)
801 /* Extract LABEL_REF. */
802 if (GET_CODE (orig
) == CONST
)
803 orig
= XEXP (XEXP (orig
, 0), 0);
804 /* Extract CODE_LABEL. */
805 orig
= XEXP (orig
, 0);
806 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
807 /* Make sure we have label and not a note. */
809 LABEL_NUSES (orig
)++;
811 crtl
->uses_pic_offset_table
= 1;
814 if (GET_CODE (orig
) == SYMBOL_REF
)
821 /* Before reload, allocate a temporary register for the intermediate
822 result. This allows the sequence to be deleted when the final
823 result is unused and the insns are trivially dead. */
824 tmp_reg
= ((reload_in_progress
|| reload_completed
)
825 ? reg
: gen_reg_rtx (Pmode
));
827 if (function_label_operand (orig
, VOIDmode
))
829 /* Force function label into memory in word mode. */
830 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
831 /* Load plabel address from DLT. */
832 emit_move_insn (tmp_reg
,
833 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
834 gen_rtx_HIGH (word_mode
, orig
)));
836 = gen_const_mem (Pmode
,
837 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
838 gen_rtx_UNSPEC (Pmode
,
841 emit_move_insn (reg
, pic_ref
);
842 /* Now load address of function descriptor. */
843 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
847 /* Load symbol reference from DLT. */
848 emit_move_insn (tmp_reg
,
849 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
850 gen_rtx_HIGH (word_mode
, orig
)));
852 = gen_const_mem (Pmode
,
853 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
854 gen_rtx_UNSPEC (Pmode
,
859 crtl
->uses_pic_offset_table
= 1;
860 mark_reg_pointer (reg
, BITS_PER_UNIT
);
861 insn
= emit_move_insn (reg
, pic_ref
);
863 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
864 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
868 else if (GET_CODE (orig
) == CONST
)
872 if (GET_CODE (XEXP (orig
, 0)) == PLUS
873 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
877 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
879 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
880 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
881 base
== reg
? 0 : reg
);
883 if (GET_CODE (orig
) == CONST_INT
)
885 if (INT_14_BITS (orig
))
886 return plus_constant (Pmode
, base
, INTVAL (orig
));
887 orig
= force_reg (Pmode
, orig
);
889 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
890 /* Likewise, should we set special REG_NOTEs here? */
896 static GTY(()) rtx gen_tls_tga
;
899 gen_tls_get_addr (void)
902 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
907 hppa_tls_call (rtx arg
)
911 ret
= gen_reg_rtx (Pmode
);
912 emit_library_call_value (gen_tls_get_addr (), ret
,
913 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
919 legitimize_tls_address (rtx addr
)
921 rtx ret
, tmp
, t1
, t2
, tp
;
924 /* Currently, we can't handle anything but a SYMBOL_REF. */
925 if (GET_CODE (addr
) != SYMBOL_REF
)
928 switch (SYMBOL_REF_TLS_MODEL (addr
))
930 case TLS_MODEL_GLOBAL_DYNAMIC
:
931 tmp
= gen_reg_rtx (Pmode
);
933 emit_insn (gen_tgd_load_pic (tmp
, addr
));
935 emit_insn (gen_tgd_load (tmp
, addr
));
936 ret
= hppa_tls_call (tmp
);
939 case TLS_MODEL_LOCAL_DYNAMIC
:
940 ret
= gen_reg_rtx (Pmode
);
941 tmp
= gen_reg_rtx (Pmode
);
944 emit_insn (gen_tld_load_pic (tmp
, addr
));
946 emit_insn (gen_tld_load (tmp
, addr
));
947 t1
= hppa_tls_call (tmp
);
950 t2
= gen_reg_rtx (Pmode
);
951 emit_libcall_block (insn
, t2
, t1
,
952 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
954 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
957 case TLS_MODEL_INITIAL_EXEC
:
958 tp
= gen_reg_rtx (Pmode
);
959 tmp
= gen_reg_rtx (Pmode
);
960 ret
= gen_reg_rtx (Pmode
);
961 emit_insn (gen_tp_load (tp
));
963 emit_insn (gen_tie_load_pic (tmp
, addr
));
965 emit_insn (gen_tie_load (tmp
, addr
));
966 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
969 case TLS_MODEL_LOCAL_EXEC
:
970 tp
= gen_reg_rtx (Pmode
);
971 ret
= gen_reg_rtx (Pmode
);
972 emit_insn (gen_tp_load (tp
));
973 emit_insn (gen_tle_load (ret
, addr
, tp
));
983 /* Helper for hppa_legitimize_address. Given X, return true if it
984 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
986 This respectively represent canonical shift-add rtxs or scaled
989 mem_shadd_or_shadd_rtx_p (rtx x
)
991 return ((GET_CODE (x
) == ASHIFT
992 || GET_CODE (x
) == MULT
)
993 && GET_CODE (XEXP (x
, 1)) == CONST_INT
994 && ((GET_CODE (x
) == ASHIFT
995 && pa_shadd_constant_p (INTVAL (XEXP (x
, 1))))
996 || (GET_CODE (x
) == MULT
997 && pa_mem_shadd_constant_p (INTVAL (XEXP (x
, 1))))));
1000 /* Try machine-dependent ways of modifying an illegitimate address
1001 to be legitimate. If we find one, return the new, valid address.
1002 This macro is used in only one place: `memory_address' in explow.c.
1004 OLDX is the address as it was before break_out_memory_refs was called.
1005 In some cases it is useful to look at this to decide what needs to be done.
1007 It is always safe for this macro to do nothing. It exists to recognize
1008 opportunities to optimize the output.
1010 For the PA, transform:
1012 memory(X + <large int>)
1016 if (<large int> & mask) >= 16
1017 Y = (<large int> & ~mask) + mask + 1 Round up.
1019 Y = (<large int> & ~mask) Round down.
1021 memory (Z + (<large int> - Y));
1023 This is for CSE to find several similar references, and only use one Z.
1025 X can either be a SYMBOL_REF or REG, but because combine cannot
1026 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1027 D will not fit in 14 bits.
1029 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1032 MODE_INT references allow displacements which fit in 14 bits, so use
1035 This relies on the fact that most mode MODE_FLOAT references will use FP
1036 registers and most mode MODE_INT references will use integer registers.
1037 (In the rare case of an FP register used in an integer MODE, we depend
1038 on secondary reloads to clean things up.)
1041 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1042 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1043 addressing modes to be used).
1045 Note that the addresses passed into hppa_legitimize_address always
1046 come from a MEM, so we only have to match the MULT form on incoming
1047 addresses. But to be future proof we also match the ASHIFT form.
1049 However, this routine always places those shift-add sequences into
1050 registers, so we have to generate the ASHIFT form as our output.
1052 Put X and Z into registers. Then put the entire expression into
1056 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1061 /* We need to canonicalize the order of operands in unscaled indexed
1062 addresses since the code that checks if an address is valid doesn't
1063 always try both orders. */
1064 if (!TARGET_NO_SPACE_REGS
1065 && GET_CODE (x
) == PLUS
1066 && GET_MODE (x
) == Pmode
1067 && REG_P (XEXP (x
, 0))
1068 && REG_P (XEXP (x
, 1))
1069 && REG_POINTER (XEXP (x
, 0))
1070 && !REG_POINTER (XEXP (x
, 1)))
1071 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1073 if (tls_referenced_p (x
))
1074 return legitimize_tls_address (x
);
1076 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1078 /* Strip off CONST. */
1079 if (GET_CODE (x
) == CONST
)
1082 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1083 That should always be safe. */
1084 if (GET_CODE (x
) == PLUS
1085 && GET_CODE (XEXP (x
, 0)) == REG
1086 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1088 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1089 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1092 /* Note we must reject symbols which represent function addresses
1093 since the assembler/linker can't handle arithmetic on plabels. */
1094 if (GET_CODE (x
) == PLUS
1095 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1096 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1097 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1098 || GET_CODE (XEXP (x
, 0)) == REG
))
1100 rtx int_part
, ptr_reg
;
1102 int offset
= INTVAL (XEXP (x
, 1));
1105 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1106 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
1108 /* Choose which way to round the offset. Round up if we
1109 are >= halfway to the next boundary. */
1110 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1111 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1113 newoffset
= (offset
& ~ mask
);
1115 /* If the newoffset will not fit in 14 bits (ldo), then
1116 handling this would take 4 or 5 instructions (2 to load
1117 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1118 add the new offset and the SYMBOL_REF.) Combine can
1119 not handle 4->2 or 5->2 combinations, so do not create
1121 if (! VAL_14_BITS_P (newoffset
)
1122 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1124 rtx const_part
= plus_constant (Pmode
, XEXP (x
, 0), newoffset
);
1127 gen_rtx_HIGH (Pmode
, const_part
));
1130 gen_rtx_LO_SUM (Pmode
,
1131 tmp_reg
, const_part
));
1135 if (! VAL_14_BITS_P (newoffset
))
1136 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1138 int_part
= GEN_INT (newoffset
);
1140 ptr_reg
= force_reg (Pmode
,
1141 gen_rtx_PLUS (Pmode
,
1142 force_reg (Pmode
, XEXP (x
, 0)),
1145 return plus_constant (Pmode
, ptr_reg
, offset
- newoffset
);
1148 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1150 if (GET_CODE (x
) == PLUS
1151 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1152 && (OBJECT_P (XEXP (x
, 1))
1153 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1154 && GET_CODE (XEXP (x
, 1)) != CONST
)
1156 /* If we were given a MULT, we must fix the constant
1157 as we're going to create the ASHIFT form. */
1158 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1159 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1160 shift_val
= exact_log2 (shift_val
);
1164 if (GET_CODE (reg1
) != REG
)
1165 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1167 reg2
= XEXP (XEXP (x
, 0), 0);
1168 if (GET_CODE (reg2
) != REG
)
1169 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1171 return force_reg (Pmode
,
1172 gen_rtx_PLUS (Pmode
,
1173 gen_rtx_ASHIFT (Pmode
, reg2
,
1174 GEN_INT (shift_val
)),
1178 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1180 Only do so for floating point modes since this is more speculative
1181 and we lose if it's an integer store. */
1182 if (GET_CODE (x
) == PLUS
1183 && GET_CODE (XEXP (x
, 0)) == PLUS
1184 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x
, 0), 0))
1185 && (mode
== SFmode
|| mode
== DFmode
))
1187 int shift_val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
1189 /* If we were given a MULT, we must fix the constant
1190 as we're going to create the ASHIFT form. */
1191 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
1192 shift_val
= exact_log2 (shift_val
);
1194 /* Try and figure out what to use as a base register. */
1195 rtx reg1
, reg2
, base
, idx
;
1197 reg1
= XEXP (XEXP (x
, 0), 1);
1202 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1203 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1204 it's a base register below. */
1205 if (GET_CODE (reg1
) != REG
)
1206 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1208 if (GET_CODE (reg2
) != REG
)
1209 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1211 /* Figure out what the base and index are. */
1213 if (GET_CODE (reg1
) == REG
1214 && REG_POINTER (reg1
))
1217 idx
= gen_rtx_PLUS (Pmode
,
1218 gen_rtx_ASHIFT (Pmode
,
1219 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1220 GEN_INT (shift_val
)),
1223 else if (GET_CODE (reg2
) == REG
1224 && REG_POINTER (reg2
))
1233 /* If the index adds a large constant, try to scale the
1234 constant so that it can be loaded with only one insn. */
1235 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1236 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1237 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1238 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1240 /* Divide the CONST_INT by the scale factor, then add it to A. */
1241 int val
= INTVAL (XEXP (idx
, 1));
1242 val
/= (1 << shift_val
);
1244 reg1
= XEXP (XEXP (idx
, 0), 0);
1245 if (GET_CODE (reg1
) != REG
)
1246 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1248 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1250 /* We can now generate a simple scaled indexed address. */
1253 (Pmode
, gen_rtx_PLUS (Pmode
,
1254 gen_rtx_ASHIFT (Pmode
, reg1
,
1255 GEN_INT (shift_val
)),
1259 /* If B + C is still a valid base register, then add them. */
1260 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1261 && INTVAL (XEXP (idx
, 1)) <= 4096
1262 && INTVAL (XEXP (idx
, 1)) >= -4096)
1266 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1268 reg2
= XEXP (XEXP (idx
, 0), 0);
1269 if (GET_CODE (reg2
) != CONST_INT
)
1270 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1272 return force_reg (Pmode
,
1273 gen_rtx_PLUS (Pmode
,
1274 gen_rtx_ASHIFT (Pmode
, reg2
,
1275 GEN_INT (shift_val
)),
1279 /* Get the index into a register, then add the base + index and
1280 return a register holding the result. */
1282 /* First get A into a register. */
1283 reg1
= XEXP (XEXP (idx
, 0), 0);
1284 if (GET_CODE (reg1
) != REG
)
1285 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1287 /* And get B into a register. */
1288 reg2
= XEXP (idx
, 1);
1289 if (GET_CODE (reg2
) != REG
)
1290 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1292 reg1
= force_reg (Pmode
,
1293 gen_rtx_PLUS (Pmode
,
1294 gen_rtx_ASHIFT (Pmode
, reg1
,
1295 GEN_INT (shift_val
)),
1298 /* Add the result to our base register and return. */
1299 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1303 /* Uh-oh. We might have an address for x[n-100000]. This needs
1304 special handling to avoid creating an indexed memory address
1305 with x-100000 as the base.
1307 If the constant part is small enough, then it's still safe because
1308 there is a guard page at the beginning and end of the data segment.
1310 Scaled references are common enough that we want to try and rearrange the
1311 terms so that we can use indexing for these addresses too. Only
1312 do the optimization for floatint point modes. */
1314 if (GET_CODE (x
) == PLUS
1315 && pa_symbolic_expression_p (XEXP (x
, 1)))
1317 /* Ugly. We modify things here so that the address offset specified
1318 by the index expression is computed first, then added to x to form
1319 the entire address. */
1321 rtx regx1
, regx2
, regy1
, regy2
, y
;
1323 /* Strip off any CONST. */
1325 if (GET_CODE (y
) == CONST
)
1328 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1330 /* See if this looks like
1331 (plus (mult (reg) (mem_shadd_const))
1332 (const (plus (symbol_ref) (const_int))))
1334 Where const_int is small. In that case the const
1335 expression is a valid pointer for indexing.
1337 If const_int is big, but can be divided evenly by shadd_const
1338 and added to (reg). This allows more scaled indexed addresses. */
1339 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1340 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1341 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1342 && INTVAL (XEXP (y
, 1)) >= -4096
1343 && INTVAL (XEXP (y
, 1)) <= 4095)
1345 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1347 /* If we were given a MULT, we must fix the constant
1348 as we're going to create the ASHIFT form. */
1349 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1350 shift_val
= exact_log2 (shift_val
);
1355 if (GET_CODE (reg1
) != REG
)
1356 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1358 reg2
= XEXP (XEXP (x
, 0), 0);
1359 if (GET_CODE (reg2
) != REG
)
1360 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1364 gen_rtx_PLUS (Pmode
,
1365 gen_rtx_ASHIFT (Pmode
,
1367 GEN_INT (shift_val
)),
1370 else if ((mode
== DFmode
|| mode
== SFmode
)
1371 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1372 && mem_shadd_or_shadd_rtx_p (XEXP (x
, 0))
1373 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1374 && INTVAL (XEXP (y
, 1)) % (1 << INTVAL (XEXP (XEXP (x
, 0), 1))) == 0)
1376 int shift_val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1378 /* If we were given a MULT, we must fix the constant
1379 as we're going to create the ASHIFT form. */
1380 if (GET_CODE (XEXP (x
, 0)) == MULT
)
1381 shift_val
= exact_log2 (shift_val
);
1384 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1385 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1386 regx2
= XEXP (XEXP (x
, 0), 0);
1387 if (GET_CODE (regx2
) != REG
)
1388 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1389 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1393 gen_rtx_PLUS (Pmode
,
1394 gen_rtx_ASHIFT (Pmode
, regx2
,
1395 GEN_INT (shift_val
)),
1396 force_reg (Pmode
, XEXP (y
, 0))));
1398 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1399 && INTVAL (XEXP (y
, 1)) >= -4096
1400 && INTVAL (XEXP (y
, 1)) <= 4095)
1402 /* This is safe because of the guard page at the
1403 beginning and end of the data space. Just
1404 return the original address. */
1409 /* Doesn't look like one we can optimize. */
1410 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1411 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1412 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1413 regx1
= force_reg (Pmode
,
1414 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1416 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1424 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1426 Compute extra cost of moving data between one register class
1429 Make moves from SAR so expensive they should never happen. We used to
1430 have 0xffff here, but that generates overflow in rare cases.
1432 Copies involving a FP register and a non-FP register are relatively
1433 expensive because they must go through memory.
1435 Other copies are reasonably cheap. */
1438 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
1439 reg_class_t from
, reg_class_t to
)
1441 if (from
== SHIFT_REGS
)
1443 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1445 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1446 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1452 /* For the HPPA, REG and REG+CONST is cost 0
1453 and addresses involving symbolic constants are cost 2.
1455 PIC addresses are very expensive.
1457 It is no coincidence that this has the same structure
1458 as pa_legitimate_address_p. */
1461 hppa_address_cost (rtx X
, machine_mode mode ATTRIBUTE_UNUSED
,
1462 addr_space_t as ATTRIBUTE_UNUSED
,
1463 bool speed ATTRIBUTE_UNUSED
)
1465 switch (GET_CODE (X
))
1478 /* Compute a (partial) cost for rtx X. Return true if the complete
1479 cost has been computed, and false if subexpressions should be
1480 scanned. In either case, *TOTAL contains the cost result. */
1483 hppa_rtx_costs (rtx x
, int code
, int outer_code
, int opno ATTRIBUTE_UNUSED
,
1484 int *total
, bool speed ATTRIBUTE_UNUSED
)
1491 if (INTVAL (x
) == 0)
1493 else if (INT_14_BITS (x
))
1510 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1511 && outer_code
!= SET
)
1518 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1520 *total
= COSTS_N_INSNS (3);
1524 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1525 factor
= GET_MODE_SIZE (GET_MODE (x
)) / 4;
1529 if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1530 *total
= factor
* factor
* COSTS_N_INSNS (8);
1532 *total
= factor
* factor
* COSTS_N_INSNS (20);
1536 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1538 *total
= COSTS_N_INSNS (14);
1546 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1547 factor
= GET_MODE_SIZE (GET_MODE (x
)) / 4;
1551 *total
= factor
* factor
* COSTS_N_INSNS (60);
1554 case PLUS
: /* this includes shNadd insns */
1556 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1558 *total
= COSTS_N_INSNS (3);
1562 /* A size N times larger than UNITS_PER_WORD needs N times as
1563 many insns, taking N times as long. */
1564 factor
= GET_MODE_SIZE (GET_MODE (x
)) / UNITS_PER_WORD
;
1567 *total
= factor
* COSTS_N_INSNS (1);
1573 *total
= COSTS_N_INSNS (1);
1581 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1582 new rtx with the correct mode. */
1584 force_mode (machine_mode mode
, rtx orig
)
1586 if (mode
== GET_MODE (orig
))
1589 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1591 return gen_rtx_REG (mode
, REGNO (orig
));
1594 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1597 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1599 return tls_referenced_p (x
);
1602 /* Emit insns to move operands[1] into operands[0].
1604 Return 1 if we have written out everything that needs to be done to
1605 do the move. Otherwise, return 0 and the caller will emit the move
1608 Note SCRATCH_REG may not be in the proper mode depending on how it
1609 will be used. This routine is responsible for creating a new copy
1610 of SCRATCH_REG in the proper mode. */
1613 pa_emit_move_sequence (rtx
*operands
, machine_mode mode
, rtx scratch_reg
)
1615 register rtx operand0
= operands
[0];
1616 register rtx operand1
= operands
[1];
1619 /* We can only handle indexed addresses in the destination operand
1620 of floating point stores. Thus, we need to break out indexed
1621 addresses from the destination operand. */
1622 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1624 gcc_assert (can_create_pseudo_p ());
1626 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1627 operand0
= replace_equiv_address (operand0
, tem
);
1630 /* On targets with non-equivalent space registers, break out unscaled
1631 indexed addresses from the source operand before the final CSE.
1632 We have to do this because the REG_POINTER flag is not correctly
1633 carried through various optimization passes and CSE may substitute
1634 a pseudo without the pointer set for one with the pointer set. As
1635 a result, we loose various opportunities to create insns with
1636 unscaled indexed addresses. */
1637 if (!TARGET_NO_SPACE_REGS
1638 && !cse_not_expected
1639 && GET_CODE (operand1
) == MEM
1640 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1641 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1642 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1644 = replace_equiv_address (operand1
,
1645 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1648 && reload_in_progress
&& GET_CODE (operand0
) == REG
1649 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1650 operand0
= reg_equiv_mem (REGNO (operand0
));
1651 else if (scratch_reg
1652 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1653 && GET_CODE (SUBREG_REG (operand0
)) == REG
1654 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1656 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1657 the code which tracks sets/uses for delete_output_reload. */
1658 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1659 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1660 SUBREG_BYTE (operand0
));
1661 operand0
= alter_subreg (&temp
, true);
1665 && reload_in_progress
&& GET_CODE (operand1
) == REG
1666 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1667 operand1
= reg_equiv_mem (REGNO (operand1
));
1668 else if (scratch_reg
1669 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1670 && GET_CODE (SUBREG_REG (operand1
)) == REG
1671 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1673 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1674 the code which tracks sets/uses for delete_output_reload. */
1675 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1676 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1677 SUBREG_BYTE (operand1
));
1678 operand1
= alter_subreg (&temp
, true);
1681 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1682 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1683 != XEXP (operand0
, 0)))
1684 operand0
= replace_equiv_address (operand0
, tem
);
1686 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1687 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1688 != XEXP (operand1
, 0)))
1689 operand1
= replace_equiv_address (operand1
, tem
);
1691 /* Handle secondary reloads for loads/stores of FP registers from
1692 REG+D addresses where D does not fit in 5 or 14 bits, including
1693 (subreg (mem (addr))) cases. */
1695 && fp_reg_operand (operand0
, mode
)
1696 && (MEM_P (operand1
)
1697 || (GET_CODE (operand1
) == SUBREG
1698 && MEM_P (XEXP (operand1
, 0))))
1699 && !floating_point_store_memory_operand (operand1
, mode
))
1701 if (GET_CODE (operand1
) == SUBREG
)
1702 operand1
= XEXP (operand1
, 0);
1704 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1705 it in WORD_MODE regardless of what mode it was originally given
1707 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1709 /* D might not fit in 14 bits either; for such cases load D into
1711 if (reg_plus_base_memory_operand (operand1
, mode
)
1714 && INT_14_BITS (XEXP (XEXP (operand1
, 0), 1))))
1716 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1717 emit_move_insn (scratch_reg
,
1718 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
, 0)),
1720 XEXP (XEXP (operand1
, 0), 0),
1724 emit_move_insn (scratch_reg
, XEXP (operand1
, 0));
1725 emit_insn (gen_rtx_SET (operand0
,
1726 replace_equiv_address (operand1
, scratch_reg
)));
1729 else if (scratch_reg
1730 && fp_reg_operand (operand1
, mode
)
1731 && (MEM_P (operand0
)
1732 || (GET_CODE (operand0
) == SUBREG
1733 && MEM_P (XEXP (operand0
, 0))))
1734 && !floating_point_store_memory_operand (operand0
, mode
))
1736 if (GET_CODE (operand0
) == SUBREG
)
1737 operand0
= XEXP (operand0
, 0);
1739 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1740 it in WORD_MODE regardless of what mode it was originally given
1742 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1744 /* D might not fit in 14 bits either; for such cases load D into
1746 if (reg_plus_base_memory_operand (operand0
, mode
)
1749 && INT_14_BITS (XEXP (XEXP (operand0
, 0), 1))))
1751 emit_move_insn (scratch_reg
, XEXP (XEXP (operand0
, 0), 1));
1752 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0
,
1755 XEXP (XEXP (operand0
, 0),
1760 emit_move_insn (scratch_reg
, XEXP (operand0
, 0));
1761 emit_insn (gen_rtx_SET (replace_equiv_address (operand0
, scratch_reg
),
1765 /* Handle secondary reloads for loads of FP registers from constant
1766 expressions by forcing the constant into memory. For the most part,
1767 this is only necessary for SImode and DImode.
1769 Use scratch_reg to hold the address of the memory location. */
1770 else if (scratch_reg
1771 && CONSTANT_P (operand1
)
1772 && fp_reg_operand (operand0
, mode
))
1774 rtx const_mem
, xoperands
[2];
1776 if (operand1
== CONST0_RTX (mode
))
1778 emit_insn (gen_rtx_SET (operand0
, operand1
));
1782 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1783 it in WORD_MODE regardless of what mode it was originally given
1785 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1787 /* Force the constant into memory and put the address of the
1788 memory location into scratch_reg. */
1789 const_mem
= force_const_mem (mode
, operand1
);
1790 xoperands
[0] = scratch_reg
;
1791 xoperands
[1] = XEXP (const_mem
, 0);
1792 pa_emit_move_sequence (xoperands
, Pmode
, 0);
1794 /* Now load the destination register. */
1795 emit_insn (gen_rtx_SET (operand0
,
1796 replace_equiv_address (const_mem
, scratch_reg
)));
1799 /* Handle secondary reloads for SAR. These occur when trying to load
1800 the SAR from memory or a constant. */
1801 else if (scratch_reg
1802 && GET_CODE (operand0
) == REG
1803 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1804 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1805 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1807 /* D might not fit in 14 bits either; for such cases load D into
1809 if (GET_CODE (operand1
) == MEM
1810 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1812 /* We are reloading the address into the scratch register, so we
1813 want to make sure the scratch register is a full register. */
1814 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1816 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1817 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1820 XEXP (XEXP (operand1
, 0),
1824 /* Now we are going to load the scratch register from memory,
1825 we want to load it in the same width as the original MEM,
1826 which must be the same as the width of the ultimate destination,
1828 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1830 emit_move_insn (scratch_reg
,
1831 replace_equiv_address (operand1
, scratch_reg
));
1835 /* We want to load the scratch register using the same mode as
1836 the ultimate destination. */
1837 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1839 emit_move_insn (scratch_reg
, operand1
);
1842 /* And emit the insn to set the ultimate destination. We know that
1843 the scratch register has the same mode as the destination at this
1845 emit_move_insn (operand0
, scratch_reg
);
1848 /* Handle the most common case: storing into a register. */
1849 else if (register_operand (operand0
, mode
))
1851 /* Legitimize TLS symbol references. This happens for references
1852 that aren't a legitimate constant. */
1853 if (PA_SYMBOL_REF_TLS_P (operand1
))
1854 operand1
= legitimize_tls_address (operand1
);
1856 if (register_operand (operand1
, mode
)
1857 || (GET_CODE (operand1
) == CONST_INT
1858 && pa_cint_ok_for_move (INTVAL (operand1
)))
1859 || (operand1
== CONST0_RTX (mode
))
1860 || (GET_CODE (operand1
) == HIGH
1861 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1862 /* Only `general_operands' can come here, so MEM is ok. */
1863 || GET_CODE (operand1
) == MEM
)
1865 /* Various sets are created during RTL generation which don't
1866 have the REG_POINTER flag correctly set. After the CSE pass,
1867 instruction recognition can fail if we don't consistently
1868 set this flag when performing register copies. This should
1869 also improve the opportunities for creating insns that use
1870 unscaled indexing. */
1871 if (REG_P (operand0
) && REG_P (operand1
))
1873 if (REG_POINTER (operand1
)
1874 && !REG_POINTER (operand0
)
1875 && !HARD_REGISTER_P (operand0
))
1876 copy_reg_pointer (operand0
, operand1
);
1879 /* When MEMs are broken out, the REG_POINTER flag doesn't
1880 get set. In some cases, we can set the REG_POINTER flag
1881 from the declaration for the MEM. */
1882 if (REG_P (operand0
)
1883 && GET_CODE (operand1
) == MEM
1884 && !REG_POINTER (operand0
))
1886 tree decl
= MEM_EXPR (operand1
);
1888 /* Set the register pointer flag and register alignment
1889 if the declaration for this memory reference is a
1895 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1897 if (TREE_CODE (decl
) == COMPONENT_REF
)
1898 decl
= TREE_OPERAND (decl
, 1);
1900 type
= TREE_TYPE (decl
);
1901 type
= strip_array_types (type
);
1903 if (POINTER_TYPE_P (type
))
1907 type
= TREE_TYPE (type
);
1908 /* Using TYPE_ALIGN_OK is rather conservative as
1909 only the ada frontend actually sets it. */
1910 align
= (TYPE_ALIGN_OK (type
) ? TYPE_ALIGN (type
)
1912 mark_reg_pointer (operand0
, align
);
1917 emit_insn (gen_rtx_SET (operand0
, operand1
));
1921 else if (GET_CODE (operand0
) == MEM
)
1923 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1924 && !(reload_in_progress
|| reload_completed
))
1926 rtx temp
= gen_reg_rtx (DFmode
);
1928 emit_insn (gen_rtx_SET (temp
, operand1
));
1929 emit_insn (gen_rtx_SET (operand0
, temp
));
1932 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1934 /* Run this case quickly. */
1935 emit_insn (gen_rtx_SET (operand0
, operand1
));
1938 if (! (reload_in_progress
|| reload_completed
))
1940 operands
[0] = validize_mem (operand0
);
1941 operands
[1] = operand1
= force_reg (mode
, operand1
);
1945 /* Simplify the source if we need to.
1946 Note we do have to handle function labels here, even though we do
1947 not consider them legitimate constants. Loop optimizations can
1948 call the emit_move_xxx with one as a source. */
1949 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1950 || (GET_CODE (operand1
) == HIGH
1951 && symbolic_operand (XEXP (operand1
, 0), mode
))
1952 || function_label_operand (operand1
, VOIDmode
)
1953 || tls_referenced_p (operand1
))
1957 if (GET_CODE (operand1
) == HIGH
)
1960 operand1
= XEXP (operand1
, 0);
1962 if (symbolic_operand (operand1
, mode
))
1964 /* Argh. The assembler and linker can't handle arithmetic
1967 So we force the plabel into memory, load operand0 from
1968 the memory location, then add in the constant part. */
1969 if ((GET_CODE (operand1
) == CONST
1970 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1971 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
1973 || function_label_operand (operand1
, VOIDmode
))
1975 rtx temp
, const_part
;
1977 /* Figure out what (if any) scratch register to use. */
1978 if (reload_in_progress
|| reload_completed
)
1980 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
1981 /* SCRATCH_REG will hold an address and maybe the actual
1982 data. We want it in WORD_MODE regardless of what mode it
1983 was originally given to us. */
1984 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1987 scratch_reg
= gen_reg_rtx (Pmode
);
1989 if (GET_CODE (operand1
) == CONST
)
1991 /* Save away the constant part of the expression. */
1992 const_part
= XEXP (XEXP (operand1
, 0), 1);
1993 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
1995 /* Force the function label into memory. */
1996 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
2000 /* No constant part. */
2001 const_part
= NULL_RTX
;
2003 /* Force the function label into memory. */
2004 temp
= force_const_mem (mode
, operand1
);
2008 /* Get the address of the memory location. PIC-ify it if
2010 temp
= XEXP (temp
, 0);
2012 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
2014 /* Put the address of the memory location into our destination
2017 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2019 /* Now load from the memory location into our destination
2021 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
2022 pa_emit_move_sequence (operands
, mode
, scratch_reg
);
2024 /* And add back in the constant part. */
2025 if (const_part
!= NULL_RTX
)
2026 expand_inc (operand0
, const_part
);
2036 if (reload_in_progress
|| reload_completed
)
2038 temp
= scratch_reg
? scratch_reg
: operand0
;
2039 /* TEMP will hold an address and maybe the actual
2040 data. We want it in WORD_MODE regardless of what mode it
2041 was originally given to us. */
2042 temp
= force_mode (word_mode
, temp
);
2045 temp
= gen_reg_rtx (Pmode
);
2047 /* Force (const (plus (symbol) (const_int))) to memory
2048 if the const_int will not fit in 14 bits. Although
2049 this requires a relocation, the instruction sequence
2050 needed to load the value is shorter. */
2051 if (GET_CODE (operand1
) == CONST
2052 && GET_CODE (XEXP (operand1
, 0)) == PLUS
2053 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
2054 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1)))
2056 rtx x
, m
= force_const_mem (mode
, operand1
);
2058 x
= legitimize_pic_address (XEXP (m
, 0), mode
, temp
);
2059 x
= replace_equiv_address (m
, x
);
2060 insn
= emit_move_insn (operand0
, x
);
2064 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
2065 if (REG_P (operand0
) && REG_P (operands
[1]))
2066 copy_reg_pointer (operand0
, operands
[1]);
2067 insn
= emit_move_insn (operand0
, operands
[1]);
2070 /* Put a REG_EQUAL note on this insn. */
2071 set_unique_reg_note (insn
, REG_EQUAL
, operand1
);
2073 /* On the HPPA, references to data space are supposed to use dp,
2074 register 27, but showing it in the RTL inhibits various cse
2075 and loop optimizations. */
2080 if (reload_in_progress
|| reload_completed
)
2082 temp
= scratch_reg
? scratch_reg
: operand0
;
2083 /* TEMP will hold an address and maybe the actual
2084 data. We want it in WORD_MODE regardless of what mode it
2085 was originally given to us. */
2086 temp
= force_mode (word_mode
, temp
);
2089 temp
= gen_reg_rtx (mode
);
2091 /* Loading a SYMBOL_REF into a register makes that register
2092 safe to be used as the base in an indexed address.
2094 Don't mark hard registers though. That loses. */
2095 if (GET_CODE (operand0
) == REG
2096 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2097 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2098 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2099 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2102 set
= gen_rtx_SET (operand0
, temp
);
2104 set
= gen_rtx_SET (operand0
,
2105 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2107 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2113 else if (tls_referenced_p (operand1
))
2118 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2120 addend
= XEXP (XEXP (tmp
, 0), 1);
2121 tmp
= XEXP (XEXP (tmp
, 0), 0);
2124 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2125 tmp
= legitimize_tls_address (tmp
);
2128 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2129 tmp
= force_operand (tmp
, operands
[0]);
2133 else if (GET_CODE (operand1
) != CONST_INT
2134 || !pa_cint_ok_for_move (INTVAL (operand1
)))
2139 HOST_WIDE_INT value
= 0;
2140 HOST_WIDE_INT insv
= 0;
2143 if (GET_CODE (operand1
) == CONST_INT
)
2144 value
= INTVAL (operand1
);
2147 && GET_CODE (operand1
) == CONST_INT
2148 && HOST_BITS_PER_WIDE_INT
> 32
2149 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2153 /* Extract the low order 32 bits of the value and sign extend.
2154 If the new value is the same as the original value, we can
2155 can use the original value as-is. If the new value is
2156 different, we use it and insert the most-significant 32-bits
2157 of the original value into the final result. */
2158 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2159 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2162 #if HOST_BITS_PER_WIDE_INT > 32
2163 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2167 operand1
= GEN_INT (nval
);
2171 if (reload_in_progress
|| reload_completed
)
2172 temp
= scratch_reg
? scratch_reg
: operand0
;
2174 temp
= gen_reg_rtx (mode
);
2176 /* We don't directly split DImode constants on 32-bit targets
2177 because PLUS uses an 11-bit immediate and the insn sequence
2178 generated is not as efficient as the one using HIGH/LO_SUM. */
2179 if (GET_CODE (operand1
) == CONST_INT
2180 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2181 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2184 /* Directly break constant into high and low parts. This
2185 provides better optimization opportunities because various
2186 passes recognize constants split with PLUS but not LO_SUM.
2187 We use a 14-bit signed low part except when the addition
2188 of 0x4000 to the high part might change the sign of the
2190 HOST_WIDE_INT low
= value
& 0x3fff;
2191 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2195 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2203 emit_insn (gen_rtx_SET (temp
, GEN_INT (high
)));
2204 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2208 emit_insn (gen_rtx_SET (temp
, gen_rtx_HIGH (mode
, operand1
)));
2209 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2212 insn
= emit_move_insn (operands
[0], operands
[1]);
2214 /* Now insert the most significant 32 bits of the value
2215 into the register. When we don't have a second register
2216 available, it could take up to nine instructions to load
2217 a 64-bit integer constant. Prior to reload, we force
2218 constants that would take more than three instructions
2219 to load to the constant pool. During and after reload,
2220 we have to handle all possible values. */
2223 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2224 register and the value to be inserted is outside the
2225 range that can be loaded with three depdi instructions. */
2226 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2228 operand1
= GEN_INT (insv
);
2230 emit_insn (gen_rtx_SET (temp
,
2231 gen_rtx_HIGH (mode
, operand1
)));
2232 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2234 insn
= emit_insn (gen_insvdi (operand0
, GEN_INT (32),
2237 insn
= emit_insn (gen_insvsi (operand0
, GEN_INT (32),
2242 int len
= 5, pos
= 27;
2244 /* Insert the bits using the depdi instruction. */
2247 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2248 HOST_WIDE_INT sign
= v5
< 0;
2250 /* Left extend the insertion. */
2251 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2252 while (pos
> 0 && (insv
& 1) == sign
)
2254 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2260 insn
= emit_insn (gen_insvdi (operand0
,
2265 insn
= emit_insn (gen_insvsi (operand0
,
2270 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2276 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2281 /* Now have insn-emit do whatever it normally does. */
2285 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2286 it will need a link/runtime reloc). */
2289 pa_reloc_needed (tree exp
)
2293 switch (TREE_CODE (exp
))
2298 case POINTER_PLUS_EXPR
:
2301 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2302 reloc
|= pa_reloc_needed (TREE_OPERAND (exp
, 1));
2306 case NON_LVALUE_EXPR
:
2307 reloc
= pa_reloc_needed (TREE_OPERAND (exp
, 0));
2313 unsigned HOST_WIDE_INT ix
;
2315 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2317 reloc
|= pa_reloc_needed (value
);
2331 /* Return the best assembler insn template
2332 for moving operands[1] into operands[0] as a fullword. */
2334 pa_singlemove_string (rtx
*operands
)
2336 HOST_WIDE_INT intval
;
2338 if (GET_CODE (operands
[0]) == MEM
)
2339 return "stw %r1,%0";
2340 if (GET_CODE (operands
[1]) == MEM
)
2342 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2347 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2349 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2351 REAL_VALUE_FROM_CONST_DOUBLE (d
, operands
[1]);
2352 REAL_VALUE_TO_TARGET_SINGLE (d
, i
);
2354 operands
[1] = GEN_INT (i
);
2355 /* Fall through to CONST_INT case. */
2357 if (GET_CODE (operands
[1]) == CONST_INT
)
2359 intval
= INTVAL (operands
[1]);
2361 if (VAL_14_BITS_P (intval
))
2363 else if ((intval
& 0x7ff) == 0)
2364 return "ldil L'%1,%0";
2365 else if (pa_zdepi_cint_p (intval
))
2366 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2368 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2370 return "copy %1,%0";
2374 /* Compute position (in OP[1]) and width (in OP[2])
2375 useful for copying IMM to a register using the zdepi
2376 instructions. Store the immediate value to insert in OP[0]. */
2378 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2382 /* Find the least significant set bit in IMM. */
2383 for (lsb
= 0; lsb
< 32; lsb
++)
2390 /* Choose variants based on *sign* of the 5-bit field. */
2391 if ((imm
& 0x10) == 0)
2392 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2395 /* Find the width of the bitstring in IMM. */
2396 for (len
= 5; len
< 32 - lsb
; len
++)
2398 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2402 /* Sign extend IMM as a 5-bit value. */
2403 imm
= (imm
& 0xf) - 0x10;
2411 /* Compute position (in OP[1]) and width (in OP[2])
2412 useful for copying IMM to a register using the depdi,z
2413 instructions. Store the immediate value to insert in OP[0]. */
2416 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2418 int lsb
, len
, maxlen
;
2420 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2422 /* Find the least significant set bit in IMM. */
2423 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2430 /* Choose variants based on *sign* of the 5-bit field. */
2431 if ((imm
& 0x10) == 0)
2432 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2435 /* Find the width of the bitstring in IMM. */
2436 for (len
= 5; len
< maxlen
- lsb
; len
++)
2438 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2442 /* Extend length if host is narrow and IMM is negative. */
2443 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2446 /* Sign extend IMM as a 5-bit value. */
2447 imm
= (imm
& 0xf) - 0x10;
2455 /* Output assembler code to perform a doubleword move insn
2456 with operands OPERANDS. */
2459 pa_output_move_double (rtx
*operands
)
2461 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2463 rtx addreg0
= 0, addreg1
= 0;
2465 /* First classify both operands. */
2467 if (REG_P (operands
[0]))
2469 else if (offsettable_memref_p (operands
[0]))
2471 else if (GET_CODE (operands
[0]) == MEM
)
2476 if (REG_P (operands
[1]))
2478 else if (CONSTANT_P (operands
[1]))
2480 else if (offsettable_memref_p (operands
[1]))
2482 else if (GET_CODE (operands
[1]) == MEM
)
2487 /* Check for the cases that the operand constraints are not
2488 supposed to allow to happen. */
2489 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2491 /* Handle copies between general and floating registers. */
2493 if (optype0
== REGOP
&& optype1
== REGOP
2494 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2496 if (FP_REG_P (operands
[0]))
2498 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2499 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2500 return "{fldds|fldd} -16(%%sp),%0";
2504 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2505 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2506 return "{ldws|ldw} -12(%%sp),%R0";
2510 /* Handle auto decrementing and incrementing loads and stores
2511 specifically, since the structure of the function doesn't work
2512 for them without major modification. Do it better when we learn
2513 this port about the general inc/dec addressing of PA.
2514 (This was written by tege. Chide him if it doesn't work.) */
2516 if (optype0
== MEMOP
)
2518 /* We have to output the address syntax ourselves, since print_operand
2519 doesn't deal with the addresses we want to use. Fix this later. */
2521 rtx addr
= XEXP (operands
[0], 0);
2522 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2524 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2526 operands
[0] = XEXP (addr
, 0);
2527 gcc_assert (GET_CODE (operands
[1]) == REG
2528 && GET_CODE (operands
[0]) == REG
);
2530 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2532 /* No overlap between high target register and address
2533 register. (We do this in a non-obvious way to
2534 save a register file writeback) */
2535 if (GET_CODE (addr
) == POST_INC
)
2536 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2537 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2539 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2541 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2543 operands
[0] = XEXP (addr
, 0);
2544 gcc_assert (GET_CODE (operands
[1]) == REG
2545 && GET_CODE (operands
[0]) == REG
);
2547 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2548 /* No overlap between high target register and address
2549 register. (We do this in a non-obvious way to save a
2550 register file writeback) */
2551 if (GET_CODE (addr
) == PRE_INC
)
2552 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2553 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2556 if (optype1
== MEMOP
)
2558 /* We have to output the address syntax ourselves, since print_operand
2559 doesn't deal with the addresses we want to use. Fix this later. */
2561 rtx addr
= XEXP (operands
[1], 0);
2562 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2564 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2566 operands
[1] = XEXP (addr
, 0);
2567 gcc_assert (GET_CODE (operands
[0]) == REG
2568 && GET_CODE (operands
[1]) == REG
);
2570 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2572 /* No overlap between high target register and address
2573 register. (We do this in a non-obvious way to
2574 save a register file writeback) */
2575 if (GET_CODE (addr
) == POST_INC
)
2576 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2577 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2581 /* This is an undefined situation. We should load into the
2582 address register *and* update that register. Probably
2583 we don't need to handle this at all. */
2584 if (GET_CODE (addr
) == POST_INC
)
2585 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2586 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2589 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2591 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2593 operands
[1] = XEXP (addr
, 0);
2594 gcc_assert (GET_CODE (operands
[0]) == REG
2595 && GET_CODE (operands
[1]) == REG
);
2597 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2599 /* No overlap between high target register and address
2600 register. (We do this in a non-obvious way to
2601 save a register file writeback) */
2602 if (GET_CODE (addr
) == PRE_INC
)
2603 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2604 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2608 /* This is an undefined situation. We should load into the
2609 address register *and* update that register. Probably
2610 we don't need to handle this at all. */
2611 if (GET_CODE (addr
) == PRE_INC
)
2612 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2613 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2616 else if (GET_CODE (addr
) == PLUS
2617 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2621 /* Load address into left half of destination register. */
2622 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2623 xoperands
[1] = XEXP (addr
, 1);
2624 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2625 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2626 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2628 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2630 else if (GET_CODE (addr
) == PLUS
2631 && REG_P (XEXP (addr
, 0))
2632 && REG_P (XEXP (addr
, 1)))
2636 /* Load address into left half of destination register. */
2637 xoperands
[0] = gen_rtx_SUBREG (SImode
, operands
[0], 0);
2638 xoperands
[1] = XEXP (addr
, 0);
2639 xoperands
[2] = XEXP (addr
, 1);
2640 output_asm_insn ("{addl|add,l} %1,%2,%0",
2642 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2646 /* If an operand is an unoffsettable memory ref, find a register
2647 we can increment temporarily to make it refer to the second word. */
2649 if (optype0
== MEMOP
)
2650 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2652 if (optype1
== MEMOP
)
2653 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2655 /* Ok, we can do one word at a time.
2656 Normally we do the low-numbered word first.
2658 In either case, set up in LATEHALF the operands to use
2659 for the high-numbered word and in some cases alter the
2660 operands in OPERANDS to be suitable for the low-numbered word. */
2662 if (optype0
== REGOP
)
2663 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2664 else if (optype0
== OFFSOP
)
2665 latehalf
[0] = adjust_address_nv (operands
[0], SImode
, 4);
2667 latehalf
[0] = operands
[0];
2669 if (optype1
== REGOP
)
2670 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2671 else if (optype1
== OFFSOP
)
2672 latehalf
[1] = adjust_address_nv (operands
[1], SImode
, 4);
2673 else if (optype1
== CNSTOP
)
2674 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2676 latehalf
[1] = operands
[1];
2678 /* If the first move would clobber the source of the second one,
2679 do them in the other order.
2681 This can happen in two cases:
2683 mem -> register where the first half of the destination register
2684 is the same register used in the memory's address. Reload
2685 can create such insns.
2687 mem in this case will be either register indirect or register
2688 indirect plus a valid offset.
2690 register -> register move where REGNO(dst) == REGNO(src + 1)
2691 someone (Tim/Tege?) claimed this can happen for parameter loads.
2693 Handle mem -> register case first. */
2694 if (optype0
== REGOP
2695 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2696 && refers_to_regno_p (REGNO (operands
[0]), operands
[1]))
2698 /* Do the late half first. */
2700 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2701 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2705 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2706 return pa_singlemove_string (operands
);
2709 /* Now handle register -> register case. */
2710 if (optype0
== REGOP
&& optype1
== REGOP
2711 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2713 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2714 return pa_singlemove_string (operands
);
2717 /* Normal case: do the two words, low-numbered first. */
2719 output_asm_insn (pa_singlemove_string (operands
), operands
);
2721 /* Make any unoffsettable addresses point at high-numbered word. */
2723 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2725 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2728 output_asm_insn (pa_singlemove_string (latehalf
), latehalf
);
2730 /* Undo the adds we just did. */
2732 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2734 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2740 pa_output_fp_move_double (rtx
*operands
)
2742 if (FP_REG_P (operands
[0]))
2744 if (FP_REG_P (operands
[1])
2745 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2746 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2748 output_asm_insn ("fldd%F1 %1,%0", operands
);
2750 else if (FP_REG_P (operands
[1]))
2752 output_asm_insn ("fstd%F0 %1,%0", operands
);
2758 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2760 /* This is a pain. You have to be prepared to deal with an
2761 arbitrary address here including pre/post increment/decrement.
2763 so avoid this in the MD. */
2764 gcc_assert (GET_CODE (operands
[0]) == REG
);
2766 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2767 xoperands
[0] = operands
[0];
2768 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2773 /* Return a REG that occurs in ADDR with coefficient 1.
2774 ADDR can be effectively incremented by incrementing REG. */
2777 find_addr_reg (rtx addr
)
2779 while (GET_CODE (addr
) == PLUS
)
2781 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2782 addr
= XEXP (addr
, 0);
2783 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2784 addr
= XEXP (addr
, 1);
2785 else if (CONSTANT_P (XEXP (addr
, 0)))
2786 addr
= XEXP (addr
, 1);
2787 else if (CONSTANT_P (XEXP (addr
, 1)))
2788 addr
= XEXP (addr
, 0);
2792 gcc_assert (GET_CODE (addr
) == REG
);
2796 /* Emit code to perform a block move.
2798 OPERANDS[0] is the destination pointer as a REG, clobbered.
2799 OPERANDS[1] is the source pointer as a REG, clobbered.
2800 OPERANDS[2] is a register for temporary storage.
2801 OPERANDS[3] is a register for temporary storage.
2802 OPERANDS[4] is the size as a CONST_INT
2803 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2804 OPERANDS[6] is another temporary register. */
2807 pa_output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2809 int align
= INTVAL (operands
[5]);
2810 unsigned long n_bytes
= INTVAL (operands
[4]);
2812 /* We can't move more than a word at a time because the PA
2813 has no longer integer move insns. (Could use fp mem ops?) */
2814 if (align
> (TARGET_64BIT
? 8 : 4))
2815 align
= (TARGET_64BIT
? 8 : 4);
2817 /* Note that we know each loop below will execute at least twice
2818 (else we would have open-coded the copy). */
2822 /* Pre-adjust the loop counter. */
2823 operands
[4] = GEN_INT (n_bytes
- 16);
2824 output_asm_insn ("ldi %4,%2", operands
);
2827 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2828 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2829 output_asm_insn ("std,ma %3,8(%0)", operands
);
2830 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2831 output_asm_insn ("std,ma %6,8(%0)", operands
);
2833 /* Handle the residual. There could be up to 7 bytes of
2834 residual to copy! */
2835 if (n_bytes
% 16 != 0)
2837 operands
[4] = GEN_INT (n_bytes
% 8);
2838 if (n_bytes
% 16 >= 8)
2839 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2840 if (n_bytes
% 8 != 0)
2841 output_asm_insn ("ldd 0(%1),%6", operands
);
2842 if (n_bytes
% 16 >= 8)
2843 output_asm_insn ("std,ma %3,8(%0)", operands
);
2844 if (n_bytes
% 8 != 0)
2845 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2850 /* Pre-adjust the loop counter. */
2851 operands
[4] = GEN_INT (n_bytes
- 8);
2852 output_asm_insn ("ldi %4,%2", operands
);
2855 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2856 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2857 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2858 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2859 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2861 /* Handle the residual. There could be up to 7 bytes of
2862 residual to copy! */
2863 if (n_bytes
% 8 != 0)
2865 operands
[4] = GEN_INT (n_bytes
% 4);
2866 if (n_bytes
% 8 >= 4)
2867 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2868 if (n_bytes
% 4 != 0)
2869 output_asm_insn ("ldw 0(%1),%6", operands
);
2870 if (n_bytes
% 8 >= 4)
2871 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2872 if (n_bytes
% 4 != 0)
2873 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2878 /* Pre-adjust the loop counter. */
2879 operands
[4] = GEN_INT (n_bytes
- 4);
2880 output_asm_insn ("ldi %4,%2", operands
);
2883 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2884 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2885 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2886 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2887 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2889 /* Handle the residual. */
2890 if (n_bytes
% 4 != 0)
2892 if (n_bytes
% 4 >= 2)
2893 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2894 if (n_bytes
% 2 != 0)
2895 output_asm_insn ("ldb 0(%1),%6", operands
);
2896 if (n_bytes
% 4 >= 2)
2897 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2898 if (n_bytes
% 2 != 0)
2899 output_asm_insn ("stb %6,0(%0)", operands
);
2904 /* Pre-adjust the loop counter. */
2905 operands
[4] = GEN_INT (n_bytes
- 2);
2906 output_asm_insn ("ldi %4,%2", operands
);
2909 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2910 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2911 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2912 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2913 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2915 /* Handle the residual. */
2916 if (n_bytes
% 2 != 0)
2918 output_asm_insn ("ldb 0(%1),%3", operands
);
2919 output_asm_insn ("stb %3,0(%0)", operands
);
2928 /* Count the number of insns necessary to handle this block move.
2930 Basic structure is the same as emit_block_move, except that we
2931 count insns rather than emit them. */
2934 compute_movmem_length (rtx_insn
*insn
)
2936 rtx pat
= PATTERN (insn
);
2937 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2938 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2939 unsigned int n_insns
= 0;
2941 /* We can't move more than four bytes at a time because the PA
2942 has no longer integer move insns. (Could use fp mem ops?) */
2943 if (align
> (TARGET_64BIT
? 8 : 4))
2944 align
= (TARGET_64BIT
? 8 : 4);
2946 /* The basic copying loop. */
2950 if (n_bytes
% (2 * align
) != 0)
2952 if ((n_bytes
% (2 * align
)) >= align
)
2955 if ((n_bytes
% align
) != 0)
2959 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2963 /* Emit code to perform a block clear.
2965 OPERANDS[0] is the destination pointer as a REG, clobbered.
2966 OPERANDS[1] is a register for temporary storage.
2967 OPERANDS[2] is the size as a CONST_INT
2968 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2971 pa_output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2973 int align
= INTVAL (operands
[3]);
2974 unsigned long n_bytes
= INTVAL (operands
[2]);
2976 /* We can't clear more than a word at a time because the PA
2977 has no longer integer move insns. */
2978 if (align
> (TARGET_64BIT
? 8 : 4))
2979 align
= (TARGET_64BIT
? 8 : 4);
2981 /* Note that we know each loop below will execute at least twice
2982 (else we would have open-coded the copy). */
2986 /* Pre-adjust the loop counter. */
2987 operands
[2] = GEN_INT (n_bytes
- 16);
2988 output_asm_insn ("ldi %2,%1", operands
);
2991 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2992 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
2993 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2995 /* Handle the residual. There could be up to 7 bytes of
2996 residual to copy! */
2997 if (n_bytes
% 16 != 0)
2999 operands
[2] = GEN_INT (n_bytes
% 8);
3000 if (n_bytes
% 16 >= 8)
3001 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
3002 if (n_bytes
% 8 != 0)
3003 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
3008 /* Pre-adjust the loop counter. */
3009 operands
[2] = GEN_INT (n_bytes
- 8);
3010 output_asm_insn ("ldi %2,%1", operands
);
3013 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3014 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
3015 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3017 /* Handle the residual. There could be up to 7 bytes of
3018 residual to copy! */
3019 if (n_bytes
% 8 != 0)
3021 operands
[2] = GEN_INT (n_bytes
% 4);
3022 if (n_bytes
% 8 >= 4)
3023 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
3024 if (n_bytes
% 4 != 0)
3025 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
3030 /* Pre-adjust the loop counter. */
3031 operands
[2] = GEN_INT (n_bytes
- 4);
3032 output_asm_insn ("ldi %2,%1", operands
);
3035 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3036 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
3037 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3039 /* Handle the residual. */
3040 if (n_bytes
% 4 != 0)
3042 if (n_bytes
% 4 >= 2)
3043 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
3044 if (n_bytes
% 2 != 0)
3045 output_asm_insn ("stb %%r0,0(%0)", operands
);
3050 /* Pre-adjust the loop counter. */
3051 operands
[2] = GEN_INT (n_bytes
- 2);
3052 output_asm_insn ("ldi %2,%1", operands
);
3055 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3056 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
3057 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
3059 /* Handle the residual. */
3060 if (n_bytes
% 2 != 0)
3061 output_asm_insn ("stb %%r0,0(%0)", operands
);
3070 /* Count the number of insns necessary to handle this block move.
3072 Basic structure is the same as emit_block_move, except that we
3073 count insns rather than emit them. */
3076 compute_clrmem_length (rtx_insn
*insn
)
3078 rtx pat
= PATTERN (insn
);
3079 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
3080 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
3081 unsigned int n_insns
= 0;
3083 /* We can't clear more than a word at a time because the PA
3084 has no longer integer move insns. */
3085 if (align
> (TARGET_64BIT
? 8 : 4))
3086 align
= (TARGET_64BIT
? 8 : 4);
3088 /* The basic loop. */
3092 if (n_bytes
% (2 * align
) != 0)
3094 if ((n_bytes
% (2 * align
)) >= align
)
3097 if ((n_bytes
% align
) != 0)
3101 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3107 pa_output_and (rtx
*operands
)
3109 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3111 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3112 int ls0
, ls1
, ms0
, p
, len
;
3114 for (ls0
= 0; ls0
< 32; ls0
++)
3115 if ((mask
& (1 << ls0
)) == 0)
3118 for (ls1
= ls0
; ls1
< 32; ls1
++)
3119 if ((mask
& (1 << ls1
)) != 0)
3122 for (ms0
= ls1
; ms0
< 32; ms0
++)
3123 if ((mask
& (1 << ms0
)) == 0)
3126 gcc_assert (ms0
== 32);
3134 operands
[2] = GEN_INT (len
);
3135 return "{extru|extrw,u} %1,31,%2,%0";
3139 /* We could use this `depi' for the case above as well, but `depi'
3140 requires one more register file access than an `extru'. */
3145 operands
[2] = GEN_INT (p
);
3146 operands
[3] = GEN_INT (len
);
3147 return "{depi|depwi} 0,%2,%3,%0";
3151 return "and %1,%2,%0";
3154 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3155 storing the result in operands[0]. */
3157 pa_output_64bit_and (rtx
*operands
)
3159 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3161 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3162 int ls0
, ls1
, ms0
, p
, len
;
3164 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3165 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3168 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3169 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3172 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3173 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3176 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3178 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3184 operands
[2] = GEN_INT (len
);
3185 return "extrd,u %1,63,%2,%0";
3189 /* We could use this `depi' for the case above as well, but `depi'
3190 requires one more register file access than an `extru'. */
3195 operands
[2] = GEN_INT (p
);
3196 operands
[3] = GEN_INT (len
);
3197 return "depdi 0,%2,%3,%0";
3201 return "and %1,%2,%0";
3205 pa_output_ior (rtx
*operands
)
3207 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3208 int bs0
, bs1
, p
, len
;
3210 if (INTVAL (operands
[2]) == 0)
3211 return "copy %1,%0";
3213 for (bs0
= 0; bs0
< 32; bs0
++)
3214 if ((mask
& (1 << bs0
)) != 0)
3217 for (bs1
= bs0
; bs1
< 32; bs1
++)
3218 if ((mask
& (1 << bs1
)) == 0)
3221 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3226 operands
[2] = GEN_INT (p
);
3227 operands
[3] = GEN_INT (len
);
3228 return "{depi|depwi} -1,%2,%3,%0";
3231 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3232 storing the result in operands[0]. */
3234 pa_output_64bit_ior (rtx
*operands
)
3236 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3237 int bs0
, bs1
, p
, len
;
3239 if (INTVAL (operands
[2]) == 0)
3240 return "copy %1,%0";
3242 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3243 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3246 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3247 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3250 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3251 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3256 operands
[2] = GEN_INT (p
);
3257 operands
[3] = GEN_INT (len
);
3258 return "depdi -1,%2,%3,%0";
3261 /* Target hook for assembling integer objects. This code handles
3262 aligned SI and DI integers specially since function references
3263 must be preceded by P%. */
3266 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3268 if (size
== UNITS_PER_WORD
3270 && function_label_operand (x
, VOIDmode
))
3272 fputs (size
== 8? "\t.dword\t" : "\t.word\t", asm_out_file
);
3274 /* We don't want an OPD when generating fast indirect calls. */
3275 if (!TARGET_FAST_INDIRECT_CALLS
)
3276 fputs ("P%", asm_out_file
);
3278 output_addr_const (asm_out_file
, x
);
3279 fputc ('\n', asm_out_file
);
3282 return default_assemble_integer (x
, size
, aligned_p
);
3285 /* Output an ascii string. */
3287 pa_output_ascii (FILE *file
, const char *p
, int size
)
3291 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3293 /* The HP assembler can only take strings of 256 characters at one
3294 time. This is a limitation on input line length, *not* the
3295 length of the string. Sigh. Even worse, it seems that the
3296 restriction is in number of input characters (see \xnn &
3297 \whatever). So we have to do this very carefully. */
3299 fputs ("\t.STRING \"", file
);
3302 for (i
= 0; i
< size
; i
+= 4)
3306 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3308 register unsigned int c
= (unsigned char) p
[i
+ io
];
3310 if (c
== '\"' || c
== '\\')
3311 partial_output
[co
++] = '\\';
3312 if (c
>= ' ' && c
< 0177)
3313 partial_output
[co
++] = c
;
3317 partial_output
[co
++] = '\\';
3318 partial_output
[co
++] = 'x';
3319 hexd
= c
/ 16 - 0 + '0';
3321 hexd
-= '9' - 'a' + 1;
3322 partial_output
[co
++] = hexd
;
3323 hexd
= c
% 16 - 0 + '0';
3325 hexd
-= '9' - 'a' + 1;
3326 partial_output
[co
++] = hexd
;
3329 if (chars_output
+ co
> 243)
3331 fputs ("\"\n\t.STRING \"", file
);
3334 fwrite (partial_output
, 1, (size_t) co
, file
);
3338 fputs ("\"\n", file
);
3341 /* Try to rewrite floating point comparisons & branches to avoid
3342 useless add,tr insns.
3344 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3345 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3346 first attempt to remove useless add,tr insns. It is zero
3347 for the second pass as reorg sometimes leaves bogus REG_DEAD
3350 When CHECK_NOTES is zero we can only eliminate add,tr insns
3351 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3354 remove_useless_addtr_insns (int check_notes
)
3357 static int pass
= 0;
3359 /* This is fairly cheap, so always run it when optimizing. */
3363 int fbranch_count
= 0;
3365 /* Walk all the insns in this function looking for fcmp & fbranch
3366 instructions. Keep track of how many of each we find. */
3367 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3371 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3372 if (! NONJUMP_INSN_P (insn
) && ! JUMP_P (insn
))
3375 tmp
= PATTERN (insn
);
3377 /* It must be a set. */
3378 if (GET_CODE (tmp
) != SET
)
3381 /* If the destination is CCFP, then we've found an fcmp insn. */
3382 tmp
= SET_DEST (tmp
);
3383 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3389 tmp
= PATTERN (insn
);
3390 /* If this is an fbranch instruction, bump the fbranch counter. */
3391 if (GET_CODE (tmp
) == SET
3392 && SET_DEST (tmp
) == pc_rtx
3393 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3394 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3395 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3396 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3404 /* Find all floating point compare + branch insns. If possible,
3405 reverse the comparison & the branch to avoid add,tr insns. */
3406 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3411 /* Ignore anything that isn't an INSN. */
3412 if (! NONJUMP_INSN_P (insn
))
3415 tmp
= PATTERN (insn
);
3417 /* It must be a set. */
3418 if (GET_CODE (tmp
) != SET
)
3421 /* The destination must be CCFP, which is register zero. */
3422 tmp
= SET_DEST (tmp
);
3423 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3426 /* INSN should be a set of CCFP.
3428 See if the result of this insn is used in a reversed FP
3429 conditional branch. If so, reverse our condition and
3430 the branch. Doing so avoids useless add,tr insns. */
3431 next
= next_insn (insn
);
3434 /* Jumps, calls and labels stop our search. */
3435 if (JUMP_P (next
) || CALL_P (next
) || LABEL_P (next
))
3438 /* As does another fcmp insn. */
3439 if (NONJUMP_INSN_P (next
)
3440 && GET_CODE (PATTERN (next
)) == SET
3441 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3442 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3445 next
= next_insn (next
);
3448 /* Is NEXT_INSN a branch? */
3449 if (next
&& JUMP_P (next
))
3451 rtx pattern
= PATTERN (next
);
3453 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3454 and CCFP dies, then reverse our conditional and the branch
3455 to avoid the add,tr. */
3456 if (GET_CODE (pattern
) == SET
3457 && SET_DEST (pattern
) == pc_rtx
3458 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3459 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3460 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3461 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3462 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3463 && (fcmp_count
== fbranch_count
3465 && find_regno_note (next
, REG_DEAD
, 0))))
3467 /* Reverse the branch. */
3468 tmp
= XEXP (SET_SRC (pattern
), 1);
3469 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3470 XEXP (SET_SRC (pattern
), 2) = tmp
;
3471 INSN_CODE (next
) = -1;
3473 /* Reverse our condition. */
3474 tmp
= PATTERN (insn
);
3475 PUT_CODE (XEXP (tmp
, 1),
3476 (reverse_condition_maybe_unordered
3477 (GET_CODE (XEXP (tmp
, 1)))));
3487 /* You may have trouble believing this, but this is the 32 bit HP-PA
3492 Variable arguments (optional; any number may be allocated)
3494 SP-(4*(N+9)) arg word N
3499 Fixed arguments (must be allocated; may remain unused)
3508 SP-32 External Data Pointer (DP)
3510 SP-24 External/stub RP (RP')
3514 SP-8 Calling Stub RP (RP'')
3519 SP-0 Stack Pointer (points to next available address)
3523 /* This function saves registers as follows. Registers marked with ' are
3524 this function's registers (as opposed to the previous function's).
3525 If a frame_pointer isn't needed, r4 is saved as a general register;
3526 the space for the frame pointer is still allocated, though, to keep
3532 SP (FP') Previous FP
3533 SP + 4 Alignment filler (sigh)
3534 SP + 8 Space for locals reserved here.
3538 SP + n All call saved register used.
3542 SP + o All call saved fp registers used.
3546 SP + p (SP') points to next available address.
3550 /* Global variables set by output_function_prologue(). */
3551 /* Size of frame. Need to know this to emit return insns from
3553 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3554 static int save_fregs
;
3556 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3557 Handle case where DISP > 8k by using the add_high_const patterns.
3559 Note in DISP > 8k case, we will leave the high part of the address
3560 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3563 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3565 rtx dest
, src
, basereg
;
3568 src
= gen_rtx_REG (word_mode
, reg
);
3569 basereg
= gen_rtx_REG (Pmode
, base
);
3570 if (VAL_14_BITS_P (disp
))
3572 dest
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
3573 insn
= emit_move_insn (dest
, src
);
3575 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3577 rtx delta
= GEN_INT (disp
);
3578 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3580 emit_move_insn (tmpreg
, delta
);
3581 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3584 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3585 gen_rtx_SET (tmpreg
,
3586 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3587 RTX_FRAME_RELATED_P (insn
) = 1;
3589 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3590 insn
= emit_move_insn (dest
, src
);
3594 rtx delta
= GEN_INT (disp
);
3595 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3596 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3598 emit_move_insn (tmpreg
, high
);
3599 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3600 insn
= emit_move_insn (dest
, src
);
3602 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3603 gen_rtx_SET (gen_rtx_MEM (word_mode
,
3604 gen_rtx_PLUS (word_mode
,
3611 RTX_FRAME_RELATED_P (insn
) = 1;
3614 /* Emit RTL to store REG at the memory location specified by BASE and then
3615 add MOD to BASE. MOD must be <= 8k. */
3618 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3620 rtx basereg
, srcreg
, delta
;
3623 gcc_assert (VAL_14_BITS_P (mod
));
3625 basereg
= gen_rtx_REG (Pmode
, base
);
3626 srcreg
= gen_rtx_REG (word_mode
, reg
);
3627 delta
= GEN_INT (mod
);
3629 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3632 RTX_FRAME_RELATED_P (insn
) = 1;
3634 /* RTX_FRAME_RELATED_P must be set on each frame related set
3635 in a parallel with more than one element. */
3636 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3637 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3641 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3642 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3643 whether to add a frame note or not.
3645 In the DISP > 8k case, we leave the high part of the address in %r1.
3646 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3649 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3653 if (VAL_14_BITS_P (disp
))
3655 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3656 plus_constant (Pmode
,
3657 gen_rtx_REG (Pmode
, base
), disp
));
3659 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3661 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3662 rtx delta
= GEN_INT (disp
);
3663 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3665 emit_move_insn (tmpreg
, delta
);
3666 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3667 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3669 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3670 gen_rtx_SET (tmpreg
,
3671 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3675 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3676 rtx delta
= GEN_INT (disp
);
3677 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3679 emit_move_insn (tmpreg
,
3680 gen_rtx_PLUS (Pmode
, basereg
,
3681 gen_rtx_HIGH (Pmode
, delta
)));
3682 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3683 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3686 if (DO_FRAME_NOTES
&& note
)
3687 RTX_FRAME_RELATED_P (insn
) = 1;
3691 pa_compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3696 /* The code in pa_expand_prologue and pa_expand_epilogue must
3697 be consistent with the rounding and size calculation done here.
3698 Change them at the same time. */
3700 /* We do our own stack alignment. First, round the size of the
3701 stack locals up to a word boundary. */
3702 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3704 /* Space for previous frame pointer + filler. If any frame is
3705 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3706 waste some space here for the sake of HP compatibility. The
3707 first slot is only used when the frame pointer is needed. */
3708 if (size
|| frame_pointer_needed
)
3709 size
+= STARTING_FRAME_OFFSET
;
3711 /* If the current function calls __builtin_eh_return, then we need
3712 to allocate stack space for registers that will hold data for
3713 the exception handler. */
3714 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3718 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3720 size
+= i
* UNITS_PER_WORD
;
3723 /* Account for space used by the callee general register saves. */
3724 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3725 if (df_regs_ever_live_p (i
))
3726 size
+= UNITS_PER_WORD
;
3728 /* Account for space used by the callee floating point register saves. */
3729 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3730 if (df_regs_ever_live_p (i
)
3731 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3735 /* We always save both halves of the FP register, so always
3736 increment the frame size by 8 bytes. */
3740 /* If any of the floating registers are saved, account for the
3741 alignment needed for the floating point register save block. */
3744 size
= (size
+ 7) & ~7;
3749 /* The various ABIs include space for the outgoing parameters in the
3750 size of the current function's stack frame. We don't need to align
3751 for the outgoing arguments as their alignment is set by the final
3752 rounding for the frame as a whole. */
3753 size
+= crtl
->outgoing_args_size
;
3755 /* Allocate space for the fixed frame marker. This space must be
3756 allocated for any function that makes calls or allocates
3758 if (!crtl
->is_leaf
|| size
)
3759 size
+= TARGET_64BIT
? 48 : 32;
3761 /* Finally, round to the preferred stack boundary. */
3762 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3763 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3766 /* Generate the assembly code for function entry. FILE is a stdio
3767 stream to output the code to. SIZE is an int: how many units of
3768 temporary storage to allocate.
3770 Refer to the array `regs_ever_live' to determine which registers to
3771 save; `regs_ever_live[I]' is nonzero if register number I is ever
3772 used in the function. This function is responsible for knowing
3773 which registers should not be saved even if used. */
3775 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3776 of memory. If any fpu reg is used in the function, we allocate
3777 such a block here, at the bottom of the frame, just in case it's needed.
3779 If this function is a leaf procedure, then we may choose not
3780 to do a "save" insn. The decision about whether or not
3781 to do this is made in regclass.c. */
3784 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3786 /* The function's label and associated .PROC must never be
3787 separated and must be output *after* any profiling declarations
3788 to avoid changing spaces/subspaces within a procedure. */
3789 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3790 fputs ("\t.PROC\n", file
);
3792 /* pa_expand_prologue does the dirty work now. We just need
3793 to output the assembler directives which denote the start
3795 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3797 fputs (",NO_CALLS", file
);
3799 fputs (",CALLS", file
);
3801 fputs (",SAVE_RP", file
);
3803 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3804 at the beginning of the frame and that it is used as the frame
3805 pointer for the frame. We do this because our current frame
3806 layout doesn't conform to that specified in the HP runtime
3807 documentation and we need a way to indicate to programs such as
3808 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3809 isn't used by HP compilers but is supported by the assembler.
3810 However, SAVE_SP is supposed to indicate that the previous stack
3811 pointer has been saved in the frame marker. */
3812 if (frame_pointer_needed
)
3813 fputs (",SAVE_SP", file
);
3815 /* Pass on information about the number of callee register saves
3816 performed in the prologue.
3818 The compiler is supposed to pass the highest register number
3819 saved, the assembler then has to adjust that number before
3820 entering it into the unwind descriptor (to account for any
3821 caller saved registers with lower register numbers than the
3822 first callee saved register). */
3824 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3827 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3829 fputs ("\n\t.ENTRY\n", file
);
3831 remove_useless_addtr_insns (0);
3835 pa_expand_prologue (void)
3837 int merge_sp_adjust_with_store
= 0;
3838 HOST_WIDE_INT size
= get_frame_size ();
3839 HOST_WIDE_INT offset
;
3848 /* Compute total size for frame pointer, filler, locals and rounding to
3849 the next word boundary. Similar code appears in pa_compute_frame_size
3850 and must be changed in tandem with this code. */
3851 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3852 if (local_fsize
|| frame_pointer_needed
)
3853 local_fsize
+= STARTING_FRAME_OFFSET
;
3855 actual_fsize
= pa_compute_frame_size (size
, &save_fregs
);
3856 if (flag_stack_usage_info
)
3857 current_function_static_stack_size
= actual_fsize
;
3859 /* Compute a few things we will use often. */
3860 tmpreg
= gen_rtx_REG (word_mode
, 1);
3862 /* Save RP first. The calling conventions manual states RP will
3863 always be stored into the caller's frame at sp - 20 or sp - 16
3864 depending on which ABI is in use. */
3865 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3867 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3873 /* Allocate the local frame and set up the frame pointer if needed. */
3874 if (actual_fsize
!= 0)
3876 if (frame_pointer_needed
)
3878 /* Copy the old frame pointer temporarily into %r1. Set up the
3879 new stack pointer, then store away the saved old frame pointer
3880 into the stack at sp and at the same time update the stack
3881 pointer by actual_fsize bytes. Two versions, first
3882 handles small (<8k) frames. The second handles large (>=8k)
3884 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3886 RTX_FRAME_RELATED_P (insn
) = 1;
3888 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3890 RTX_FRAME_RELATED_P (insn
) = 1;
3892 if (VAL_14_BITS_P (actual_fsize
))
3893 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3896 /* It is incorrect to store the saved frame pointer at *sp,
3897 then increment sp (writes beyond the current stack boundary).
3899 So instead use stwm to store at *sp and post-increment the
3900 stack pointer as an atomic operation. Then increment sp to
3901 finish allocating the new frame. */
3902 HOST_WIDE_INT adjust1
= 8192 - 64;
3903 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3905 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3906 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3910 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3911 we need to store the previous stack pointer (frame pointer)
3912 into the frame marker on targets that use the HP unwind
3913 library. This allows the HP unwind library to be used to
3914 unwind GCC frames. However, we are not fully compatible
3915 with the HP library because our frame layout differs from
3916 that specified in the HP runtime specification.
3918 We don't want a frame note on this instruction as the frame
3919 marker moves during dynamic stack allocation.
3921 This instruction also serves as a blockage to prevent
3922 register spills from being scheduled before the stack
3923 pointer is raised. This is necessary as we store
3924 registers using the frame pointer as a base register,
3925 and the frame pointer is set before sp is raised. */
3926 if (TARGET_HPUX_UNWIND_LIBRARY
)
3928 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3929 GEN_INT (TARGET_64BIT
? -8 : -4));
3931 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3932 hard_frame_pointer_rtx
);
3935 emit_insn (gen_blockage ());
3937 /* no frame pointer needed. */
3940 /* In some cases we can perform the first callee register save
3941 and allocating the stack frame at the same time. If so, just
3942 make a note of it and defer allocating the frame until saving
3943 the callee registers. */
3944 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
3945 merge_sp_adjust_with_store
= 1;
3946 /* Can not optimize. Adjust the stack frame by actual_fsize
3949 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3954 /* Normal register save.
3956 Do not save the frame pointer in the frame_pointer_needed case. It
3957 was done earlier. */
3958 if (frame_pointer_needed
)
3960 offset
= local_fsize
;
3962 /* Saving the EH return data registers in the frame is the simplest
3963 way to get the frame unwind information emitted. We put them
3964 just before the general registers. */
3965 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3967 unsigned int i
, regno
;
3971 regno
= EH_RETURN_DATA_REGNO (i
);
3972 if (regno
== INVALID_REGNUM
)
3975 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
3976 offset
+= UNITS_PER_WORD
;
3980 for (i
= 18; i
>= 4; i
--)
3981 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3983 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
3984 offset
+= UNITS_PER_WORD
;
3987 /* Account for %r3 which is saved in a special place. */
3990 /* No frame pointer needed. */
3993 offset
= local_fsize
- actual_fsize
;
3995 /* Saving the EH return data registers in the frame is the simplest
3996 way to get the frame unwind information emitted. */
3997 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3999 unsigned int i
, regno
;
4003 regno
= EH_RETURN_DATA_REGNO (i
);
4004 if (regno
== INVALID_REGNUM
)
4007 /* If merge_sp_adjust_with_store is nonzero, then we can
4008 optimize the first save. */
4009 if (merge_sp_adjust_with_store
)
4011 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
4012 merge_sp_adjust_with_store
= 0;
4015 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4016 offset
+= UNITS_PER_WORD
;
4020 for (i
= 18; i
>= 3; i
--)
4021 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4023 /* If merge_sp_adjust_with_store is nonzero, then we can
4024 optimize the first GR save. */
4025 if (merge_sp_adjust_with_store
)
4027 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
4028 merge_sp_adjust_with_store
= 0;
4031 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
4032 offset
+= UNITS_PER_WORD
;
4036 /* If we wanted to merge the SP adjustment with a GR save, but we never
4037 did any GR saves, then just emit the adjustment here. */
4038 if (merge_sp_adjust_with_store
)
4039 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4043 /* The hppa calling conventions say that %r19, the pic offset
4044 register, is saved at sp - 32 (in this function's frame)
4045 when generating PIC code. FIXME: What is the correct thing
4046 to do for functions which make no calls and allocate no
4047 frame? Do we need to allocate a frame, or can we just omit
4048 the save? For now we'll just omit the save.
4050 We don't want a note on this insn as the frame marker can
4051 move if there is a dynamic stack allocation. */
4052 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
4054 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
4056 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
4060 /* Align pointer properly (doubleword boundary). */
4061 offset
= (offset
+ 7) & ~7;
4063 /* Floating point register store. */
4068 /* First get the frame or stack pointer to the start of the FP register
4070 if (frame_pointer_needed
)
4072 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4073 base
= hard_frame_pointer_rtx
;
4077 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4078 base
= stack_pointer_rtx
;
4081 /* Now actually save the FP registers. */
4082 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4084 if (df_regs_ever_live_p (i
)
4085 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4089 addr
= gen_rtx_MEM (DFmode
,
4090 gen_rtx_POST_INC (word_mode
, tmpreg
));
4091 reg
= gen_rtx_REG (DFmode
, i
);
4092 insn
= emit_move_insn (addr
, reg
);
4095 RTX_FRAME_RELATED_P (insn
) = 1;
4098 rtx mem
= gen_rtx_MEM (DFmode
,
4099 plus_constant (Pmode
, base
,
4101 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4102 gen_rtx_SET (mem
, reg
));
4106 rtx meml
= gen_rtx_MEM (SFmode
,
4107 plus_constant (Pmode
, base
,
4109 rtx memr
= gen_rtx_MEM (SFmode
,
4110 plus_constant (Pmode
, base
,
4112 rtx regl
= gen_rtx_REG (SFmode
, i
);
4113 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4114 rtx setl
= gen_rtx_SET (meml
, regl
);
4115 rtx setr
= gen_rtx_SET (memr
, regr
);
4118 RTX_FRAME_RELATED_P (setl
) = 1;
4119 RTX_FRAME_RELATED_P (setr
) = 1;
4120 vec
= gen_rtvec (2, setl
, setr
);
4121 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4122 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4125 offset
+= GET_MODE_SIZE (DFmode
);
4132 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4133 Handle case where DISP > 8k by using the add_high_const patterns. */
4136 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4138 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4139 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4142 if (VAL_14_BITS_P (disp
))
4143 src
= gen_rtx_MEM (word_mode
, plus_constant (Pmode
, basereg
, disp
));
4144 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4146 rtx delta
= GEN_INT (disp
);
4147 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4149 emit_move_insn (tmpreg
, delta
);
4150 if (TARGET_DISABLE_INDEXING
)
4152 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4153 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4156 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4160 rtx delta
= GEN_INT (disp
);
4161 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4162 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4164 emit_move_insn (tmpreg
, high
);
4165 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4168 emit_move_insn (dest
, src
);
4171 /* Update the total code bytes output to the text section. */
4174 update_total_code_bytes (unsigned int nbytes
)
4176 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4177 && !IN_NAMED_SECTION_P (cfun
->decl
))
4179 unsigned int old_total
= total_code_bytes
;
4181 total_code_bytes
+= nbytes
;
4183 /* Be prepared to handle overflows. */
4184 if (old_total
> total_code_bytes
)
4185 total_code_bytes
= UINT_MAX
;
4189 /* This function generates the assembly code for function exit.
4190 Args are as for output_function_prologue ().
4192 The function epilogue should not depend on the current stack
4193 pointer! It should use the frame pointer only. This is mandatory
4194 because of alloca; we also take advantage of it to omit stack
4195 adjustments before returning. */
4198 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4200 rtx_insn
*insn
= get_last_insn ();
4203 /* pa_expand_epilogue does the dirty work now. We just need
4204 to output the assembler directives which denote the end
4207 To make debuggers happy, emit a nop if the epilogue was completely
4208 eliminated due to a volatile call as the last insn in the
4209 current function. That way the return address (in %r2) will
4210 always point to a valid instruction in the current function. */
4212 /* Get the last real insn. */
4214 insn
= prev_real_insn (insn
);
4216 /* If it is a sequence, then look inside. */
4217 if (insn
&& NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4218 insn
= as_a
<rtx_sequence
*> (PATTERN (insn
))-> insn (0);
4220 /* If insn is a CALL_INSN, then it must be a call to a volatile
4221 function (otherwise there would be epilogue insns). */
4222 if (insn
&& CALL_P (insn
))
4224 fputs ("\tnop\n", file
);
4230 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4232 if (TARGET_SOM
&& TARGET_GAS
)
4234 /* We are done with this subspace except possibly for some additional
4235 debug information. Forget that we are in this subspace to ensure
4236 that the next function is output in its own subspace. */
4238 cfun
->machine
->in_nsubspa
= 2;
4241 /* Thunks do their own insn accounting. */
4245 if (INSN_ADDRESSES_SET_P ())
4247 last_address
= extra_nop
? 4 : 0;
4248 insn
= get_last_nonnote_insn ();
4251 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4253 last_address
+= insn_default_length (insn
);
4255 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4256 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4259 last_address
= UINT_MAX
;
4261 /* Finally, update the total number of code bytes output so far. */
4262 update_total_code_bytes (last_address
);
4266 pa_expand_epilogue (void)
4269 HOST_WIDE_INT offset
;
4270 HOST_WIDE_INT ret_off
= 0;
4272 int merge_sp_adjust_with_load
= 0;
4274 /* We will use this often. */
4275 tmpreg
= gen_rtx_REG (word_mode
, 1);
4277 /* Try to restore RP early to avoid load/use interlocks when
4278 RP gets used in the return (bv) instruction. This appears to still
4279 be necessary even when we schedule the prologue and epilogue. */
4282 ret_off
= TARGET_64BIT
? -16 : -20;
4283 if (frame_pointer_needed
)
4285 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4290 /* No frame pointer, and stack is smaller than 8k. */
4291 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4293 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4299 /* General register restores. */
4300 if (frame_pointer_needed
)
4302 offset
= local_fsize
;
4304 /* If the current function calls __builtin_eh_return, then we need
4305 to restore the saved EH data registers. */
4306 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4308 unsigned int i
, regno
;
4312 regno
= EH_RETURN_DATA_REGNO (i
);
4313 if (regno
== INVALID_REGNUM
)
4316 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4317 offset
+= UNITS_PER_WORD
;
4321 for (i
= 18; i
>= 4; i
--)
4322 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4324 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4325 offset
+= UNITS_PER_WORD
;
4330 offset
= local_fsize
- actual_fsize
;
4332 /* If the current function calls __builtin_eh_return, then we need
4333 to restore the saved EH data registers. */
4334 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4336 unsigned int i
, regno
;
4340 regno
= EH_RETURN_DATA_REGNO (i
);
4341 if (regno
== INVALID_REGNUM
)
4344 /* Only for the first load.
4345 merge_sp_adjust_with_load holds the register load
4346 with which we will merge the sp adjustment. */
4347 if (merge_sp_adjust_with_load
== 0
4349 && VAL_14_BITS_P (-actual_fsize
))
4350 merge_sp_adjust_with_load
= regno
;
4352 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4353 offset
+= UNITS_PER_WORD
;
4357 for (i
= 18; i
>= 3; i
--)
4359 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4361 /* Only for the first load.
4362 merge_sp_adjust_with_load holds the register load
4363 with which we will merge the sp adjustment. */
4364 if (merge_sp_adjust_with_load
== 0
4366 && VAL_14_BITS_P (-actual_fsize
))
4367 merge_sp_adjust_with_load
= i
;
4369 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4370 offset
+= UNITS_PER_WORD
;
4375 /* Align pointer properly (doubleword boundary). */
4376 offset
= (offset
+ 7) & ~7;
4378 /* FP register restores. */
4381 /* Adjust the register to index off of. */
4382 if (frame_pointer_needed
)
4383 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4385 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4387 /* Actually do the restores now. */
4388 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4389 if (df_regs_ever_live_p (i
)
4390 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4392 rtx src
= gen_rtx_MEM (DFmode
,
4393 gen_rtx_POST_INC (word_mode
, tmpreg
));
4394 rtx dest
= gen_rtx_REG (DFmode
, i
);
4395 emit_move_insn (dest
, src
);
4399 /* Emit a blockage insn here to keep these insns from being moved to
4400 an earlier spot in the epilogue, or into the main instruction stream.
4402 This is necessary as we must not cut the stack back before all the
4403 restores are finished. */
4404 emit_insn (gen_blockage ());
4406 /* Reset stack pointer (and possibly frame pointer). The stack
4407 pointer is initially set to fp + 64 to avoid a race condition. */
4408 if (frame_pointer_needed
)
4410 rtx delta
= GEN_INT (-64);
4412 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4413 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4414 stack_pointer_rtx
, delta
));
4416 /* If we were deferring a callee register restore, do it now. */
4417 else if (merge_sp_adjust_with_load
)
4419 rtx delta
= GEN_INT (-actual_fsize
);
4420 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4422 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4424 else if (actual_fsize
!= 0)
4425 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4428 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4429 frame greater than 8k), do so now. */
4431 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4433 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4435 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4437 emit_insn (gen_blockage ());
4438 emit_insn (TARGET_64BIT
4439 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4440 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4445 pa_can_use_return_insn (void)
4447 if (!reload_completed
)
4450 if (frame_pointer_needed
)
4453 if (df_regs_ever_live_p (2))
4459 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4463 hppa_pic_save_rtx (void)
4465 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4468 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4469 #define NO_DEFERRED_PROFILE_COUNTERS 0
4473 /* Vector of funcdef numbers. */
4474 static vec
<int> funcdef_nos
;
4476 /* Output deferred profile counters. */
4478 output_deferred_profile_counters (void)
4483 if (funcdef_nos
.is_empty ())
4486 switch_to_section (data_section
);
4487 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4488 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4490 for (i
= 0; funcdef_nos
.iterate (i
, &n
); i
++)
4492 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4493 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4496 funcdef_nos
.release ();
4500 hppa_profile_hook (int label_no
)
4502 /* We use SImode for the address of the function in both 32 and
4503 64-bit code to avoid having to provide DImode versions of the
4504 lcla2 and load_offset_label_address insn patterns. */
4505 rtx reg
= gen_reg_rtx (SImode
);
4506 rtx_code_label
*label_rtx
= gen_label_rtx ();
4507 rtx begin_label_rtx
;
4508 rtx_insn
*call_insn
;
4509 char begin_label_name
[16];
4511 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4513 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4516 emit_move_insn (arg_pointer_rtx
,
4517 gen_rtx_PLUS (word_mode
, virtual_outgoing_args_rtx
,
4520 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4522 /* The address of the function is loaded into %r25 with an instruction-
4523 relative sequence that avoids the use of relocations. The sequence
4524 is split so that the load_offset_label_address instruction can
4525 occupy the delay slot of the call to _mcount. */
4527 emit_insn (gen_lcla2 (reg
, label_rtx
));
4529 emit_insn (gen_lcla1 (reg
, label_rtx
));
4531 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4532 reg
, begin_label_rtx
, label_rtx
));
4534 #if !NO_DEFERRED_PROFILE_COUNTERS
4536 rtx count_label_rtx
, addr
, r24
;
4537 char count_label_name
[16];
4539 funcdef_nos
.safe_push (label_no
);
4540 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4541 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (count_label_name
));
4543 addr
= force_reg (Pmode
, count_label_rtx
);
4544 r24
= gen_rtx_REG (Pmode
, 24);
4545 emit_move_insn (r24
, addr
);
4548 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4549 gen_rtx_SYMBOL_REF (Pmode
,
4551 GEN_INT (TARGET_64BIT
? 24 : 12)));
4553 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4558 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4559 gen_rtx_SYMBOL_REF (Pmode
,
4561 GEN_INT (TARGET_64BIT
? 16 : 8)));
4565 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4566 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4568 /* Indicate the _mcount call cannot throw, nor will it execute a
4570 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4573 /* Fetch the return address for the frame COUNT steps up from
4574 the current frame, after the prologue. FRAMEADDR is the
4575 frame pointer of the COUNT frame.
4577 We want to ignore any export stub remnants here. To handle this,
4578 we examine the code at the return address, and if it is an export
4579 stub, we return a memory rtx for the stub return address stored
4582 The value returned is used in two different ways:
4584 1. To find a function's caller.
4586 2. To change the return address for a function.
4588 This function handles most instances of case 1; however, it will
4589 fail if there are two levels of stubs to execute on the return
4590 path. The only way I believe that can happen is if the return value
4591 needs a parameter relocation, which never happens for C code.
4593 This function handles most instances of case 2; however, it will
4594 fail if we did not originally have stub code on the return path
4595 but will need stub code on the new return path. This can happen if
4596 the caller & callee are both in the main program, but the new
4597 return location is in a shared library. */
4600 pa_return_addr_rtx (int count
, rtx frameaddr
)
4607 /* The instruction stream at the return address of a PA1.X export stub is:
4609 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4610 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4611 0x00011820 | stub+16: mtsp r1,sr0
4612 0xe0400002 | stub+20: be,n 0(sr0,rp)
4614 0xe0400002 must be specified as -532676606 so that it won't be
4615 rejected as an invalid immediate operand on 64-bit hosts.
4617 The instruction stream at the return address of a PA2.0 export stub is:
4619 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4620 0xe840d002 | stub+12: bve,n (rp)
4623 HOST_WIDE_INT insns
[4];
4629 rp
= get_hard_reg_initial_val (Pmode
, 2);
4631 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4634 /* If there is no export stub then just use the value saved from
4635 the return pointer register. */
4637 saved_rp
= gen_reg_rtx (Pmode
);
4638 emit_move_insn (saved_rp
, rp
);
4640 /* Get pointer to the instruction stream. We have to mask out the
4641 privilege level from the two low order bits of the return address
4642 pointer here so that ins will point to the start of the first
4643 instruction that would have been executed if we returned. */
4644 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4645 label
= gen_label_rtx ();
4649 insns
[0] = 0x4bc23fd1;
4650 insns
[1] = -398405630;
4655 insns
[0] = 0x4bc23fd1;
4656 insns
[1] = 0x004010a1;
4657 insns
[2] = 0x00011820;
4658 insns
[3] = -532676606;
4662 /* Check the instruction stream at the normal return address for the
4663 export stub. If it is an export stub, than our return address is
4664 really in -24[frameaddr]. */
4666 for (i
= 0; i
< len
; i
++)
4668 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (Pmode
, ins
, i
* 4));
4669 rtx op1
= GEN_INT (insns
[i
]);
4670 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4673 /* Here we know that our return address points to an export
4674 stub. We don't want to return the address of the export stub,
4675 but rather the return address of the export stub. That return
4676 address is stored at -24[frameaddr]. */
4678 emit_move_insn (saved_rp
,
4680 memory_address (Pmode
,
4681 plus_constant (Pmode
, frameaddr
,
4690 pa_emit_bcond_fp (rtx operands
[])
4692 enum rtx_code code
= GET_CODE (operands
[0]);
4693 rtx operand0
= operands
[1];
4694 rtx operand1
= operands
[2];
4695 rtx label
= operands
[3];
4697 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode
, 0),
4698 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4700 emit_jump_insn (gen_rtx_SET (pc_rtx
,
4701 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4704 gen_rtx_REG (CCFPmode
, 0),
4706 gen_rtx_LABEL_REF (VOIDmode
, label
),
4711 /* Adjust the cost of a scheduling dependency. Return the new cost of
4712 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4715 pa_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
4717 enum attr_type attr_type
;
4719 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4720 true dependencies as they are described with bypasses now. */
4721 if (pa_cpu
>= PROCESSOR_8000
|| REG_NOTE_KIND (link
) == 0)
4724 if (! recog_memoized (insn
))
4727 attr_type
= get_attr_type (insn
);
4729 switch (REG_NOTE_KIND (link
))
4732 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4735 if (attr_type
== TYPE_FPLOAD
)
4737 rtx pat
= PATTERN (insn
);
4738 rtx dep_pat
= PATTERN (dep_insn
);
4739 if (GET_CODE (pat
) == PARALLEL
)
4741 /* This happens for the fldXs,mb patterns. */
4742 pat
= XVECEXP (pat
, 0, 0);
4744 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4745 /* If this happens, we have to extend this to schedule
4746 optimally. Return 0 for now. */
4749 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4751 if (! recog_memoized (dep_insn
))
4753 switch (get_attr_type (dep_insn
))
4760 case TYPE_FPSQRTSGL
:
4761 case TYPE_FPSQRTDBL
:
4762 /* A fpload can't be issued until one cycle before a
4763 preceding arithmetic operation has finished if
4764 the target of the fpload is any of the sources
4765 (or destination) of the arithmetic operation. */
4766 return insn_default_latency (dep_insn
) - 1;
4773 else if (attr_type
== TYPE_FPALU
)
4775 rtx pat
= PATTERN (insn
);
4776 rtx dep_pat
= PATTERN (dep_insn
);
4777 if (GET_CODE (pat
) == PARALLEL
)
4779 /* This happens for the fldXs,mb patterns. */
4780 pat
= XVECEXP (pat
, 0, 0);
4782 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4783 /* If this happens, we have to extend this to schedule
4784 optimally. Return 0 for now. */
4787 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4789 if (! recog_memoized (dep_insn
))
4791 switch (get_attr_type (dep_insn
))
4795 case TYPE_FPSQRTSGL
:
4796 case TYPE_FPSQRTDBL
:
4797 /* An ALU flop can't be issued until two cycles before a
4798 preceding divide or sqrt operation has finished if
4799 the target of the ALU flop is any of the sources
4800 (or destination) of the divide or sqrt operation. */
4801 return insn_default_latency (dep_insn
) - 2;
4809 /* For other anti dependencies, the cost is 0. */
4812 case REG_DEP_OUTPUT
:
4813 /* Output dependency; DEP_INSN writes a register that INSN writes some
4815 if (attr_type
== TYPE_FPLOAD
)
4817 rtx pat
= PATTERN (insn
);
4818 rtx dep_pat
= PATTERN (dep_insn
);
4819 if (GET_CODE (pat
) == PARALLEL
)
4821 /* This happens for the fldXs,mb patterns. */
4822 pat
= XVECEXP (pat
, 0, 0);
4824 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4825 /* If this happens, we have to extend this to schedule
4826 optimally. Return 0 for now. */
4829 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4831 if (! recog_memoized (dep_insn
))
4833 switch (get_attr_type (dep_insn
))
4840 case TYPE_FPSQRTSGL
:
4841 case TYPE_FPSQRTDBL
:
4842 /* A fpload can't be issued until one cycle before a
4843 preceding arithmetic operation has finished if
4844 the target of the fpload is the destination of the
4845 arithmetic operation.
4847 Exception: For PA7100LC, PA7200 and PA7300, the cost
4848 is 3 cycles, unless they bundle together. We also
4849 pay the penalty if the second insn is a fpload. */
4850 return insn_default_latency (dep_insn
) - 1;
4857 else if (attr_type
== TYPE_FPALU
)
4859 rtx pat
= PATTERN (insn
);
4860 rtx dep_pat
= PATTERN (dep_insn
);
4861 if (GET_CODE (pat
) == PARALLEL
)
4863 /* This happens for the fldXs,mb patterns. */
4864 pat
= XVECEXP (pat
, 0, 0);
4866 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4867 /* If this happens, we have to extend this to schedule
4868 optimally. Return 0 for now. */
4871 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4873 if (! recog_memoized (dep_insn
))
4875 switch (get_attr_type (dep_insn
))
4879 case TYPE_FPSQRTSGL
:
4880 case TYPE_FPSQRTDBL
:
4881 /* An ALU flop can't be issued until two cycles before a
4882 preceding divide or sqrt operation has finished if
4883 the target of the ALU flop is also the target of
4884 the divide or sqrt operation. */
4885 return insn_default_latency (dep_insn
) - 2;
4893 /* For other output dependencies, the cost is 0. */
4901 /* Adjust scheduling priorities. We use this to try and keep addil
4902 and the next use of %r1 close together. */
4904 pa_adjust_priority (rtx_insn
*insn
, int priority
)
4906 rtx set
= single_set (insn
);
4910 src
= SET_SRC (set
);
4911 dest
= SET_DEST (set
);
4912 if (GET_CODE (src
) == LO_SUM
4913 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4914 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4917 else if (GET_CODE (src
) == MEM
4918 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4919 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4920 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
4923 else if (GET_CODE (dest
) == MEM
4924 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
4925 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
4926 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
4932 /* The 700 can only issue a single insn at a time.
4933 The 7XXX processors can issue two insns at a time.
4934 The 8000 can issue 4 insns at a time. */
4936 pa_issue_rate (void)
4940 case PROCESSOR_700
: return 1;
4941 case PROCESSOR_7100
: return 2;
4942 case PROCESSOR_7100LC
: return 2;
4943 case PROCESSOR_7200
: return 2;
4944 case PROCESSOR_7300
: return 2;
4945 case PROCESSOR_8000
: return 4;
4954 /* Return any length plus adjustment needed by INSN which already has
4955 its length computed as LENGTH. Return LENGTH if no adjustment is
4958 Also compute the length of an inline block move here as it is too
4959 complicated to express as a length attribute in pa.md. */
4961 pa_adjust_insn_length (rtx_insn
*insn
, int length
)
4963 rtx pat
= PATTERN (insn
);
4965 /* If length is negative or undefined, provide initial length. */
4966 if ((unsigned int) length
>= INT_MAX
)
4968 if (GET_CODE (pat
) == SEQUENCE
)
4969 insn
= as_a
<rtx_insn
*> (XVECEXP (pat
, 0, 0));
4971 switch (get_attr_type (insn
))
4974 length
= pa_attr_length_millicode_call (insn
);
4977 length
= pa_attr_length_call (insn
, 0);
4980 length
= pa_attr_length_call (insn
, 1);
4983 length
= pa_attr_length_indirect_call (insn
);
4985 case TYPE_SH_FUNC_ADRS
:
4986 length
= pa_attr_length_millicode_call (insn
) + 20;
4993 /* Block move pattern. */
4994 if (NONJUMP_INSN_P (insn
)
4995 && GET_CODE (pat
) == PARALLEL
4996 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4997 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4998 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
4999 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
5000 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
5001 length
+= compute_movmem_length (insn
) - 4;
5002 /* Block clear pattern. */
5003 else if (NONJUMP_INSN_P (insn
)
5004 && GET_CODE (pat
) == PARALLEL
5005 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
5006 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
5007 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
5008 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
5009 length
+= compute_clrmem_length (insn
) - 4;
5010 /* Conditional branch with an unfilled delay slot. */
5011 else if (JUMP_P (insn
) && ! simplejump_p (insn
))
5013 /* Adjust a short backwards conditional with an unfilled delay slot. */
5014 if (GET_CODE (pat
) == SET
5016 && JUMP_LABEL (insn
) != NULL_RTX
5017 && ! forward_branch_p (insn
))
5019 else if (GET_CODE (pat
) == PARALLEL
5020 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
5023 /* Adjust dbra insn with short backwards conditional branch with
5024 unfilled delay slot -- only for case where counter is in a
5025 general register register. */
5026 else if (GET_CODE (pat
) == PARALLEL
5027 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
5028 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
5029 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
5031 && ! forward_branch_p (insn
))
5037 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5040 pa_print_operand_punct_valid_p (unsigned char code
)
5051 /* Print operand X (an rtx) in assembler syntax to file FILE.
5052 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5053 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5056 pa_print_operand (FILE *file
, rtx x
, int code
)
5061 /* Output a 'nop' if there's nothing for the delay slot. */
5062 if (dbr_sequence_length () == 0)
5063 fputs ("\n\tnop", file
);
5066 /* Output a nullification completer if there's nothing for the */
5067 /* delay slot or nullification is requested. */
5068 if (dbr_sequence_length () == 0 ||
5070 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
5074 /* Print out the second register name of a register pair.
5075 I.e., R (6) => 7. */
5076 fputs (reg_names
[REGNO (x
) + 1], file
);
5079 /* A register or zero. */
5081 || (x
== CONST0_RTX (DFmode
))
5082 || (x
== CONST0_RTX (SFmode
)))
5084 fputs ("%r0", file
);
5090 /* A register or zero (floating point). */
5092 || (x
== CONST0_RTX (DFmode
))
5093 || (x
== CONST0_RTX (SFmode
)))
5095 fputs ("%fr0", file
);
5104 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
5105 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
5106 pa_output_global_address (file
, xoperands
[1], 0);
5107 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
5111 case 'C': /* Plain (C)ondition */
5113 switch (GET_CODE (x
))
5116 fputs ("=", file
); break;
5118 fputs ("<>", file
); break;
5120 fputs (">", file
); break;
5122 fputs (">=", file
); break;
5124 fputs (">>=", file
); break;
5126 fputs (">>", file
); break;
5128 fputs ("<", file
); break;
5130 fputs ("<=", file
); break;
5132 fputs ("<<=", file
); break;
5134 fputs ("<<", file
); break;
5139 case 'N': /* Condition, (N)egated */
5140 switch (GET_CODE (x
))
5143 fputs ("<>", file
); break;
5145 fputs ("=", file
); break;
5147 fputs ("<=", file
); break;
5149 fputs ("<", file
); break;
5151 fputs ("<<", file
); break;
5153 fputs ("<<=", file
); break;
5155 fputs (">=", file
); break;
5157 fputs (">", file
); break;
5159 fputs (">>", file
); break;
5161 fputs (">>=", file
); break;
5166 /* For floating point comparisons. Note that the output
5167 predicates are the complement of the desired mode. The
5168 conditions for GT, GE, LT, LE and LTGT cause an invalid
5169 operation exception if the result is unordered and this
5170 exception is enabled in the floating-point status register. */
5172 switch (GET_CODE (x
))
5175 fputs ("!=", file
); break;
5177 fputs ("=", file
); break;
5179 fputs ("!>", file
); break;
5181 fputs ("!>=", file
); break;
5183 fputs ("!<", file
); break;
5185 fputs ("!<=", file
); break;
5187 fputs ("!<>", file
); break;
5189 fputs ("!?<=", file
); break;
5191 fputs ("!?<", file
); break;
5193 fputs ("!?>=", file
); break;
5195 fputs ("!?>", file
); break;
5197 fputs ("!?=", file
); break;
5199 fputs ("!?", file
); break;
5201 fputs ("?", file
); break;
5206 case 'S': /* Condition, operands are (S)wapped. */
5207 switch (GET_CODE (x
))
5210 fputs ("=", file
); break;
5212 fputs ("<>", file
); break;
5214 fputs ("<", file
); break;
5216 fputs ("<=", file
); break;
5218 fputs ("<<=", file
); break;
5220 fputs ("<<", file
); break;
5222 fputs (">", file
); break;
5224 fputs (">=", file
); break;
5226 fputs (">>=", file
); break;
5228 fputs (">>", file
); break;
5233 case 'B': /* Condition, (B)oth swapped and negate. */
5234 switch (GET_CODE (x
))
5237 fputs ("<>", file
); break;
5239 fputs ("=", file
); break;
5241 fputs (">=", file
); break;
5243 fputs (">", file
); break;
5245 fputs (">>", file
); break;
5247 fputs (">>=", file
); break;
5249 fputs ("<=", file
); break;
5251 fputs ("<", file
); break;
5253 fputs ("<<", file
); break;
5255 fputs ("<<=", file
); break;
5261 gcc_assert (GET_CODE (x
) == CONST_INT
);
5262 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5265 gcc_assert (GET_CODE (x
) == CONST_INT
);
5266 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5269 gcc_assert (GET_CODE (x
) == CONST_INT
);
5270 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5273 gcc_assert (GET_CODE (x
) == CONST_INT
5274 && (INTVAL (x
) == 1 || INTVAL (x
) == 2 || INTVAL (x
) == 3));
5275 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5278 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5279 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5282 gcc_assert (GET_CODE (x
) == CONST_INT
);
5283 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5286 gcc_assert (GET_CODE (x
) == CONST_INT
);
5287 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5290 if (GET_CODE (x
) == CONST_INT
)
5295 switch (GET_CODE (XEXP (x
, 0)))
5299 if (ASSEMBLER_DIALECT
== 0)
5300 fputs ("s,mb", file
);
5302 fputs (",mb", file
);
5306 if (ASSEMBLER_DIALECT
== 0)
5307 fputs ("s,ma", file
);
5309 fputs (",ma", file
);
5312 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5313 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5315 if (ASSEMBLER_DIALECT
== 0)
5318 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5319 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5321 if (ASSEMBLER_DIALECT
== 0)
5322 fputs ("x,s", file
);
5326 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5330 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5336 pa_output_global_address (file
, x
, 0);
5339 pa_output_global_address (file
, x
, 1);
5341 case 0: /* Don't do anything special */
5346 compute_zdepwi_operands (INTVAL (x
), op
);
5347 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5353 compute_zdepdi_operands (INTVAL (x
), op
);
5354 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5358 /* We can get here from a .vtable_inherit due to our
5359 CONSTANT_ADDRESS_P rejecting perfectly good constant
5365 if (GET_CODE (x
) == REG
)
5367 fputs (reg_names
[REGNO (x
)], file
);
5368 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5374 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5375 && (REGNO (x
) & 1) == 0)
5378 else if (GET_CODE (x
) == MEM
)
5380 int size
= GET_MODE_SIZE (GET_MODE (x
));
5381 rtx base
= NULL_RTX
;
5382 switch (GET_CODE (XEXP (x
, 0)))
5386 base
= XEXP (XEXP (x
, 0), 0);
5387 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5391 base
= XEXP (XEXP (x
, 0), 0);
5392 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5395 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5396 fprintf (file
, "%s(%s)",
5397 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5398 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5399 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5400 fprintf (file
, "%s(%s)",
5401 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5402 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5403 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5404 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5406 /* Because the REG_POINTER flag can get lost during reload,
5407 pa_legitimate_address_p canonicalizes the order of the
5408 index and base registers in the combined move patterns. */
5409 rtx base
= XEXP (XEXP (x
, 0), 1);
5410 rtx index
= XEXP (XEXP (x
, 0), 0);
5412 fprintf (file
, "%s(%s)",
5413 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5416 output_address (XEXP (x
, 0));
5419 output_address (XEXP (x
, 0));
5424 output_addr_const (file
, x
);
5427 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5430 pa_output_global_address (FILE *file
, rtx x
, int round_constant
)
5433 /* Imagine (high (const (plus ...))). */
5434 if (GET_CODE (x
) == HIGH
)
5437 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5438 output_addr_const (file
, x
);
5439 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5441 output_addr_const (file
, x
);
5442 fputs ("-$global$", file
);
5444 else if (GET_CODE (x
) == CONST
)
5446 const char *sep
= "";
5447 int offset
= 0; /* assembler wants -$global$ at end */
5448 rtx base
= NULL_RTX
;
5450 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5454 base
= XEXP (XEXP (x
, 0), 0);
5455 output_addr_const (file
, base
);
5458 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5464 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5468 base
= XEXP (XEXP (x
, 0), 1);
5469 output_addr_const (file
, base
);
5472 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5478 /* How bogus. The compiler is apparently responsible for
5479 rounding the constant if it uses an LR field selector.
5481 The linker and/or assembler seem a better place since
5482 they have to do this kind of thing already.
5484 If we fail to do this, HP's optimizing linker may eliminate
5485 an addil, but not update the ldw/stw/ldo instruction that
5486 uses the result of the addil. */
5488 offset
= ((offset
+ 0x1000) & ~0x1fff);
5490 switch (GET_CODE (XEXP (x
, 0)))
5503 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5511 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5512 fputs ("-$global$", file
);
5514 fprintf (file
, "%s%d", sep
, offset
);
5517 output_addr_const (file
, x
);
5520 /* Output boilerplate text to appear at the beginning of the file.
5521 There are several possible versions. */
5522 #define aputs(x) fputs(x, asm_out_file)
5524 pa_file_start_level (void)
5527 aputs ("\t.LEVEL 2.0w\n");
5528 else if (TARGET_PA_20
)
5529 aputs ("\t.LEVEL 2.0\n");
5530 else if (TARGET_PA_11
)
5531 aputs ("\t.LEVEL 1.1\n");
5533 aputs ("\t.LEVEL 1.0\n");
5537 pa_file_start_space (int sortspace
)
5539 aputs ("\t.SPACE $PRIVATE$");
5542 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5544 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5545 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5546 "\n\t.SPACE $TEXT$");
5549 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5550 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5554 pa_file_start_file (int want_version
)
5556 if (write_symbols
!= NO_DEBUG
)
5558 output_file_directive (asm_out_file
, main_input_filename
);
5560 aputs ("\t.version\t\"01.01\"\n");
5565 pa_file_start_mcount (const char *aswhat
)
5568 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5572 pa_elf_file_start (void)
5574 pa_file_start_level ();
5575 pa_file_start_mcount ("ENTRY");
5576 pa_file_start_file (0);
5580 pa_som_file_start (void)
5582 pa_file_start_level ();
5583 pa_file_start_space (0);
5584 aputs ("\t.IMPORT $global$,DATA\n"
5585 "\t.IMPORT $$dyncall,MILLICODE\n");
5586 pa_file_start_mcount ("CODE");
5587 pa_file_start_file (0);
5591 pa_linux_file_start (void)
5593 pa_file_start_file (1);
5594 pa_file_start_level ();
5595 pa_file_start_mcount ("CODE");
5599 pa_hpux64_gas_file_start (void)
5601 pa_file_start_level ();
5602 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5604 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5606 pa_file_start_file (1);
5610 pa_hpux64_hpas_file_start (void)
5612 pa_file_start_level ();
5613 pa_file_start_space (1);
5614 pa_file_start_mcount ("CODE");
5615 pa_file_start_file (0);
5619 /* Search the deferred plabel list for SYMBOL and return its internal
5620 label. If an entry for SYMBOL is not found, a new entry is created. */
5623 pa_get_deferred_plabel (rtx symbol
)
5625 const char *fname
= XSTR (symbol
, 0);
5628 /* See if we have already put this function on the list of deferred
5629 plabels. This list is generally small, so a liner search is not
5630 too ugly. If it proves too slow replace it with something faster. */
5631 for (i
= 0; i
< n_deferred_plabels
; i
++)
5632 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5635 /* If the deferred plabel list is empty, or this entry was not found
5636 on the list, create a new entry on the list. */
5637 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5641 if (deferred_plabels
== 0)
5642 deferred_plabels
= ggc_alloc
<deferred_plabel
> ();
5644 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5646 n_deferred_plabels
+ 1);
5648 i
= n_deferred_plabels
++;
5649 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5650 deferred_plabels
[i
].symbol
= symbol
;
5652 /* Gross. We have just implicitly taken the address of this
5653 function. Mark it in the same manner as assemble_name. */
5654 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5656 mark_referenced (id
);
5659 return deferred_plabels
[i
].internal_label
;
5663 output_deferred_plabels (void)
5667 /* If we have some deferred plabels, then we need to switch into the
5668 data or readonly data section, and align it to a 4 byte boundary
5669 before outputting the deferred plabels. */
5670 if (n_deferred_plabels
)
5672 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5673 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5676 /* Now output the deferred plabels. */
5677 for (i
= 0; i
< n_deferred_plabels
; i
++)
5679 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5680 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5681 assemble_integer (deferred_plabels
[i
].symbol
,
5682 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5686 /* Initialize optabs to point to emulation routines. */
5689 pa_init_libfuncs (void)
5691 if (HPUX_LONG_DOUBLE_LIBRARY
)
5693 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5694 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5695 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5696 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5697 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5698 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5699 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5700 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5701 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5703 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5704 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5705 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5706 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5707 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5708 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5709 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5711 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5712 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5713 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5714 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5716 set_conv_libfunc (sfix_optab
, SImode
, TFmode
,
5717 TARGET_64BIT
? "__U_Qfcnvfxt_quad_to_sgl"
5718 : "_U_Qfcnvfxt_quad_to_sgl");
5719 set_conv_libfunc (sfix_optab
, DImode
, TFmode
,
5720 "_U_Qfcnvfxt_quad_to_dbl");
5721 set_conv_libfunc (ufix_optab
, SImode
, TFmode
,
5722 "_U_Qfcnvfxt_quad_to_usgl");
5723 set_conv_libfunc (ufix_optab
, DImode
, TFmode
,
5724 "_U_Qfcnvfxt_quad_to_udbl");
5726 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
,
5727 "_U_Qfcnvxf_sgl_to_quad");
5728 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
,
5729 "_U_Qfcnvxf_dbl_to_quad");
5730 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
,
5731 "_U_Qfcnvxf_usgl_to_quad");
5732 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
,
5733 "_U_Qfcnvxf_udbl_to_quad");
5736 if (TARGET_SYNC_LIBCALL
)
5737 init_sync_libfuncs (UNITS_PER_WORD
);
5740 /* HP's millicode routines mean something special to the assembler.
5741 Keep track of which ones we have used. */
5743 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5744 static void import_milli (enum millicodes
);
5745 static char imported
[(int) end1000
];
5746 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5747 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5748 #define MILLI_START 10
5751 import_milli (enum millicodes code
)
5753 char str
[sizeof (import_string
)];
5755 if (!imported
[(int) code
])
5757 imported
[(int) code
] = 1;
5758 strcpy (str
, import_string
);
5759 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5760 output_asm_insn (str
, 0);
5764 /* The register constraints have put the operands and return value in
5765 the proper registers. */
5768 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx_insn
*insn
)
5770 import_milli (mulI
);
5771 return pa_output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5774 /* Emit the rtl for doing a division by a constant. */
5776 /* Do magic division millicodes exist for this value? */
5777 const int pa_magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5779 /* We'll use an array to keep track of the magic millicodes and
5780 whether or not we've used them already. [n][0] is signed, [n][1] is
5783 static int div_milli
[16][2];
5786 pa_emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5788 if (GET_CODE (operands
[2]) == CONST_INT
5789 && INTVAL (operands
[2]) > 0
5790 && INTVAL (operands
[2]) < 16
5791 && pa_magic_milli
[INTVAL (operands
[2])])
5793 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5795 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5799 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode
, 29),
5800 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5802 gen_rtx_REG (SImode
, 26),
5804 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5805 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5806 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5807 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5808 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5809 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5816 pa_output_div_insn (rtx
*operands
, int unsignedp
, rtx_insn
*insn
)
5820 /* If the divisor is a constant, try to use one of the special
5822 if (GET_CODE (operands
[0]) == CONST_INT
)
5824 static char buf
[100];
5825 divisor
= INTVAL (operands
[0]);
5826 if (!div_milli
[divisor
][unsignedp
])
5828 div_milli
[divisor
][unsignedp
] = 1;
5830 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5832 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5836 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5837 INTVAL (operands
[0]));
5838 return pa_output_millicode_call (insn
,
5839 gen_rtx_SYMBOL_REF (SImode
, buf
));
5843 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5844 INTVAL (operands
[0]));
5845 return pa_output_millicode_call (insn
,
5846 gen_rtx_SYMBOL_REF (SImode
, buf
));
5849 /* Divisor isn't a special constant. */
5854 import_milli (divU
);
5855 return pa_output_millicode_call (insn
,
5856 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5860 import_milli (divI
);
5861 return pa_output_millicode_call (insn
,
5862 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5867 /* Output a $$rem millicode to do mod. */
5870 pa_output_mod_insn (int unsignedp
, rtx_insn
*insn
)
5874 import_milli (remU
);
5875 return pa_output_millicode_call (insn
,
5876 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5880 import_milli (remI
);
5881 return pa_output_millicode_call (insn
,
5882 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5887 pa_output_arg_descriptor (rtx_insn
*call_insn
)
5889 const char *arg_regs
[4];
5890 machine_mode arg_mode
;
5892 int i
, output_flag
= 0;
5895 /* We neither need nor want argument location descriptors for the
5896 64bit runtime environment or the ELF32 environment. */
5897 if (TARGET_64BIT
|| TARGET_ELF32
)
5900 for (i
= 0; i
< 4; i
++)
5903 /* Specify explicitly that no argument relocations should take place
5904 if using the portable runtime calling conventions. */
5905 if (TARGET_PORTABLE_RUNTIME
)
5907 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5912 gcc_assert (CALL_P (call_insn
));
5913 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5914 link
; link
= XEXP (link
, 1))
5916 rtx use
= XEXP (link
, 0);
5918 if (! (GET_CODE (use
) == USE
5919 && GET_CODE (XEXP (use
, 0)) == REG
5920 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5923 arg_mode
= GET_MODE (XEXP (use
, 0));
5924 regno
= REGNO (XEXP (use
, 0));
5925 if (regno
>= 23 && regno
<= 26)
5927 arg_regs
[26 - regno
] = "GR";
5928 if (arg_mode
== DImode
)
5929 arg_regs
[25 - regno
] = "GR";
5931 else if (regno
>= 32 && regno
<= 39)
5933 if (arg_mode
== SFmode
)
5934 arg_regs
[(regno
- 32) / 2] = "FR";
5937 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5938 arg_regs
[(regno
- 34) / 2] = "FR";
5939 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
5941 arg_regs
[(regno
- 34) / 2] = "FU";
5942 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
5947 fputs ("\t.CALL ", asm_out_file
);
5948 for (i
= 0; i
< 4; i
++)
5953 fputc (',', asm_out_file
);
5954 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
5957 fputc ('\n', asm_out_file
);
5960 /* Inform reload about cases where moving X with a mode MODE to or from
5961 a register in RCLASS requires an extra scratch or immediate register.
5962 Return the class needed for the immediate register. */
5965 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
5966 machine_mode mode
, secondary_reload_info
*sri
)
5969 enum reg_class rclass
= (enum reg_class
) rclass_i
;
5971 /* Handle the easy stuff first. */
5972 if (rclass
== R1_REGS
)
5978 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
5984 /* If we have something like (mem (mem (...)), we can safely assume the
5985 inner MEM will end up in a general register after reloading, so there's
5986 no need for a secondary reload. */
5987 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
5990 /* Trying to load a constant into a FP register during PIC code
5991 generation requires %r1 as a scratch register. For float modes,
5992 the only legitimate constant is CONST0_RTX. However, there are
5993 a few patterns that accept constant double operands. */
5995 && FP_REG_CLASS_P (rclass
)
5996 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
6001 sri
->icode
= CODE_FOR_reload_insi_r1
;
6005 sri
->icode
= CODE_FOR_reload_indi_r1
;
6009 sri
->icode
= CODE_FOR_reload_insf_r1
;
6013 sri
->icode
= CODE_FOR_reload_indf_r1
;
6022 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6023 register when we're generating PIC code or when the operand isn't
6025 if (pa_symbolic_expression_p (x
))
6027 if (GET_CODE (x
) == HIGH
)
6030 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
6035 sri
->icode
= CODE_FOR_reload_insi_r1
;
6039 sri
->icode
= CODE_FOR_reload_indi_r1
;
6049 /* Profiling showed the PA port spends about 1.3% of its compilation
6050 time in true_regnum from calls inside pa_secondary_reload_class. */
6051 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
6052 regno
= true_regnum (x
);
6054 /* Handle reloads for floating point loads and stores. */
6055 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
6056 && FP_REG_CLASS_P (rclass
))
6062 /* We don't need a secondary reload for indexed memory addresses.
6064 When INT14_OK_STRICT is true, it might appear that we could
6065 directly allow register indirect memory addresses. However,
6066 this doesn't work because we don't support SUBREGs in
6067 floating-point register copies and reload doesn't tell us
6068 when it's going to use a SUBREG. */
6069 if (IS_INDEX_ADDR_P (x
))
6073 /* Request a secondary reload with a general scratch register
6074 for everything else. ??? Could symbolic operands be handled
6075 directly when generating non-pic PA 2.0 code? */
6077 ? direct_optab_handler (reload_in_optab
, mode
)
6078 : direct_optab_handler (reload_out_optab
, mode
));
6082 /* A SAR<->FP register copy requires an intermediate general register
6083 and secondary memory. We need a secondary reload with a general
6084 scratch register for spills. */
6085 if (rclass
== SHIFT_REGS
)
6088 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
6091 ? direct_optab_handler (reload_in_optab
, mode
)
6092 : direct_optab_handler (reload_out_optab
, mode
));
6096 /* Handle FP copy. */
6097 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
6098 return GENERAL_REGS
;
6101 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
6102 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
6103 && FP_REG_CLASS_P (rclass
))
6104 return GENERAL_REGS
;
6109 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6110 is only marked as live on entry by df-scan when it is a fixed
6111 register. It isn't a fixed register in the 64-bit runtime,
6112 so we need to mark it here. */
6115 pa_extra_live_on_entry (bitmap regs
)
6118 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
6121 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6122 to prevent it from being deleted. */
6125 pa_eh_return_handler_rtx (void)
6129 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
6130 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
6131 tmp
= gen_rtx_MEM (word_mode
, tmp
);
6136 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6137 by invisible reference. As a GCC extension, we also pass anything
6138 with a zero or variable size by reference.
6140 The 64-bit runtime does not describe passing any types by invisible
6141 reference. The internals of GCC can't currently handle passing
6142 empty structures, and zero or variable length arrays when they are
6143 not passed entirely on the stack or by reference. Thus, as a GCC
6144 extension, we pass these types by reference. The HP compiler doesn't
6145 support these types, so hopefully there shouldn't be any compatibility
6146 issues. This may have to be revisited when HP releases a C99 compiler
6147 or updates the ABI. */
6150 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED
,
6151 machine_mode mode
, const_tree type
,
6152 bool named ATTRIBUTE_UNUSED
)
6157 size
= int_size_in_bytes (type
);
6159 size
= GET_MODE_SIZE (mode
);
6164 return size
<= 0 || size
> 8;
6168 pa_function_arg_padding (machine_mode mode
, const_tree type
)
6173 && (AGGREGATE_TYPE_P (type
)
6174 || TREE_CODE (type
) == COMPLEX_TYPE
6175 || TREE_CODE (type
) == VECTOR_TYPE
)))
6177 /* Return none if justification is not required. */
6179 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
6180 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
6183 /* The directions set here are ignored when a BLKmode argument larger
6184 than a word is placed in a register. Different code is used for
6185 the stack and registers. This makes it difficult to have a
6186 consistent data representation for both the stack and registers.
6187 For both runtimes, the justification and padding for arguments on
6188 the stack and in registers should be identical. */
6190 /* The 64-bit runtime specifies left justification for aggregates. */
6193 /* The 32-bit runtime architecture specifies right justification.
6194 When the argument is passed on the stack, the argument is padded
6195 with garbage on the left. The HP compiler pads with zeros. */
6199 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6206 /* Do what is necessary for `va_start'. We look at the current function
6207 to determine if stdargs or varargs is used and fill in an initial
6208 va_list. A pointer to this constructor is returned. */
6211 hppa_builtin_saveregs (void)
6214 tree fntype
= TREE_TYPE (current_function_decl
);
6215 int argadj
= ((!stdarg_p (fntype
))
6216 ? UNITS_PER_WORD
: 0);
6219 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, argadj
);
6221 offset
= crtl
->args
.arg_offset_rtx
;
6227 /* Adjust for varargs/stdarg differences. */
6229 offset
= plus_constant (Pmode
, crtl
->args
.arg_offset_rtx
, -argadj
);
6231 offset
= crtl
->args
.arg_offset_rtx
;
6233 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6234 from the incoming arg pointer and growing to larger addresses. */
6235 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6236 emit_move_insn (gen_rtx_MEM (word_mode
,
6237 plus_constant (Pmode
,
6238 arg_pointer_rtx
, off
)),
6239 gen_rtx_REG (word_mode
, i
));
6241 /* The incoming args pointer points just beyond the flushback area;
6242 normally this is not a serious concern. However, when we are doing
6243 varargs/stdargs we want to make the arg pointer point to the start
6244 of the incoming argument area. */
6245 emit_move_insn (virtual_incoming_args_rtx
,
6246 plus_constant (Pmode
, arg_pointer_rtx
, -64));
6248 /* Now return a pointer to the first anonymous argument. */
6249 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6250 virtual_incoming_args_rtx
,
6251 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6254 /* Store general registers on the stack. */
6255 dest
= gen_rtx_MEM (BLKmode
,
6256 plus_constant (Pmode
, crtl
->args
.internal_arg_pointer
,
6258 set_mem_alias_set (dest
, get_varargs_alias_set ());
6259 set_mem_align (dest
, BITS_PER_WORD
);
6260 move_block_from_reg (23, dest
, 4);
6262 /* move_block_from_reg will emit code to store the argument registers
6263 individually as scalar stores.
6265 However, other insns may later load from the same addresses for
6266 a structure load (passing a struct to a varargs routine).
6268 The alias code assumes that such aliasing can never happen, so we
6269 have to keep memory referencing insns from moving up beyond the
6270 last argument register store. So we emit a blockage insn here. */
6271 emit_insn (gen_blockage ());
6273 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6274 crtl
->args
.internal_arg_pointer
,
6275 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6279 hppa_va_start (tree valist
, rtx nextarg
)
6281 nextarg
= expand_builtin_saveregs ();
6282 std_expand_builtin_va_start (valist
, nextarg
);
6286 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6291 /* Args grow upward. We can use the generic routines. */
6292 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6294 else /* !TARGET_64BIT */
6296 tree ptr
= build_pointer_type (type
);
6299 unsigned int size
, ofs
;
6302 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6306 ptr
= build_pointer_type (type
);
6308 size
= int_size_in_bytes (type
);
6309 valist_type
= TREE_TYPE (valist
);
6311 /* Args grow down. Not handled by generic routines. */
6313 u
= fold_convert (sizetype
, size_in_bytes (type
));
6314 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6315 t
= fold_build_pointer_plus (valist
, u
);
6317 /* Align to 4 or 8 byte boundary depending on argument size. */
6319 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6320 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6321 t
= fold_convert (valist_type
, t
);
6323 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6325 ofs
= (8 - size
) % 4;
6327 t
= fold_build_pointer_plus_hwi (t
, ofs
);
6329 t
= fold_convert (ptr
, t
);
6330 t
= build_va_arg_indirect_ref (t
);
6333 t
= build_va_arg_indirect_ref (t
);
6339 /* True if MODE is valid for the target. By "valid", we mean able to
6340 be manipulated in non-trivial ways. In particular, this means all
6341 the arithmetic is supported.
6343 Currently, TImode is not valid as the HP 64-bit runtime documentation
6344 doesn't document the alignment and calling conventions for this type.
6345 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6346 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6349 pa_scalar_mode_supported_p (machine_mode mode
)
6351 int precision
= GET_MODE_PRECISION (mode
);
6353 switch (GET_MODE_CLASS (mode
))
6355 case MODE_PARTIAL_INT
:
6357 if (precision
== CHAR_TYPE_SIZE
)
6359 if (precision
== SHORT_TYPE_SIZE
)
6361 if (precision
== INT_TYPE_SIZE
)
6363 if (precision
== LONG_TYPE_SIZE
)
6365 if (precision
== LONG_LONG_TYPE_SIZE
)
6370 if (precision
== FLOAT_TYPE_SIZE
)
6372 if (precision
== DOUBLE_TYPE_SIZE
)
6374 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6378 case MODE_DECIMAL_FLOAT
:
6386 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6387 it branches into the delay slot. Otherwise, return FALSE. */
6390 branch_to_delay_slot_p (rtx_insn
*insn
)
6392 rtx_insn
*jump_insn
;
6394 if (dbr_sequence_length ())
6397 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6400 insn
= next_active_insn (insn
);
6401 if (jump_insn
== insn
)
6404 /* We can't rely on the length of asms. So, we return FALSE when
6405 the branch is followed by an asm. */
6407 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6408 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
6409 || get_attr_length (insn
) > 0)
6416 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6418 This occurs when INSN has an unfilled delay slot and is followed
6419 by an asm. Disaster can occur if the asm is empty and the jump
6420 branches into the delay slot. So, we add a nop in the delay slot
6421 when this occurs. */
6424 branch_needs_nop_p (rtx_insn
*insn
)
6426 rtx_insn
*jump_insn
;
6428 if (dbr_sequence_length ())
6431 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6434 insn
= next_active_insn (insn
);
6435 if (!insn
|| jump_insn
== insn
)
6438 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6439 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6440 && get_attr_length (insn
) > 0)
6447 /* Return TRUE if INSN, a forward jump insn, can use nullification
6448 to skip the following instruction. This avoids an extra cycle due
6449 to a mis-predicted branch when we fall through. */
6452 use_skip_p (rtx_insn
*insn
)
6454 rtx_insn
*jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6458 insn
= next_active_insn (insn
);
6460 /* We can't rely on the length of asms, so we can't skip asms. */
6462 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6463 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6465 if (get_attr_length (insn
) == 4
6466 && jump_insn
== next_active_insn (insn
))
6468 if (get_attr_length (insn
) > 0)
6475 /* This routine handles all the normal conditional branch sequences we
6476 might need to generate. It handles compare immediate vs compare
6477 register, nullification of delay slots, varying length branches,
6478 negated branches, and all combinations of the above. It returns the
6479 output appropriate to emit the branch corresponding to all given
6483 pa_output_cbranch (rtx
*operands
, int negated
, rtx_insn
*insn
)
6485 static char buf
[100];
6487 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6488 int length
= get_attr_length (insn
);
6491 /* A conditional branch to the following instruction (e.g. the delay slot)
6492 is asking for a disaster. This can happen when not optimizing and
6493 when jump optimization fails.
6495 While it is usually safe to emit nothing, this can fail if the
6496 preceding instruction is a nullified branch with an empty delay
6497 slot and the same branch target as this branch. We could check
6498 for this but jump optimization should eliminate nop jumps. It
6499 is always safe to emit a nop. */
6500 if (branch_to_delay_slot_p (insn
))
6503 /* The doubleword form of the cmpib instruction doesn't have the LEU
6504 and GTU conditions while the cmpb instruction does. Since we accept
6505 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6506 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6507 operands
[2] = gen_rtx_REG (DImode
, 0);
6508 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6509 operands
[1] = gen_rtx_REG (DImode
, 0);
6511 /* If this is a long branch with its delay slot unfilled, set `nullify'
6512 as it can nullify the delay slot and save a nop. */
6513 if (length
== 8 && dbr_sequence_length () == 0)
6516 /* If this is a short forward conditional branch which did not get
6517 its delay slot filled, the delay slot can still be nullified. */
6518 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6519 nullify
= forward_branch_p (insn
);
6521 /* A forward branch over a single nullified insn can be done with a
6522 comclr instruction. This avoids a single cycle penalty due to
6523 mis-predicted branch if we fall through (branch not taken). */
6524 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6528 /* All short conditional branches except backwards with an unfilled
6532 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6534 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6535 if (GET_MODE (operands
[1]) == DImode
)
6538 strcat (buf
, "%B3");
6540 strcat (buf
, "%S3");
6542 strcat (buf
, " %2,%r1,%%r0");
6545 if (branch_needs_nop_p (insn
))
6546 strcat (buf
, ",n %2,%r1,%0%#");
6548 strcat (buf
, ",n %2,%r1,%0");
6551 strcat (buf
, " %2,%r1,%0");
6554 /* All long conditionals. Note a short backward branch with an
6555 unfilled delay slot is treated just like a long backward branch
6556 with an unfilled delay slot. */
6558 /* Handle weird backwards branch with a filled delay slot
6559 which is nullified. */
6560 if (dbr_sequence_length () != 0
6561 && ! forward_branch_p (insn
)
6564 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6565 if (GET_MODE (operands
[1]) == DImode
)
6568 strcat (buf
, "%S3");
6570 strcat (buf
, "%B3");
6571 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6573 /* Handle short backwards branch with an unfilled delay slot.
6574 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6575 taken and untaken branches. */
6576 else if (dbr_sequence_length () == 0
6577 && ! forward_branch_p (insn
)
6578 && INSN_ADDRESSES_SET_P ()
6579 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6580 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6582 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6583 if (GET_MODE (operands
[1]) == DImode
)
6586 strcat (buf
, "%B3 %2,%r1,%0%#");
6588 strcat (buf
, "%S3 %2,%r1,%0%#");
6592 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6593 if (GET_MODE (operands
[1]) == DImode
)
6596 strcat (buf
, "%S3");
6598 strcat (buf
, "%B3");
6600 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6602 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6607 /* The reversed conditional branch must branch over one additional
6608 instruction if the delay slot is filled and needs to be extracted
6609 by pa_output_lbranch. If the delay slot is empty or this is a
6610 nullified forward branch, the instruction after the reversed
6611 condition branch must be nullified. */
6612 if (dbr_sequence_length () == 0
6613 || (nullify
&& forward_branch_p (insn
)))
6617 operands
[4] = GEN_INT (length
);
6622 operands
[4] = GEN_INT (length
+ 4);
6625 /* Create a reversed conditional branch which branches around
6626 the following insns. */
6627 if (GET_MODE (operands
[1]) != DImode
)
6633 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6636 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6642 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6645 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6654 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6657 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6663 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6666 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6670 output_asm_insn (buf
, operands
);
6671 return pa_output_lbranch (operands
[0], insn
, xdelay
);
6676 /* This routine handles output of long unconditional branches that
6677 exceed the maximum range of a simple branch instruction. Since
6678 we don't have a register available for the branch, we save register
6679 %r1 in the frame marker, load the branch destination DEST into %r1,
6680 execute the branch, and restore %r1 in the delay slot of the branch.
6682 Since long branches may have an insn in the delay slot and the
6683 delay slot is used to restore %r1, we in general need to extract
6684 this insn and execute it before the branch. However, to facilitate
6685 use of this function by conditional branches, we also provide an
6686 option to not extract the delay insn so that it will be emitted
6687 after the long branch. So, if there is an insn in the delay slot,
6688 it is extracted if XDELAY is nonzero.
6690 The lengths of the various long-branch sequences are 20, 16 and 24
6691 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6694 pa_output_lbranch (rtx dest
, rtx_insn
*insn
, int xdelay
)
6698 xoperands
[0] = dest
;
6700 /* First, free up the delay slot. */
6701 if (xdelay
&& dbr_sequence_length () != 0)
6703 /* We can't handle a jump in the delay slot. */
6704 gcc_assert (! JUMP_P (NEXT_INSN (insn
)));
6706 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6709 /* Now delete the delay insn. */
6710 SET_INSN_DELETED (NEXT_INSN (insn
));
6713 /* Output an insn to save %r1. The runtime documentation doesn't
6714 specify whether the "Clean Up" slot in the callers frame can
6715 be clobbered by the callee. It isn't copied by HP's builtin
6716 alloca, so this suggests that it can be clobbered if necessary.
6717 The "Static Link" location is copied by HP builtin alloca, so
6718 we avoid using it. Using the cleanup slot might be a problem
6719 if we have to interoperate with languages that pass cleanup
6720 information. However, it should be possible to handle these
6721 situations with GCC's asm feature.
6723 The "Current RP" slot is reserved for the called procedure, so
6724 we try to use it when we don't have a frame of our own. It's
6725 rather unlikely that we won't have a frame when we need to emit
6728 Really the way to go long term is a register scavenger; goto
6729 the target of the jump and find a register which we can use
6730 as a scratch to hold the value in %r1. Then, we wouldn't have
6731 to free up the delay slot or clobber a slot that may be needed
6732 for other purposes. */
6735 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6736 /* Use the return pointer slot in the frame marker. */
6737 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6739 /* Use the slot at -40 in the frame marker since HP builtin
6740 alloca doesn't copy it. */
6741 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6745 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6746 /* Use the return pointer slot in the frame marker. */
6747 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6749 /* Use the "Clean Up" slot in the frame marker. In GCC,
6750 the only other use of this location is for copying a
6751 floating point double argument from a floating-point
6752 register to two general registers. The copy is done
6753 as an "atomic" operation when outputting a call, so it
6754 won't interfere with our using the location here. */
6755 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6758 if (TARGET_PORTABLE_RUNTIME
)
6760 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6761 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6762 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6766 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
6767 if (TARGET_SOM
|| !TARGET_GAS
)
6769 xoperands
[1] = gen_label_rtx ();
6770 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands
);
6771 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6772 CODE_LABEL_NUMBER (xoperands
[1]));
6773 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands
);
6777 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands
);
6778 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
6780 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6783 /* Now output a very long branch to the original target. */
6784 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6786 /* Now restore the value of %r1 in the delay slot. */
6789 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6790 return "ldd -16(%%r30),%%r1";
6792 return "ldd -40(%%r30),%%r1";
6796 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6797 return "ldw -20(%%r30),%%r1";
6799 return "ldw -12(%%r30),%%r1";
6803 /* This routine handles all the branch-on-bit conditional branch sequences we
6804 might need to generate. It handles nullification of delay slots,
6805 varying length branches, negated branches and all combinations of the
6806 above. it returns the appropriate output template to emit the branch. */
6809 pa_output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
, int which
)
6811 static char buf
[100];
6813 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6814 int length
= get_attr_length (insn
);
6817 /* A conditional branch to the following instruction (e.g. the delay slot) is
6818 asking for a disaster. I do not think this can happen as this pattern
6819 is only used when optimizing; jump optimization should eliminate the
6820 jump. But be prepared just in case. */
6822 if (branch_to_delay_slot_p (insn
))
6825 /* If this is a long branch with its delay slot unfilled, set `nullify'
6826 as it can nullify the delay slot and save a nop. */
6827 if (length
== 8 && dbr_sequence_length () == 0)
6830 /* If this is a short forward conditional branch which did not get
6831 its delay slot filled, the delay slot can still be nullified. */
6832 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6833 nullify
= forward_branch_p (insn
);
6835 /* A forward branch over a single nullified insn can be done with a
6836 extrs instruction. This avoids a single cycle penalty due to
6837 mis-predicted branch if we fall through (branch not taken). */
6838 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6843 /* All short conditional branches except backwards with an unfilled
6847 strcpy (buf
, "{extrs,|extrw,s,}");
6849 strcpy (buf
, "bb,");
6850 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6851 strcpy (buf
, "extrd,s,*");
6852 else if (GET_MODE (operands
[0]) == DImode
)
6853 strcpy (buf
, "bb,*");
6854 if ((which
== 0 && negated
)
6855 || (which
== 1 && ! negated
))
6860 strcat (buf
, " %0,%1,1,%%r0");
6861 else if (nullify
&& negated
)
6863 if (branch_needs_nop_p (insn
))
6864 strcat (buf
, ",n %0,%1,%3%#");
6866 strcat (buf
, ",n %0,%1,%3");
6868 else if (nullify
&& ! negated
)
6870 if (branch_needs_nop_p (insn
))
6871 strcat (buf
, ",n %0,%1,%2%#");
6873 strcat (buf
, ",n %0,%1,%2");
6875 else if (! nullify
&& negated
)
6876 strcat (buf
, " %0,%1,%3");
6877 else if (! nullify
&& ! negated
)
6878 strcat (buf
, " %0,%1,%2");
6881 /* All long conditionals. Note a short backward branch with an
6882 unfilled delay slot is treated just like a long backward branch
6883 with an unfilled delay slot. */
6885 /* Handle weird backwards branch with a filled delay slot
6886 which is nullified. */
6887 if (dbr_sequence_length () != 0
6888 && ! forward_branch_p (insn
)
6891 strcpy (buf
, "bb,");
6892 if (GET_MODE (operands
[0]) == DImode
)
6894 if ((which
== 0 && negated
)
6895 || (which
== 1 && ! negated
))
6900 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
6902 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
6904 /* Handle short backwards branch with an unfilled delay slot.
6905 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6906 taken and untaken branches. */
6907 else if (dbr_sequence_length () == 0
6908 && ! forward_branch_p (insn
)
6909 && INSN_ADDRESSES_SET_P ()
6910 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6911 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6913 strcpy (buf
, "bb,");
6914 if (GET_MODE (operands
[0]) == DImode
)
6916 if ((which
== 0 && negated
)
6917 || (which
== 1 && ! negated
))
6922 strcat (buf
, " %0,%1,%3%#");
6924 strcat (buf
, " %0,%1,%2%#");
6928 if (GET_MODE (operands
[0]) == DImode
)
6929 strcpy (buf
, "extrd,s,*");
6931 strcpy (buf
, "{extrs,|extrw,s,}");
6932 if ((which
== 0 && negated
)
6933 || (which
== 1 && ! negated
))
6937 if (nullify
&& negated
)
6938 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
6939 else if (nullify
&& ! negated
)
6940 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
6942 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
6944 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
6949 /* The reversed conditional branch must branch over one additional
6950 instruction if the delay slot is filled and needs to be extracted
6951 by pa_output_lbranch. If the delay slot is empty or this is a
6952 nullified forward branch, the instruction after the reversed
6953 condition branch must be nullified. */
6954 if (dbr_sequence_length () == 0
6955 || (nullify
&& forward_branch_p (insn
)))
6959 operands
[4] = GEN_INT (length
);
6964 operands
[4] = GEN_INT (length
+ 4);
6967 if (GET_MODE (operands
[0]) == DImode
)
6968 strcpy (buf
, "bb,*");
6970 strcpy (buf
, "bb,");
6971 if ((which
== 0 && negated
)
6972 || (which
== 1 && !negated
))
6977 strcat (buf
, ",n %0,%1,.+%4");
6979 strcat (buf
, " %0,%1,.+%4");
6980 output_asm_insn (buf
, operands
);
6981 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
6987 /* This routine handles all the branch-on-variable-bit conditional branch
6988 sequences we might need to generate. It handles nullification of delay
6989 slots, varying length branches, negated branches and all combinations
6990 of the above. it returns the appropriate output template to emit the
6994 pa_output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx_insn
*insn
,
6997 static char buf
[100];
6999 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7000 int length
= get_attr_length (insn
);
7003 /* A conditional branch to the following instruction (e.g. the delay slot) is
7004 asking for a disaster. I do not think this can happen as this pattern
7005 is only used when optimizing; jump optimization should eliminate the
7006 jump. But be prepared just in case. */
7008 if (branch_to_delay_slot_p (insn
))
7011 /* If this is a long branch with its delay slot unfilled, set `nullify'
7012 as it can nullify the delay slot and save a nop. */
7013 if (length
== 8 && dbr_sequence_length () == 0)
7016 /* If this is a short forward conditional branch which did not get
7017 its delay slot filled, the delay slot can still be nullified. */
7018 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7019 nullify
= forward_branch_p (insn
);
7021 /* A forward branch over a single nullified insn can be done with a
7022 extrs instruction. This avoids a single cycle penalty due to
7023 mis-predicted branch if we fall through (branch not taken). */
7024 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
7029 /* All short conditional branches except backwards with an unfilled
7033 strcpy (buf
, "{vextrs,|extrw,s,}");
7035 strcpy (buf
, "{bvb,|bb,}");
7036 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
7037 strcpy (buf
, "extrd,s,*");
7038 else if (GET_MODE (operands
[0]) == DImode
)
7039 strcpy (buf
, "bb,*");
7040 if ((which
== 0 && negated
)
7041 || (which
== 1 && ! negated
))
7046 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7047 else if (nullify
&& negated
)
7049 if (branch_needs_nop_p (insn
))
7050 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7052 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
7054 else if (nullify
&& ! negated
)
7056 if (branch_needs_nop_p (insn
))
7057 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7059 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
7061 else if (! nullify
&& negated
)
7062 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
7063 else if (! nullify
&& ! negated
)
7064 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
7067 /* All long conditionals. Note a short backward branch with an
7068 unfilled delay slot is treated just like a long backward branch
7069 with an unfilled delay slot. */
7071 /* Handle weird backwards branch with a filled delay slot
7072 which is nullified. */
7073 if (dbr_sequence_length () != 0
7074 && ! forward_branch_p (insn
)
7077 strcpy (buf
, "{bvb,|bb,}");
7078 if (GET_MODE (operands
[0]) == DImode
)
7080 if ((which
== 0 && negated
)
7081 || (which
== 1 && ! negated
))
7086 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7088 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7090 /* Handle short backwards branch with an unfilled delay slot.
7091 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7092 taken and untaken branches. */
7093 else if (dbr_sequence_length () == 0
7094 && ! forward_branch_p (insn
)
7095 && INSN_ADDRESSES_SET_P ()
7096 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7097 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7099 strcpy (buf
, "{bvb,|bb,}");
7100 if (GET_MODE (operands
[0]) == DImode
)
7102 if ((which
== 0 && negated
)
7103 || (which
== 1 && ! negated
))
7108 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
7110 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
7114 strcpy (buf
, "{vextrs,|extrw,s,}");
7115 if (GET_MODE (operands
[0]) == DImode
)
7116 strcpy (buf
, "extrd,s,*");
7117 if ((which
== 0 && negated
)
7118 || (which
== 1 && ! negated
))
7122 if (nullify
&& negated
)
7123 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7124 else if (nullify
&& ! negated
)
7125 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7127 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7129 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7134 /* The reversed conditional branch must branch over one additional
7135 instruction if the delay slot is filled and needs to be extracted
7136 by pa_output_lbranch. If the delay slot is empty or this is a
7137 nullified forward branch, the instruction after the reversed
7138 condition branch must be nullified. */
7139 if (dbr_sequence_length () == 0
7140 || (nullify
&& forward_branch_p (insn
)))
7144 operands
[4] = GEN_INT (length
);
7149 operands
[4] = GEN_INT (length
+ 4);
7152 if (GET_MODE (operands
[0]) == DImode
)
7153 strcpy (buf
, "bb,*");
7155 strcpy (buf
, "{bvb,|bb,}");
7156 if ((which
== 0 && negated
)
7157 || (which
== 1 && !negated
))
7162 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
7164 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
7165 output_asm_insn (buf
, operands
);
7166 return pa_output_lbranch (negated
? operands
[3] : operands
[2],
7172 /* Return the output template for emitting a dbra type insn.
7174 Note it may perform some output operations on its own before
7175 returning the final output string. */
7177 pa_output_dbra (rtx
*operands
, rtx_insn
*insn
, int which_alternative
)
7179 int length
= get_attr_length (insn
);
7181 /* A conditional branch to the following instruction (e.g. the delay slot) is
7182 asking for a disaster. Be prepared! */
7184 if (branch_to_delay_slot_p (insn
))
7186 if (which_alternative
== 0)
7187 return "ldo %1(%0),%0";
7188 else if (which_alternative
== 1)
7190 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
7191 output_asm_insn ("ldw -16(%%r30),%4", operands
);
7192 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7193 return "{fldws|fldw} -16(%%r30),%0";
7197 output_asm_insn ("ldw %0,%4", operands
);
7198 return "ldo %1(%4),%4\n\tstw %4,%0";
7202 if (which_alternative
== 0)
7204 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7207 /* If this is a long branch with its delay slot unfilled, set `nullify'
7208 as it can nullify the delay slot and save a nop. */
7209 if (length
== 8 && dbr_sequence_length () == 0)
7212 /* If this is a short forward conditional branch which did not get
7213 its delay slot filled, the delay slot can still be nullified. */
7214 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7215 nullify
= forward_branch_p (insn
);
7222 if (branch_needs_nop_p (insn
))
7223 return "addib,%C2,n %1,%0,%3%#";
7225 return "addib,%C2,n %1,%0,%3";
7228 return "addib,%C2 %1,%0,%3";
7231 /* Handle weird backwards branch with a fulled delay slot
7232 which is nullified. */
7233 if (dbr_sequence_length () != 0
7234 && ! forward_branch_p (insn
)
7236 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7237 /* Handle short backwards branch with an unfilled delay slot.
7238 Using a addb;nop rather than addi;bl saves 1 cycle for both
7239 taken and untaken branches. */
7240 else if (dbr_sequence_length () == 0
7241 && ! forward_branch_p (insn
)
7242 && INSN_ADDRESSES_SET_P ()
7243 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7244 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7245 return "addib,%C2 %1,%0,%3%#";
7247 /* Handle normal cases. */
7249 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7251 return "addi,%N2 %1,%0,%0\n\tb %3";
7254 /* The reversed conditional branch must branch over one additional
7255 instruction if the delay slot is filled and needs to be extracted
7256 by pa_output_lbranch. If the delay slot is empty or this is a
7257 nullified forward branch, the instruction after the reversed
7258 condition branch must be nullified. */
7259 if (dbr_sequence_length () == 0
7260 || (nullify
&& forward_branch_p (insn
)))
7264 operands
[4] = GEN_INT (length
);
7269 operands
[4] = GEN_INT (length
+ 4);
7273 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7275 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7277 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7281 /* Deal with gross reload from FP register case. */
7282 else if (which_alternative
== 1)
7284 /* Move loop counter from FP register to MEM then into a GR,
7285 increment the GR, store the GR into MEM, and finally reload
7286 the FP register from MEM from within the branch's delay slot. */
7287 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7289 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7291 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7292 else if (length
== 28)
7293 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7296 operands
[5] = GEN_INT (length
- 16);
7297 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7298 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7299 return pa_output_lbranch (operands
[3], insn
, 0);
7302 /* Deal with gross reload from memory case. */
7305 /* Reload loop counter from memory, the store back to memory
7306 happens in the branch's delay slot. */
7307 output_asm_insn ("ldw %0,%4", operands
);
7309 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7310 else if (length
== 16)
7311 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7314 operands
[5] = GEN_INT (length
- 4);
7315 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7316 return pa_output_lbranch (operands
[3], insn
, 0);
7321 /* Return the output template for emitting a movb type insn.
7323 Note it may perform some output operations on its own before
7324 returning the final output string. */
7326 pa_output_movb (rtx
*operands
, rtx_insn
*insn
, int which_alternative
,
7327 int reverse_comparison
)
7329 int length
= get_attr_length (insn
);
7331 /* A conditional branch to the following instruction (e.g. the delay slot) is
7332 asking for a disaster. Be prepared! */
7334 if (branch_to_delay_slot_p (insn
))
7336 if (which_alternative
== 0)
7337 return "copy %1,%0";
7338 else if (which_alternative
== 1)
7340 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7341 return "{fldws|fldw} -16(%%r30),%0";
7343 else if (which_alternative
== 2)
7349 /* Support the second variant. */
7350 if (reverse_comparison
)
7351 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7353 if (which_alternative
== 0)
7355 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7358 /* If this is a long branch with its delay slot unfilled, set `nullify'
7359 as it can nullify the delay slot and save a nop. */
7360 if (length
== 8 && dbr_sequence_length () == 0)
7363 /* If this is a short forward conditional branch which did not get
7364 its delay slot filled, the delay slot can still be nullified. */
7365 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7366 nullify
= forward_branch_p (insn
);
7373 if (branch_needs_nop_p (insn
))
7374 return "movb,%C2,n %1,%0,%3%#";
7376 return "movb,%C2,n %1,%0,%3";
7379 return "movb,%C2 %1,%0,%3";
7382 /* Handle weird backwards branch with a filled delay slot
7383 which is nullified. */
7384 if (dbr_sequence_length () != 0
7385 && ! forward_branch_p (insn
)
7387 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7389 /* Handle short backwards branch with an unfilled delay slot.
7390 Using a movb;nop rather than or;bl saves 1 cycle for both
7391 taken and untaken branches. */
7392 else if (dbr_sequence_length () == 0
7393 && ! forward_branch_p (insn
)
7394 && INSN_ADDRESSES_SET_P ()
7395 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7396 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7397 return "movb,%C2 %1,%0,%3%#";
7398 /* Handle normal cases. */
7400 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7402 return "or,%N2 %1,%%r0,%0\n\tb %3";
7405 /* The reversed conditional branch must branch over one additional
7406 instruction if the delay slot is filled and needs to be extracted
7407 by pa_output_lbranch. If the delay slot is empty or this is a
7408 nullified forward branch, the instruction after the reversed
7409 condition branch must be nullified. */
7410 if (dbr_sequence_length () == 0
7411 || (nullify
&& forward_branch_p (insn
)))
7415 operands
[4] = GEN_INT (length
);
7420 operands
[4] = GEN_INT (length
+ 4);
7424 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7426 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7428 return pa_output_lbranch (operands
[3], insn
, xdelay
);
7431 /* Deal with gross reload for FP destination register case. */
7432 else if (which_alternative
== 1)
7434 /* Move source register to MEM, perform the branch test, then
7435 finally load the FP register from MEM from within the branch's
7437 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7439 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7440 else if (length
== 16)
7441 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7444 operands
[4] = GEN_INT (length
- 4);
7445 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7446 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7447 return pa_output_lbranch (operands
[3], insn
, 0);
7450 /* Deal with gross reload from memory case. */
7451 else if (which_alternative
== 2)
7453 /* Reload loop counter from memory, the store back to memory
7454 happens in the branch's delay slot. */
7456 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7457 else if (length
== 12)
7458 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7461 operands
[4] = GEN_INT (length
);
7462 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7464 return pa_output_lbranch (operands
[3], insn
, 0);
7467 /* Handle SAR as a destination. */
7471 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7472 else if (length
== 12)
7473 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7476 operands
[4] = GEN_INT (length
);
7477 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7479 return pa_output_lbranch (operands
[3], insn
, 0);
7484 /* Copy any FP arguments in INSN into integer registers. */
7486 copy_fp_args (rtx_insn
*insn
)
7491 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7493 int arg_mode
, regno
;
7494 rtx use
= XEXP (link
, 0);
7496 if (! (GET_CODE (use
) == USE
7497 && GET_CODE (XEXP (use
, 0)) == REG
7498 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7501 arg_mode
= GET_MODE (XEXP (use
, 0));
7502 regno
= REGNO (XEXP (use
, 0));
7504 /* Is it a floating point register? */
7505 if (regno
>= 32 && regno
<= 39)
7507 /* Copy the FP register into an integer register via memory. */
7508 if (arg_mode
== SFmode
)
7510 xoperands
[0] = XEXP (use
, 0);
7511 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7512 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7513 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7517 xoperands
[0] = XEXP (use
, 0);
7518 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7519 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7520 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7521 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7527 /* Compute length of the FP argument copy sequence for INSN. */
7529 length_fp_args (rtx_insn
*insn
)
7534 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7536 int arg_mode
, regno
;
7537 rtx use
= XEXP (link
, 0);
7539 if (! (GET_CODE (use
) == USE
7540 && GET_CODE (XEXP (use
, 0)) == REG
7541 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7544 arg_mode
= GET_MODE (XEXP (use
, 0));
7545 regno
= REGNO (XEXP (use
, 0));
7547 /* Is it a floating point register? */
7548 if (regno
>= 32 && regno
<= 39)
7550 if (arg_mode
== SFmode
)
7560 /* Return the attribute length for the millicode call instruction INSN.
7561 The length must match the code generated by pa_output_millicode_call.
7562 We include the delay slot in the returned length as it is better to
7563 over estimate the length than to under estimate it. */
7566 pa_attr_length_millicode_call (rtx_insn
*insn
)
7568 unsigned long distance
= -1;
7569 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7571 if (INSN_ADDRESSES_SET_P ())
7573 distance
= (total
+ insn_current_reference_address (insn
));
7574 if (distance
< total
)
7580 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7585 else if (TARGET_PORTABLE_RUNTIME
)
7589 if (!TARGET_LONG_CALLS
&& distance
< MAX_PCREL17F_OFFSET
)
7599 /* INSN is a function call.
7601 CALL_DEST is the routine we are calling. */
7604 pa_output_millicode_call (rtx_insn
*insn
, rtx call_dest
)
7606 int attr_length
= get_attr_length (insn
);
7607 int seq_length
= dbr_sequence_length ();
7610 xoperands
[0] = call_dest
;
7611 xoperands
[2] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7613 /* Handle the common case where we are sure that the branch will
7614 reach the beginning of the $CODE$ subspace. The within reach
7615 form of the $$sh_func_adrs call has a length of 28. Because it
7616 has an attribute type of sh_func_adrs, it never has a nonzero
7617 sequence length (i.e., the delay slot is never filled). */
7618 if (!TARGET_LONG_CALLS
7619 && (attr_length
== 8
7620 || (attr_length
== 28
7621 && get_attr_type (insn
) == TYPE_SH_FUNC_ADRS
)))
7623 output_asm_insn ("{bl|b,l} %0,%2", xoperands
);
7629 /* It might seem that one insn could be saved by accessing
7630 the millicode function using the linkage table. However,
7631 this doesn't work in shared libraries and other dynamically
7632 loaded objects. Using a pc-relative sequence also avoids
7633 problems related to the implicit use of the gp register. */
7634 output_asm_insn ("b,l .+8,%%r1", xoperands
);
7638 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
7639 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
7643 xoperands
[1] = gen_label_rtx ();
7644 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7645 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7646 CODE_LABEL_NUMBER (xoperands
[1]));
7647 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7650 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7652 else if (TARGET_PORTABLE_RUNTIME
)
7654 /* Pure portable runtime doesn't allow be/ble; we also don't
7655 have PIC support in the assembler/linker, so this sequence
7658 /* Get the address of our target into %r1. */
7659 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7660 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7662 /* Get our return address into %r31. */
7663 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7664 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7666 /* Jump to our target address in %r1. */
7667 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7671 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7673 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7675 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7679 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7680 output_asm_insn ("addi 16,%%r1,%%r31", xoperands
);
7682 if (TARGET_SOM
|| !TARGET_GAS
)
7684 /* The HP assembler can generate relocations for the
7685 difference of two symbols. GAS can do this for a
7686 millicode symbol but not an arbitrary external
7687 symbol when generating SOM output. */
7688 xoperands
[1] = gen_label_rtx ();
7689 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7690 CODE_LABEL_NUMBER (xoperands
[1]));
7691 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7692 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7696 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands
);
7697 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7701 /* Jump to our target address in %r1. */
7702 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7706 if (seq_length
== 0)
7707 output_asm_insn ("nop", xoperands
);
7712 /* Return the attribute length of the call instruction INSN. The SIBCALL
7713 flag indicates whether INSN is a regular call or a sibling call. The
7714 length returned must be longer than the code actually generated by
7715 pa_output_call. Since branch shortening is done before delay branch
7716 sequencing, there is no way to determine whether or not the delay
7717 slot will be filled during branch shortening. Even when the delay
7718 slot is filled, we may have to add a nop if the delay slot contains
7719 a branch that can't reach its target. Thus, we always have to include
7720 the delay slot in the length estimate. This used to be done in
7721 pa_adjust_insn_length but we do it here now as some sequences always
7722 fill the delay slot and we can save four bytes in the estimate for
7726 pa_attr_length_call (rtx_insn
*insn
, int sibcall
)
7729 rtx call
, call_dest
;
7732 rtx pat
= PATTERN (insn
);
7733 unsigned long distance
= -1;
7735 gcc_assert (CALL_P (insn
));
7737 if (INSN_ADDRESSES_SET_P ())
7739 unsigned long total
;
7741 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7742 distance
= (total
+ insn_current_reference_address (insn
));
7743 if (distance
< total
)
7747 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7749 /* Get the call rtx. */
7750 call
= XVECEXP (pat
, 0, 0);
7751 if (GET_CODE (call
) == SET
)
7752 call
= SET_SRC (call
);
7754 gcc_assert (GET_CODE (call
) == CALL
);
7756 /* Determine if this is a local call. */
7757 call_dest
= XEXP (XEXP (call
, 0), 0);
7758 call_decl
= SYMBOL_REF_DECL (call_dest
);
7759 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7761 /* pc-relative branch. */
7762 if (!TARGET_LONG_CALLS
7763 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7764 || distance
< MAX_PCREL17F_OFFSET
))
7767 /* 64-bit plabel sequence. */
7768 else if (TARGET_64BIT
&& !local_call
)
7769 length
+= sibcall
? 28 : 24;
7771 /* non-pic long absolute branch sequence. */
7772 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7775 /* long pc-relative branch sequence. */
7776 else if (TARGET_LONG_PIC_SDIFF_CALL
7777 || (TARGET_GAS
&& !TARGET_SOM
7778 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
)))
7782 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7786 /* 32-bit plabel sequence. */
7792 length
+= length_fp_args (insn
);
7802 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7810 /* INSN is a function call.
7812 CALL_DEST is the routine we are calling. */
7815 pa_output_call (rtx_insn
*insn
, rtx call_dest
, int sibcall
)
7817 int seq_length
= dbr_sequence_length ();
7818 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7819 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7822 xoperands
[0] = call_dest
;
7824 /* Handle the common case where we're sure that the branch will reach
7825 the beginning of the "$CODE$" subspace. This is the beginning of
7826 the current function if we are in a named section. */
7827 if (!TARGET_LONG_CALLS
&& pa_attr_length_call (insn
, sibcall
) == 8)
7829 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7830 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7834 if (TARGET_64BIT
&& !local_call
)
7836 /* ??? As far as I can tell, the HP linker doesn't support the
7837 long pc-relative sequence described in the 64-bit runtime
7838 architecture. So, we use a slightly longer indirect call. */
7839 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7840 xoperands
[1] = gen_label_rtx ();
7842 /* If this isn't a sibcall, we put the load of %r27 into the
7843 delay slot. We can't do this in a sibcall as we don't
7844 have a second call-clobbered scratch register available.
7845 We don't need to do anything when generating fast indirect
7847 if (seq_length
!= 0 && !sibcall
)
7849 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7852 /* Now delete the delay insn. */
7853 SET_INSN_DELETED (NEXT_INSN (insn
));
7857 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7858 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7859 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7863 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7864 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7865 output_asm_insn ("bve (%%r1)", xoperands
);
7869 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7870 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7871 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7877 int indirect_call
= 0;
7879 /* Emit a long call. There are several different sequences
7880 of increasing length and complexity. In most cases,
7881 they don't allow an instruction in the delay slot. */
7882 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7883 && !TARGET_LONG_PIC_SDIFF_CALL
7884 && !(TARGET_GAS
&& !TARGET_SOM
7885 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7893 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7895 /* A non-jump insn in the delay slot. By definition we can
7896 emit this insn before the call (and in fact before argument
7898 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7901 /* Now delete the delay insn. */
7902 SET_INSN_DELETED (NEXT_INSN (insn
));
7906 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7908 /* This is the best sequence for making long calls in
7909 non-pic code. Unfortunately, GNU ld doesn't provide
7910 the stub needed for external calls, and GAS's support
7911 for this with the SOM linker is buggy. It is safe
7912 to use this for local calls. */
7913 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7915 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
7919 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7922 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7924 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7930 if (TARGET_LONG_PIC_SDIFF_CALL
)
7932 /* The HP assembler and linker can handle relocations
7933 for the difference of two symbols. The HP assembler
7934 recognizes the sequence as a pc-relative call and
7935 the linker provides stubs when needed. */
7936 xoperands
[1] = gen_label_rtx ();
7937 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7938 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7939 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7940 CODE_LABEL_NUMBER (xoperands
[1]));
7941 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7943 else if (TARGET_GAS
&& !TARGET_SOM
7944 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7946 /* GAS currently can't generate the relocations that
7947 are needed for the SOM linker under HP-UX using this
7948 sequence. The GNU linker doesn't generate the stubs
7949 that are needed for external calls on TARGET_ELF32
7950 with this sequence. For now, we have to use a
7951 longer plabel sequence when using GAS. */
7952 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7953 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7955 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7960 /* Emit a long plabel-based call sequence. This is
7961 essentially an inline implementation of $$dyncall.
7962 We don't actually try to call $$dyncall as this is
7963 as difficult as calling the function itself. */
7964 xoperands
[0] = pa_get_deferred_plabel (call_dest
);
7965 xoperands
[1] = gen_label_rtx ();
7967 /* Since the call is indirect, FP arguments in registers
7968 need to be copied to the general registers. Then, the
7969 argument relocation stub will copy them back. */
7971 copy_fp_args (insn
);
7975 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
7976 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
7977 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
7981 output_asm_insn ("addil LR'%0-$global$,%%r27",
7983 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7987 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
7988 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
7989 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
7990 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
7992 if (!sibcall
&& !TARGET_PA_20
)
7994 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
7995 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7996 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
7998 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
8005 output_asm_insn ("bve (%%r1)", xoperands
);
8010 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8011 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
8015 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
8020 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
8021 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8026 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8027 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
8029 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
8033 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
8034 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
8036 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
8039 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
8041 output_asm_insn ("copy %%r31,%%r2", xoperands
);
8049 if (seq_length
== 0)
8050 output_asm_insn ("nop", xoperands
);
8055 /* Return the attribute length of the indirect call instruction INSN.
8056 The length must match the code generated by output_indirect call.
8057 The returned length includes the delay slot. Currently, the delay
8058 slot of an indirect call sequence is not exposed and it is used by
8059 the sequence itself. */
8062 pa_attr_length_indirect_call (rtx_insn
*insn
)
8064 unsigned long distance
= -1;
8065 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
8067 if (INSN_ADDRESSES_SET_P ())
8069 distance
= (total
+ insn_current_reference_address (insn
));
8070 if (distance
< total
)
8077 if (TARGET_FAST_INDIRECT_CALLS
8078 || (!TARGET_LONG_CALLS
8079 && !TARGET_PORTABLE_RUNTIME
8080 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
8081 || distance
< MAX_PCREL17F_OFFSET
)))
8087 if (TARGET_PORTABLE_RUNTIME
)
8090 /* Out of reach, can use ble. */
8095 pa_output_indirect_call (rtx_insn
*insn
, rtx call_dest
)
8101 xoperands
[0] = call_dest
;
8102 output_asm_insn ("ldd 16(%0),%%r2", xoperands
);
8103 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands
);
8107 /* First the special case for kernels, level 0 systems, etc. */
8108 if (TARGET_FAST_INDIRECT_CALLS
)
8109 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8111 /* Now the normal case -- we can reach $$dyncall directly or
8112 we're sure that we can get there via a long-branch stub.
8114 No need to check target flags as the length uniquely identifies
8115 the remaining cases. */
8116 if (pa_attr_length_indirect_call (insn
) == 8)
8118 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8119 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8120 variant of the B,L instruction can't be used on the SOM target. */
8121 if (TARGET_PA_20
&& !TARGET_SOM
)
8122 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8124 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8127 /* Long millicode call, but we are not generating PIC or portable runtime
8129 if (pa_attr_length_indirect_call (insn
) == 12)
8130 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8132 /* Long millicode call for portable runtime. */
8133 if (pa_attr_length_indirect_call (insn
) == 16)
8134 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8136 /* We need a long PIC call to $$dyncall. */
8137 xoperands
[0] = NULL_RTX
;
8138 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
8139 if (TARGET_SOM
|| !TARGET_GAS
)
8141 xoperands
[0] = gen_label_rtx ();
8142 output_asm_insn ("addil L'$$dyncall-%0,%%r2", xoperands
);
8143 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8144 CODE_LABEL_NUMBER (xoperands
[0]));
8145 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands
);
8149 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r2", xoperands
);
8150 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8153 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8154 output_asm_insn ("ldo 12(%%r2),%%r2", xoperands
);
8158 /* In HPUX 8.0's shared library scheme, special relocations are needed
8159 for function labels if they might be passed to a function
8160 in a shared library (because shared libraries don't live in code
8161 space), and special magic is needed to construct their address. */
8164 pa_encode_label (rtx sym
)
8166 const char *str
= XSTR (sym
, 0);
8167 int len
= strlen (str
) + 1;
8170 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8174 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8178 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8180 int old_referenced
= 0;
8182 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8184 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8186 default_encode_section_info (decl
, rtl
, first
);
8188 if (first
&& TEXT_SPACE_P (decl
))
8190 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8191 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8192 pa_encode_label (XEXP (rtl
, 0));
8194 else if (old_referenced
)
8195 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8198 /* This is sort of inverse to pa_encode_section_info. */
8201 pa_strip_name_encoding (const char *str
)
8203 str
+= (*str
== '@');
8204 str
+= (*str
== '*');
8208 /* Returns 1 if OP is a function label involved in a simple addition
8209 with a constant. Used to keep certain patterns from matching
8210 during instruction combination. */
8212 pa_is_function_label_plus_const (rtx op
)
8214 /* Strip off any CONST. */
8215 if (GET_CODE (op
) == CONST
)
8218 return (GET_CODE (op
) == PLUS
8219 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8220 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8223 /* Output assembly code for a thunk to FUNCTION. */
8226 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8227 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8230 static unsigned int current_thunk_number
;
8231 int val_14
= VAL_14_BITS_P (delta
);
8232 unsigned int old_last_address
= last_address
, nbytes
= 0;
8236 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8237 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8238 xoperands
[2] = GEN_INT (delta
);
8240 final_start_function (emit_barrier (), file
, 1);
8242 /* Output the thunk. We know that the function is in the same
8243 translation unit (i.e., the same space) as the thunk, and that
8244 thunks are output after their method. Thus, we don't need an
8245 external branch to reach the function. With SOM and GAS,
8246 functions and thunks are effectively in different sections.
8247 Thus, we can always use a IA-relative branch and the linker
8248 will add a long branch stub if necessary.
8250 However, we have to be careful when generating PIC code on the
8251 SOM port to ensure that the sequence does not transfer to an
8252 import stub for the target function as this could clobber the
8253 return value saved at SP-24. This would also apply to the
8254 32-bit linux port if the multi-space model is implemented. */
8255 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8256 && !(flag_pic
&& TREE_PUBLIC (function
))
8257 && (TARGET_GAS
|| last_address
< 262132))
8258 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8259 && ((targetm_common
.have_named_sections
8260 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8261 /* The GNU 64-bit linker has rather poor stub management.
8262 So, we use a long branch from thunks that aren't in
8263 the same section as the target function. */
8265 && (DECL_SECTION_NAME (thunk_fndecl
)
8266 != DECL_SECTION_NAME (function
)))
8267 || ((DECL_SECTION_NAME (thunk_fndecl
)
8268 == DECL_SECTION_NAME (function
))
8269 && last_address
< 262132)))
8270 /* In this case, we need to be able to reach the start of
8271 the stub table even though the function is likely closer
8272 and can be jumped to directly. */
8273 || (targetm_common
.have_named_sections
8274 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8275 && DECL_SECTION_NAME (function
) == NULL
8276 && total_code_bytes
< MAX_PCREL17F_OFFSET
)
8278 || (!targetm_common
.have_named_sections
8279 && total_code_bytes
< MAX_PCREL17F_OFFSET
))))
8282 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8284 output_asm_insn ("b %0", xoperands
);
8288 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8293 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8297 else if (TARGET_64BIT
)
8299 /* We only have one call-clobbered scratch register, so we can't
8300 make use of the delay slot if delta doesn't fit in 14 bits. */
8303 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8304 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8307 output_asm_insn ("b,l .+8,%%r1", xoperands
);
8311 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8312 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
8316 xoperands
[3] = GEN_INT (val_14
? 8 : 16);
8317 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands
);
8322 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8323 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8328 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8332 else if (TARGET_PORTABLE_RUNTIME
)
8334 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8335 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8338 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8340 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8344 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8349 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8353 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8355 /* The function is accessible from outside this module. The only
8356 way to avoid an import stub between the thunk and function is to
8357 call the function directly with an indirect sequence similar to
8358 that used by $$dyncall. This is possible because $$dyncall acts
8359 as the import stub in an indirect call. */
8360 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8361 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8362 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8363 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8364 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8365 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8366 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8367 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8368 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8372 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8378 output_asm_insn ("bve (%%r22)", xoperands
);
8381 else if (TARGET_NO_SPACE_REGS
)
8383 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8388 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8389 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8390 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8395 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8397 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8401 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8403 if (TARGET_SOM
|| !TARGET_GAS
)
8405 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands
);
8406 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands
);
8410 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8411 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands
);
8415 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8417 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8421 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8426 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8433 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8435 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8436 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8440 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8445 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8450 final_end_function ();
8452 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8454 switch_to_section (data_section
);
8455 output_asm_insn (".align 4", xoperands
);
8456 ASM_OUTPUT_LABEL (file
, label
);
8457 output_asm_insn (".word P'%0", xoperands
);
8460 current_thunk_number
++;
8461 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8462 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8463 last_address
+= nbytes
;
8464 if (old_last_address
> last_address
)
8465 last_address
= UINT_MAX
;
8466 update_total_code_bytes (nbytes
);
8469 /* Only direct calls to static functions are allowed to be sibling (tail)
8472 This restriction is necessary because some linker generated stubs will
8473 store return pointers into rp' in some cases which might clobber a
8474 live value already in rp'.
8476 In a sibcall the current function and the target function share stack
8477 space. Thus if the path to the current function and the path to the
8478 target function save a value in rp', they save the value into the
8479 same stack slot, which has undesirable consequences.
8481 Because of the deferred binding nature of shared libraries any function
8482 with external scope could be in a different load module and thus require
8483 rp' to be saved when calling that function. So sibcall optimizations
8484 can only be safe for static function.
8486 Note that GCC never needs return value relocations, so we don't have to
8487 worry about static calls with return value relocations (which require
8490 It is safe to perform a sibcall optimization when the target function
8491 will never return. */
8493 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8495 if (TARGET_PORTABLE_RUNTIME
)
8498 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8499 single subspace mode and the call is not indirect. As far as I know,
8500 there is no operating system support for the multiple subspace mode.
8501 It might be possible to support indirect calls if we didn't use
8502 $$dyncall (see the indirect sequence generated in pa_output_call). */
8504 return (decl
!= NULL_TREE
);
8506 /* Sibcalls are not ok because the arg pointer register is not a fixed
8507 register. This prevents the sibcall optimization from occurring. In
8508 addition, there are problems with stub placement using GNU ld. This
8509 is because a normal sibcall branch uses a 17-bit relocation while
8510 a regular call branch uses a 22-bit relocation. As a result, more
8511 care needs to be taken in the placement of long-branch stubs. */
8515 /* Sibcalls are only ok within a translation unit. */
8516 return (decl
&& !TREE_PUBLIC (decl
));
8519 /* ??? Addition is not commutative on the PA due to the weird implicit
8520 space register selection rules for memory addresses. Therefore, we
8521 don't consider a + b == b + a, as this might be inside a MEM. */
8523 pa_commutative_p (const_rtx x
, int outer_code
)
8525 return (COMMUTATIVE_P (x
)
8526 && (TARGET_NO_SPACE_REGS
8527 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8528 || GET_CODE (x
) != PLUS
));
8531 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8532 use in fmpyadd instructions. */
8534 pa_fmpyaddoperands (rtx
*operands
)
8536 machine_mode mode
= GET_MODE (operands
[0]);
8538 /* Must be a floating point mode. */
8539 if (mode
!= SFmode
&& mode
!= DFmode
)
8542 /* All modes must be the same. */
8543 if (! (mode
== GET_MODE (operands
[1])
8544 && mode
== GET_MODE (operands
[2])
8545 && mode
== GET_MODE (operands
[3])
8546 && mode
== GET_MODE (operands
[4])
8547 && mode
== GET_MODE (operands
[5])))
8550 /* All operands must be registers. */
8551 if (! (GET_CODE (operands
[1]) == REG
8552 && GET_CODE (operands
[2]) == REG
8553 && GET_CODE (operands
[3]) == REG
8554 && GET_CODE (operands
[4]) == REG
8555 && GET_CODE (operands
[5]) == REG
))
8558 /* Only 2 real operands to the addition. One of the input operands must
8559 be the same as the output operand. */
8560 if (! rtx_equal_p (operands
[3], operands
[4])
8561 && ! rtx_equal_p (operands
[3], operands
[5]))
8564 /* Inout operand of add cannot conflict with any operands from multiply. */
8565 if (rtx_equal_p (operands
[3], operands
[0])
8566 || rtx_equal_p (operands
[3], operands
[1])
8567 || rtx_equal_p (operands
[3], operands
[2]))
8570 /* multiply cannot feed into addition operands. */
8571 if (rtx_equal_p (operands
[4], operands
[0])
8572 || rtx_equal_p (operands
[5], operands
[0]))
8575 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8577 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8578 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8579 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8580 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8581 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8582 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8585 /* Passed. Operands are suitable for fmpyadd. */
8589 #if !defined(USE_COLLECT2)
8591 pa_asm_out_constructor (rtx symbol
, int priority
)
8593 if (!function_label_operand (symbol
, VOIDmode
))
8594 pa_encode_label (symbol
);
8596 #ifdef CTORS_SECTION_ASM_OP
8597 default_ctor_section_asm_out_constructor (symbol
, priority
);
8599 # ifdef TARGET_ASM_NAMED_SECTION
8600 default_named_section_asm_out_constructor (symbol
, priority
);
8602 default_stabs_asm_out_constructor (symbol
, priority
);
8608 pa_asm_out_destructor (rtx symbol
, int priority
)
8610 if (!function_label_operand (symbol
, VOIDmode
))
8611 pa_encode_label (symbol
);
8613 #ifdef DTORS_SECTION_ASM_OP
8614 default_dtor_section_asm_out_destructor (symbol
, priority
);
8616 # ifdef TARGET_ASM_NAMED_SECTION
8617 default_named_section_asm_out_destructor (symbol
, priority
);
8619 default_stabs_asm_out_destructor (symbol
, priority
);
8625 /* This function places uninitialized global data in the bss section.
8626 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8627 function on the SOM port to prevent uninitialized global data from
8628 being placed in the data section. */
8631 pa_asm_output_aligned_bss (FILE *stream
,
8633 unsigned HOST_WIDE_INT size
,
8636 switch_to_section (bss_section
);
8637 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8639 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8640 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8643 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8644 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8647 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8648 ASM_OUTPUT_LABEL (stream
, name
);
8649 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8652 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8653 that doesn't allow the alignment of global common storage to be directly
8654 specified. The SOM linker aligns common storage based on the rounded
8655 value of the NUM_BYTES parameter in the .comm directive. It's not
8656 possible to use the .align directive as it doesn't affect the alignment
8657 of the label associated with a .comm directive. */
8660 pa_asm_output_aligned_common (FILE *stream
,
8662 unsigned HOST_WIDE_INT size
,
8665 unsigned int max_common_align
;
8667 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8668 if (align
> max_common_align
)
8670 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8671 "for global common data. Using %u",
8672 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8673 align
= max_common_align
;
8676 switch_to_section (bss_section
);
8678 assemble_name (stream
, name
);
8679 fprintf (stream
, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8680 MAX (size
, align
/ BITS_PER_UNIT
));
8683 /* We can't use .comm for local common storage as the SOM linker effectively
8684 treats the symbol as universal and uses the same storage for local symbols
8685 with the same name in different object files. The .block directive
8686 reserves an uninitialized block of storage. However, it's not common
8687 storage. Fortunately, GCC never requests common storage with the same
8688 name in any given translation unit. */
8691 pa_asm_output_aligned_local (FILE *stream
,
8693 unsigned HOST_WIDE_INT size
,
8696 switch_to_section (bss_section
);
8697 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8700 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8701 assemble_name (stream
, name
);
8702 fprintf (stream
, "\n");
8705 ASM_OUTPUT_LABEL (stream
, name
);
8706 fprintf (stream
, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8709 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8710 use in fmpysub instructions. */
8712 pa_fmpysuboperands (rtx
*operands
)
8714 machine_mode mode
= GET_MODE (operands
[0]);
8716 /* Must be a floating point mode. */
8717 if (mode
!= SFmode
&& mode
!= DFmode
)
8720 /* All modes must be the same. */
8721 if (! (mode
== GET_MODE (operands
[1])
8722 && mode
== GET_MODE (operands
[2])
8723 && mode
== GET_MODE (operands
[3])
8724 && mode
== GET_MODE (operands
[4])
8725 && mode
== GET_MODE (operands
[5])))
8728 /* All operands must be registers. */
8729 if (! (GET_CODE (operands
[1]) == REG
8730 && GET_CODE (operands
[2]) == REG
8731 && GET_CODE (operands
[3]) == REG
8732 && GET_CODE (operands
[4]) == REG
8733 && GET_CODE (operands
[5]) == REG
))
8736 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8737 operation, so operands[4] must be the same as operand[3]. */
8738 if (! rtx_equal_p (operands
[3], operands
[4]))
8741 /* multiply cannot feed into subtraction. */
8742 if (rtx_equal_p (operands
[5], operands
[0]))
8745 /* Inout operand of sub cannot conflict with any operands from multiply. */
8746 if (rtx_equal_p (operands
[3], operands
[0])
8747 || rtx_equal_p (operands
[3], operands
[1])
8748 || rtx_equal_p (operands
[3], operands
[2]))
8751 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8753 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8754 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8755 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8756 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8757 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8758 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8761 /* Passed. Operands are suitable for fmpysub. */
8765 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8766 constants for a MULT embedded inside a memory address. */
8768 pa_mem_shadd_constant_p (int val
)
8770 if (val
== 2 || val
== 4 || val
== 8)
8776 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8777 constants for shadd instructions. */
8779 pa_shadd_constant_p (int val
)
8781 if (val
== 1 || val
== 2 || val
== 3)
8787 /* Return TRUE if INSN branches forward. */
8790 forward_branch_p (rtx_insn
*insn
)
8792 rtx lab
= JUMP_LABEL (insn
);
8794 /* The INSN must have a jump label. */
8795 gcc_assert (lab
!= NULL_RTX
);
8797 if (INSN_ADDRESSES_SET_P ())
8798 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8805 insn
= NEXT_INSN (insn
);
8811 /* Output an unconditional move and branch insn. */
8814 pa_output_parallel_movb (rtx
*operands
, rtx_insn
*insn
)
8816 int length
= get_attr_length (insn
);
8818 /* These are the cases in which we win. */
8820 return "mov%I1b,tr %1,%0,%2";
8822 /* None of the following cases win, but they don't lose either. */
8825 if (dbr_sequence_length () == 0)
8827 /* Nothing in the delay slot, fake it by putting the combined
8828 insn (the copy or add) in the delay slot of a bl. */
8829 if (GET_CODE (operands
[1]) == CONST_INT
)
8830 return "b %2\n\tldi %1,%0";
8832 return "b %2\n\tcopy %1,%0";
8836 /* Something in the delay slot, but we've got a long branch. */
8837 if (GET_CODE (operands
[1]) == CONST_INT
)
8838 return "ldi %1,%0\n\tb %2";
8840 return "copy %1,%0\n\tb %2";
8844 if (GET_CODE (operands
[1]) == CONST_INT
)
8845 output_asm_insn ("ldi %1,%0", operands
);
8847 output_asm_insn ("copy %1,%0", operands
);
8848 return pa_output_lbranch (operands
[2], insn
, 1);
8851 /* Output an unconditional add and branch insn. */
8854 pa_output_parallel_addb (rtx
*operands
, rtx_insn
*insn
)
8856 int length
= get_attr_length (insn
);
8858 /* To make life easy we want operand0 to be the shared input/output
8859 operand and operand1 to be the readonly operand. */
8860 if (operands
[0] == operands
[1])
8861 operands
[1] = operands
[2];
8863 /* These are the cases in which we win. */
8865 return "add%I1b,tr %1,%0,%3";
8867 /* None of the following cases win, but they don't lose either. */
8870 if (dbr_sequence_length () == 0)
8871 /* Nothing in the delay slot, fake it by putting the combined
8872 insn (the copy or add) in the delay slot of a bl. */
8873 return "b %3\n\tadd%I1 %1,%0,%0";
8875 /* Something in the delay slot, but we've got a long branch. */
8876 return "add%I1 %1,%0,%0\n\tb %3";
8879 output_asm_insn ("add%I1 %1,%0,%0", operands
);
8880 return pa_output_lbranch (operands
[3], insn
, 1);
8883 /* We use this hook to perform a PA specific optimization which is difficult
8884 to do in earlier passes. */
8889 remove_useless_addtr_insns (1);
8891 if (pa_cpu
< PROCESSOR_8000
)
8892 pa_combine_instructions ();
8895 /* The PA has a number of odd instructions which can perform multiple
8896 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8897 it may be profitable to combine two instructions into one instruction
8898 with two outputs. It's not profitable PA2.0 machines because the
8899 two outputs would take two slots in the reorder buffers.
8901 This routine finds instructions which can be combined and combines
8902 them. We only support some of the potential combinations, and we
8903 only try common ways to find suitable instructions.
8905 * addb can add two registers or a register and a small integer
8906 and jump to a nearby (+-8k) location. Normally the jump to the
8907 nearby location is conditional on the result of the add, but by
8908 using the "true" condition we can make the jump unconditional.
8909 Thus addb can perform two independent operations in one insn.
8911 * movb is similar to addb in that it can perform a reg->reg
8912 or small immediate->reg copy and jump to a nearby (+-8k location).
8914 * fmpyadd and fmpysub can perform a FP multiply and either an
8915 FP add or FP sub if the operands of the multiply and add/sub are
8916 independent (there are other minor restrictions). Note both
8917 the fmpy and fadd/fsub can in theory move to better spots according
8918 to data dependencies, but for now we require the fmpy stay at a
8921 * Many of the memory operations can perform pre & post updates
8922 of index registers. GCC's pre/post increment/decrement addressing
8923 is far too simple to take advantage of all the possibilities. This
8924 pass may not be suitable since those insns may not be independent.
8926 * comclr can compare two ints or an int and a register, nullify
8927 the following instruction and zero some other register. This
8928 is more difficult to use as it's harder to find an insn which
8929 will generate a comclr than finding something like an unconditional
8930 branch. (conditional moves & long branches create comclr insns).
8932 * Most arithmetic operations can conditionally skip the next
8933 instruction. They can be viewed as "perform this operation
8934 and conditionally jump to this nearby location" (where nearby
8935 is an insns away). These are difficult to use due to the
8936 branch length restrictions. */
8939 pa_combine_instructions (void)
8943 /* This can get expensive since the basic algorithm is on the
8944 order of O(n^2) (or worse). Only do it for -O2 or higher
8945 levels of optimization. */
8949 /* Walk down the list of insns looking for "anchor" insns which
8950 may be combined with "floating" insns. As the name implies,
8951 "anchor" instructions don't move, while "floating" insns may
8953 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
8954 rtx_insn
*new_rtx
= make_insn_raw (par
);
8956 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
8958 enum attr_pa_combine_type anchor_attr
;
8959 enum attr_pa_combine_type floater_attr
;
8961 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
8962 Also ignore any special USE insns. */
8963 if ((! NONJUMP_INSN_P (anchor
) && ! JUMP_P (anchor
) && ! CALL_P (anchor
))
8964 || GET_CODE (PATTERN (anchor
)) == USE
8965 || GET_CODE (PATTERN (anchor
)) == CLOBBER
)
8968 anchor_attr
= get_attr_pa_combine_type (anchor
);
8969 /* See if anchor is an insn suitable for combination. */
8970 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
8971 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
8972 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
8973 && ! forward_branch_p (anchor
)))
8977 for (floater
= PREV_INSN (anchor
);
8979 floater
= PREV_INSN (floater
))
8981 if (NOTE_P (floater
)
8982 || (NONJUMP_INSN_P (floater
)
8983 && (GET_CODE (PATTERN (floater
)) == USE
8984 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
8987 /* Anything except a regular INSN will stop our search. */
8988 if (! NONJUMP_INSN_P (floater
))
8994 /* See if FLOATER is suitable for combination with the
8996 floater_attr
= get_attr_pa_combine_type (floater
);
8997 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
8998 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
8999 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9000 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9002 /* If ANCHOR and FLOATER can be combined, then we're
9003 done with this pass. */
9004 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9005 SET_DEST (PATTERN (floater
)),
9006 XEXP (SET_SRC (PATTERN (floater
)), 0),
9007 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9011 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9012 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9014 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9016 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9017 SET_DEST (PATTERN (floater
)),
9018 XEXP (SET_SRC (PATTERN (floater
)), 0),
9019 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9024 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9025 SET_DEST (PATTERN (floater
)),
9026 SET_SRC (PATTERN (floater
)),
9027 SET_SRC (PATTERN (floater
))))
9033 /* If we didn't find anything on the backwards scan try forwards. */
9035 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9036 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9038 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9040 if (NOTE_P (floater
)
9041 || (NONJUMP_INSN_P (floater
)
9042 && (GET_CODE (PATTERN (floater
)) == USE
9043 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9047 /* Anything except a regular INSN will stop our search. */
9048 if (! NONJUMP_INSN_P (floater
))
9054 /* See if FLOATER is suitable for combination with the
9056 floater_attr
= get_attr_pa_combine_type (floater
);
9057 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9058 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9059 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9060 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9062 /* If ANCHOR and FLOATER can be combined, then we're
9063 done with this pass. */
9064 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9065 SET_DEST (PATTERN (floater
)),
9066 XEXP (SET_SRC (PATTERN (floater
)),
9068 XEXP (SET_SRC (PATTERN (floater
)),
9075 /* FLOATER will be nonzero if we found a suitable floating
9076 insn for combination with ANCHOR. */
9078 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9079 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9081 /* Emit the new instruction and delete the old anchor. */
9082 emit_insn_before (gen_rtx_PARALLEL
9084 gen_rtvec (2, PATTERN (anchor
),
9085 PATTERN (floater
))),
9088 SET_INSN_DELETED (anchor
);
9090 /* Emit a special USE insn for FLOATER, then delete
9091 the floating insn. */
9092 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9093 delete_insn (floater
);
9098 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9101 /* Emit the new_jump instruction and delete the old anchor. */
9103 = emit_jump_insn_before (gen_rtx_PARALLEL
9105 gen_rtvec (2, PATTERN (anchor
),
9106 PATTERN (floater
))),
9109 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9110 SET_INSN_DELETED (anchor
);
9112 /* Emit a special USE insn for FLOATER, then delete
9113 the floating insn. */
9114 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9115 delete_insn (floater
);
9123 pa_can_combine_p (rtx_insn
*new_rtx
, rtx_insn
*anchor
, rtx_insn
*floater
,
9124 int reversed
, rtx dest
,
9127 int insn_code_number
;
9128 rtx_insn
*start
, *end
;
9130 /* Create a PARALLEL with the patterns of ANCHOR and
9131 FLOATER, try to recognize it, then test constraints
9132 for the resulting pattern.
9134 If the pattern doesn't match or the constraints
9135 aren't met keep searching for a suitable floater
9137 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9138 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9139 INSN_CODE (new_rtx
) = -1;
9140 insn_code_number
= recog_memoized (new_rtx
);
9141 basic_block bb
= BLOCK_FOR_INSN (anchor
);
9142 if (insn_code_number
< 0
9143 || (extract_insn (new_rtx
),
9144 !constrain_operands (1, get_preferred_alternatives (new_rtx
, bb
))))
9158 /* There's up to three operands to consider. One
9159 output and two inputs.
9161 The output must not be used between FLOATER & ANCHOR
9162 exclusive. The inputs must not be set between
9163 FLOATER and ANCHOR exclusive. */
9165 if (reg_used_between_p (dest
, start
, end
))
9168 if (reg_set_between_p (src1
, start
, end
))
9171 if (reg_set_between_p (src2
, start
, end
))
9174 /* If we get here, then everything is good. */
9178 /* Return nonzero if references for INSN are delayed.
9180 Millicode insns are actually function calls with some special
9181 constraints on arguments and register usage.
9183 Millicode calls always expect their arguments in the integer argument
9184 registers, and always return their result in %r29 (ret1). They
9185 are expected to clobber their arguments, %r1, %r29, and the return
9186 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9188 This function tells reorg that the references to arguments and
9189 millicode calls do not appear to happen until after the millicode call.
9190 This allows reorg to put insns which set the argument registers into the
9191 delay slot of the millicode call -- thus they act more like traditional
9194 Note we cannot consider side effects of the insn to be delayed because
9195 the branch and link insn will clobber the return pointer. If we happened
9196 to use the return pointer in the delay slot of the call, then we lose.
9198 get_attr_type will try to recognize the given insn, so make sure to
9199 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9202 pa_insn_refs_are_delayed (rtx_insn
*insn
)
9204 return ((NONJUMP_INSN_P (insn
)
9205 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9206 && GET_CODE (PATTERN (insn
)) != USE
9207 && GET_CODE (PATTERN (insn
)) != CLOBBER
9208 && get_attr_type (insn
) == TYPE_MILLI
));
9211 /* Promote the return value, but not the arguments. */
9214 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9216 int *punsignedp ATTRIBUTE_UNUSED
,
9217 const_tree fntype ATTRIBUTE_UNUSED
,
9220 if (for_return
== 0)
9222 return promote_mode (type
, mode
, punsignedp
);
9225 /* On the HP-PA the value is found in register(s) 28(-29), unless
9226 the mode is SF or DF. Then the value is returned in fr4 (32).
9228 This must perform the same promotions as PROMOTE_MODE, else promoting
9229 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9231 Small structures must be returned in a PARALLEL on PA64 in order
9232 to match the HP Compiler ABI. */
9235 pa_function_value (const_tree valtype
,
9236 const_tree func ATTRIBUTE_UNUSED
,
9237 bool outgoing ATTRIBUTE_UNUSED
)
9239 machine_mode valmode
;
9241 if (AGGREGATE_TYPE_P (valtype
)
9242 || TREE_CODE (valtype
) == COMPLEX_TYPE
9243 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9245 HOST_WIDE_INT valsize
= int_size_in_bytes (valtype
);
9247 /* Handle aggregates that fit exactly in a word or double word. */
9248 if ((valsize
& (UNITS_PER_WORD
- 1)) == 0)
9249 return gen_rtx_REG (TYPE_MODE (valtype
), 28);
9253 /* Aggregates with a size less than or equal to 128 bits are
9254 returned in GR 28(-29). They are left justified. The pad
9255 bits are undefined. Larger aggregates are returned in
9259 int ub
= valsize
<= UNITS_PER_WORD
? 1 : 2;
9261 for (i
= 0; i
< ub
; i
++)
9263 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9264 gen_rtx_REG (DImode
, 28 + i
),
9269 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9271 else if (valsize
> UNITS_PER_WORD
)
9273 /* Aggregates 5 to 8 bytes in size are returned in general
9274 registers r28-r29 in the same manner as other non
9275 floating-point objects. The data is right-justified and
9276 zero-extended to 64 bits. This is opposite to the normal
9277 justification used on big endian targets and requires
9278 special treatment. */
9279 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9280 gen_rtx_REG (DImode
, 28), const0_rtx
);
9281 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9285 if ((INTEGRAL_TYPE_P (valtype
)
9286 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9287 || POINTER_TYPE_P (valtype
))
9288 valmode
= word_mode
;
9290 valmode
= TYPE_MODE (valtype
);
9292 if (TREE_CODE (valtype
) == REAL_TYPE
9293 && !AGGREGATE_TYPE_P (valtype
)
9294 && TYPE_MODE (valtype
) != TFmode
9295 && !TARGET_SOFT_FLOAT
)
9296 return gen_rtx_REG (valmode
, 32);
9298 return gen_rtx_REG (valmode
, 28);
9301 /* Implement the TARGET_LIBCALL_VALUE hook. */
9304 pa_libcall_value (machine_mode mode
,
9305 const_rtx fun ATTRIBUTE_UNUSED
)
9307 if (! TARGET_SOFT_FLOAT
9308 && (mode
== SFmode
|| mode
== DFmode
))
9309 return gen_rtx_REG (mode
, 32);
9311 return gen_rtx_REG (mode
, 28);
9314 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9317 pa_function_value_regno_p (const unsigned int regno
)
9320 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9326 /* Update the data in CUM to advance over an argument
9327 of mode MODE and data type TYPE.
9328 (TYPE is null for libcalls where that information may not be available.) */
9331 pa_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
9332 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9334 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9335 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9337 cum
->nargs_prototype
--;
9338 cum
->words
+= (arg_size
9339 + ((cum
->words
& 01)
9340 && type
!= NULL_TREE
9344 /* Return the location of a parameter that is passed in a register or NULL
9345 if the parameter has any component that is passed in memory.
9347 This is new code and will be pushed to into the net sources after
9350 ??? We might want to restructure this so that it looks more like other
9353 pa_function_arg (cumulative_args_t cum_v
, machine_mode mode
,
9354 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9356 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9357 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9364 if (mode
== VOIDmode
)
9367 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9369 /* If this arg would be passed partially or totally on the stack, then
9370 this routine should return zero. pa_arg_partial_bytes will
9371 handle arguments which are split between regs and stack slots if
9372 the ABI mandates split arguments. */
9375 /* The 32-bit ABI does not split arguments. */
9376 if (cum
->words
+ arg_size
> max_arg_words
)
9382 alignment
= cum
->words
& 1;
9383 if (cum
->words
+ alignment
>= max_arg_words
)
9387 /* The 32bit ABIs and the 64bit ABIs are rather different,
9388 particularly in their handling of FP registers. We might
9389 be able to cleverly share code between them, but I'm not
9390 going to bother in the hope that splitting them up results
9391 in code that is more easily understood. */
9395 /* Advance the base registers to their current locations.
9397 Remember, gprs grow towards smaller register numbers while
9398 fprs grow to higher register numbers. Also remember that
9399 although FP regs are 32-bit addressable, we pretend that
9400 the registers are 64-bits wide. */
9401 gpr_reg_base
= 26 - cum
->words
;
9402 fpr_reg_base
= 32 + cum
->words
;
9404 /* Arguments wider than one word and small aggregates need special
9408 || (type
&& (AGGREGATE_TYPE_P (type
)
9409 || TREE_CODE (type
) == COMPLEX_TYPE
9410 || TREE_CODE (type
) == VECTOR_TYPE
)))
9412 /* Double-extended precision (80-bit), quad-precision (128-bit)
9413 and aggregates including complex numbers are aligned on
9414 128-bit boundaries. The first eight 64-bit argument slots
9415 are associated one-to-one, with general registers r26
9416 through r19, and also with floating-point registers fr4
9417 through fr11. Arguments larger than one word are always
9418 passed in general registers.
9420 Using a PARALLEL with a word mode register results in left
9421 justified data on a big-endian target. */
9424 int i
, offset
= 0, ub
= arg_size
;
9426 /* Align the base register. */
9427 gpr_reg_base
-= alignment
;
9429 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9430 for (i
= 0; i
< ub
; i
++)
9432 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9433 gen_rtx_REG (DImode
, gpr_reg_base
),
9439 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9444 /* If the argument is larger than a word, then we know precisely
9445 which registers we must use. */
9459 /* Structures 5 to 8 bytes in size are passed in the general
9460 registers in the same manner as other non floating-point
9461 objects. The data is right-justified and zero-extended
9462 to 64 bits. This is opposite to the normal justification
9463 used on big endian targets and requires special treatment.
9464 We now define BLOCK_REG_PADDING to pad these objects.
9465 Aggregates, complex and vector types are passed in the same
9466 manner as structures. */
9468 || (type
&& (AGGREGATE_TYPE_P (type
)
9469 || TREE_CODE (type
) == COMPLEX_TYPE
9470 || TREE_CODE (type
) == VECTOR_TYPE
)))
9472 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9473 gen_rtx_REG (DImode
, gpr_reg_base
),
9475 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9480 /* We have a single word (32 bits). A simple computation
9481 will get us the register #s we need. */
9482 gpr_reg_base
= 26 - cum
->words
;
9483 fpr_reg_base
= 32 + 2 * cum
->words
;
9487 /* Determine if the argument needs to be passed in both general and
9488 floating point registers. */
9489 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9490 /* If we are doing soft-float with portable runtime, then there
9491 is no need to worry about FP regs. */
9492 && !TARGET_SOFT_FLOAT
9493 /* The parameter must be some kind of scalar float, else we just
9494 pass it in integer registers. */
9495 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9496 /* The target function must not have a prototype. */
9497 && cum
->nargs_prototype
<= 0
9498 /* libcalls do not need to pass items in both FP and general
9500 && type
!= NULL_TREE
9501 /* All this hair applies to "outgoing" args only. This includes
9502 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9504 /* Also pass outgoing floating arguments in both registers in indirect
9505 calls with the 32 bit ABI and the HP assembler since there is no
9506 way to the specify argument locations in static functions. */
9511 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9517 gen_rtx_EXPR_LIST (VOIDmode
,
9518 gen_rtx_REG (mode
, fpr_reg_base
),
9520 gen_rtx_EXPR_LIST (VOIDmode
,
9521 gen_rtx_REG (mode
, gpr_reg_base
),
9526 /* See if we should pass this parameter in a general register. */
9527 if (TARGET_SOFT_FLOAT
9528 /* Indirect calls in the normal 32bit ABI require all arguments
9529 to be passed in general registers. */
9530 || (!TARGET_PORTABLE_RUNTIME
9534 /* If the parameter is not a scalar floating-point parameter,
9535 then it belongs in GPRs. */
9536 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9537 /* Structure with single SFmode field belongs in GPR. */
9538 || (type
&& AGGREGATE_TYPE_P (type
)))
9539 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9541 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9546 /* Arguments larger than one word are double word aligned. */
9549 pa_function_arg_boundary (machine_mode mode
, const_tree type
)
9551 bool singleword
= (type
9552 ? (integer_zerop (TYPE_SIZE (type
))
9553 || !TREE_CONSTANT (TYPE_SIZE (type
))
9554 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9555 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9557 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9560 /* If this arg would be passed totally in registers or totally on the stack,
9561 then this routine should return zero. */
9564 pa_arg_partial_bytes (cumulative_args_t cum_v
, machine_mode mode
,
9565 tree type
, bool named ATTRIBUTE_UNUSED
)
9567 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9568 unsigned int max_arg_words
= 8;
9569 unsigned int offset
= 0;
9574 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9577 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9578 /* Arg fits fully into registers. */
9580 else if (cum
->words
+ offset
>= max_arg_words
)
9581 /* Arg fully on the stack. */
9585 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9589 /* A get_unnamed_section callback for switching to the text section.
9591 This function is only used with SOM. Because we don't support
9592 named subspaces, we can only create a new subspace or switch back
9593 to the default text subspace. */
9596 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9598 gcc_assert (TARGET_SOM
);
9601 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9603 /* We only want to emit a .nsubspa directive once at the
9604 start of the function. */
9605 cfun
->machine
->in_nsubspa
= 1;
9607 /* Create a new subspace for the text. This provides
9608 better stub placement and one-only functions. */
9610 && DECL_ONE_ONLY (cfun
->decl
)
9611 && !DECL_WEAK (cfun
->decl
))
9613 output_section_asm_op ("\t.SPACE $TEXT$\n"
9614 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9615 "ACCESS=44,SORT=24,COMDAT");
9621 /* There isn't a current function or the body of the current
9622 function has been completed. So, we are changing to the
9623 text section to output debugging information. Thus, we
9624 need to forget that we are in the text section so that
9625 varasm.c will call us when text_section is selected again. */
9626 gcc_assert (!cfun
|| !cfun
->machine
9627 || cfun
->machine
->in_nsubspa
== 2);
9630 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9633 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9636 /* A get_unnamed_section callback for switching to comdat data
9637 sections. This function is only used with SOM. */
9640 som_output_comdat_data_section_asm_op (const void *data
)
9643 output_section_asm_op (data
);
9646 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9649 pa_som_asm_init_sections (void)
9652 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9654 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9655 is not being generated. */
9656 som_readonly_data_section
9657 = get_unnamed_section (0, output_section_asm_op
,
9658 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9660 /* When secondary definitions are not supported, SOM makes readonly
9661 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9663 som_one_only_readonly_data_section
9664 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9666 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9667 "ACCESS=0x2c,SORT=16,COMDAT");
9670 /* When secondary definitions are not supported, SOM makes data one-only
9671 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9672 som_one_only_data_section
9673 = get_unnamed_section (SECTION_WRITE
,
9674 som_output_comdat_data_section_asm_op
,
9675 "\t.SPACE $PRIVATE$\n"
9676 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9677 "ACCESS=31,SORT=24,COMDAT");
9680 som_tm_clone_table_section
9681 = get_unnamed_section (0, output_section_asm_op
,
9682 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9684 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9685 which reference data within the $TEXT$ space (for example constant
9686 strings in the $LIT$ subspace).
9688 The assemblers (GAS and HP as) both have problems with handling
9689 the difference of two symbols which is the other correct way to
9690 reference constant data during PIC code generation.
9692 So, there's no way to reference constant data which is in the
9693 $TEXT$ space during PIC generation. Instead place all constant
9694 data into the $PRIVATE$ subspace (this reduces sharing, but it
9695 works correctly). */
9696 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9698 /* We must not have a reference to an external symbol defined in a
9699 shared library in a readonly section, else the SOM linker will
9702 So, we force exception information into the data section. */
9703 exception_section
= data_section
;
9706 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9709 pa_som_tm_clone_table_section (void)
9711 return som_tm_clone_table_section
;
9714 /* On hpux10, the linker will give an error if we have a reference
9715 in the read-only data section to a symbol defined in a shared
9716 library. Therefore, expressions that might require a reloc can
9717 not be placed in the read-only data section. */
9720 pa_select_section (tree exp
, int reloc
,
9721 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9723 if (TREE_CODE (exp
) == VAR_DECL
9724 && TREE_READONLY (exp
)
9725 && !TREE_THIS_VOLATILE (exp
)
9726 && DECL_INITIAL (exp
)
9727 && (DECL_INITIAL (exp
) == error_mark_node
9728 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9732 && DECL_ONE_ONLY (exp
)
9733 && !DECL_WEAK (exp
))
9734 return som_one_only_readonly_data_section
;
9736 return readonly_data_section
;
9738 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9739 return readonly_data_section
;
9741 && TREE_CODE (exp
) == VAR_DECL
9742 && DECL_ONE_ONLY (exp
)
9743 && !DECL_WEAK (exp
))
9744 return som_one_only_data_section
;
9746 return data_section
;
9749 /* Implement pa_reloc_rw_mask. */
9752 pa_reloc_rw_mask (void)
9754 /* We force (const (plus (symbol) (const_int))) to memory when the
9755 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9756 handle this construct in read-only memory and we want to avoid
9757 this for ELF. So, we always force an RTX needing relocation to
9758 the data section. */
9763 pa_globalize_label (FILE *stream
, const char *name
)
9765 /* We only handle DATA objects here, functions are globalized in
9766 ASM_DECLARE_FUNCTION_NAME. */
9767 if (! FUNCTION_NAME_P (name
))
9769 fputs ("\t.EXPORT ", stream
);
9770 assemble_name (stream
, name
);
9771 fputs (",DATA\n", stream
);
9775 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9778 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9779 int incoming ATTRIBUTE_UNUSED
)
9781 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9784 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9787 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9789 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9790 PA64 ABI says that objects larger than 128 bits are returned in memory.
9791 Note, int_size_in_bytes can return -1 if the size of the object is
9792 variable or larger than the maximum value that can be expressed as
9793 a HOST_WIDE_INT. It can also return zero for an empty type. The
9794 simplest way to handle variable and empty types is to pass them in
9795 memory. This avoids problems in defining the boundaries of argument
9796 slots, allocating registers, etc. */
9797 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9798 || int_size_in_bytes (type
) <= 0);
9801 /* Structure to hold declaration and name of external symbols that are
9802 emitted by GCC. We generate a vector of these symbols and output them
9803 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9804 This avoids putting out names that are never really used. */
9806 typedef struct GTY(()) extern_symbol
9812 /* Define gc'd vector type for extern_symbol. */
9814 /* Vector of extern_symbol pointers. */
9815 static GTY(()) vec
<extern_symbol
, va_gc
> *extern_symbols
;
9817 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9818 /* Mark DECL (name NAME) as an external reference (assembler output
9819 file FILE). This saves the names to output at the end of the file
9820 if actually referenced. */
9823 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9825 gcc_assert (file
== asm_out_file
);
9826 extern_symbol p
= {decl
, name
};
9827 vec_safe_push (extern_symbols
, p
);
9830 /* Output text required at the end of an assembler file.
9831 This includes deferred plabels and .import directives for
9832 all external symbols that were actually referenced. */
9835 pa_hpux_file_end (void)
9840 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9841 output_deferred_profile_counters ();
9843 output_deferred_plabels ();
9845 for (i
= 0; vec_safe_iterate (extern_symbols
, i
, &p
); i
++)
9847 tree decl
= p
->decl
;
9849 if (!TREE_ASM_WRITTEN (decl
)
9850 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9851 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9854 vec_free (extern_symbols
);
9858 /* Return true if a change from mode FROM to mode TO for a register
9859 in register class RCLASS is invalid. */
9862 pa_cannot_change_mode_class (machine_mode from
, machine_mode to
,
9863 enum reg_class rclass
)
9868 /* Reject changes to/from complex and vector modes. */
9869 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
9870 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
9873 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
9876 /* There is no way to load QImode or HImode values directly from
9877 memory. SImode loads to the FP registers are not zero extended.
9878 On the 64-bit target, this conflicts with the definition of
9879 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9880 with different sizes in the floating-point registers. */
9881 if (MAYBE_FP_REG_CLASS_P (rclass
))
9884 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9885 in specific sets of registers. Thus, we cannot allow changing
9886 to a larger mode when it's larger than a word. */
9887 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
9888 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
9894 /* Returns TRUE if it is a good idea to tie two pseudo registers
9895 when one has mode MODE1 and one has mode MODE2.
9896 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9897 for any hard reg, then this must be FALSE for correct output.
9899 We should return FALSE for QImode and HImode because these modes
9900 are not ok in the floating-point registers. However, this prevents
9901 tieing these modes to SImode and DImode in the general registers.
9902 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9903 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9904 in the floating-point registers. */
9907 pa_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
9909 /* Don't tie modes in different classes. */
9910 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
9917 /* Length in units of the trampoline instruction code. */
9919 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9922 /* Output assembler code for a block containing the constant parts
9923 of a trampoline, leaving space for the variable parts.\
9925 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9926 and then branches to the specified routine.
9928 This code template is copied from text segment to stack location
9929 and then patched with pa_trampoline_init to contain valid values,
9930 and then entered as a subroutine.
9932 It is best to keep this as small as possible to avoid having to
9933 flush multiple lines in the cache. */
9936 pa_asm_trampoline_template (FILE *f
)
9940 fputs ("\tldw 36(%r22),%r21\n", f
);
9941 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
9942 if (ASSEMBLER_DIALECT
== 0)
9943 fputs ("\tdepi 0,31,2,%r21\n", f
);
9945 fputs ("\tdepwi 0,31,2,%r21\n", f
);
9946 fputs ("\tldw 4(%r21),%r19\n", f
);
9947 fputs ("\tldw 0(%r21),%r21\n", f
);
9950 fputs ("\tbve (%r21)\n", f
);
9951 fputs ("\tldw 40(%r22),%r29\n", f
);
9952 fputs ("\t.word 0\n", f
);
9953 fputs ("\t.word 0\n", f
);
9957 fputs ("\tldsid (%r21),%r1\n", f
);
9958 fputs ("\tmtsp %r1,%sr0\n", f
);
9959 fputs ("\tbe 0(%sr0,%r21)\n", f
);
9960 fputs ("\tldw 40(%r22),%r29\n", f
);
9962 fputs ("\t.word 0\n", f
);
9963 fputs ("\t.word 0\n", f
);
9964 fputs ("\t.word 0\n", f
);
9965 fputs ("\t.word 0\n", f
);
9969 fputs ("\t.dword 0\n", f
);
9970 fputs ("\t.dword 0\n", f
);
9971 fputs ("\t.dword 0\n", f
);
9972 fputs ("\t.dword 0\n", f
);
9973 fputs ("\tmfia %r31\n", f
);
9974 fputs ("\tldd 24(%r31),%r1\n", f
);
9975 fputs ("\tldd 24(%r1),%r27\n", f
);
9976 fputs ("\tldd 16(%r1),%r1\n", f
);
9977 fputs ("\tbve (%r1)\n", f
);
9978 fputs ("\tldd 32(%r31),%r31\n", f
);
9979 fputs ("\t.dword 0 ; fptr\n", f
);
9980 fputs ("\t.dword 0 ; static link\n", f
);
9984 /* Emit RTL insns to initialize the variable parts of a trampoline.
9985 FNADDR is an RTX for the address of the function's pure code.
9986 CXT is an RTX for the static chain value for the function.
9988 Move the function address to the trampoline template at offset 36.
9989 Move the static chain value to trampoline template at offset 40.
9990 Move the trampoline address to trampoline template at offset 44.
9991 Move r19 to trampoline template at offset 48. The latter two
9992 words create a plabel for the indirect call to the trampoline.
9994 A similar sequence is used for the 64-bit port but the plabel is
9995 at the beginning of the trampoline.
9997 Finally, the cache entries for the trampoline code are flushed.
9998 This is necessary to ensure that the trampoline instruction sequence
9999 is written to memory prior to any attempts at prefetching the code
10003 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10005 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10006 rtx start_addr
= gen_reg_rtx (Pmode
);
10007 rtx end_addr
= gen_reg_rtx (Pmode
);
10008 rtx line_length
= gen_reg_rtx (Pmode
);
10011 emit_block_move (m_tramp
, assemble_trampoline_template (),
10012 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10013 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10017 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10018 emit_move_insn (tmp
, fnaddr
);
10019 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10020 emit_move_insn (tmp
, chain_value
);
10022 /* Create a fat pointer for the trampoline. */
10023 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10024 emit_move_insn (tmp
, r_tramp
);
10025 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10026 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10028 /* fdc and fic only use registers for the address to flush,
10029 they do not accept integer displacements. We align the
10030 start and end addresses to the beginning of their respective
10031 cache lines to minimize the number of lines flushed. */
10032 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10033 GEN_INT (-MIN_CACHELINE_SIZE
)));
10034 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
,
10035 TRAMPOLINE_CODE_SIZE
-1));
10036 emit_insn (gen_andsi3 (end_addr
, tmp
,
10037 GEN_INT (-MIN_CACHELINE_SIZE
)));
10038 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10039 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10040 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10041 gen_reg_rtx (Pmode
),
10042 gen_reg_rtx (Pmode
)));
10046 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10047 emit_move_insn (tmp
, fnaddr
);
10048 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10049 emit_move_insn (tmp
, chain_value
);
10051 /* Create a fat pointer for the trampoline. */
10052 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10053 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (Pmode
,
10055 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10056 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10058 /* fdc and fic only use registers for the address to flush,
10059 they do not accept integer displacements. We align the
10060 start and end addresses to the beginning of their respective
10061 cache lines to minimize the number of lines flushed. */
10062 tmp
= force_reg (Pmode
, plus_constant (Pmode
, r_tramp
, 32));
10063 emit_insn (gen_anddi3 (start_addr
, tmp
,
10064 GEN_INT (-MIN_CACHELINE_SIZE
)));
10065 tmp
= force_reg (Pmode
, plus_constant (Pmode
, tmp
,
10066 TRAMPOLINE_CODE_SIZE
- 1));
10067 emit_insn (gen_anddi3 (end_addr
, tmp
,
10068 GEN_INT (-MIN_CACHELINE_SIZE
)));
10069 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10070 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10071 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10072 gen_reg_rtx (Pmode
),
10073 gen_reg_rtx (Pmode
)));
10076 #ifdef HAVE_ENABLE_EXECUTE_STACK
10077 Â
emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
10078 Â Â Â Â LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
10082 /* Perform any machine-specific adjustment in the address of the trampoline.
10083 ADDR contains the address that was passed to pa_trampoline_init.
10084 Adjust the trampoline address to point to the plabel at offset 44. */
10087 pa_trampoline_adjust_address (rtx addr
)
10090 addr
= memory_address (Pmode
, plus_constant (Pmode
, addr
, 46));
10095 pa_delegitimize_address (rtx orig_x
)
10097 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10099 if (GET_CODE (x
) == LO_SUM
10100 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10101 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10102 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10107 pa_internal_arg_pointer (void)
10109 /* The argument pointer and the hard frame pointer are the same in
10110 the 32-bit runtime, so we don't need a copy. */
10112 return copy_to_reg (virtual_incoming_args_rtx
);
10114 return virtual_incoming_args_rtx
;
10117 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10118 Frame pointer elimination is automatically handled. */
10121 pa_can_eliminate (const int from
, const int to
)
10123 /* The argument cannot be eliminated in the 64-bit runtime. */
10124 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10127 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10128 ? ! frame_pointer_needed
10132 /* Define the offset between two registers, FROM to be eliminated and its
10133 replacement TO, at the start of a routine. */
10135 pa_initial_elimination_offset (int from
, int to
)
10137 HOST_WIDE_INT offset
;
10139 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10140 && to
== STACK_POINTER_REGNUM
)
10141 offset
= -pa_compute_frame_size (get_frame_size (), 0);
10142 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10145 gcc_unreachable ();
10151 pa_conditional_register_usage (void)
10155 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10157 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10158 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10159 for (i
= 33; i
< 56; i
+= 2)
10160 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10162 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10164 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10165 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10168 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10171 /* Target hook for c_mode_for_suffix. */
10173 static machine_mode
10174 pa_c_mode_for_suffix (char suffix
)
10176 if (HPUX_LONG_DOUBLE_LIBRARY
)
10185 /* Target hook for function_section. */
10188 pa_function_section (tree decl
, enum node_frequency freq
,
10189 bool startup
, bool exit
)
10191 /* Put functions in text section if target doesn't have named sections. */
10192 if (!targetm_common
.have_named_sections
)
10193 return text_section
;
10195 /* Force nested functions into the same section as the containing
10198 && DECL_SECTION_NAME (decl
) == NULL
10199 && DECL_CONTEXT (decl
) != NULL_TREE
10200 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10201 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL
)
10202 return function_section (DECL_CONTEXT (decl
));
10204 /* Otherwise, use the default function section. */
10205 return default_function_section (decl
, freq
, startup
, exit
);
10208 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10210 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10211 that need more than three instructions to load prior to reload. This
10212 limit is somewhat arbitrary. It takes three instructions to load a
10213 CONST_INT from memory but two are memory accesses. It may be better
10214 to increase the allowed range for CONST_INTS. We may also be able
10215 to handle CONST_DOUBLES. */
10218 pa_legitimate_constant_p (machine_mode mode
, rtx x
)
10220 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10223 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10226 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10227 legitimate constants. The other variants can't be handled by
10228 the move patterns after reload starts. */
10229 if (tls_referenced_p (x
))
10232 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10236 && HOST_BITS_PER_WIDE_INT
> 32
10237 && GET_CODE (x
) == CONST_INT
10238 && !reload_in_progress
10239 && !reload_completed
10240 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10241 && !pa_cint_ok_for_move (INTVAL (x
)))
10244 if (function_label_operand (x
, mode
))
10250 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10252 static unsigned int
10253 pa_section_type_flags (tree decl
, const char *name
, int reloc
)
10255 unsigned int flags
;
10257 flags
= default_section_type_flags (decl
, name
, reloc
);
10259 /* Function labels are placed in the constant pool. This can
10260 cause a section conflict if decls are put in ".data.rel.ro"
10261 or ".data.rel.ro.local" using the __attribute__ construct. */
10262 if (strcmp (name
, ".data.rel.ro") == 0
10263 || strcmp (name
, ".data.rel.ro.local") == 0)
10264 flags
|= SECTION_WRITE
| SECTION_RELRO
;
10269 /* pa_legitimate_address_p recognizes an RTL expression that is a
10270 valid memory address for an instruction. The MODE argument is the
10271 machine mode for the MEM expression that wants to use this address.
10273 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10274 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10275 available with floating point loads and stores, and integer loads.
10276 We get better code by allowing indexed addresses in the initial
10279 The acceptance of indexed addresses as legitimate implies that we
10280 must provide patterns for doing indexed integer stores, or the move
10281 expanders must force the address of an indexed store to a register.
10282 We have adopted the latter approach.
10284 Another function of pa_legitimate_address_p is to ensure that
10285 the base register is a valid pointer for indexed instructions.
10286 On targets that have non-equivalent space registers, we have to
10287 know at the time of assembler output which register in a REG+REG
10288 pair is the base register. The REG_POINTER flag is sometimes lost
10289 in reload and the following passes, so it can't be relied on during
10290 code generation. Thus, we either have to canonicalize the order
10291 of the registers in REG+REG indexed addresses, or treat REG+REG
10292 addresses separately and provide patterns for both permutations.
10294 The latter approach requires several hundred additional lines of
10295 code in pa.md. The downside to canonicalizing is that a PLUS
10296 in the wrong order can't combine to form to make a scaled indexed
10297 memory operand. As we won't need to canonicalize the operands if
10298 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10300 We initially break out scaled indexed addresses in canonical order
10301 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10302 scaled indexed addresses during RTL generation. However, fold_rtx
10303 has its own opinion on how the operands of a PLUS should be ordered.
10304 If one of the operands is equivalent to a constant, it will make
10305 that operand the second operand. As the base register is likely to
10306 be equivalent to a SYMBOL_REF, we have made it the second operand.
10308 pa_legitimate_address_p accepts REG+REG as legitimate when the
10309 operands are in the order INDEX+BASE on targets with non-equivalent
10310 space registers, and in any order on targets with equivalent space
10311 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10313 We treat a SYMBOL_REF as legitimate if it is part of the current
10314 function's constant-pool, because such addresses can actually be
10315 output as REG+SMALLINT. */
10318 pa_legitimate_address_p (machine_mode mode
, rtx x
, bool strict
)
10321 && (strict
? STRICT_REG_OK_FOR_BASE_P (x
)
10322 : REG_OK_FOR_BASE_P (x
)))
10323 || ((GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
10324 || GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
)
10325 && REG_P (XEXP (x
, 0))
10326 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10327 : REG_OK_FOR_BASE_P (XEXP (x
, 0)))))
10330 if (GET_CODE (x
) == PLUS
)
10334 /* For REG+REG, the base register should be in XEXP (x, 1),
10335 so check it first. */
10336 if (REG_P (XEXP (x
, 1))
10337 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 1))
10338 : REG_OK_FOR_BASE_P (XEXP (x
, 1))))
10339 base
= XEXP (x
, 1), index
= XEXP (x
, 0);
10340 else if (REG_P (XEXP (x
, 0))
10341 && (strict
? STRICT_REG_OK_FOR_BASE_P (XEXP (x
, 0))
10342 : REG_OK_FOR_BASE_P (XEXP (x
, 0))))
10343 base
= XEXP (x
, 0), index
= XEXP (x
, 1);
10347 if (GET_CODE (index
) == CONST_INT
)
10349 if (INT_5_BITS (index
))
10352 /* When INT14_OK_STRICT is false, a secondary reload is needed
10353 to adjust the displacement of SImode and DImode floating point
10354 instructions but this may fail when the register also needs
10355 reloading. So, we return false when STRICT is true. We
10356 also reject long displacements for float mode addresses since
10357 the majority of accesses will use floating point instructions
10358 that don't support 14-bit offsets. */
10359 if (!INT14_OK_STRICT
10360 && (strict
|| !(reload_in_progress
|| reload_completed
))
10365 return base14_operand (index
, mode
);
10368 if (!TARGET_DISABLE_INDEXING
10369 /* Only accept the "canonical" INDEX+BASE operand order
10370 on targets with non-equivalent space registers. */
10371 && (TARGET_NO_SPACE_REGS
10373 : (base
== XEXP (x
, 1) && REG_P (index
)
10374 && (reload_completed
10375 || (reload_in_progress
&& HARD_REGISTER_P (base
))
10376 || REG_POINTER (base
))
10377 && (reload_completed
10378 || (reload_in_progress
&& HARD_REGISTER_P (index
))
10379 || !REG_POINTER (index
))))
10380 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode
)
10381 && (strict
? STRICT_REG_OK_FOR_INDEX_P (index
)
10382 : REG_OK_FOR_INDEX_P (index
))
10383 && borx_reg_operand (base
, Pmode
)
10384 && borx_reg_operand (index
, Pmode
))
10387 if (!TARGET_DISABLE_INDEXING
10388 && GET_CODE (index
) == MULT
10389 && MODE_OK_FOR_SCALED_INDEXING_P (mode
)
10390 && REG_P (XEXP (index
, 0))
10391 && GET_MODE (XEXP (index
, 0)) == Pmode
10392 && (strict
? STRICT_REG_OK_FOR_INDEX_P (XEXP (index
, 0))
10393 : REG_OK_FOR_INDEX_P (XEXP (index
, 0)))
10394 && GET_CODE (XEXP (index
, 1)) == CONST_INT
10395 && INTVAL (XEXP (index
, 1))
10396 == (HOST_WIDE_INT
) GET_MODE_SIZE (mode
)
10397 && borx_reg_operand (base
, Pmode
))
10403 if (GET_CODE (x
) == LO_SUM
)
10405 rtx y
= XEXP (x
, 0);
10407 if (GET_CODE (y
) == SUBREG
)
10408 y
= SUBREG_REG (y
);
10411 && (strict
? STRICT_REG_OK_FOR_BASE_P (y
)
10412 : REG_OK_FOR_BASE_P (y
)))
10414 /* Needed for -fPIC */
10416 && GET_CODE (XEXP (x
, 1)) == UNSPEC
)
10419 if (!INT14_OK_STRICT
10420 && (strict
|| !(reload_in_progress
|| reload_completed
))
10425 if (CONSTANT_P (XEXP (x
, 1)))
10431 if (GET_CODE (x
) == CONST_INT
&& INT_5_BITS (x
))
10437 /* Look for machine dependent ways to make the invalid address AD a
10440 For the PA, transform:
10442 memory(X + <large int>)
10446 if (<large int> & mask) >= 16
10447 Y = (<large int> & ~mask) + mask + 1 Round up.
10449 Y = (<large int> & ~mask) Round down.
10451 memory (Z + (<large int> - Y));
10453 This makes reload inheritance and reload_cse work better since Z
10456 There may be more opportunities to improve code with this hook. */
10459 pa_legitimize_reload_address (rtx ad
, machine_mode mode
,
10460 int opnum
, int type
,
10461 int ind_levels ATTRIBUTE_UNUSED
)
10463 long offset
, newoffset
, mask
;
10464 rtx new_rtx
, temp
= NULL_RTX
;
10466 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
10467 && !INT14_OK_STRICT
? 0x1f : 0x3fff);
10469 if (optimize
&& GET_CODE (ad
) == PLUS
)
10470 temp
= simplify_binary_operation (PLUS
, Pmode
,
10471 XEXP (ad
, 0), XEXP (ad
, 1));
10473 new_rtx
= temp
? temp
: ad
;
10476 && GET_CODE (new_rtx
) == PLUS
10477 && GET_CODE (XEXP (new_rtx
, 0)) == REG
10478 && GET_CODE (XEXP (new_rtx
, 1)) == CONST_INT
)
10480 offset
= INTVAL (XEXP ((new_rtx
), 1));
10482 /* Choose rounding direction. Round up if we are >= halfway. */
10483 if ((offset
& mask
) >= ((mask
+ 1) / 2))
10484 newoffset
= (offset
& ~mask
) + mask
+ 1;
10486 newoffset
= offset
& ~mask
;
10488 /* Ensure that long displacements are aligned. */
10490 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
10491 || (TARGET_64BIT
&& (mode
) == DImode
)))
10492 newoffset
&= ~(GET_MODE_SIZE (mode
) - 1);
10494 if (newoffset
!= 0 && VAL_14_BITS_P (newoffset
))
10496 temp
= gen_rtx_PLUS (Pmode
, XEXP (new_rtx
, 0),
10497 GEN_INT (newoffset
));
10498 ad
= gen_rtx_PLUS (Pmode
, temp
, GEN_INT (offset
- newoffset
));
10499 push_reload (XEXP (ad
, 0), 0, &XEXP (ad
, 0), 0,
10500 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
10501 opnum
, (enum reload_type
) type
);
10509 /* Output address vector. */
10512 pa_output_addr_vec (rtx lab
, rtx body
)
10514 int idx
, vlen
= XVECLEN (body
, 0);
10516 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10518 fputs ("\t.begin_brtab\n", asm_out_file
);
10519 for (idx
= 0; idx
< vlen
; idx
++)
10521 ASM_OUTPUT_ADDR_VEC_ELT
10522 (asm_out_file
, CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 0, idx
), 0)));
10525 fputs ("\t.end_brtab\n", asm_out_file
);
10528 /* Output address difference vector. */
10531 pa_output_addr_diff_vec (rtx lab
, rtx body
)
10533 rtx base
= XEXP (XEXP (body
, 0), 0);
10534 int idx
, vlen
= XVECLEN (body
, 1);
10536 targetm
.asm_out
.internal_label (asm_out_file
, "L", CODE_LABEL_NUMBER (lab
));
10538 fputs ("\t.begin_brtab\n", asm_out_file
);
10539 for (idx
= 0; idx
< vlen
; idx
++)
10541 ASM_OUTPUT_ADDR_DIFF_ELT
10544 CODE_LABEL_NUMBER (XEXP (XVECEXP (body
, 1, idx
), 0)),
10545 CODE_LABEL_NUMBER (base
));
10548 fputs ("\t.end_brtab\n", asm_out_file
);