1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
40 #include "integrate.h"
42 #include "diagnostic-core.h"
48 #include "common/common-target.h"
49 #include "target-def.h"
50 #include "langhooks.h"
54 /* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
57 hppa_fpstore_bypass_p (rtx out_insn
, rtx in_insn
)
59 enum machine_mode store_mode
;
60 enum machine_mode other_mode
;
63 if (recog_memoized (in_insn
) < 0
64 || (get_attr_type (in_insn
) != TYPE_FPSTORE
65 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
66 || recog_memoized (out_insn
) < 0)
69 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
71 set
= single_set (out_insn
);
75 other_mode
= GET_MODE (SET_SRC (set
));
77 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
81 #ifndef DO_FRAME_NOTES
82 #ifdef INCOMING_RETURN_ADDR_RTX
83 #define DO_FRAME_NOTES 1
85 #define DO_FRAME_NOTES 0
89 static void pa_option_override (void);
90 static void copy_reg_pointer (rtx
, rtx
);
91 static void fix_range (const char *);
92 static int hppa_register_move_cost (enum machine_mode mode
, reg_class_t
,
94 static int hppa_address_cost (rtx
, bool);
95 static bool hppa_rtx_costs (rtx
, int, int, int *, bool);
96 static inline rtx
force_mode (enum machine_mode
, rtx
);
97 static void pa_reorg (void);
98 static void pa_combine_instructions (void);
99 static int pa_can_combine_p (rtx
, rtx
, rtx
, int, rtx
, rtx
, rtx
);
100 static bool forward_branch_p (rtx
);
101 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
102 static int compute_movmem_length (rtx
);
103 static int compute_clrmem_length (rtx
);
104 static bool pa_assemble_integer (rtx
, unsigned int, int);
105 static void remove_useless_addtr_insns (int);
106 static void store_reg (int, HOST_WIDE_INT
, int);
107 static void store_reg_modify (int, int, HOST_WIDE_INT
);
108 static void load_reg (int, HOST_WIDE_INT
, int);
109 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
110 static rtx
pa_function_value (const_tree
, const_tree
, bool);
111 static rtx
pa_libcall_value (enum machine_mode
, const_rtx
);
112 static bool pa_function_value_regno_p (const unsigned int);
113 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
114 static void update_total_code_bytes (unsigned int);
115 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
116 static int pa_adjust_cost (rtx
, rtx
, rtx
, int);
117 static int pa_adjust_priority (rtx
, int);
118 static int pa_issue_rate (void);
119 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
120 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
122 static void pa_encode_section_info (tree
, rtx
, int);
123 static const char *pa_strip_name_encoding (const char *);
124 static bool pa_function_ok_for_sibcall (tree
, tree
);
125 static void pa_globalize_label (FILE *, const char *)
127 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
128 HOST_WIDE_INT
, tree
);
129 #if !defined(USE_COLLECT2)
130 static void pa_asm_out_constructor (rtx
, int);
131 static void pa_asm_out_destructor (rtx
, int);
133 static void pa_init_builtins (void);
134 static rtx
pa_expand_builtin (tree
, rtx
, rtx
, enum machine_mode mode
, int);
135 static rtx
hppa_builtin_saveregs (void);
136 static void hppa_va_start (tree
, rtx
);
137 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
138 static bool pa_scalar_mode_supported_p (enum machine_mode
);
139 static bool pa_commutative_p (const_rtx x
, int outer_code
);
140 static void copy_fp_args (rtx
) ATTRIBUTE_UNUSED
;
141 static int length_fp_args (rtx
) ATTRIBUTE_UNUSED
;
142 static rtx
hppa_legitimize_address (rtx
, rtx
, enum machine_mode
);
143 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
144 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
145 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
146 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
147 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
148 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
149 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
150 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
151 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
152 static void output_deferred_plabels (void);
153 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
154 #ifdef ASM_OUTPUT_EXTERNAL_REAL
155 static void pa_hpux_file_end (void);
157 #if HPUX_LONG_DOUBLE_LIBRARY
158 static void pa_hpux_init_libfuncs (void);
160 static rtx
pa_struct_value_rtx (tree
, int);
161 static bool pa_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
163 static int pa_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
165 static void pa_function_arg_advance (CUMULATIVE_ARGS
*, enum machine_mode
,
167 static rtx
pa_function_arg (CUMULATIVE_ARGS
*, enum machine_mode
,
169 static unsigned int pa_function_arg_boundary (enum machine_mode
, const_tree
);
170 static struct machine_function
* pa_init_machine_status (void);
171 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
173 secondary_reload_info
*);
174 static void pa_extra_live_on_entry (bitmap
);
175 static enum machine_mode
pa_promote_function_mode (const_tree
,
176 enum machine_mode
, int *,
179 static void pa_asm_trampoline_template (FILE *);
180 static void pa_trampoline_init (rtx
, tree
, rtx
);
181 static rtx
pa_trampoline_adjust_address (rtx
);
182 static rtx
pa_delegitimize_address (rtx
);
183 static bool pa_print_operand_punct_valid_p (unsigned char);
184 static rtx
pa_internal_arg_pointer (void);
185 static bool pa_can_eliminate (const int, const int);
186 static void pa_conditional_register_usage (void);
187 static enum machine_mode
pa_c_mode_for_suffix (char);
188 static section
*pa_function_section (tree
, enum node_frequency
, bool, bool);
189 static bool pa_cannot_force_const_mem (enum machine_mode
, rtx
);
190 static bool pa_legitimate_constant_p (enum machine_mode
, rtx
);
192 /* The following extra sections are only used for SOM. */
193 static GTY(()) section
*som_readonly_data_section
;
194 static GTY(()) section
*som_one_only_readonly_data_section
;
195 static GTY(()) section
*som_one_only_data_section
;
197 /* Counts for the number of callee-saved general and floating point
198 registers which were saved by the current function's prologue. */
199 static int gr_saved
, fr_saved
;
201 /* Boolean indicating whether the return pointer was saved by the
202 current function's prologue. */
203 static bool rp_saved
;
205 static rtx
find_addr_reg (rtx
);
207 /* Keep track of the number of bytes we have output in the CODE subspace
208 during this compilation so we'll know when to emit inline long-calls. */
209 unsigned long total_code_bytes
;
211 /* The last address of the previous function plus the number of bytes in
212 associated thunks that have been output. This is used to determine if
213 a thunk can use an IA-relative branch to reach its target function. */
214 static unsigned int last_address
;
216 /* Variables to handle plabels that we discover are necessary at assembly
217 output time. They are output after the current function. */
218 struct GTY(()) deferred_plabel
223 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
225 static size_t n_deferred_plabels
= 0;
227 /* Initialize the GCC target structure. */
229 #undef TARGET_OPTION_OVERRIDE
230 #define TARGET_OPTION_OVERRIDE pa_option_override
232 #undef TARGET_ASM_ALIGNED_HI_OP
233 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
234 #undef TARGET_ASM_ALIGNED_SI_OP
235 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
236 #undef TARGET_ASM_ALIGNED_DI_OP
237 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
238 #undef TARGET_ASM_UNALIGNED_HI_OP
239 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
240 #undef TARGET_ASM_UNALIGNED_SI_OP
241 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
242 #undef TARGET_ASM_UNALIGNED_DI_OP
243 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
244 #undef TARGET_ASM_INTEGER
245 #define TARGET_ASM_INTEGER pa_assemble_integer
247 #undef TARGET_ASM_FUNCTION_PROLOGUE
248 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
249 #undef TARGET_ASM_FUNCTION_EPILOGUE
250 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
252 #undef TARGET_FUNCTION_VALUE
253 #define TARGET_FUNCTION_VALUE pa_function_value
254 #undef TARGET_LIBCALL_VALUE
255 #define TARGET_LIBCALL_VALUE pa_libcall_value
256 #undef TARGET_FUNCTION_VALUE_REGNO_P
257 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
259 #undef TARGET_LEGITIMIZE_ADDRESS
260 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
262 #undef TARGET_SCHED_ADJUST_COST
263 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
264 #undef TARGET_SCHED_ADJUST_PRIORITY
265 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
266 #undef TARGET_SCHED_ISSUE_RATE
267 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
269 #undef TARGET_ENCODE_SECTION_INFO
270 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
271 #undef TARGET_STRIP_NAME_ENCODING
272 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
274 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
275 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
277 #undef TARGET_COMMUTATIVE_P
278 #define TARGET_COMMUTATIVE_P pa_commutative_p
280 #undef TARGET_ASM_OUTPUT_MI_THUNK
281 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
282 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
283 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
285 #undef TARGET_ASM_FILE_END
286 #ifdef ASM_OUTPUT_EXTERNAL_REAL
287 #define TARGET_ASM_FILE_END pa_hpux_file_end
289 #define TARGET_ASM_FILE_END output_deferred_plabels
292 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
293 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
295 #if !defined(USE_COLLECT2)
296 #undef TARGET_ASM_CONSTRUCTOR
297 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
298 #undef TARGET_ASM_DESTRUCTOR
299 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
302 #undef TARGET_INIT_BUILTINS
303 #define TARGET_INIT_BUILTINS pa_init_builtins
305 #undef TARGET_EXPAND_BUILTIN
306 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
308 #undef TARGET_REGISTER_MOVE_COST
309 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
310 #undef TARGET_RTX_COSTS
311 #define TARGET_RTX_COSTS hppa_rtx_costs
312 #undef TARGET_ADDRESS_COST
313 #define TARGET_ADDRESS_COST hppa_address_cost
315 #undef TARGET_MACHINE_DEPENDENT_REORG
316 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
318 #if HPUX_LONG_DOUBLE_LIBRARY
319 #undef TARGET_INIT_LIBFUNCS
320 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
323 #undef TARGET_PROMOTE_FUNCTION_MODE
324 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
325 #undef TARGET_PROMOTE_PROTOTYPES
326 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
328 #undef TARGET_STRUCT_VALUE_RTX
329 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
330 #undef TARGET_RETURN_IN_MEMORY
331 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
334 #undef TARGET_PASS_BY_REFERENCE
335 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
336 #undef TARGET_CALLEE_COPIES
337 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
338 #undef TARGET_ARG_PARTIAL_BYTES
339 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
340 #undef TARGET_FUNCTION_ARG
341 #define TARGET_FUNCTION_ARG pa_function_arg
342 #undef TARGET_FUNCTION_ARG_ADVANCE
343 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
344 #undef TARGET_FUNCTION_ARG_BOUNDARY
345 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
347 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
348 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
349 #undef TARGET_EXPAND_BUILTIN_VA_START
350 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
351 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
352 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
354 #undef TARGET_SCALAR_MODE_SUPPORTED_P
355 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
357 #undef TARGET_CANNOT_FORCE_CONST_MEM
358 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
360 #undef TARGET_SECONDARY_RELOAD
361 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
363 #undef TARGET_EXTRA_LIVE_ON_ENTRY
364 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
366 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
367 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
368 #undef TARGET_TRAMPOLINE_INIT
369 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
370 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
371 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
372 #undef TARGET_DELEGITIMIZE_ADDRESS
373 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
374 #undef TARGET_INTERNAL_ARG_POINTER
375 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
376 #undef TARGET_CAN_ELIMINATE
377 #define TARGET_CAN_ELIMINATE pa_can_eliminate
378 #undef TARGET_CONDITIONAL_REGISTER_USAGE
379 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
380 #undef TARGET_C_MODE_FOR_SUFFIX
381 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
382 #undef TARGET_ASM_FUNCTION_SECTION
383 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
385 #undef TARGET_LEGITIMATE_CONSTANT_P
386 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
388 struct gcc_target targetm
= TARGET_INITIALIZER
;
390 /* Parse the -mfixed-range= option string. */
393 fix_range (const char *const_str
)
396 char *str
, *dash
, *comma
;
398 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
399 REG2 are either register names or register numbers. The effect
400 of this option is to mark the registers in the range from REG1 to
401 REG2 as ``fixed'' so they won't be used by the compiler. This is
402 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
404 i
= strlen (const_str
);
405 str
= (char *) alloca (i
+ 1);
406 memcpy (str
, const_str
, i
+ 1);
410 dash
= strchr (str
, '-');
413 warning (0, "value of -mfixed-range must have form REG1-REG2");
418 comma
= strchr (dash
+ 1, ',');
422 first
= decode_reg_name (str
);
425 warning (0, "unknown register name: %s", str
);
429 last
= decode_reg_name (dash
+ 1);
432 warning (0, "unknown register name: %s", dash
+ 1);
440 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
444 for (i
= first
; i
<= last
; ++i
)
445 fixed_regs
[i
] = call_used_regs
[i
] = 1;
454 /* Check if all floating point registers have been fixed. */
455 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
460 target_flags
|= MASK_DISABLE_FPREGS
;
463 /* Implement the TARGET_OPTION_OVERRIDE hook. */
466 pa_option_override (void)
469 cl_deferred_option
*opt
;
470 VEC(cl_deferred_option
,heap
) *vec
471 = (VEC(cl_deferred_option
,heap
) *) pa_deferred_options
;
473 FOR_EACH_VEC_ELT (cl_deferred_option
, vec
, i
, opt
)
475 switch (opt
->opt_index
)
477 case OPT_mfixed_range_
:
478 fix_range (opt
->arg
);
486 /* Unconditional branches in the delay slot are not compatible with dwarf2
487 call frame information. There is no benefit in using this optimization
488 on PA8000 and later processors. */
489 if (pa_cpu
>= PROCESSOR_8000
490 || (targetm_common
.except_unwind_info (&global_options
) == UI_DWARF2
492 || flag_unwind_tables
)
493 target_flags
&= ~MASK_JUMP_IN_DELAY
;
495 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
497 warning (0, "PIC code generation is not supported in the portable runtime model");
500 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
502 warning (0, "PIC code generation is not compatible with fast indirect calls");
505 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
507 warning (0, "-g is only supported when using GAS on this processor,");
508 warning (0, "-g option disabled");
509 write_symbols
= NO_DEBUG
;
512 /* We only support the "big PIC" model now. And we always generate PIC
513 code when in 64bit mode. */
514 if (flag_pic
== 1 || TARGET_64BIT
)
517 /* Disable -freorder-blocks-and-partition as we don't support hot and
518 cold partitioning. */
519 if (flag_reorder_blocks_and_partition
)
521 inform (input_location
,
522 "-freorder-blocks-and-partition does not work "
523 "on this architecture");
524 flag_reorder_blocks_and_partition
= 0;
525 flag_reorder_blocks
= 1;
528 /* We can't guarantee that .dword is available for 32-bit targets. */
529 if (UNITS_PER_WORD
== 4)
530 targetm
.asm_out
.aligned_op
.di
= NULL
;
532 /* The unaligned ops are only available when using GAS. */
535 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
536 targetm
.asm_out
.unaligned_op
.si
= NULL
;
537 targetm
.asm_out
.unaligned_op
.di
= NULL
;
540 init_machine_status
= pa_init_machine_status
;
545 PA_BUILTIN_COPYSIGNQ
,
548 PA_BUILTIN_HUGE_VALQ
,
552 static GTY(()) tree pa_builtins
[(int) PA_BUILTIN_max
];
555 pa_init_builtins (void)
557 #ifdef DONT_HAVE_FPUTC_UNLOCKED
558 built_in_decls
[(int) BUILT_IN_FPUTC_UNLOCKED
] =
559 built_in_decls
[(int) BUILT_IN_PUTC_UNLOCKED
];
560 implicit_built_in_decls
[(int) BUILT_IN_FPUTC_UNLOCKED
]
561 = implicit_built_in_decls
[(int) BUILT_IN_PUTC_UNLOCKED
];
564 if (built_in_decls
[BUILT_IN_FINITE
])
565 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITE
], "_Isfinite");
566 if (built_in_decls
[BUILT_IN_FINITEF
])
567 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEF
], "_Isfinitef");
570 if (HPUX_LONG_DOUBLE_LIBRARY
)
574 /* Under HPUX, the __float128 type is a synonym for "long double". */
575 (*lang_hooks
.types
.register_builtin_type
) (long_double_type_node
,
578 /* TFmode support builtins. */
579 ftype
= build_function_type_list (long_double_type_node
,
580 long_double_type_node
,
582 decl
= add_builtin_function ("__builtin_fabsq", ftype
,
583 PA_BUILTIN_FABSQ
, BUILT_IN_MD
,
584 "_U_Qfabs", NULL_TREE
);
585 TREE_READONLY (decl
) = 1;
586 pa_builtins
[PA_BUILTIN_FABSQ
] = decl
;
588 ftype
= build_function_type_list (long_double_type_node
,
589 long_double_type_node
,
590 long_double_type_node
,
592 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
593 PA_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
594 "_U_Qfcopysign", NULL_TREE
);
595 TREE_READONLY (decl
) = 1;
596 pa_builtins
[PA_BUILTIN_COPYSIGNQ
] = decl
;
598 ftype
= build_function_type_list (long_double_type_node
, NULL_TREE
);
599 decl
= add_builtin_function ("__builtin_infq", ftype
,
600 PA_BUILTIN_INFQ
, BUILT_IN_MD
,
602 pa_builtins
[PA_BUILTIN_INFQ
] = decl
;
604 decl
= add_builtin_function ("__builtin_huge_valq", ftype
,
605 PA_BUILTIN_HUGE_VALQ
, BUILT_IN_MD
,
607 pa_builtins
[PA_BUILTIN_HUGE_VALQ
] = decl
;
612 pa_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
613 enum machine_mode mode ATTRIBUTE_UNUSED
,
614 int ignore ATTRIBUTE_UNUSED
)
616 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
617 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
621 case PA_BUILTIN_FABSQ
:
622 case PA_BUILTIN_COPYSIGNQ
:
623 return expand_call (exp
, target
, ignore
);
625 case PA_BUILTIN_INFQ
:
626 case PA_BUILTIN_HUGE_VALQ
:
628 enum machine_mode target_mode
= TYPE_MODE (TREE_TYPE (exp
));
633 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, target_mode
);
635 tmp
= validize_mem (force_const_mem (target_mode
, tmp
));
638 target
= gen_reg_rtx (target_mode
);
640 emit_move_insn (target
, tmp
);
651 /* Function to init struct machine_function.
652 This will be called, via a pointer variable,
653 from push_function_context. */
655 static struct machine_function
*
656 pa_init_machine_status (void)
658 return ggc_alloc_cleared_machine_function ();
661 /* If FROM is a probable pointer register, mark TO as a probable
662 pointer register with the same pointer alignment as FROM. */
665 copy_reg_pointer (rtx to
, rtx from
)
667 if (REG_POINTER (from
))
668 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
671 /* Return 1 if X contains a symbolic expression. We know these
672 expressions will have one of a few well defined forms, so
673 we need only check those forms. */
675 symbolic_expression_p (rtx x
)
678 /* Strip off any HIGH. */
679 if (GET_CODE (x
) == HIGH
)
682 return (symbolic_operand (x
, VOIDmode
));
685 /* Accept any constant that can be moved in one instruction into a
688 cint_ok_for_move (HOST_WIDE_INT ival
)
690 /* OK if ldo, ldil, or zdepi, can be used. */
691 return (VAL_14_BITS_P (ival
)
692 || ldil_cint_p (ival
)
693 || zdepi_cint_p (ival
));
696 /* True iff ldil can be used to load this CONST_INT. The least
697 significant 11 bits of the value must be zero and the value must
698 not change sign when extended from 32 to 64 bits. */
700 ldil_cint_p (HOST_WIDE_INT ival
)
702 HOST_WIDE_INT x
= ival
& (((HOST_WIDE_INT
) -1 << 31) | 0x7ff);
704 return x
== 0 || x
== ((HOST_WIDE_INT
) -1 << 31);
707 /* True iff zdepi can be used to generate this CONST_INT.
708 zdepi first sign extends a 5-bit signed number to a given field
709 length, then places this field anywhere in a zero. */
711 zdepi_cint_p (unsigned HOST_WIDE_INT x
)
713 unsigned HOST_WIDE_INT lsb_mask
, t
;
715 /* This might not be obvious, but it's at least fast.
716 This function is critical; we don't have the time loops would take. */
718 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
719 /* Return true iff t is a power of two. */
720 return ((t
& (t
- 1)) == 0);
723 /* True iff depi or extru can be used to compute (reg & mask).
724 Accept bit pattern like these:
729 and_mask_p (unsigned HOST_WIDE_INT mask
)
732 mask
+= mask
& -mask
;
733 return (mask
& (mask
- 1)) == 0;
736 /* True iff depi can be used to compute (reg | MASK). */
738 ior_mask_p (unsigned HOST_WIDE_INT mask
)
740 mask
+= mask
& -mask
;
741 return (mask
& (mask
- 1)) == 0;
744 /* Legitimize PIC addresses. If the address is already
745 position-independent, we return ORIG. Newly generated
746 position-independent addresses go to REG. If we need more
747 than one register, we lose. */
750 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
754 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
756 /* Labels need special handling. */
757 if (pic_label_operand (orig
, mode
))
761 /* We do not want to go through the movXX expanders here since that
762 would create recursion.
764 Nor do we really want to call a generator for a named pattern
765 since that requires multiple patterns if we want to support
768 So instead we just emit the raw set, which avoids the movXX
769 expanders completely. */
770 mark_reg_pointer (reg
, BITS_PER_UNIT
);
771 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, orig
));
773 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
774 add_reg_note (insn
, REG_EQUAL
, orig
);
776 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
777 and update LABEL_NUSES because this is not done automatically. */
778 if (reload_in_progress
|| reload_completed
)
780 /* Extract LABEL_REF. */
781 if (GET_CODE (orig
) == CONST
)
782 orig
= XEXP (XEXP (orig
, 0), 0);
783 /* Extract CODE_LABEL. */
784 orig
= XEXP (orig
, 0);
785 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
786 LABEL_NUSES (orig
)++;
788 crtl
->uses_pic_offset_table
= 1;
791 if (GET_CODE (orig
) == SYMBOL_REF
)
797 /* Before reload, allocate a temporary register for the intermediate
798 result. This allows the sequence to be deleted when the final
799 result is unused and the insns are trivially dead. */
800 tmp_reg
= ((reload_in_progress
|| reload_completed
)
801 ? reg
: gen_reg_rtx (Pmode
));
803 if (function_label_operand (orig
, VOIDmode
))
805 /* Force function label into memory in word mode. */
806 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
807 /* Load plabel address from DLT. */
808 emit_move_insn (tmp_reg
,
809 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
810 gen_rtx_HIGH (word_mode
, orig
)));
812 = gen_const_mem (Pmode
,
813 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
814 gen_rtx_UNSPEC (Pmode
,
817 emit_move_insn (reg
, pic_ref
);
818 /* Now load address of function descriptor. */
819 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
823 /* Load symbol reference from DLT. */
824 emit_move_insn (tmp_reg
,
825 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
826 gen_rtx_HIGH (word_mode
, orig
)));
828 = gen_const_mem (Pmode
,
829 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
830 gen_rtx_UNSPEC (Pmode
,
835 crtl
->uses_pic_offset_table
= 1;
836 mark_reg_pointer (reg
, BITS_PER_UNIT
);
837 insn
= emit_move_insn (reg
, pic_ref
);
839 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
840 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
844 else if (GET_CODE (orig
) == CONST
)
848 if (GET_CODE (XEXP (orig
, 0)) == PLUS
849 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
853 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
855 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
856 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
857 base
== reg
? 0 : reg
);
859 if (GET_CODE (orig
) == CONST_INT
)
861 if (INT_14_BITS (orig
))
862 return plus_constant (base
, INTVAL (orig
));
863 orig
= force_reg (Pmode
, orig
);
865 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
866 /* Likewise, should we set special REG_NOTEs here? */
872 static GTY(()) rtx gen_tls_tga
;
875 gen_tls_get_addr (void)
878 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
883 hppa_tls_call (rtx arg
)
887 ret
= gen_reg_rtx (Pmode
);
888 emit_library_call_value (gen_tls_get_addr (), ret
,
889 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
895 legitimize_tls_address (rtx addr
)
897 rtx ret
, insn
, tmp
, t1
, t2
, tp
;
898 enum tls_model model
= SYMBOL_REF_TLS_MODEL (addr
);
902 case TLS_MODEL_GLOBAL_DYNAMIC
:
903 tmp
= gen_reg_rtx (Pmode
);
905 emit_insn (gen_tgd_load_pic (tmp
, addr
));
907 emit_insn (gen_tgd_load (tmp
, addr
));
908 ret
= hppa_tls_call (tmp
);
911 case TLS_MODEL_LOCAL_DYNAMIC
:
912 ret
= gen_reg_rtx (Pmode
);
913 tmp
= gen_reg_rtx (Pmode
);
916 emit_insn (gen_tld_load_pic (tmp
, addr
));
918 emit_insn (gen_tld_load (tmp
, addr
));
919 t1
= hppa_tls_call (tmp
);
922 t2
= gen_reg_rtx (Pmode
);
923 emit_libcall_block (insn
, t2
, t1
,
924 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
926 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
929 case TLS_MODEL_INITIAL_EXEC
:
930 tp
= gen_reg_rtx (Pmode
);
931 tmp
= gen_reg_rtx (Pmode
);
932 ret
= gen_reg_rtx (Pmode
);
933 emit_insn (gen_tp_load (tp
));
935 emit_insn (gen_tie_load_pic (tmp
, addr
));
937 emit_insn (gen_tie_load (tmp
, addr
));
938 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
941 case TLS_MODEL_LOCAL_EXEC
:
942 tp
= gen_reg_rtx (Pmode
);
943 ret
= gen_reg_rtx (Pmode
);
944 emit_insn (gen_tp_load (tp
));
945 emit_insn (gen_tle_load (ret
, addr
, tp
));
955 /* Try machine-dependent ways of modifying an illegitimate address
956 to be legitimate. If we find one, return the new, valid address.
957 This macro is used in only one place: `memory_address' in explow.c.
959 OLDX is the address as it was before break_out_memory_refs was called.
960 In some cases it is useful to look at this to decide what needs to be done.
962 It is always safe for this macro to do nothing. It exists to recognize
963 opportunities to optimize the output.
965 For the PA, transform:
967 memory(X + <large int>)
971 if (<large int> & mask) >= 16
972 Y = (<large int> & ~mask) + mask + 1 Round up.
974 Y = (<large int> & ~mask) Round down.
976 memory (Z + (<large int> - Y));
978 This is for CSE to find several similar references, and only use one Z.
980 X can either be a SYMBOL_REF or REG, but because combine cannot
981 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
982 D will not fit in 14 bits.
984 MODE_FLOAT references allow displacements which fit in 5 bits, so use
987 MODE_INT references allow displacements which fit in 14 bits, so use
990 This relies on the fact that most mode MODE_FLOAT references will use FP
991 registers and most mode MODE_INT references will use integer registers.
992 (In the rare case of an FP register used in an integer MODE, we depend
993 on secondary reloads to clean things up.)
996 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
997 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
998 addressing modes to be used).
1000 Put X and Z into registers. Then put the entire expression into
1004 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
1005 enum machine_mode mode
)
1009 /* We need to canonicalize the order of operands in unscaled indexed
1010 addresses since the code that checks if an address is valid doesn't
1011 always try both orders. */
1012 if (!TARGET_NO_SPACE_REGS
1013 && GET_CODE (x
) == PLUS
1014 && GET_MODE (x
) == Pmode
1015 && REG_P (XEXP (x
, 0))
1016 && REG_P (XEXP (x
, 1))
1017 && REG_POINTER (XEXP (x
, 0))
1018 && !REG_POINTER (XEXP (x
, 1)))
1019 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
1021 if (PA_SYMBOL_REF_TLS_P (x
))
1022 return legitimize_tls_address (x
);
1024 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
1026 /* Strip off CONST. */
1027 if (GET_CODE (x
) == CONST
)
1030 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1031 That should always be safe. */
1032 if (GET_CODE (x
) == PLUS
1033 && GET_CODE (XEXP (x
, 0)) == REG
1034 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1036 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1037 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1040 /* Note we must reject symbols which represent function addresses
1041 since the assembler/linker can't handle arithmetic on plabels. */
1042 if (GET_CODE (x
) == PLUS
1043 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1044 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1045 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1046 || GET_CODE (XEXP (x
, 0)) == REG
))
1048 rtx int_part
, ptr_reg
;
1050 int offset
= INTVAL (XEXP (x
, 1));
1053 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1054 ? (INT14_OK_STRICT
? 0x3fff : 0x1f) : 0x3fff);
1056 /* Choose which way to round the offset. Round up if we
1057 are >= halfway to the next boundary. */
1058 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1059 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1061 newoffset
= (offset
& ~ mask
);
1063 /* If the newoffset will not fit in 14 bits (ldo), then
1064 handling this would take 4 or 5 instructions (2 to load
1065 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1066 add the new offset and the SYMBOL_REF.) Combine can
1067 not handle 4->2 or 5->2 combinations, so do not create
1069 if (! VAL_14_BITS_P (newoffset
)
1070 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1072 rtx const_part
= plus_constant (XEXP (x
, 0), newoffset
);
1075 gen_rtx_HIGH (Pmode
, const_part
));
1078 gen_rtx_LO_SUM (Pmode
,
1079 tmp_reg
, const_part
));
1083 if (! VAL_14_BITS_P (newoffset
))
1084 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1086 int_part
= GEN_INT (newoffset
);
1088 ptr_reg
= force_reg (Pmode
,
1089 gen_rtx_PLUS (Pmode
,
1090 force_reg (Pmode
, XEXP (x
, 0)),
1093 return plus_constant (ptr_reg
, offset
- newoffset
);
1096 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1098 if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == MULT
1099 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1100 && shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1)))
1101 && (OBJECT_P (XEXP (x
, 1))
1102 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1103 && GET_CODE (XEXP (x
, 1)) != CONST
)
1105 int val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1109 if (GET_CODE (reg1
) != REG
)
1110 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1112 reg2
= XEXP (XEXP (x
, 0), 0);
1113 if (GET_CODE (reg2
) != REG
)
1114 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1116 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
,
1117 gen_rtx_MULT (Pmode
,
1123 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1125 Only do so for floating point modes since this is more speculative
1126 and we lose if it's an integer store. */
1127 if (GET_CODE (x
) == PLUS
1128 && GET_CODE (XEXP (x
, 0)) == PLUS
1129 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
1130 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
1131 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
1132 && (mode
== SFmode
|| mode
== DFmode
))
1135 /* First, try and figure out what to use as a base register. */
1136 rtx reg1
, reg2
, base
, idx
;
1138 reg1
= XEXP (XEXP (x
, 0), 1);
1143 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1144 then emit_move_sequence will turn on REG_POINTER so we'll know
1145 it's a base register below. */
1146 if (GET_CODE (reg1
) != REG
)
1147 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1149 if (GET_CODE (reg2
) != REG
)
1150 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1152 /* Figure out what the base and index are. */
1154 if (GET_CODE (reg1
) == REG
1155 && REG_POINTER (reg1
))
1158 idx
= gen_rtx_PLUS (Pmode
,
1159 gen_rtx_MULT (Pmode
,
1160 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1161 XEXP (XEXP (XEXP (x
, 0), 0), 1)),
1164 else if (GET_CODE (reg2
) == REG
1165 && REG_POINTER (reg2
))
1174 /* If the index adds a large constant, try to scale the
1175 constant so that it can be loaded with only one insn. */
1176 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1177 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1178 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1179 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1181 /* Divide the CONST_INT by the scale factor, then add it to A. */
1182 int val
= INTVAL (XEXP (idx
, 1));
1184 val
/= INTVAL (XEXP (XEXP (idx
, 0), 1));
1185 reg1
= XEXP (XEXP (idx
, 0), 0);
1186 if (GET_CODE (reg1
) != REG
)
1187 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1189 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1191 /* We can now generate a simple scaled indexed address. */
1194 (Pmode
, gen_rtx_PLUS (Pmode
,
1195 gen_rtx_MULT (Pmode
, reg1
,
1196 XEXP (XEXP (idx
, 0), 1)),
1200 /* If B + C is still a valid base register, then add them. */
1201 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1202 && INTVAL (XEXP (idx
, 1)) <= 4096
1203 && INTVAL (XEXP (idx
, 1)) >= -4096)
1205 int val
= INTVAL (XEXP (XEXP (idx
, 0), 1));
1208 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1210 reg2
= XEXP (XEXP (idx
, 0), 0);
1211 if (GET_CODE (reg2
) != CONST_INT
)
1212 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1214 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
,
1215 gen_rtx_MULT (Pmode
,
1221 /* Get the index into a register, then add the base + index and
1222 return a register holding the result. */
1224 /* First get A into a register. */
1225 reg1
= XEXP (XEXP (idx
, 0), 0);
1226 if (GET_CODE (reg1
) != REG
)
1227 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1229 /* And get B into a register. */
1230 reg2
= XEXP (idx
, 1);
1231 if (GET_CODE (reg2
) != REG
)
1232 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1234 reg1
= force_reg (Pmode
,
1235 gen_rtx_PLUS (Pmode
,
1236 gen_rtx_MULT (Pmode
, reg1
,
1237 XEXP (XEXP (idx
, 0), 1)),
1240 /* Add the result to our base register and return. */
1241 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1245 /* Uh-oh. We might have an address for x[n-100000]. This needs
1246 special handling to avoid creating an indexed memory address
1247 with x-100000 as the base.
1249 If the constant part is small enough, then it's still safe because
1250 there is a guard page at the beginning and end of the data segment.
1252 Scaled references are common enough that we want to try and rearrange the
1253 terms so that we can use indexing for these addresses too. Only
1254 do the optimization for floatint point modes. */
1256 if (GET_CODE (x
) == PLUS
1257 && symbolic_expression_p (XEXP (x
, 1)))
1259 /* Ugly. We modify things here so that the address offset specified
1260 by the index expression is computed first, then added to x to form
1261 the entire address. */
1263 rtx regx1
, regx2
, regy1
, regy2
, y
;
1265 /* Strip off any CONST. */
1267 if (GET_CODE (y
) == CONST
)
1270 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1272 /* See if this looks like
1273 (plus (mult (reg) (shadd_const))
1274 (const (plus (symbol_ref) (const_int))))
1276 Where const_int is small. In that case the const
1277 expression is a valid pointer for indexing.
1279 If const_int is big, but can be divided evenly by shadd_const
1280 and added to (reg). This allows more scaled indexed addresses. */
1281 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1282 && GET_CODE (XEXP (x
, 0)) == MULT
1283 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1284 && INTVAL (XEXP (y
, 1)) >= -4096
1285 && INTVAL (XEXP (y
, 1)) <= 4095
1286 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1287 && shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1))))
1289 int val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1293 if (GET_CODE (reg1
) != REG
)
1294 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1296 reg2
= XEXP (XEXP (x
, 0), 0);
1297 if (GET_CODE (reg2
) != REG
)
1298 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1300 return force_reg (Pmode
,
1301 gen_rtx_PLUS (Pmode
,
1302 gen_rtx_MULT (Pmode
,
1307 else if ((mode
== DFmode
|| mode
== SFmode
)
1308 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1309 && GET_CODE (XEXP (x
, 0)) == MULT
1310 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1311 && INTVAL (XEXP (y
, 1)) % INTVAL (XEXP (XEXP (x
, 0), 1)) == 0
1312 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1313 && shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1))))
1316 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1317 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1318 regx2
= XEXP (XEXP (x
, 0), 0);
1319 if (GET_CODE (regx2
) != REG
)
1320 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1321 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1325 gen_rtx_PLUS (Pmode
,
1326 gen_rtx_MULT (Pmode
, regx2
,
1327 XEXP (XEXP (x
, 0), 1)),
1328 force_reg (Pmode
, XEXP (y
, 0))));
1330 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1331 && INTVAL (XEXP (y
, 1)) >= -4096
1332 && INTVAL (XEXP (y
, 1)) <= 4095)
1334 /* This is safe because of the guard page at the
1335 beginning and end of the data space. Just
1336 return the original address. */
1341 /* Doesn't look like one we can optimize. */
1342 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1343 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1344 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1345 regx1
= force_reg (Pmode
,
1346 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1348 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1356 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1358 Compute extra cost of moving data between one register class
1361 Make moves from SAR so expensive they should never happen. We used to
1362 have 0xffff here, but that generates overflow in rare cases.
1364 Copies involving a FP register and a non-FP register are relatively
1365 expensive because they must go through memory.
1367 Other copies are reasonably cheap. */
1370 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
1371 reg_class_t from
, reg_class_t to
)
1373 if (from
== SHIFT_REGS
)
1375 else if (to
== SHIFT_REGS
&& FP_REG_CLASS_P (from
))
1377 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1378 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1384 /* For the HPPA, REG and REG+CONST is cost 0
1385 and addresses involving symbolic constants are cost 2.
1387 PIC addresses are very expensive.
1389 It is no coincidence that this has the same structure
1390 as GO_IF_LEGITIMATE_ADDRESS. */
1393 hppa_address_cost (rtx X
,
1394 bool speed ATTRIBUTE_UNUSED
)
1396 switch (GET_CODE (X
))
1409 /* Compute a (partial) cost for rtx X. Return true if the complete
1410 cost has been computed, and false if subexpressions should be
1411 scanned. In either case, *TOTAL contains the cost result. */
1414 hppa_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
1415 bool speed ATTRIBUTE_UNUSED
)
1420 if (INTVAL (x
) == 0)
1422 else if (INT_14_BITS (x
))
1439 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1440 && outer_code
!= SET
)
1447 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1448 *total
= COSTS_N_INSNS (3);
1449 else if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1450 *total
= COSTS_N_INSNS (8);
1452 *total
= COSTS_N_INSNS (20);
1456 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1458 *total
= COSTS_N_INSNS (14);
1466 *total
= COSTS_N_INSNS (60);
1469 case PLUS
: /* this includes shNadd insns */
1471 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1472 *total
= COSTS_N_INSNS (3);
1474 *total
= COSTS_N_INSNS (1);
1480 *total
= COSTS_N_INSNS (1);
1488 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1489 new rtx with the correct mode. */
1491 force_mode (enum machine_mode mode
, rtx orig
)
1493 if (mode
== GET_MODE (orig
))
1496 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1498 return gen_rtx_REG (mode
, REGNO (orig
));
1501 /* Return 1 if *X is a thread-local symbol. */
1504 pa_tls_symbol_ref_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
1506 return PA_SYMBOL_REF_TLS_P (*x
);
1509 /* Return 1 if X contains a thread-local symbol. */
1512 pa_tls_referenced_p (rtx x
)
1514 if (!TARGET_HAVE_TLS
)
1517 return for_each_rtx (&x
, &pa_tls_symbol_ref_1
, 0);
1520 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1523 pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
1525 return pa_tls_referenced_p (x
);
1528 /* Emit insns to move operands[1] into operands[0].
1530 Return 1 if we have written out everything that needs to be done to
1531 do the move. Otherwise, return 0 and the caller will emit the move
1534 Note SCRATCH_REG may not be in the proper mode depending on how it
1535 will be used. This routine is responsible for creating a new copy
1536 of SCRATCH_REG in the proper mode. */
1539 emit_move_sequence (rtx
*operands
, enum machine_mode mode
, rtx scratch_reg
)
1541 register rtx operand0
= operands
[0];
1542 register rtx operand1
= operands
[1];
1545 /* We can only handle indexed addresses in the destination operand
1546 of floating point stores. Thus, we need to break out indexed
1547 addresses from the destination operand. */
1548 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1550 gcc_assert (can_create_pseudo_p ());
1552 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1553 operand0
= replace_equiv_address (operand0
, tem
);
1556 /* On targets with non-equivalent space registers, break out unscaled
1557 indexed addresses from the source operand before the final CSE.
1558 We have to do this because the REG_POINTER flag is not correctly
1559 carried through various optimization passes and CSE may substitute
1560 a pseudo without the pointer set for one with the pointer set. As
1561 a result, we loose various opportunities to create insns with
1562 unscaled indexed addresses. */
1563 if (!TARGET_NO_SPACE_REGS
1564 && !cse_not_expected
1565 && GET_CODE (operand1
) == MEM
1566 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1567 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1568 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1570 = replace_equiv_address (operand1
,
1571 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1574 && reload_in_progress
&& GET_CODE (operand0
) == REG
1575 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1576 operand0
= reg_equiv_mem (REGNO (operand0
));
1577 else if (scratch_reg
1578 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1579 && GET_CODE (SUBREG_REG (operand0
)) == REG
1580 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1582 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1583 the code which tracks sets/uses for delete_output_reload. */
1584 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1585 reg_equiv_mem (REGNO (SUBREG_REG (operand0
))),
1586 SUBREG_BYTE (operand0
));
1587 operand0
= alter_subreg (&temp
);
1591 && reload_in_progress
&& GET_CODE (operand1
) == REG
1592 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1593 operand1
= reg_equiv_mem (REGNO (operand1
));
1594 else if (scratch_reg
1595 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1596 && GET_CODE (SUBREG_REG (operand1
)) == REG
1597 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1599 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1600 the code which tracks sets/uses for delete_output_reload. */
1601 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1602 reg_equiv_mem (REGNO (SUBREG_REG (operand1
))),
1603 SUBREG_BYTE (operand1
));
1604 operand1
= alter_subreg (&temp
);
1607 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1608 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1609 != XEXP (operand0
, 0)))
1610 operand0
= replace_equiv_address (operand0
, tem
);
1612 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1613 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1614 != XEXP (operand1
, 0)))
1615 operand1
= replace_equiv_address (operand1
, tem
);
1617 /* Handle secondary reloads for loads/stores of FP registers from
1618 REG+D addresses where D does not fit in 5 or 14 bits, including
1619 (subreg (mem (addr))) cases. */
1621 && fp_reg_operand (operand0
, mode
)
1622 && ((GET_CODE (operand1
) == MEM
1623 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4 ? SFmode
: DFmode
),
1624 XEXP (operand1
, 0)))
1625 || ((GET_CODE (operand1
) == SUBREG
1626 && GET_CODE (XEXP (operand1
, 0)) == MEM
1627 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1629 XEXP (XEXP (operand1
, 0), 0))))))
1631 if (GET_CODE (operand1
) == SUBREG
)
1632 operand1
= XEXP (operand1
, 0);
1634 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1635 it in WORD_MODE regardless of what mode it was originally given
1637 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1639 /* D might not fit in 14 bits either; for such cases load D into
1641 if (!memory_address_p (Pmode
, XEXP (operand1
, 0)))
1643 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1644 emit_move_insn (scratch_reg
,
1645 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
, 0)),
1647 XEXP (XEXP (operand1
, 0), 0),
1651 emit_move_insn (scratch_reg
, XEXP (operand1
, 0));
1652 emit_insn (gen_rtx_SET (VOIDmode
, operand0
,
1653 replace_equiv_address (operand1
, scratch_reg
)));
1656 else if (scratch_reg
1657 && fp_reg_operand (operand1
, mode
)
1658 && ((GET_CODE (operand0
) == MEM
1659 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1661 XEXP (operand0
, 0)))
1662 || ((GET_CODE (operand0
) == SUBREG
)
1663 && GET_CODE (XEXP (operand0
, 0)) == MEM
1664 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1666 XEXP (XEXP (operand0
, 0), 0)))))
1668 if (GET_CODE (operand0
) == SUBREG
)
1669 operand0
= XEXP (operand0
, 0);
1671 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1672 it in WORD_MODE regardless of what mode it was originally given
1674 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1676 /* D might not fit in 14 bits either; for such cases load D into
1678 if (!memory_address_p (Pmode
, XEXP (operand0
, 0)))
1680 emit_move_insn (scratch_reg
, XEXP (XEXP (operand0
, 0), 1));
1681 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0
,
1684 XEXP (XEXP (operand0
, 0),
1689 emit_move_insn (scratch_reg
, XEXP (operand0
, 0));
1690 emit_insn (gen_rtx_SET (VOIDmode
,
1691 replace_equiv_address (operand0
, scratch_reg
),
1695 /* Handle secondary reloads for loads of FP registers from constant
1696 expressions by forcing the constant into memory.
1698 Use scratch_reg to hold the address of the memory location.
1700 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
1701 NO_REGS when presented with a const_int and a register class
1702 containing only FP registers. Doing so unfortunately creates
1703 more problems than it solves. Fix this for 2.5. */
1704 else if (scratch_reg
1705 && CONSTANT_P (operand1
)
1706 && fp_reg_operand (operand0
, mode
))
1708 rtx const_mem
, xoperands
[2];
1710 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1711 it in WORD_MODE regardless of what mode it was originally given
1713 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1715 /* Force the constant into memory and put the address of the
1716 memory location into scratch_reg. */
1717 const_mem
= force_const_mem (mode
, operand1
);
1718 xoperands
[0] = scratch_reg
;
1719 xoperands
[1] = XEXP (const_mem
, 0);
1720 emit_move_sequence (xoperands
, Pmode
, 0);
1722 /* Now load the destination register. */
1723 emit_insn (gen_rtx_SET (mode
, operand0
,
1724 replace_equiv_address (const_mem
, scratch_reg
)));
1727 /* Handle secondary reloads for SAR. These occur when trying to load
1728 the SAR from memory or a constant. */
1729 else if (scratch_reg
1730 && GET_CODE (operand0
) == REG
1731 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1732 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1733 && (GET_CODE (operand1
) == MEM
|| GET_CODE (operand1
) == CONST_INT
))
1735 /* D might not fit in 14 bits either; for such cases load D into
1737 if (GET_CODE (operand1
) == MEM
1738 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1740 /* We are reloading the address into the scratch register, so we
1741 want to make sure the scratch register is a full register. */
1742 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1744 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1745 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1748 XEXP (XEXP (operand1
, 0),
1752 /* Now we are going to load the scratch register from memory,
1753 we want to load it in the same width as the original MEM,
1754 which must be the same as the width of the ultimate destination,
1756 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1758 emit_move_insn (scratch_reg
,
1759 replace_equiv_address (operand1
, scratch_reg
));
1763 /* We want to load the scratch register using the same mode as
1764 the ultimate destination. */
1765 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1767 emit_move_insn (scratch_reg
, operand1
);
1770 /* And emit the insn to set the ultimate destination. We know that
1771 the scratch register has the same mode as the destination at this
1773 emit_move_insn (operand0
, scratch_reg
);
1776 /* Handle the most common case: storing into a register. */
1777 else if (register_operand (operand0
, mode
))
1779 if (register_operand (operand1
, mode
)
1780 || (GET_CODE (operand1
) == CONST_INT
1781 && cint_ok_for_move (INTVAL (operand1
)))
1782 || (operand1
== CONST0_RTX (mode
))
1783 || (GET_CODE (operand1
) == HIGH
1784 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1785 /* Only `general_operands' can come here, so MEM is ok. */
1786 || GET_CODE (operand1
) == MEM
)
1788 /* Various sets are created during RTL generation which don't
1789 have the REG_POINTER flag correctly set. After the CSE pass,
1790 instruction recognition can fail if we don't consistently
1791 set this flag when performing register copies. This should
1792 also improve the opportunities for creating insns that use
1793 unscaled indexing. */
1794 if (REG_P (operand0
) && REG_P (operand1
))
1796 if (REG_POINTER (operand1
)
1797 && !REG_POINTER (operand0
)
1798 && !HARD_REGISTER_P (operand0
))
1799 copy_reg_pointer (operand0
, operand1
);
1802 /* When MEMs are broken out, the REG_POINTER flag doesn't
1803 get set. In some cases, we can set the REG_POINTER flag
1804 from the declaration for the MEM. */
1805 if (REG_P (operand0
)
1806 && GET_CODE (operand1
) == MEM
1807 && !REG_POINTER (operand0
))
1809 tree decl
= MEM_EXPR (operand1
);
1811 /* Set the register pointer flag and register alignment
1812 if the declaration for this memory reference is a
1818 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1820 if (TREE_CODE (decl
) == COMPONENT_REF
)
1821 decl
= TREE_OPERAND (decl
, 1);
1823 type
= TREE_TYPE (decl
);
1824 type
= strip_array_types (type
);
1826 if (POINTER_TYPE_P (type
))
1830 type
= TREE_TYPE (type
);
1831 /* Using TYPE_ALIGN_OK is rather conservative as
1832 only the ada frontend actually sets it. */
1833 align
= (TYPE_ALIGN_OK (type
) ? TYPE_ALIGN (type
)
1835 mark_reg_pointer (operand0
, align
);
1840 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operand1
));
1844 else if (GET_CODE (operand0
) == MEM
)
1846 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1847 && !(reload_in_progress
|| reload_completed
))
1849 rtx temp
= gen_reg_rtx (DFmode
);
1851 emit_insn (gen_rtx_SET (VOIDmode
, temp
, operand1
));
1852 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, temp
));
1855 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1857 /* Run this case quickly. */
1858 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operand1
));
1861 if (! (reload_in_progress
|| reload_completed
))
1863 operands
[0] = validize_mem (operand0
);
1864 operands
[1] = operand1
= force_reg (mode
, operand1
);
1868 /* Simplify the source if we need to.
1869 Note we do have to handle function labels here, even though we do
1870 not consider them legitimate constants. Loop optimizations can
1871 call the emit_move_xxx with one as a source. */
1872 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1873 || function_label_operand (operand1
, VOIDmode
)
1874 || (GET_CODE (operand1
) == HIGH
1875 && symbolic_operand (XEXP (operand1
, 0), mode
)))
1879 if (GET_CODE (operand1
) == HIGH
)
1882 operand1
= XEXP (operand1
, 0);
1884 if (symbolic_operand (operand1
, mode
))
1886 /* Argh. The assembler and linker can't handle arithmetic
1889 So we force the plabel into memory, load operand0 from
1890 the memory location, then add in the constant part. */
1891 if ((GET_CODE (operand1
) == CONST
1892 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1893 && function_label_operand (XEXP (XEXP (operand1
, 0), 0),
1895 || function_label_operand (operand1
, VOIDmode
))
1897 rtx temp
, const_part
;
1899 /* Figure out what (if any) scratch register to use. */
1900 if (reload_in_progress
|| reload_completed
)
1902 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
1903 /* SCRATCH_REG will hold an address and maybe the actual
1904 data. We want it in WORD_MODE regardless of what mode it
1905 was originally given to us. */
1906 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1909 scratch_reg
= gen_reg_rtx (Pmode
);
1911 if (GET_CODE (operand1
) == CONST
)
1913 /* Save away the constant part of the expression. */
1914 const_part
= XEXP (XEXP (operand1
, 0), 1);
1915 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
1917 /* Force the function label into memory. */
1918 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
1922 /* No constant part. */
1923 const_part
= NULL_RTX
;
1925 /* Force the function label into memory. */
1926 temp
= force_const_mem (mode
, operand1
);
1930 /* Get the address of the memory location. PIC-ify it if
1932 temp
= XEXP (temp
, 0);
1934 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
1936 /* Put the address of the memory location into our destination
1939 emit_move_sequence (operands
, mode
, scratch_reg
);
1941 /* Now load from the memory location into our destination
1943 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
1944 emit_move_sequence (operands
, mode
, scratch_reg
);
1946 /* And add back in the constant part. */
1947 if (const_part
!= NULL_RTX
)
1948 expand_inc (operand0
, const_part
);
1957 if (reload_in_progress
|| reload_completed
)
1959 temp
= scratch_reg
? scratch_reg
: operand0
;
1960 /* TEMP will hold an address and maybe the actual
1961 data. We want it in WORD_MODE regardless of what mode it
1962 was originally given to us. */
1963 temp
= force_mode (word_mode
, temp
);
1966 temp
= gen_reg_rtx (Pmode
);
1968 /* (const (plus (symbol) (const_int))) must be forced to
1969 memory during/after reload if the const_int will not fit
1971 if (GET_CODE (operand1
) == CONST
1972 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1973 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
1974 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1))
1975 && (reload_completed
|| reload_in_progress
)
1978 rtx const_mem
= force_const_mem (mode
, operand1
);
1979 operands
[1] = legitimize_pic_address (XEXP (const_mem
, 0),
1981 operands
[1] = replace_equiv_address (const_mem
, operands
[1]);
1982 emit_move_sequence (operands
, mode
, temp
);
1986 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
1987 if (REG_P (operand0
) && REG_P (operands
[1]))
1988 copy_reg_pointer (operand0
, operands
[1]);
1989 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operands
[1]));
1992 /* On the HPPA, references to data space are supposed to use dp,
1993 register 27, but showing it in the RTL inhibits various cse
1994 and loop optimizations. */
1999 if (reload_in_progress
|| reload_completed
)
2001 temp
= scratch_reg
? scratch_reg
: operand0
;
2002 /* TEMP will hold an address and maybe the actual
2003 data. We want it in WORD_MODE regardless of what mode it
2004 was originally given to us. */
2005 temp
= force_mode (word_mode
, temp
);
2008 temp
= gen_reg_rtx (mode
);
2010 /* Loading a SYMBOL_REF into a register makes that register
2011 safe to be used as the base in an indexed address.
2013 Don't mark hard registers though. That loses. */
2014 if (GET_CODE (operand0
) == REG
2015 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
2016 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
2017 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
2018 mark_reg_pointer (temp
, BITS_PER_UNIT
);
2021 set
= gen_rtx_SET (mode
, operand0
, temp
);
2023 set
= gen_rtx_SET (VOIDmode
,
2025 gen_rtx_LO_SUM (mode
, temp
, operand1
));
2027 emit_insn (gen_rtx_SET (VOIDmode
,
2029 gen_rtx_HIGH (mode
, operand1
)));
2035 else if (pa_tls_referenced_p (operand1
))
2040 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
2042 addend
= XEXP (XEXP (tmp
, 0), 1);
2043 tmp
= XEXP (XEXP (tmp
, 0), 0);
2046 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2047 tmp
= legitimize_tls_address (tmp
);
2050 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2051 tmp
= force_operand (tmp
, operands
[0]);
2055 else if (GET_CODE (operand1
) != CONST_INT
2056 || !cint_ok_for_move (INTVAL (operand1
)))
2060 HOST_WIDE_INT value
= 0;
2061 HOST_WIDE_INT insv
= 0;
2064 if (GET_CODE (operand1
) == CONST_INT
)
2065 value
= INTVAL (operand1
);
2068 && GET_CODE (operand1
) == CONST_INT
2069 && HOST_BITS_PER_WIDE_INT
> 32
2070 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2074 /* Extract the low order 32 bits of the value and sign extend.
2075 If the new value is the same as the original value, we can
2076 can use the original value as-is. If the new value is
2077 different, we use it and insert the most-significant 32-bits
2078 of the original value into the final result. */
2079 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2080 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2083 #if HOST_BITS_PER_WIDE_INT > 32
2084 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2088 operand1
= GEN_INT (nval
);
2092 if (reload_in_progress
|| reload_completed
)
2093 temp
= scratch_reg
? scratch_reg
: operand0
;
2095 temp
= gen_reg_rtx (mode
);
2097 /* We don't directly split DImode constants on 32-bit targets
2098 because PLUS uses an 11-bit immediate and the insn sequence
2099 generated is not as efficient as the one using HIGH/LO_SUM. */
2100 if (GET_CODE (operand1
) == CONST_INT
2101 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2102 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2105 /* Directly break constant into high and low parts. This
2106 provides better optimization opportunities because various
2107 passes recognize constants split with PLUS but not LO_SUM.
2108 We use a 14-bit signed low part except when the addition
2109 of 0x4000 to the high part might change the sign of the
2111 HOST_WIDE_INT low
= value
& 0x3fff;
2112 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2116 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2124 emit_insn (gen_rtx_SET (VOIDmode
, temp
, GEN_INT (high
)));
2125 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2129 emit_insn (gen_rtx_SET (VOIDmode
, temp
,
2130 gen_rtx_HIGH (mode
, operand1
)));
2131 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2134 insn
= emit_move_insn (operands
[0], operands
[1]);
2136 /* Now insert the most significant 32 bits of the value
2137 into the register. When we don't have a second register
2138 available, it could take up to nine instructions to load
2139 a 64-bit integer constant. Prior to reload, we force
2140 constants that would take more than three instructions
2141 to load to the constant pool. During and after reload,
2142 we have to handle all possible values. */
2145 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2146 register and the value to be inserted is outside the
2147 range that can be loaded with three depdi instructions. */
2148 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2150 operand1
= GEN_INT (insv
);
2152 emit_insn (gen_rtx_SET (VOIDmode
, temp
,
2153 gen_rtx_HIGH (mode
, operand1
)));
2154 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2155 emit_insn (gen_insv (operand0
, GEN_INT (32),
2160 int len
= 5, pos
= 27;
2162 /* Insert the bits using the depdi instruction. */
2165 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2166 HOST_WIDE_INT sign
= v5
< 0;
2168 /* Left extend the insertion. */
2169 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2170 while (pos
> 0 && (insv
& 1) == sign
)
2172 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2177 emit_insn (gen_insv (operand0
, GEN_INT (len
),
2178 GEN_INT (pos
), GEN_INT (v5
)));
2180 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2186 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2191 /* Now have insn-emit do whatever it normally does. */
2195 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2196 it will need a link/runtime reloc). */
2199 reloc_needed (tree exp
)
2203 switch (TREE_CODE (exp
))
2208 case POINTER_PLUS_EXPR
:
2211 reloc
= reloc_needed (TREE_OPERAND (exp
, 0));
2212 reloc
|= reloc_needed (TREE_OPERAND (exp
, 1));
2216 case NON_LVALUE_EXPR
:
2217 reloc
= reloc_needed (TREE_OPERAND (exp
, 0));
2223 unsigned HOST_WIDE_INT ix
;
2225 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2227 reloc
|= reloc_needed (value
);
2241 /* Return the best assembler insn template
2242 for moving operands[1] into operands[0] as a fullword. */
2244 singlemove_string (rtx
*operands
)
2246 HOST_WIDE_INT intval
;
2248 if (GET_CODE (operands
[0]) == MEM
)
2249 return "stw %r1,%0";
2250 if (GET_CODE (operands
[1]) == MEM
)
2252 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2257 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2259 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2261 REAL_VALUE_FROM_CONST_DOUBLE (d
, operands
[1]);
2262 REAL_VALUE_TO_TARGET_SINGLE (d
, i
);
2264 operands
[1] = GEN_INT (i
);
2265 /* Fall through to CONST_INT case. */
2267 if (GET_CODE (operands
[1]) == CONST_INT
)
2269 intval
= INTVAL (operands
[1]);
2271 if (VAL_14_BITS_P (intval
))
2273 else if ((intval
& 0x7ff) == 0)
2274 return "ldil L'%1,%0";
2275 else if (zdepi_cint_p (intval
))
2276 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2278 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2280 return "copy %1,%0";
2284 /* Compute position (in OP[1]) and width (in OP[2])
2285 useful for copying IMM to a register using the zdepi
2286 instructions. Store the immediate value to insert in OP[0]. */
2288 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2292 /* Find the least significant set bit in IMM. */
2293 for (lsb
= 0; lsb
< 32; lsb
++)
2300 /* Choose variants based on *sign* of the 5-bit field. */
2301 if ((imm
& 0x10) == 0)
2302 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2305 /* Find the width of the bitstring in IMM. */
2306 for (len
= 5; len
< 32 - lsb
; len
++)
2308 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2312 /* Sign extend IMM as a 5-bit value. */
2313 imm
= (imm
& 0xf) - 0x10;
2321 /* Compute position (in OP[1]) and width (in OP[2])
2322 useful for copying IMM to a register using the depdi,z
2323 instructions. Store the immediate value to insert in OP[0]. */
2325 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2327 int lsb
, len
, maxlen
;
2329 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2331 /* Find the least significant set bit in IMM. */
2332 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2339 /* Choose variants based on *sign* of the 5-bit field. */
2340 if ((imm
& 0x10) == 0)
2341 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2344 /* Find the width of the bitstring in IMM. */
2345 for (len
= 5; len
< maxlen
- lsb
; len
++)
2347 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2351 /* Extend length if host is narrow and IMM is negative. */
2352 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2355 /* Sign extend IMM as a 5-bit value. */
2356 imm
= (imm
& 0xf) - 0x10;
2364 /* Output assembler code to perform a doubleword move insn
2365 with operands OPERANDS. */
2368 output_move_double (rtx
*operands
)
2370 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2372 rtx addreg0
= 0, addreg1
= 0;
2374 /* First classify both operands. */
2376 if (REG_P (operands
[0]))
2378 else if (offsettable_memref_p (operands
[0]))
2380 else if (GET_CODE (operands
[0]) == MEM
)
2385 if (REG_P (operands
[1]))
2387 else if (CONSTANT_P (operands
[1]))
2389 else if (offsettable_memref_p (operands
[1]))
2391 else if (GET_CODE (operands
[1]) == MEM
)
2396 /* Check for the cases that the operand constraints are not
2397 supposed to allow to happen. */
2398 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2400 /* Handle copies between general and floating registers. */
2402 if (optype0
== REGOP
&& optype1
== REGOP
2403 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2405 if (FP_REG_P (operands
[0]))
2407 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2408 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2409 return "{fldds|fldd} -16(%%sp),%0";
2413 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2414 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2415 return "{ldws|ldw} -12(%%sp),%R0";
2419 /* Handle auto decrementing and incrementing loads and stores
2420 specifically, since the structure of the function doesn't work
2421 for them without major modification. Do it better when we learn
2422 this port about the general inc/dec addressing of PA.
2423 (This was written by tege. Chide him if it doesn't work.) */
2425 if (optype0
== MEMOP
)
2427 /* We have to output the address syntax ourselves, since print_operand
2428 doesn't deal with the addresses we want to use. Fix this later. */
2430 rtx addr
= XEXP (operands
[0], 0);
2431 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2433 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2435 operands
[0] = XEXP (addr
, 0);
2436 gcc_assert (GET_CODE (operands
[1]) == REG
2437 && GET_CODE (operands
[0]) == REG
);
2439 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2441 /* No overlap between high target register and address
2442 register. (We do this in a non-obvious way to
2443 save a register file writeback) */
2444 if (GET_CODE (addr
) == POST_INC
)
2445 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2446 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2448 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2450 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2452 operands
[0] = XEXP (addr
, 0);
2453 gcc_assert (GET_CODE (operands
[1]) == REG
2454 && GET_CODE (operands
[0]) == REG
);
2456 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2457 /* No overlap between high target register and address
2458 register. (We do this in a non-obvious way to save a
2459 register file writeback) */
2460 if (GET_CODE (addr
) == PRE_INC
)
2461 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2462 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2465 if (optype1
== MEMOP
)
2467 /* We have to output the address syntax ourselves, since print_operand
2468 doesn't deal with the addresses we want to use. Fix this later. */
2470 rtx addr
= XEXP (operands
[1], 0);
2471 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2473 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2475 operands
[1] = XEXP (addr
, 0);
2476 gcc_assert (GET_CODE (operands
[0]) == REG
2477 && GET_CODE (operands
[1]) == REG
);
2479 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2481 /* No overlap between high target register and address
2482 register. (We do this in a non-obvious way to
2483 save a register file writeback) */
2484 if (GET_CODE (addr
) == POST_INC
)
2485 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2486 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2490 /* This is an undefined situation. We should load into the
2491 address register *and* update that register. Probably
2492 we don't need to handle this at all. */
2493 if (GET_CODE (addr
) == POST_INC
)
2494 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2495 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2498 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2500 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2502 operands
[1] = XEXP (addr
, 0);
2503 gcc_assert (GET_CODE (operands
[0]) == REG
2504 && GET_CODE (operands
[1]) == REG
);
2506 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2508 /* No overlap between high target register and address
2509 register. (We do this in a non-obvious way to
2510 save a register file writeback) */
2511 if (GET_CODE (addr
) == PRE_INC
)
2512 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2513 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2517 /* This is an undefined situation. We should load into the
2518 address register *and* update that register. Probably
2519 we don't need to handle this at all. */
2520 if (GET_CODE (addr
) == PRE_INC
)
2521 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2522 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2525 else if (GET_CODE (addr
) == PLUS
2526 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2529 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2531 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2533 xoperands
[0] = high_reg
;
2534 xoperands
[1] = XEXP (addr
, 1);
2535 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2536 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2537 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2539 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2543 xoperands
[0] = high_reg
;
2544 xoperands
[1] = XEXP (addr
, 1);
2545 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2546 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2547 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2549 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2554 /* If an operand is an unoffsettable memory ref, find a register
2555 we can increment temporarily to make it refer to the second word. */
2557 if (optype0
== MEMOP
)
2558 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2560 if (optype1
== MEMOP
)
2561 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2563 /* Ok, we can do one word at a time.
2564 Normally we do the low-numbered word first.
2566 In either case, set up in LATEHALF the operands to use
2567 for the high-numbered word and in some cases alter the
2568 operands in OPERANDS to be suitable for the low-numbered word. */
2570 if (optype0
== REGOP
)
2571 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2572 else if (optype0
== OFFSOP
)
2573 latehalf
[0] = adjust_address (operands
[0], SImode
, 4);
2575 latehalf
[0] = operands
[0];
2577 if (optype1
== REGOP
)
2578 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2579 else if (optype1
== OFFSOP
)
2580 latehalf
[1] = adjust_address (operands
[1], SImode
, 4);
2581 else if (optype1
== CNSTOP
)
2582 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2584 latehalf
[1] = operands
[1];
2586 /* If the first move would clobber the source of the second one,
2587 do them in the other order.
2589 This can happen in two cases:
2591 mem -> register where the first half of the destination register
2592 is the same register used in the memory's address. Reload
2593 can create such insns.
2595 mem in this case will be either register indirect or register
2596 indirect plus a valid offset.
2598 register -> register move where REGNO(dst) == REGNO(src + 1)
2599 someone (Tim/Tege?) claimed this can happen for parameter loads.
2601 Handle mem -> register case first. */
2602 if (optype0
== REGOP
2603 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2604 && refers_to_regno_p (REGNO (operands
[0]), REGNO (operands
[0]) + 1,
2607 /* Do the late half first. */
2609 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2610 output_asm_insn (singlemove_string (latehalf
), latehalf
);
2614 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2615 return singlemove_string (operands
);
2618 /* Now handle register -> register case. */
2619 if (optype0
== REGOP
&& optype1
== REGOP
2620 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2622 output_asm_insn (singlemove_string (latehalf
), latehalf
);
2623 return singlemove_string (operands
);
2626 /* Normal case: do the two words, low-numbered first. */
2628 output_asm_insn (singlemove_string (operands
), operands
);
2630 /* Make any unoffsettable addresses point at high-numbered word. */
2632 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2634 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2637 output_asm_insn (singlemove_string (latehalf
), latehalf
);
2639 /* Undo the adds we just did. */
2641 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2643 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2649 output_fp_move_double (rtx
*operands
)
2651 if (FP_REG_P (operands
[0]))
2653 if (FP_REG_P (operands
[1])
2654 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2655 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2657 output_asm_insn ("fldd%F1 %1,%0", operands
);
2659 else if (FP_REG_P (operands
[1]))
2661 output_asm_insn ("fstd%F0 %1,%0", operands
);
2667 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2669 /* This is a pain. You have to be prepared to deal with an
2670 arbitrary address here including pre/post increment/decrement.
2672 so avoid this in the MD. */
2673 gcc_assert (GET_CODE (operands
[0]) == REG
);
2675 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2676 xoperands
[0] = operands
[0];
2677 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2682 /* Return a REG that occurs in ADDR with coefficient 1.
2683 ADDR can be effectively incremented by incrementing REG. */
2686 find_addr_reg (rtx addr
)
2688 while (GET_CODE (addr
) == PLUS
)
2690 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2691 addr
= XEXP (addr
, 0);
2692 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2693 addr
= XEXP (addr
, 1);
2694 else if (CONSTANT_P (XEXP (addr
, 0)))
2695 addr
= XEXP (addr
, 1);
2696 else if (CONSTANT_P (XEXP (addr
, 1)))
2697 addr
= XEXP (addr
, 0);
2701 gcc_assert (GET_CODE (addr
) == REG
);
2705 /* Emit code to perform a block move.
2707 OPERANDS[0] is the destination pointer as a REG, clobbered.
2708 OPERANDS[1] is the source pointer as a REG, clobbered.
2709 OPERANDS[2] is a register for temporary storage.
2710 OPERANDS[3] is a register for temporary storage.
2711 OPERANDS[4] is the size as a CONST_INT
2712 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2713 OPERANDS[6] is another temporary register. */
2716 output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2718 int align
= INTVAL (operands
[5]);
2719 unsigned long n_bytes
= INTVAL (operands
[4]);
2721 /* We can't move more than a word at a time because the PA
2722 has no longer integer move insns. (Could use fp mem ops?) */
2723 if (align
> (TARGET_64BIT
? 8 : 4))
2724 align
= (TARGET_64BIT
? 8 : 4);
2726 /* Note that we know each loop below will execute at least twice
2727 (else we would have open-coded the copy). */
2731 /* Pre-adjust the loop counter. */
2732 operands
[4] = GEN_INT (n_bytes
- 16);
2733 output_asm_insn ("ldi %4,%2", operands
);
2736 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2737 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2738 output_asm_insn ("std,ma %3,8(%0)", operands
);
2739 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2740 output_asm_insn ("std,ma %6,8(%0)", operands
);
2742 /* Handle the residual. There could be up to 7 bytes of
2743 residual to copy! */
2744 if (n_bytes
% 16 != 0)
2746 operands
[4] = GEN_INT (n_bytes
% 8);
2747 if (n_bytes
% 16 >= 8)
2748 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2749 if (n_bytes
% 8 != 0)
2750 output_asm_insn ("ldd 0(%1),%6", operands
);
2751 if (n_bytes
% 16 >= 8)
2752 output_asm_insn ("std,ma %3,8(%0)", operands
);
2753 if (n_bytes
% 8 != 0)
2754 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2759 /* Pre-adjust the loop counter. */
2760 operands
[4] = GEN_INT (n_bytes
- 8);
2761 output_asm_insn ("ldi %4,%2", operands
);
2764 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2765 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2766 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2767 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2768 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2770 /* Handle the residual. There could be up to 7 bytes of
2771 residual to copy! */
2772 if (n_bytes
% 8 != 0)
2774 operands
[4] = GEN_INT (n_bytes
% 4);
2775 if (n_bytes
% 8 >= 4)
2776 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2777 if (n_bytes
% 4 != 0)
2778 output_asm_insn ("ldw 0(%1),%6", operands
);
2779 if (n_bytes
% 8 >= 4)
2780 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2781 if (n_bytes
% 4 != 0)
2782 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2787 /* Pre-adjust the loop counter. */
2788 operands
[4] = GEN_INT (n_bytes
- 4);
2789 output_asm_insn ("ldi %4,%2", operands
);
2792 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2793 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2794 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2795 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2796 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2798 /* Handle the residual. */
2799 if (n_bytes
% 4 != 0)
2801 if (n_bytes
% 4 >= 2)
2802 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2803 if (n_bytes
% 2 != 0)
2804 output_asm_insn ("ldb 0(%1),%6", operands
);
2805 if (n_bytes
% 4 >= 2)
2806 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2807 if (n_bytes
% 2 != 0)
2808 output_asm_insn ("stb %6,0(%0)", operands
);
2813 /* Pre-adjust the loop counter. */
2814 operands
[4] = GEN_INT (n_bytes
- 2);
2815 output_asm_insn ("ldi %4,%2", operands
);
2818 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2819 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2820 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2821 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2822 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2824 /* Handle the residual. */
2825 if (n_bytes
% 2 != 0)
2827 output_asm_insn ("ldb 0(%1),%3", operands
);
2828 output_asm_insn ("stb %3,0(%0)", operands
);
2837 /* Count the number of insns necessary to handle this block move.
2839 Basic structure is the same as emit_block_move, except that we
2840 count insns rather than emit them. */
2843 compute_movmem_length (rtx insn
)
2845 rtx pat
= PATTERN (insn
);
2846 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2847 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2848 unsigned int n_insns
= 0;
2850 /* We can't move more than four bytes at a time because the PA
2851 has no longer integer move insns. (Could use fp mem ops?) */
2852 if (align
> (TARGET_64BIT
? 8 : 4))
2853 align
= (TARGET_64BIT
? 8 : 4);
2855 /* The basic copying loop. */
2859 if (n_bytes
% (2 * align
) != 0)
2861 if ((n_bytes
% (2 * align
)) >= align
)
2864 if ((n_bytes
% align
) != 0)
2868 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2872 /* Emit code to perform a block clear.
2874 OPERANDS[0] is the destination pointer as a REG, clobbered.
2875 OPERANDS[1] is a register for temporary storage.
2876 OPERANDS[2] is the size as a CONST_INT
2877 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2880 output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2882 int align
= INTVAL (operands
[3]);
2883 unsigned long n_bytes
= INTVAL (operands
[2]);
2885 /* We can't clear more than a word at a time because the PA
2886 has no longer integer move insns. */
2887 if (align
> (TARGET_64BIT
? 8 : 4))
2888 align
= (TARGET_64BIT
? 8 : 4);
2890 /* Note that we know each loop below will execute at least twice
2891 (else we would have open-coded the copy). */
2895 /* Pre-adjust the loop counter. */
2896 operands
[2] = GEN_INT (n_bytes
- 16);
2897 output_asm_insn ("ldi %2,%1", operands
);
2900 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2901 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
2902 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2904 /* Handle the residual. There could be up to 7 bytes of
2905 residual to copy! */
2906 if (n_bytes
% 16 != 0)
2908 operands
[2] = GEN_INT (n_bytes
% 8);
2909 if (n_bytes
% 16 >= 8)
2910 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2911 if (n_bytes
% 8 != 0)
2912 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
2917 /* Pre-adjust the loop counter. */
2918 operands
[2] = GEN_INT (n_bytes
- 8);
2919 output_asm_insn ("ldi %2,%1", operands
);
2922 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2923 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
2924 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2926 /* Handle the residual. There could be up to 7 bytes of
2927 residual to copy! */
2928 if (n_bytes
% 8 != 0)
2930 operands
[2] = GEN_INT (n_bytes
% 4);
2931 if (n_bytes
% 8 >= 4)
2932 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2933 if (n_bytes
% 4 != 0)
2934 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
2939 /* Pre-adjust the loop counter. */
2940 operands
[2] = GEN_INT (n_bytes
- 4);
2941 output_asm_insn ("ldi %2,%1", operands
);
2944 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2945 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
2946 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2948 /* Handle the residual. */
2949 if (n_bytes
% 4 != 0)
2951 if (n_bytes
% 4 >= 2)
2952 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2953 if (n_bytes
% 2 != 0)
2954 output_asm_insn ("stb %%r0,0(%0)", operands
);
2959 /* Pre-adjust the loop counter. */
2960 operands
[2] = GEN_INT (n_bytes
- 2);
2961 output_asm_insn ("ldi %2,%1", operands
);
2964 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
2965 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
2966 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
2968 /* Handle the residual. */
2969 if (n_bytes
% 2 != 0)
2970 output_asm_insn ("stb %%r0,0(%0)", operands
);
2979 /* Count the number of insns necessary to handle this block move.
2981 Basic structure is the same as emit_block_move, except that we
2982 count insns rather than emit them. */
2985 compute_clrmem_length (rtx insn
)
2987 rtx pat
= PATTERN (insn
);
2988 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
2989 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
2990 unsigned int n_insns
= 0;
2992 /* We can't clear more than a word at a time because the PA
2993 has no longer integer move insns. */
2994 if (align
> (TARGET_64BIT
? 8 : 4))
2995 align
= (TARGET_64BIT
? 8 : 4);
2997 /* The basic loop. */
3001 if (n_bytes
% (2 * align
) != 0)
3003 if ((n_bytes
% (2 * align
)) >= align
)
3006 if ((n_bytes
% align
) != 0)
3010 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3016 output_and (rtx
*operands
)
3018 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3020 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3021 int ls0
, ls1
, ms0
, p
, len
;
3023 for (ls0
= 0; ls0
< 32; ls0
++)
3024 if ((mask
& (1 << ls0
)) == 0)
3027 for (ls1
= ls0
; ls1
< 32; ls1
++)
3028 if ((mask
& (1 << ls1
)) != 0)
3031 for (ms0
= ls1
; ms0
< 32; ms0
++)
3032 if ((mask
& (1 << ms0
)) == 0)
3035 gcc_assert (ms0
== 32);
3043 operands
[2] = GEN_INT (len
);
3044 return "{extru|extrw,u} %1,31,%2,%0";
3048 /* We could use this `depi' for the case above as well, but `depi'
3049 requires one more register file access than an `extru'. */
3054 operands
[2] = GEN_INT (p
);
3055 operands
[3] = GEN_INT (len
);
3056 return "{depi|depwi} 0,%2,%3,%0";
3060 return "and %1,%2,%0";
3063 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3064 storing the result in operands[0]. */
3066 output_64bit_and (rtx
*operands
)
3068 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3070 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3071 int ls0
, ls1
, ms0
, p
, len
;
3073 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3074 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3077 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3078 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3081 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3082 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3085 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3087 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3093 operands
[2] = GEN_INT (len
);
3094 return "extrd,u %1,63,%2,%0";
3098 /* We could use this `depi' for the case above as well, but `depi'
3099 requires one more register file access than an `extru'. */
3104 operands
[2] = GEN_INT (p
);
3105 operands
[3] = GEN_INT (len
);
3106 return "depdi 0,%2,%3,%0";
3110 return "and %1,%2,%0";
3114 output_ior (rtx
*operands
)
3116 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3117 int bs0
, bs1
, p
, len
;
3119 if (INTVAL (operands
[2]) == 0)
3120 return "copy %1,%0";
3122 for (bs0
= 0; bs0
< 32; bs0
++)
3123 if ((mask
& (1 << bs0
)) != 0)
3126 for (bs1
= bs0
; bs1
< 32; bs1
++)
3127 if ((mask
& (1 << bs1
)) == 0)
3130 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3135 operands
[2] = GEN_INT (p
);
3136 operands
[3] = GEN_INT (len
);
3137 return "{depi|depwi} -1,%2,%3,%0";
3140 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3141 storing the result in operands[0]. */
3143 output_64bit_ior (rtx
*operands
)
3145 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3146 int bs0
, bs1
, p
, len
;
3148 if (INTVAL (operands
[2]) == 0)
3149 return "copy %1,%0";
3151 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3152 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3155 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3156 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3159 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3160 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3165 operands
[2] = GEN_INT (p
);
3166 operands
[3] = GEN_INT (len
);
3167 return "depdi -1,%2,%3,%0";
3170 /* Target hook for assembling integer objects. This code handles
3171 aligned SI and DI integers specially since function references
3172 must be preceded by P%. */
3175 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3177 if (size
== UNITS_PER_WORD
3179 && function_label_operand (x
, VOIDmode
))
3181 fputs (size
== 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file
);
3182 output_addr_const (asm_out_file
, x
);
3183 fputc ('\n', asm_out_file
);
3186 return default_assemble_integer (x
, size
, aligned_p
);
3189 /* Output an ascii string. */
3191 output_ascii (FILE *file
, const char *p
, int size
)
3195 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3197 /* The HP assembler can only take strings of 256 characters at one
3198 time. This is a limitation on input line length, *not* the
3199 length of the string. Sigh. Even worse, it seems that the
3200 restriction is in number of input characters (see \xnn &
3201 \whatever). So we have to do this very carefully. */
3203 fputs ("\t.STRING \"", file
);
3206 for (i
= 0; i
< size
; i
+= 4)
3210 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3212 register unsigned int c
= (unsigned char) p
[i
+ io
];
3214 if (c
== '\"' || c
== '\\')
3215 partial_output
[co
++] = '\\';
3216 if (c
>= ' ' && c
< 0177)
3217 partial_output
[co
++] = c
;
3221 partial_output
[co
++] = '\\';
3222 partial_output
[co
++] = 'x';
3223 hexd
= c
/ 16 - 0 + '0';
3225 hexd
-= '9' - 'a' + 1;
3226 partial_output
[co
++] = hexd
;
3227 hexd
= c
% 16 - 0 + '0';
3229 hexd
-= '9' - 'a' + 1;
3230 partial_output
[co
++] = hexd
;
3233 if (chars_output
+ co
> 243)
3235 fputs ("\"\n\t.STRING \"", file
);
3238 fwrite (partial_output
, 1, (size_t) co
, file
);
3242 fputs ("\"\n", file
);
3245 /* Try to rewrite floating point comparisons & branches to avoid
3246 useless add,tr insns.
3248 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3249 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3250 first attempt to remove useless add,tr insns. It is zero
3251 for the second pass as reorg sometimes leaves bogus REG_DEAD
3254 When CHECK_NOTES is zero we can only eliminate add,tr insns
3255 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3258 remove_useless_addtr_insns (int check_notes
)
3261 static int pass
= 0;
3263 /* This is fairly cheap, so always run it when optimizing. */
3267 int fbranch_count
= 0;
3269 /* Walk all the insns in this function looking for fcmp & fbranch
3270 instructions. Keep track of how many of each we find. */
3271 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3275 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3276 if (GET_CODE (insn
) != INSN
&& GET_CODE (insn
) != JUMP_INSN
)
3279 tmp
= PATTERN (insn
);
3281 /* It must be a set. */
3282 if (GET_CODE (tmp
) != SET
)
3285 /* If the destination is CCFP, then we've found an fcmp insn. */
3286 tmp
= SET_DEST (tmp
);
3287 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3293 tmp
= PATTERN (insn
);
3294 /* If this is an fbranch instruction, bump the fbranch counter. */
3295 if (GET_CODE (tmp
) == SET
3296 && SET_DEST (tmp
) == pc_rtx
3297 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3298 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3299 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3300 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3308 /* Find all floating point compare + branch insns. If possible,
3309 reverse the comparison & the branch to avoid add,tr insns. */
3310 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3314 /* Ignore anything that isn't an INSN. */
3315 if (GET_CODE (insn
) != INSN
)
3318 tmp
= PATTERN (insn
);
3320 /* It must be a set. */
3321 if (GET_CODE (tmp
) != SET
)
3324 /* The destination must be CCFP, which is register zero. */
3325 tmp
= SET_DEST (tmp
);
3326 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3329 /* INSN should be a set of CCFP.
3331 See if the result of this insn is used in a reversed FP
3332 conditional branch. If so, reverse our condition and
3333 the branch. Doing so avoids useless add,tr insns. */
3334 next
= next_insn (insn
);
3337 /* Jumps, calls and labels stop our search. */
3338 if (GET_CODE (next
) == JUMP_INSN
3339 || GET_CODE (next
) == CALL_INSN
3340 || GET_CODE (next
) == CODE_LABEL
)
3343 /* As does another fcmp insn. */
3344 if (GET_CODE (next
) == INSN
3345 && GET_CODE (PATTERN (next
)) == SET
3346 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3347 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3350 next
= next_insn (next
);
3353 /* Is NEXT_INSN a branch? */
3355 && GET_CODE (next
) == JUMP_INSN
)
3357 rtx pattern
= PATTERN (next
);
3359 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3360 and CCFP dies, then reverse our conditional and the branch
3361 to avoid the add,tr. */
3362 if (GET_CODE (pattern
) == SET
3363 && SET_DEST (pattern
) == pc_rtx
3364 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3365 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3366 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3367 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3368 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3369 && (fcmp_count
== fbranch_count
3371 && find_regno_note (next
, REG_DEAD
, 0))))
3373 /* Reverse the branch. */
3374 tmp
= XEXP (SET_SRC (pattern
), 1);
3375 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3376 XEXP (SET_SRC (pattern
), 2) = tmp
;
3377 INSN_CODE (next
) = -1;
3379 /* Reverse our condition. */
3380 tmp
= PATTERN (insn
);
3381 PUT_CODE (XEXP (tmp
, 1),
3382 (reverse_condition_maybe_unordered
3383 (GET_CODE (XEXP (tmp
, 1)))));
3393 /* You may have trouble believing this, but this is the 32 bit HP-PA
3398 Variable arguments (optional; any number may be allocated)
3400 SP-(4*(N+9)) arg word N
3405 Fixed arguments (must be allocated; may remain unused)
3414 SP-32 External Data Pointer (DP)
3416 SP-24 External/stub RP (RP')
3420 SP-8 Calling Stub RP (RP'')
3425 SP-0 Stack Pointer (points to next available address)
3429 /* This function saves registers as follows. Registers marked with ' are
3430 this function's registers (as opposed to the previous function's).
3431 If a frame_pointer isn't needed, r4 is saved as a general register;
3432 the space for the frame pointer is still allocated, though, to keep
3438 SP (FP') Previous FP
3439 SP + 4 Alignment filler (sigh)
3440 SP + 8 Space for locals reserved here.
3444 SP + n All call saved register used.
3448 SP + o All call saved fp registers used.
3452 SP + p (SP') points to next available address.
3456 /* Global variables set by output_function_prologue(). */
3457 /* Size of frame. Need to know this to emit return insns from
3459 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3460 static int save_fregs
;
3462 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3463 Handle case where DISP > 8k by using the add_high_const patterns.
3465 Note in DISP > 8k case, we will leave the high part of the address
3466 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3469 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3471 rtx insn
, dest
, src
, basereg
;
3473 src
= gen_rtx_REG (word_mode
, reg
);
3474 basereg
= gen_rtx_REG (Pmode
, base
);
3475 if (VAL_14_BITS_P (disp
))
3477 dest
= gen_rtx_MEM (word_mode
, plus_constant (basereg
, disp
));
3478 insn
= emit_move_insn (dest
, src
);
3480 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3482 rtx delta
= GEN_INT (disp
);
3483 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3485 emit_move_insn (tmpreg
, delta
);
3486 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3489 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3490 gen_rtx_SET (VOIDmode
, tmpreg
,
3491 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3492 RTX_FRAME_RELATED_P (insn
) = 1;
3494 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3495 insn
= emit_move_insn (dest
, src
);
3499 rtx delta
= GEN_INT (disp
);
3500 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3501 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3503 emit_move_insn (tmpreg
, high
);
3504 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3505 insn
= emit_move_insn (dest
, src
);
3507 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3508 gen_rtx_SET (VOIDmode
,
3509 gen_rtx_MEM (word_mode
,
3510 gen_rtx_PLUS (word_mode
,
3517 RTX_FRAME_RELATED_P (insn
) = 1;
3520 /* Emit RTL to store REG at the memory location specified by BASE and then
3521 add MOD to BASE. MOD must be <= 8k. */
3524 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3526 rtx insn
, basereg
, srcreg
, delta
;
3528 gcc_assert (VAL_14_BITS_P (mod
));
3530 basereg
= gen_rtx_REG (Pmode
, base
);
3531 srcreg
= gen_rtx_REG (word_mode
, reg
);
3532 delta
= GEN_INT (mod
);
3534 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3537 RTX_FRAME_RELATED_P (insn
) = 1;
3539 /* RTX_FRAME_RELATED_P must be set on each frame related set
3540 in a parallel with more than one element. */
3541 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3542 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3546 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3547 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3548 whether to add a frame note or not.
3550 In the DISP > 8k case, we leave the high part of the address in %r1.
3551 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3554 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3558 if (VAL_14_BITS_P (disp
))
3560 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3561 plus_constant (gen_rtx_REG (Pmode
, base
), disp
));
3563 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3565 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3566 rtx delta
= GEN_INT (disp
);
3567 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3569 emit_move_insn (tmpreg
, delta
);
3570 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3571 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3573 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3574 gen_rtx_SET (VOIDmode
, tmpreg
,
3575 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3579 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3580 rtx delta
= GEN_INT (disp
);
3581 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3583 emit_move_insn (tmpreg
,
3584 gen_rtx_PLUS (Pmode
, basereg
,
3585 gen_rtx_HIGH (Pmode
, delta
)));
3586 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3587 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3590 if (DO_FRAME_NOTES
&& note
)
3591 RTX_FRAME_RELATED_P (insn
) = 1;
3595 compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3600 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3601 be consistent with the rounding and size calculation done here.
3602 Change them at the same time. */
3604 /* We do our own stack alignment. First, round the size of the
3605 stack locals up to a word boundary. */
3606 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3608 /* Space for previous frame pointer + filler. If any frame is
3609 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3610 waste some space here for the sake of HP compatibility. The
3611 first slot is only used when the frame pointer is needed. */
3612 if (size
|| frame_pointer_needed
)
3613 size
+= STARTING_FRAME_OFFSET
;
3615 /* If the current function calls __builtin_eh_return, then we need
3616 to allocate stack space for registers that will hold data for
3617 the exception handler. */
3618 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3622 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3624 size
+= i
* UNITS_PER_WORD
;
3627 /* Account for space used by the callee general register saves. */
3628 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3629 if (df_regs_ever_live_p (i
))
3630 size
+= UNITS_PER_WORD
;
3632 /* Account for space used by the callee floating point register saves. */
3633 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3634 if (df_regs_ever_live_p (i
)
3635 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3639 /* We always save both halves of the FP register, so always
3640 increment the frame size by 8 bytes. */
3644 /* If any of the floating registers are saved, account for the
3645 alignment needed for the floating point register save block. */
3648 size
= (size
+ 7) & ~7;
3653 /* The various ABIs include space for the outgoing parameters in the
3654 size of the current function's stack frame. We don't need to align
3655 for the outgoing arguments as their alignment is set by the final
3656 rounding for the frame as a whole. */
3657 size
+= crtl
->outgoing_args_size
;
3659 /* Allocate space for the fixed frame marker. This space must be
3660 allocated for any function that makes calls or allocates
3662 if (!current_function_is_leaf
|| size
)
3663 size
+= TARGET_64BIT
? 48 : 32;
3665 /* Finally, round to the preferred stack boundary. */
3666 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3667 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3670 /* Generate the assembly code for function entry. FILE is a stdio
3671 stream to output the code to. SIZE is an int: how many units of
3672 temporary storage to allocate.
3674 Refer to the array `regs_ever_live' to determine which registers to
3675 save; `regs_ever_live[I]' is nonzero if register number I is ever
3676 used in the function. This function is responsible for knowing
3677 which registers should not be saved even if used. */
3679 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3680 of memory. If any fpu reg is used in the function, we allocate
3681 such a block here, at the bottom of the frame, just in case it's needed.
3683 If this function is a leaf procedure, then we may choose not
3684 to do a "save" insn. The decision about whether or not
3685 to do this is made in regclass.c. */
3688 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3690 /* The function's label and associated .PROC must never be
3691 separated and must be output *after* any profiling declarations
3692 to avoid changing spaces/subspaces within a procedure. */
3693 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3694 fputs ("\t.PROC\n", file
);
3696 /* hppa_expand_prologue does the dirty work now. We just need
3697 to output the assembler directives which denote the start
3699 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3700 if (current_function_is_leaf
)
3701 fputs (",NO_CALLS", file
);
3703 fputs (",CALLS", file
);
3705 fputs (",SAVE_RP", file
);
3707 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3708 at the beginning of the frame and that it is used as the frame
3709 pointer for the frame. We do this because our current frame
3710 layout doesn't conform to that specified in the HP runtime
3711 documentation and we need a way to indicate to programs such as
3712 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3713 isn't used by HP compilers but is supported by the assembler.
3714 However, SAVE_SP is supposed to indicate that the previous stack
3715 pointer has been saved in the frame marker. */
3716 if (frame_pointer_needed
)
3717 fputs (",SAVE_SP", file
);
3719 /* Pass on information about the number of callee register saves
3720 performed in the prologue.
3722 The compiler is supposed to pass the highest register number
3723 saved, the assembler then has to adjust that number before
3724 entering it into the unwind descriptor (to account for any
3725 caller saved registers with lower register numbers than the
3726 first callee saved register). */
3728 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3731 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3733 fputs ("\n\t.ENTRY\n", file
);
3735 remove_useless_addtr_insns (0);
3739 hppa_expand_prologue (void)
3741 int merge_sp_adjust_with_store
= 0;
3742 HOST_WIDE_INT size
= get_frame_size ();
3743 HOST_WIDE_INT offset
;
3751 /* Compute total size for frame pointer, filler, locals and rounding to
3752 the next word boundary. Similar code appears in compute_frame_size
3753 and must be changed in tandem with this code. */
3754 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3755 if (local_fsize
|| frame_pointer_needed
)
3756 local_fsize
+= STARTING_FRAME_OFFSET
;
3758 actual_fsize
= compute_frame_size (size
, &save_fregs
);
3759 if (flag_stack_usage_info
)
3760 current_function_static_stack_size
= actual_fsize
;
3762 /* Compute a few things we will use often. */
3763 tmpreg
= gen_rtx_REG (word_mode
, 1);
3765 /* Save RP first. The calling conventions manual states RP will
3766 always be stored into the caller's frame at sp - 20 or sp - 16
3767 depending on which ABI is in use. */
3768 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3770 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3776 /* Allocate the local frame and set up the frame pointer if needed. */
3777 if (actual_fsize
!= 0)
3779 if (frame_pointer_needed
)
3781 /* Copy the old frame pointer temporarily into %r1. Set up the
3782 new stack pointer, then store away the saved old frame pointer
3783 into the stack at sp and at the same time update the stack
3784 pointer by actual_fsize bytes. Two versions, first
3785 handles small (<8k) frames. The second handles large (>=8k)
3787 insn
= emit_move_insn (tmpreg
, hard_frame_pointer_rtx
);
3789 RTX_FRAME_RELATED_P (insn
) = 1;
3791 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
3793 RTX_FRAME_RELATED_P (insn
) = 1;
3795 if (VAL_14_BITS_P (actual_fsize
))
3796 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3799 /* It is incorrect to store the saved frame pointer at *sp,
3800 then increment sp (writes beyond the current stack boundary).
3802 So instead use stwm to store at *sp and post-increment the
3803 stack pointer as an atomic operation. Then increment sp to
3804 finish allocating the new frame. */
3805 HOST_WIDE_INT adjust1
= 8192 - 64;
3806 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3808 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3809 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3813 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3814 we need to store the previous stack pointer (frame pointer)
3815 into the frame marker on targets that use the HP unwind
3816 library. This allows the HP unwind library to be used to
3817 unwind GCC frames. However, we are not fully compatible
3818 with the HP library because our frame layout differs from
3819 that specified in the HP runtime specification.
3821 We don't want a frame note on this instruction as the frame
3822 marker moves during dynamic stack allocation.
3824 This instruction also serves as a blockage to prevent
3825 register spills from being scheduled before the stack
3826 pointer is raised. This is necessary as we store
3827 registers using the frame pointer as a base register,
3828 and the frame pointer is set before sp is raised. */
3829 if (TARGET_HPUX_UNWIND_LIBRARY
)
3831 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3832 GEN_INT (TARGET_64BIT
? -8 : -4));
3834 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3835 hard_frame_pointer_rtx
);
3838 emit_insn (gen_blockage ());
3840 /* no frame pointer needed. */
3843 /* In some cases we can perform the first callee register save
3844 and allocating the stack frame at the same time. If so, just
3845 make a note of it and defer allocating the frame until saving
3846 the callee registers. */
3847 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
3848 merge_sp_adjust_with_store
= 1;
3849 /* Can not optimize. Adjust the stack frame by actual_fsize
3852 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3857 /* Normal register save.
3859 Do not save the frame pointer in the frame_pointer_needed case. It
3860 was done earlier. */
3861 if (frame_pointer_needed
)
3863 offset
= local_fsize
;
3865 /* Saving the EH return data registers in the frame is the simplest
3866 way to get the frame unwind information emitted. We put them
3867 just before the general registers. */
3868 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3870 unsigned int i
, regno
;
3874 regno
= EH_RETURN_DATA_REGNO (i
);
3875 if (regno
== INVALID_REGNUM
)
3878 store_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
3879 offset
+= UNITS_PER_WORD
;
3883 for (i
= 18; i
>= 4; i
--)
3884 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3886 store_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
3887 offset
+= UNITS_PER_WORD
;
3890 /* Account for %r3 which is saved in a special place. */
3893 /* No frame pointer needed. */
3896 offset
= local_fsize
- actual_fsize
;
3898 /* Saving the EH return data registers in the frame is the simplest
3899 way to get the frame unwind information emitted. */
3900 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3902 unsigned int i
, regno
;
3906 regno
= EH_RETURN_DATA_REGNO (i
);
3907 if (regno
== INVALID_REGNUM
)
3910 /* If merge_sp_adjust_with_store is nonzero, then we can
3911 optimize the first save. */
3912 if (merge_sp_adjust_with_store
)
3914 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
3915 merge_sp_adjust_with_store
= 0;
3918 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
3919 offset
+= UNITS_PER_WORD
;
3923 for (i
= 18; i
>= 3; i
--)
3924 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3926 /* If merge_sp_adjust_with_store is nonzero, then we can
3927 optimize the first GR save. */
3928 if (merge_sp_adjust_with_store
)
3930 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
3931 merge_sp_adjust_with_store
= 0;
3934 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
3935 offset
+= UNITS_PER_WORD
;
3939 /* If we wanted to merge the SP adjustment with a GR save, but we never
3940 did any GR saves, then just emit the adjustment here. */
3941 if (merge_sp_adjust_with_store
)
3942 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3946 /* The hppa calling conventions say that %r19, the pic offset
3947 register, is saved at sp - 32 (in this function's frame)
3948 when generating PIC code. FIXME: What is the correct thing
3949 to do for functions which make no calls and allocate no
3950 frame? Do we need to allocate a frame, or can we just omit
3951 the save? For now we'll just omit the save.
3953 We don't want a note on this insn as the frame marker can
3954 move if there is a dynamic stack allocation. */
3955 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
3957 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
3959 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
3963 /* Align pointer properly (doubleword boundary). */
3964 offset
= (offset
+ 7) & ~7;
3966 /* Floating point register store. */
3971 /* First get the frame or stack pointer to the start of the FP register
3973 if (frame_pointer_needed
)
3975 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
3976 base
= hard_frame_pointer_rtx
;
3980 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
3981 base
= stack_pointer_rtx
;
3984 /* Now actually save the FP registers. */
3985 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3987 if (df_regs_ever_live_p (i
)
3988 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3990 rtx addr
, insn
, reg
;
3991 addr
= gen_rtx_MEM (DFmode
, gen_rtx_POST_INC (DFmode
, tmpreg
));
3992 reg
= gen_rtx_REG (DFmode
, i
);
3993 insn
= emit_move_insn (addr
, reg
);
3996 RTX_FRAME_RELATED_P (insn
) = 1;
3999 rtx mem
= gen_rtx_MEM (DFmode
,
4000 plus_constant (base
, offset
));
4001 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4002 gen_rtx_SET (VOIDmode
, mem
, reg
));
4006 rtx meml
= gen_rtx_MEM (SFmode
,
4007 plus_constant (base
, offset
));
4008 rtx memr
= gen_rtx_MEM (SFmode
,
4009 plus_constant (base
, offset
+ 4));
4010 rtx regl
= gen_rtx_REG (SFmode
, i
);
4011 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
4012 rtx setl
= gen_rtx_SET (VOIDmode
, meml
, regl
);
4013 rtx setr
= gen_rtx_SET (VOIDmode
, memr
, regr
);
4016 RTX_FRAME_RELATED_P (setl
) = 1;
4017 RTX_FRAME_RELATED_P (setr
) = 1;
4018 vec
= gen_rtvec (2, setl
, setr
);
4019 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
4020 gen_rtx_SEQUENCE (VOIDmode
, vec
));
4023 offset
+= GET_MODE_SIZE (DFmode
);
4030 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4031 Handle case where DISP > 8k by using the add_high_const patterns. */
4034 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4036 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4037 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4040 if (VAL_14_BITS_P (disp
))
4041 src
= gen_rtx_MEM (word_mode
, plus_constant (basereg
, disp
));
4042 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4044 rtx delta
= GEN_INT (disp
);
4045 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4047 emit_move_insn (tmpreg
, delta
);
4048 if (TARGET_DISABLE_INDEXING
)
4050 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4051 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4054 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4058 rtx delta
= GEN_INT (disp
);
4059 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4060 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4062 emit_move_insn (tmpreg
, high
);
4063 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4066 emit_move_insn (dest
, src
);
4069 /* Update the total code bytes output to the text section. */
4072 update_total_code_bytes (unsigned int nbytes
)
4074 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4075 && !IN_NAMED_SECTION_P (cfun
->decl
))
4077 unsigned int old_total
= total_code_bytes
;
4079 total_code_bytes
+= nbytes
;
4081 /* Be prepared to handle overflows. */
4082 if (old_total
> total_code_bytes
)
4083 total_code_bytes
= UINT_MAX
;
4087 /* This function generates the assembly code for function exit.
4088 Args are as for output_function_prologue ().
4090 The function epilogue should not depend on the current stack
4091 pointer! It should use the frame pointer only. This is mandatory
4092 because of alloca; we also take advantage of it to omit stack
4093 adjustments before returning. */
4096 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4098 rtx insn
= get_last_insn ();
4102 /* hppa_expand_epilogue does the dirty work now. We just need
4103 to output the assembler directives which denote the end
4106 To make debuggers happy, emit a nop if the epilogue was completely
4107 eliminated due to a volatile call as the last insn in the
4108 current function. That way the return address (in %r2) will
4109 always point to a valid instruction in the current function. */
4111 /* Get the last real insn. */
4112 if (GET_CODE (insn
) == NOTE
)
4113 insn
= prev_real_insn (insn
);
4115 /* If it is a sequence, then look inside. */
4116 if (insn
&& GET_CODE (insn
) == INSN
&& GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4117 insn
= XVECEXP (PATTERN (insn
), 0, 0);
4119 /* If insn is a CALL_INSN, then it must be a call to a volatile
4120 function (otherwise there would be epilogue insns). */
4121 if (insn
&& GET_CODE (insn
) == CALL_INSN
)
4123 fputs ("\tnop\n", file
);
4127 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4129 if (TARGET_SOM
&& TARGET_GAS
)
4131 /* We done with this subspace except possibly for some additional
4132 debug information. Forget that we are in this subspace to ensure
4133 that the next function is output in its own subspace. */
4135 cfun
->machine
->in_nsubspa
= 2;
4138 if (INSN_ADDRESSES_SET_P ())
4140 insn
= get_last_nonnote_insn ();
4141 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4143 last_address
+= insn_default_length (insn
);
4144 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4145 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4148 last_address
= UINT_MAX
;
4150 /* Finally, update the total number of code bytes output so far. */
4151 update_total_code_bytes (last_address
);
4155 hppa_expand_epilogue (void)
4158 HOST_WIDE_INT offset
;
4159 HOST_WIDE_INT ret_off
= 0;
4161 int merge_sp_adjust_with_load
= 0;
4163 /* We will use this often. */
4164 tmpreg
= gen_rtx_REG (word_mode
, 1);
4166 /* Try to restore RP early to avoid load/use interlocks when
4167 RP gets used in the return (bv) instruction. This appears to still
4168 be necessary even when we schedule the prologue and epilogue. */
4171 ret_off
= TARGET_64BIT
? -16 : -20;
4172 if (frame_pointer_needed
)
4174 load_reg (2, ret_off
, HARD_FRAME_POINTER_REGNUM
);
4179 /* No frame pointer, and stack is smaller than 8k. */
4180 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4182 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4188 /* General register restores. */
4189 if (frame_pointer_needed
)
4191 offset
= local_fsize
;
4193 /* If the current function calls __builtin_eh_return, then we need
4194 to restore the saved EH data registers. */
4195 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4197 unsigned int i
, regno
;
4201 regno
= EH_RETURN_DATA_REGNO (i
);
4202 if (regno
== INVALID_REGNUM
)
4205 load_reg (regno
, offset
, HARD_FRAME_POINTER_REGNUM
);
4206 offset
+= UNITS_PER_WORD
;
4210 for (i
= 18; i
>= 4; i
--)
4211 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4213 load_reg (i
, offset
, HARD_FRAME_POINTER_REGNUM
);
4214 offset
+= UNITS_PER_WORD
;
4219 offset
= local_fsize
- actual_fsize
;
4221 /* If the current function calls __builtin_eh_return, then we need
4222 to restore the saved EH data registers. */
4223 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4225 unsigned int i
, regno
;
4229 regno
= EH_RETURN_DATA_REGNO (i
);
4230 if (regno
== INVALID_REGNUM
)
4233 /* Only for the first load.
4234 merge_sp_adjust_with_load holds the register load
4235 with which we will merge the sp adjustment. */
4236 if (merge_sp_adjust_with_load
== 0
4238 && VAL_14_BITS_P (-actual_fsize
))
4239 merge_sp_adjust_with_load
= regno
;
4241 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4242 offset
+= UNITS_PER_WORD
;
4246 for (i
= 18; i
>= 3; i
--)
4248 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4250 /* Only for the first load.
4251 merge_sp_adjust_with_load holds the register load
4252 with which we will merge the sp adjustment. */
4253 if (merge_sp_adjust_with_load
== 0
4255 && VAL_14_BITS_P (-actual_fsize
))
4256 merge_sp_adjust_with_load
= i
;
4258 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4259 offset
+= UNITS_PER_WORD
;
4264 /* Align pointer properly (doubleword boundary). */
4265 offset
= (offset
+ 7) & ~7;
4267 /* FP register restores. */
4270 /* Adjust the register to index off of. */
4271 if (frame_pointer_needed
)
4272 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM
, offset
, 0);
4274 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4276 /* Actually do the restores now. */
4277 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4278 if (df_regs_ever_live_p (i
)
4279 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4281 rtx src
= gen_rtx_MEM (DFmode
, gen_rtx_POST_INC (DFmode
, tmpreg
));
4282 rtx dest
= gen_rtx_REG (DFmode
, i
);
4283 emit_move_insn (dest
, src
);
4287 /* Emit a blockage insn here to keep these insns from being moved to
4288 an earlier spot in the epilogue, or into the main instruction stream.
4290 This is necessary as we must not cut the stack back before all the
4291 restores are finished. */
4292 emit_insn (gen_blockage ());
4294 /* Reset stack pointer (and possibly frame pointer). The stack
4295 pointer is initially set to fp + 64 to avoid a race condition. */
4296 if (frame_pointer_needed
)
4298 rtx delta
= GEN_INT (-64);
4300 set_reg_plus_d (STACK_POINTER_REGNUM
, HARD_FRAME_POINTER_REGNUM
, 64, 0);
4301 emit_insn (gen_pre_load (hard_frame_pointer_rtx
,
4302 stack_pointer_rtx
, delta
));
4304 /* If we were deferring a callee register restore, do it now. */
4305 else if (merge_sp_adjust_with_load
)
4307 rtx delta
= GEN_INT (-actual_fsize
);
4308 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4310 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4312 else if (actual_fsize
!= 0)
4313 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4316 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4317 frame greater than 8k), do so now. */
4319 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4321 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4323 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4325 emit_insn (gen_blockage ());
4326 emit_insn (TARGET_64BIT
4327 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4328 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4333 hppa_pic_save_rtx (void)
4335 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4338 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4339 #define NO_DEFERRED_PROFILE_COUNTERS 0
4343 /* Vector of funcdef numbers. */
4344 static VEC(int,heap
) *funcdef_nos
;
4346 /* Output deferred profile counters. */
4348 output_deferred_profile_counters (void)
4353 if (VEC_empty (int, funcdef_nos
))
4356 switch_to_section (data_section
);
4357 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4358 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4360 for (i
= 0; VEC_iterate (int, funcdef_nos
, i
, n
); i
++)
4362 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4363 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4366 VEC_free (int, heap
, funcdef_nos
);
4370 hppa_profile_hook (int label_no
)
4372 /* We use SImode for the address of the function in both 32 and
4373 64-bit code to avoid having to provide DImode versions of the
4374 lcla2 and load_offset_label_address insn patterns. */
4375 rtx reg
= gen_reg_rtx (SImode
);
4376 rtx label_rtx
= gen_label_rtx ();
4377 rtx begin_label_rtx
, call_insn
;
4378 char begin_label_name
[16];
4380 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4382 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4385 emit_move_insn (arg_pointer_rtx
,
4386 gen_rtx_PLUS (word_mode
, virtual_outgoing_args_rtx
,
4389 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4391 /* The address of the function is loaded into %r25 with an instruction-
4392 relative sequence that avoids the use of relocations. The sequence
4393 is split so that the load_offset_label_address instruction can
4394 occupy the delay slot of the call to _mcount. */
4396 emit_insn (gen_lcla2 (reg
, label_rtx
));
4398 emit_insn (gen_lcla1 (reg
, label_rtx
));
4400 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4401 reg
, begin_label_rtx
, label_rtx
));
4403 #if !NO_DEFERRED_PROFILE_COUNTERS
4405 rtx count_label_rtx
, addr
, r24
;
4406 char count_label_name
[16];
4408 VEC_safe_push (int, heap
, funcdef_nos
, label_no
);
4409 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4410 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (count_label_name
));
4412 addr
= force_reg (Pmode
, count_label_rtx
);
4413 r24
= gen_rtx_REG (Pmode
, 24);
4414 emit_move_insn (r24
, addr
);
4417 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4418 gen_rtx_SYMBOL_REF (Pmode
,
4420 GEN_INT (TARGET_64BIT
? 24 : 12)));
4422 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4427 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4428 gen_rtx_SYMBOL_REF (Pmode
,
4430 GEN_INT (TARGET_64BIT
? 16 : 8)));
4434 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4435 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4437 /* Indicate the _mcount call cannot throw, nor will it execute a
4439 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4442 /* Fetch the return address for the frame COUNT steps up from
4443 the current frame, after the prologue. FRAMEADDR is the
4444 frame pointer of the COUNT frame.
4446 We want to ignore any export stub remnants here. To handle this,
4447 we examine the code at the return address, and if it is an export
4448 stub, we return a memory rtx for the stub return address stored
4451 The value returned is used in two different ways:
4453 1. To find a function's caller.
4455 2. To change the return address for a function.
4457 This function handles most instances of case 1; however, it will
4458 fail if there are two levels of stubs to execute on the return
4459 path. The only way I believe that can happen is if the return value
4460 needs a parameter relocation, which never happens for C code.
4462 This function handles most instances of case 2; however, it will
4463 fail if we did not originally have stub code on the return path
4464 but will need stub code on the new return path. This can happen if
4465 the caller & callee are both in the main program, but the new
4466 return location is in a shared library. */
4469 return_addr_rtx (int count
, rtx frameaddr
)
4476 /* Instruction stream at the normal return address for the export stub:
4478 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4479 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4480 0x00011820 | stub+16: mtsp r1,sr0
4481 0xe0400002 | stub+20: be,n 0(sr0,rp)
4483 0xe0400002 must be specified as -532676606 so that it won't be
4484 rejected as an invalid immediate operand on 64-bit hosts. */
4486 HOST_WIDE_INT insns
[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4492 rp
= get_hard_reg_initial_val (Pmode
, 2);
4494 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4497 /* If there is no export stub then just use the value saved from
4498 the return pointer register. */
4500 saved_rp
= gen_reg_rtx (Pmode
);
4501 emit_move_insn (saved_rp
, rp
);
4503 /* Get pointer to the instruction stream. We have to mask out the
4504 privilege level from the two low order bits of the return address
4505 pointer here so that ins will point to the start of the first
4506 instruction that would have been executed if we returned. */
4507 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4508 label
= gen_label_rtx ();
4510 /* Check the instruction stream at the normal return address for the
4511 export stub. If it is an export stub, than our return address is
4512 really in -24[frameaddr]. */
4514 for (i
= 0; i
< 3; i
++)
4516 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (ins
, i
* 4));
4517 rtx op1
= GEN_INT (insns
[i
]);
4518 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4521 /* Here we know that our return address points to an export
4522 stub. We don't want to return the address of the export stub,
4523 but rather the return address of the export stub. That return
4524 address is stored at -24[frameaddr]. */
4526 emit_move_insn (saved_rp
,
4528 memory_address (Pmode
,
4529 plus_constant (frameaddr
,
4538 emit_bcond_fp (rtx operands
[])
4540 enum rtx_code code
= GET_CODE (operands
[0]);
4541 rtx operand0
= operands
[1];
4542 rtx operand1
= operands
[2];
4543 rtx label
= operands
[3];
4545 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (CCFPmode
, 0),
4546 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4548 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4552 gen_rtx_REG (CCFPmode
, 0),
4554 gen_rtx_LABEL_REF (VOIDmode
, label
),
4559 /* Adjust the cost of a scheduling dependency. Return the new cost of
4560 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4563 pa_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
4565 enum attr_type attr_type
;
4567 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4568 true dependencies as they are described with bypasses now. */
4569 if (pa_cpu
>= PROCESSOR_8000
|| REG_NOTE_KIND (link
) == 0)
4572 if (! recog_memoized (insn
))
4575 attr_type
= get_attr_type (insn
);
4577 switch (REG_NOTE_KIND (link
))
4580 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4583 if (attr_type
== TYPE_FPLOAD
)
4585 rtx pat
= PATTERN (insn
);
4586 rtx dep_pat
= PATTERN (dep_insn
);
4587 if (GET_CODE (pat
) == PARALLEL
)
4589 /* This happens for the fldXs,mb patterns. */
4590 pat
= XVECEXP (pat
, 0, 0);
4592 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4593 /* If this happens, we have to extend this to schedule
4594 optimally. Return 0 for now. */
4597 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4599 if (! recog_memoized (dep_insn
))
4601 switch (get_attr_type (dep_insn
))
4608 case TYPE_FPSQRTSGL
:
4609 case TYPE_FPSQRTDBL
:
4610 /* A fpload can't be issued until one cycle before a
4611 preceding arithmetic operation has finished if
4612 the target of the fpload is any of the sources
4613 (or destination) of the arithmetic operation. */
4614 return insn_default_latency (dep_insn
) - 1;
4621 else if (attr_type
== TYPE_FPALU
)
4623 rtx pat
= PATTERN (insn
);
4624 rtx dep_pat
= PATTERN (dep_insn
);
4625 if (GET_CODE (pat
) == PARALLEL
)
4627 /* This happens for the fldXs,mb patterns. */
4628 pat
= XVECEXP (pat
, 0, 0);
4630 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4631 /* If this happens, we have to extend this to schedule
4632 optimally. Return 0 for now. */
4635 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4637 if (! recog_memoized (dep_insn
))
4639 switch (get_attr_type (dep_insn
))
4643 case TYPE_FPSQRTSGL
:
4644 case TYPE_FPSQRTDBL
:
4645 /* An ALU flop can't be issued until two cycles before a
4646 preceding divide or sqrt operation has finished if
4647 the target of the ALU flop is any of the sources
4648 (or destination) of the divide or sqrt operation. */
4649 return insn_default_latency (dep_insn
) - 2;
4657 /* For other anti dependencies, the cost is 0. */
4660 case REG_DEP_OUTPUT
:
4661 /* Output dependency; DEP_INSN writes a register that INSN writes some
4663 if (attr_type
== TYPE_FPLOAD
)
4665 rtx pat
= PATTERN (insn
);
4666 rtx dep_pat
= PATTERN (dep_insn
);
4667 if (GET_CODE (pat
) == PARALLEL
)
4669 /* This happens for the fldXs,mb patterns. */
4670 pat
= XVECEXP (pat
, 0, 0);
4672 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4673 /* If this happens, we have to extend this to schedule
4674 optimally. Return 0 for now. */
4677 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4679 if (! recog_memoized (dep_insn
))
4681 switch (get_attr_type (dep_insn
))
4688 case TYPE_FPSQRTSGL
:
4689 case TYPE_FPSQRTDBL
:
4690 /* A fpload can't be issued until one cycle before a
4691 preceding arithmetic operation has finished if
4692 the target of the fpload is the destination of the
4693 arithmetic operation.
4695 Exception: For PA7100LC, PA7200 and PA7300, the cost
4696 is 3 cycles, unless they bundle together. We also
4697 pay the penalty if the second insn is a fpload. */
4698 return insn_default_latency (dep_insn
) - 1;
4705 else if (attr_type
== TYPE_FPALU
)
4707 rtx pat
= PATTERN (insn
);
4708 rtx dep_pat
= PATTERN (dep_insn
);
4709 if (GET_CODE (pat
) == PARALLEL
)
4711 /* This happens for the fldXs,mb patterns. */
4712 pat
= XVECEXP (pat
, 0, 0);
4714 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4715 /* If this happens, we have to extend this to schedule
4716 optimally. Return 0 for now. */
4719 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4721 if (! recog_memoized (dep_insn
))
4723 switch (get_attr_type (dep_insn
))
4727 case TYPE_FPSQRTSGL
:
4728 case TYPE_FPSQRTDBL
:
4729 /* An ALU flop can't be issued until two cycles before a
4730 preceding divide or sqrt operation has finished if
4731 the target of the ALU flop is also the target of
4732 the divide or sqrt operation. */
4733 return insn_default_latency (dep_insn
) - 2;
4741 /* For other output dependencies, the cost is 0. */
4749 /* Adjust scheduling priorities. We use this to try and keep addil
4750 and the next use of %r1 close together. */
4752 pa_adjust_priority (rtx insn
, int priority
)
4754 rtx set
= single_set (insn
);
4758 src
= SET_SRC (set
);
4759 dest
= SET_DEST (set
);
4760 if (GET_CODE (src
) == LO_SUM
4761 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4762 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4765 else if (GET_CODE (src
) == MEM
4766 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4767 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4768 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
4771 else if (GET_CODE (dest
) == MEM
4772 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
4773 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
4774 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
4780 /* The 700 can only issue a single insn at a time.
4781 The 7XXX processors can issue two insns at a time.
4782 The 8000 can issue 4 insns at a time. */
4784 pa_issue_rate (void)
4788 case PROCESSOR_700
: return 1;
4789 case PROCESSOR_7100
: return 2;
4790 case PROCESSOR_7100LC
: return 2;
4791 case PROCESSOR_7200
: return 2;
4792 case PROCESSOR_7300
: return 2;
4793 case PROCESSOR_8000
: return 4;
4802 /* Return any length adjustment needed by INSN which already has its length
4803 computed as LENGTH. Return zero if no adjustment is necessary.
4805 For the PA: function calls, millicode calls, and backwards short
4806 conditional branches with unfilled delay slots need an adjustment by +1
4807 (to account for the NOP which will be inserted into the instruction stream).
4809 Also compute the length of an inline block move here as it is too
4810 complicated to express as a length attribute in pa.md. */
4812 pa_adjust_insn_length (rtx insn
, int length
)
4814 rtx pat
= PATTERN (insn
);
4816 /* Jumps inside switch tables which have unfilled delay slots need
4818 if (GET_CODE (insn
) == JUMP_INSN
4819 && GET_CODE (pat
) == PARALLEL
4820 && get_attr_type (insn
) == TYPE_BTABLE_BRANCH
)
4822 /* Millicode insn with an unfilled delay slot. */
4823 else if (GET_CODE (insn
) == INSN
4824 && GET_CODE (pat
) != SEQUENCE
4825 && GET_CODE (pat
) != USE
4826 && GET_CODE (pat
) != CLOBBER
4827 && get_attr_type (insn
) == TYPE_MILLI
)
4829 /* Block move pattern. */
4830 else if (GET_CODE (insn
) == INSN
4831 && GET_CODE (pat
) == PARALLEL
4832 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4833 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4834 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
4835 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
4836 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
4837 return compute_movmem_length (insn
) - 4;
4838 /* Block clear pattern. */
4839 else if (GET_CODE (insn
) == INSN
4840 && GET_CODE (pat
) == PARALLEL
4841 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4842 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4843 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
4844 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
4845 return compute_clrmem_length (insn
) - 4;
4846 /* Conditional branch with an unfilled delay slot. */
4847 else if (GET_CODE (insn
) == JUMP_INSN
&& ! simplejump_p (insn
))
4849 /* Adjust a short backwards conditional with an unfilled delay slot. */
4850 if (GET_CODE (pat
) == SET
4852 && JUMP_LABEL (insn
) != NULL_RTX
4853 && ! forward_branch_p (insn
))
4855 else if (GET_CODE (pat
) == PARALLEL
4856 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
4859 /* Adjust dbra insn with short backwards conditional branch with
4860 unfilled delay slot -- only for case where counter is in a
4861 general register register. */
4862 else if (GET_CODE (pat
) == PARALLEL
4863 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
4864 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
4865 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
4867 && ! forward_branch_p (insn
))
4875 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4878 pa_print_operand_punct_valid_p (unsigned char code
)
4889 /* Print operand X (an rtx) in assembler syntax to file FILE.
4890 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4891 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4894 print_operand (FILE *file
, rtx x
, int code
)
4899 /* Output a 'nop' if there's nothing for the delay slot. */
4900 if (dbr_sequence_length () == 0)
4901 fputs ("\n\tnop", file
);
4904 /* Output a nullification completer if there's nothing for the */
4905 /* delay slot or nullification is requested. */
4906 if (dbr_sequence_length () == 0 ||
4908 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
4912 /* Print out the second register name of a register pair.
4913 I.e., R (6) => 7. */
4914 fputs (reg_names
[REGNO (x
) + 1], file
);
4917 /* A register or zero. */
4919 || (x
== CONST0_RTX (DFmode
))
4920 || (x
== CONST0_RTX (SFmode
)))
4922 fputs ("%r0", file
);
4928 /* A register or zero (floating point). */
4930 || (x
== CONST0_RTX (DFmode
))
4931 || (x
== CONST0_RTX (SFmode
)))
4933 fputs ("%fr0", file
);
4942 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
4943 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
4944 output_global_address (file
, xoperands
[1], 0);
4945 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
4949 case 'C': /* Plain (C)ondition */
4951 switch (GET_CODE (x
))
4954 fputs ("=", file
); break;
4956 fputs ("<>", file
); break;
4958 fputs (">", file
); break;
4960 fputs (">=", file
); break;
4962 fputs (">>=", file
); break;
4964 fputs (">>", file
); break;
4966 fputs ("<", file
); break;
4968 fputs ("<=", file
); break;
4970 fputs ("<<=", file
); break;
4972 fputs ("<<", file
); break;
4977 case 'N': /* Condition, (N)egated */
4978 switch (GET_CODE (x
))
4981 fputs ("<>", file
); break;
4983 fputs ("=", file
); break;
4985 fputs ("<=", file
); break;
4987 fputs ("<", file
); break;
4989 fputs ("<<", file
); break;
4991 fputs ("<<=", file
); break;
4993 fputs (">=", file
); break;
4995 fputs (">", file
); break;
4997 fputs (">>", file
); break;
4999 fputs (">>=", file
); break;
5004 /* For floating point comparisons. Note that the output
5005 predicates are the complement of the desired mode. The
5006 conditions for GT, GE, LT, LE and LTGT cause an invalid
5007 operation exception if the result is unordered and this
5008 exception is enabled in the floating-point status register. */
5010 switch (GET_CODE (x
))
5013 fputs ("!=", file
); break;
5015 fputs ("=", file
); break;
5017 fputs ("!>", file
); break;
5019 fputs ("!>=", file
); break;
5021 fputs ("!<", file
); break;
5023 fputs ("!<=", file
); break;
5025 fputs ("!<>", file
); break;
5027 fputs ("!?<=", file
); break;
5029 fputs ("!?<", file
); break;
5031 fputs ("!?>=", file
); break;
5033 fputs ("!?>", file
); break;
5035 fputs ("!?=", file
); break;
5037 fputs ("!?", file
); break;
5039 fputs ("?", file
); break;
5044 case 'S': /* Condition, operands are (S)wapped. */
5045 switch (GET_CODE (x
))
5048 fputs ("=", file
); break;
5050 fputs ("<>", file
); break;
5052 fputs ("<", file
); break;
5054 fputs ("<=", file
); break;
5056 fputs ("<<=", file
); break;
5058 fputs ("<<", file
); break;
5060 fputs (">", file
); break;
5062 fputs (">=", file
); break;
5064 fputs (">>=", file
); break;
5066 fputs (">>", file
); break;
5071 case 'B': /* Condition, (B)oth swapped and negate. */
5072 switch (GET_CODE (x
))
5075 fputs ("<>", file
); break;
5077 fputs ("=", file
); break;
5079 fputs (">=", file
); break;
5081 fputs (">", file
); break;
5083 fputs (">>", file
); break;
5085 fputs (">>=", file
); break;
5087 fputs ("<=", file
); break;
5089 fputs ("<", file
); break;
5091 fputs ("<<", file
); break;
5093 fputs ("<<=", file
); break;
5099 gcc_assert (GET_CODE (x
) == CONST_INT
);
5100 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5103 gcc_assert (GET_CODE (x
) == CONST_INT
);
5104 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5107 gcc_assert (GET_CODE (x
) == CONST_INT
);
5108 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5111 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5112 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5115 gcc_assert (GET_CODE (x
) == CONST_INT
);
5116 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5119 gcc_assert (GET_CODE (x
) == CONST_INT
);
5120 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5123 if (GET_CODE (x
) == CONST_INT
)
5128 switch (GET_CODE (XEXP (x
, 0)))
5132 if (ASSEMBLER_DIALECT
== 0)
5133 fputs ("s,mb", file
);
5135 fputs (",mb", file
);
5139 if (ASSEMBLER_DIALECT
== 0)
5140 fputs ("s,ma", file
);
5142 fputs (",ma", file
);
5145 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5146 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5148 if (ASSEMBLER_DIALECT
== 0)
5151 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5152 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5154 if (ASSEMBLER_DIALECT
== 0)
5155 fputs ("x,s", file
);
5159 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5163 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5169 output_global_address (file
, x
, 0);
5172 output_global_address (file
, x
, 1);
5174 case 0: /* Don't do anything special */
5179 compute_zdepwi_operands (INTVAL (x
), op
);
5180 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5186 compute_zdepdi_operands (INTVAL (x
), op
);
5187 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5191 /* We can get here from a .vtable_inherit due to our
5192 CONSTANT_ADDRESS_P rejecting perfectly good constant
5198 if (GET_CODE (x
) == REG
)
5200 fputs (reg_names
[REGNO (x
)], file
);
5201 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5207 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5208 && (REGNO (x
) & 1) == 0)
5211 else if (GET_CODE (x
) == MEM
)
5213 int size
= GET_MODE_SIZE (GET_MODE (x
));
5214 rtx base
= NULL_RTX
;
5215 switch (GET_CODE (XEXP (x
, 0)))
5219 base
= XEXP (XEXP (x
, 0), 0);
5220 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5224 base
= XEXP (XEXP (x
, 0), 0);
5225 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5228 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5229 fprintf (file
, "%s(%s)",
5230 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5231 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5232 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5233 fprintf (file
, "%s(%s)",
5234 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5235 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5236 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5237 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5239 /* Because the REG_POINTER flag can get lost during reload,
5240 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5241 index and base registers in the combined move patterns. */
5242 rtx base
= XEXP (XEXP (x
, 0), 1);
5243 rtx index
= XEXP (XEXP (x
, 0), 0);
5245 fprintf (file
, "%s(%s)",
5246 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5249 output_address (XEXP (x
, 0));
5252 output_address (XEXP (x
, 0));
5257 output_addr_const (file
, x
);
5260 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5263 output_global_address (FILE *file
, rtx x
, int round_constant
)
5266 /* Imagine (high (const (plus ...))). */
5267 if (GET_CODE (x
) == HIGH
)
5270 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5271 output_addr_const (file
, x
);
5272 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5274 output_addr_const (file
, x
);
5275 fputs ("-$global$", file
);
5277 else if (GET_CODE (x
) == CONST
)
5279 const char *sep
= "";
5280 int offset
= 0; /* assembler wants -$global$ at end */
5281 rtx base
= NULL_RTX
;
5283 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5286 base
= XEXP (XEXP (x
, 0), 0);
5287 output_addr_const (file
, base
);
5290 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5296 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5299 base
= XEXP (XEXP (x
, 0), 1);
5300 output_addr_const (file
, base
);
5303 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5309 /* How bogus. The compiler is apparently responsible for
5310 rounding the constant if it uses an LR field selector.
5312 The linker and/or assembler seem a better place since
5313 they have to do this kind of thing already.
5315 If we fail to do this, HP's optimizing linker may eliminate
5316 an addil, but not update the ldw/stw/ldo instruction that
5317 uses the result of the addil. */
5319 offset
= ((offset
+ 0x1000) & ~0x1fff);
5321 switch (GET_CODE (XEXP (x
, 0)))
5334 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5342 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5343 fputs ("-$global$", file
);
5345 fprintf (file
, "%s%d", sep
, offset
);
5348 output_addr_const (file
, x
);
5351 /* Output boilerplate text to appear at the beginning of the file.
5352 There are several possible versions. */
5353 #define aputs(x) fputs(x, asm_out_file)
5355 pa_file_start_level (void)
5358 aputs ("\t.LEVEL 2.0w\n");
5359 else if (TARGET_PA_20
)
5360 aputs ("\t.LEVEL 2.0\n");
5361 else if (TARGET_PA_11
)
5362 aputs ("\t.LEVEL 1.1\n");
5364 aputs ("\t.LEVEL 1.0\n");
5368 pa_file_start_space (int sortspace
)
5370 aputs ("\t.SPACE $PRIVATE$");
5373 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5374 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5375 "\n\t.SPACE $TEXT$");
5378 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5379 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5383 pa_file_start_file (int want_version
)
5385 if (write_symbols
!= NO_DEBUG
)
5387 output_file_directive (asm_out_file
, main_input_filename
);
5389 aputs ("\t.version\t\"01.01\"\n");
5394 pa_file_start_mcount (const char *aswhat
)
5397 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5401 pa_elf_file_start (void)
5403 pa_file_start_level ();
5404 pa_file_start_mcount ("ENTRY");
5405 pa_file_start_file (0);
5409 pa_som_file_start (void)
5411 pa_file_start_level ();
5412 pa_file_start_space (0);
5413 aputs ("\t.IMPORT $global$,DATA\n"
5414 "\t.IMPORT $$dyncall,MILLICODE\n");
5415 pa_file_start_mcount ("CODE");
5416 pa_file_start_file (0);
5420 pa_linux_file_start (void)
5422 pa_file_start_file (1);
5423 pa_file_start_level ();
5424 pa_file_start_mcount ("CODE");
5428 pa_hpux64_gas_file_start (void)
5430 pa_file_start_level ();
5431 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5433 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5435 pa_file_start_file (1);
5439 pa_hpux64_hpas_file_start (void)
5441 pa_file_start_level ();
5442 pa_file_start_space (1);
5443 pa_file_start_mcount ("CODE");
5444 pa_file_start_file (0);
5448 /* Search the deferred plabel list for SYMBOL and return its internal
5449 label. If an entry for SYMBOL is not found, a new entry is created. */
5452 get_deferred_plabel (rtx symbol
)
5454 const char *fname
= XSTR (symbol
, 0);
5457 /* See if we have already put this function on the list of deferred
5458 plabels. This list is generally small, so a liner search is not
5459 too ugly. If it proves too slow replace it with something faster. */
5460 for (i
= 0; i
< n_deferred_plabels
; i
++)
5461 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5464 /* If the deferred plabel list is empty, or this entry was not found
5465 on the list, create a new entry on the list. */
5466 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5470 if (deferred_plabels
== 0)
5471 deferred_plabels
= ggc_alloc_deferred_plabel ();
5473 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5475 n_deferred_plabels
+ 1);
5477 i
= n_deferred_plabels
++;
5478 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5479 deferred_plabels
[i
].symbol
= symbol
;
5481 /* Gross. We have just implicitly taken the address of this
5482 function. Mark it in the same manner as assemble_name. */
5483 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5485 mark_referenced (id
);
5488 return deferred_plabels
[i
].internal_label
;
5492 output_deferred_plabels (void)
5496 /* If we have some deferred plabels, then we need to switch into the
5497 data or readonly data section, and align it to a 4 byte boundary
5498 before outputting the deferred plabels. */
5499 if (n_deferred_plabels
)
5501 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5502 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5505 /* Now output the deferred plabels. */
5506 for (i
= 0; i
< n_deferred_plabels
; i
++)
5508 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5509 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5510 assemble_integer (deferred_plabels
[i
].symbol
,
5511 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5515 #if HPUX_LONG_DOUBLE_LIBRARY
5516 /* Initialize optabs to point to HPUX long double emulation routines. */
5518 pa_hpux_init_libfuncs (void)
5520 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5521 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5522 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5523 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5524 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5525 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5526 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5527 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5528 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5530 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5531 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5532 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5533 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5534 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5535 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5536 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5538 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5539 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5540 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5541 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5543 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, TARGET_64BIT
5544 ? "__U_Qfcnvfxt_quad_to_sgl"
5545 : "_U_Qfcnvfxt_quad_to_sgl");
5546 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
5547 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_usgl");
5548 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_udbl");
5550 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
5551 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
5552 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_usgl_to_quad");
5553 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_udbl_to_quad");
5557 /* HP's millicode routines mean something special to the assembler.
5558 Keep track of which ones we have used. */
5560 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5561 static void import_milli (enum millicodes
);
5562 static char imported
[(int) end1000
];
5563 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5564 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5565 #define MILLI_START 10
5568 import_milli (enum millicodes code
)
5570 char str
[sizeof (import_string
)];
5572 if (!imported
[(int) code
])
5574 imported
[(int) code
] = 1;
5575 strcpy (str
, import_string
);
5576 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5577 output_asm_insn (str
, 0);
5581 /* The register constraints have put the operands and return value in
5582 the proper registers. */
5585 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx insn
)
5587 import_milli (mulI
);
5588 return output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5591 /* Emit the rtl for doing a division by a constant. */
5593 /* Do magic division millicodes exist for this value? */
5594 const int magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5596 /* We'll use an array to keep track of the magic millicodes and
5597 whether or not we've used them already. [n][0] is signed, [n][1] is
5600 static int div_milli
[16][2];
5603 emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5605 if (GET_CODE (operands
[2]) == CONST_INT
5606 && INTVAL (operands
[2]) > 0
5607 && INTVAL (operands
[2]) < 16
5608 && magic_milli
[INTVAL (operands
[2])])
5610 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5612 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5616 gen_rtvec (6, gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, 29),
5617 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5619 gen_rtx_REG (SImode
, 26),
5621 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5622 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5623 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5624 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5625 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5626 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5633 output_div_insn (rtx
*operands
, int unsignedp
, rtx insn
)
5637 /* If the divisor is a constant, try to use one of the special
5639 if (GET_CODE (operands
[0]) == CONST_INT
)
5641 static char buf
[100];
5642 divisor
= INTVAL (operands
[0]);
5643 if (!div_milli
[divisor
][unsignedp
])
5645 div_milli
[divisor
][unsignedp
] = 1;
5647 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5649 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5653 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5654 INTVAL (operands
[0]));
5655 return output_millicode_call (insn
,
5656 gen_rtx_SYMBOL_REF (SImode
, buf
));
5660 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5661 INTVAL (operands
[0]));
5662 return output_millicode_call (insn
,
5663 gen_rtx_SYMBOL_REF (SImode
, buf
));
5666 /* Divisor isn't a special constant. */
5671 import_milli (divU
);
5672 return output_millicode_call (insn
,
5673 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5677 import_milli (divI
);
5678 return output_millicode_call (insn
,
5679 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5684 /* Output a $$rem millicode to do mod. */
5687 output_mod_insn (int unsignedp
, rtx insn
)
5691 import_milli (remU
);
5692 return output_millicode_call (insn
,
5693 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5697 import_milli (remI
);
5698 return output_millicode_call (insn
,
5699 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5704 output_arg_descriptor (rtx call_insn
)
5706 const char *arg_regs
[4];
5707 enum machine_mode arg_mode
;
5709 int i
, output_flag
= 0;
5712 /* We neither need nor want argument location descriptors for the
5713 64bit runtime environment or the ELF32 environment. */
5714 if (TARGET_64BIT
|| TARGET_ELF32
)
5717 for (i
= 0; i
< 4; i
++)
5720 /* Specify explicitly that no argument relocations should take place
5721 if using the portable runtime calling conventions. */
5722 if (TARGET_PORTABLE_RUNTIME
)
5724 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5729 gcc_assert (GET_CODE (call_insn
) == CALL_INSN
);
5730 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5731 link
; link
= XEXP (link
, 1))
5733 rtx use
= XEXP (link
, 0);
5735 if (! (GET_CODE (use
) == USE
5736 && GET_CODE (XEXP (use
, 0)) == REG
5737 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5740 arg_mode
= GET_MODE (XEXP (use
, 0));
5741 regno
= REGNO (XEXP (use
, 0));
5742 if (regno
>= 23 && regno
<= 26)
5744 arg_regs
[26 - regno
] = "GR";
5745 if (arg_mode
== DImode
)
5746 arg_regs
[25 - regno
] = "GR";
5748 else if (regno
>= 32 && regno
<= 39)
5750 if (arg_mode
== SFmode
)
5751 arg_regs
[(regno
- 32) / 2] = "FR";
5754 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5755 arg_regs
[(regno
- 34) / 2] = "FR";
5756 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
5758 arg_regs
[(regno
- 34) / 2] = "FU";
5759 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
5764 fputs ("\t.CALL ", asm_out_file
);
5765 for (i
= 0; i
< 4; i
++)
5770 fputc (',', asm_out_file
);
5771 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
5774 fputc ('\n', asm_out_file
);
5777 /* Inform reload about cases where moving X with a mode MODE to a register in
5778 RCLASS requires an extra scratch or immediate register. Return the class
5779 needed for the immediate register. */
5782 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
5783 enum machine_mode mode
, secondary_reload_info
*sri
)
5786 enum reg_class rclass
= (enum reg_class
) rclass_i
;
5788 /* Handle the easy stuff first. */
5789 if (rclass
== R1_REGS
)
5795 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
5801 /* If we have something like (mem (mem (...)), we can safely assume the
5802 inner MEM will end up in a general register after reloading, so there's
5803 no need for a secondary reload. */
5804 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
5807 /* Trying to load a constant into a FP register during PIC code
5808 generation requires %r1 as a scratch register. */
5810 && (mode
== SImode
|| mode
== DImode
)
5811 && FP_REG_CLASS_P (rclass
)
5812 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
5814 sri
->icode
= (mode
== SImode
? CODE_FOR_reload_insi_r1
5815 : CODE_FOR_reload_indi_r1
);
5819 /* Secondary reloads of symbolic operands require %r1 as a scratch
5820 register when we're generating PIC code and when the operand isn't
5822 if (symbolic_expression_p (x
))
5824 if (GET_CODE (x
) == HIGH
)
5827 if (flag_pic
|| !read_only_operand (x
, VOIDmode
))
5829 gcc_assert (mode
== SImode
|| mode
== DImode
);
5830 sri
->icode
= (mode
== SImode
? CODE_FOR_reload_insi_r1
5831 : CODE_FOR_reload_indi_r1
);
5836 /* Profiling showed the PA port spends about 1.3% of its compilation
5837 time in true_regnum from calls inside pa_secondary_reload_class. */
5838 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
5839 regno
= true_regnum (x
);
5841 /* In order to allow 14-bit displacements in integer loads and stores,
5842 we need to prevent reload from generating out of range integer mode
5843 loads and stores to the floating point registers. Previously, we
5844 used to call for a secondary reload and have emit_move_sequence()
5845 fix the instruction sequence. However, reload occasionally wouldn't
5846 generate the reload and we would end up with an invalid REG+D memory
5847 address. So, now we use an intermediate general register for most
5848 memory loads and stores. */
5849 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
5850 && GET_MODE_CLASS (mode
) == MODE_INT
5851 && FP_REG_CLASS_P (rclass
))
5853 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5854 the secondary reload needed for a pseudo. It never passes a
5856 if (GET_CODE (x
) == MEM
)
5860 /* We don't need an intermediate for indexed and LO_SUM DLT
5861 memory addresses. When INT14_OK_STRICT is true, it might
5862 appear that we could directly allow register indirect
5863 memory addresses. However, this doesn't work because we
5864 don't support SUBREGs in floating-point register copies
5865 and reload doesn't tell us when it's going to use a SUBREG. */
5866 if (IS_INDEX_ADDR_P (x
)
5867 || IS_LO_SUM_DLT_ADDR_P (x
))
5870 /* Otherwise, we need an intermediate general register. */
5871 return GENERAL_REGS
;
5874 /* Request a secondary reload with a general scratch register
5875 for everthing else. ??? Could symbolic operands be handled
5876 directly when generating non-pic PA 2.0 code? */
5878 ? direct_optab_handler (reload_in_optab
, mode
)
5879 : direct_optab_handler (reload_out_optab
, mode
));
5883 /* A SAR<->FP register copy requires an intermediate general register
5884 and secondary memory. We need a secondary reload with a general
5885 scratch register for spills. */
5886 if (rclass
== SHIFT_REGS
)
5889 if (regno
>= FIRST_PSEUDO_REGISTER
|| regno
< 0)
5892 ? direct_optab_handler (reload_in_optab
, mode
)
5893 : direct_optab_handler (reload_out_optab
, mode
));
5897 /* Handle FP copy. */
5898 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno
)))
5899 return GENERAL_REGS
;
5902 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
5903 && REGNO_REG_CLASS (regno
) == SHIFT_REGS
5904 && FP_REG_CLASS_P (rclass
))
5905 return GENERAL_REGS
;
5910 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5911 is only marked as live on entry by df-scan when it is a fixed
5912 register. It isn't a fixed register in the 64-bit runtime,
5913 so we need to mark it here. */
5916 pa_extra_live_on_entry (bitmap regs
)
5919 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
5922 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5923 to prevent it from being deleted. */
5926 pa_eh_return_handler_rtx (void)
5930 tmp
= gen_rtx_PLUS (word_mode
, hard_frame_pointer_rtx
,
5931 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
5932 tmp
= gen_rtx_MEM (word_mode
, tmp
);
5937 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5938 by invisible reference. As a GCC extension, we also pass anything
5939 with a zero or variable size by reference.
5941 The 64-bit runtime does not describe passing any types by invisible
5942 reference. The internals of GCC can't currently handle passing
5943 empty structures, and zero or variable length arrays when they are
5944 not passed entirely on the stack or by reference. Thus, as a GCC
5945 extension, we pass these types by reference. The HP compiler doesn't
5946 support these types, so hopefully there shouldn't be any compatibility
5947 issues. This may have to be revisited when HP releases a C99 compiler
5948 or updates the ABI. */
5951 pa_pass_by_reference (CUMULATIVE_ARGS
*ca ATTRIBUTE_UNUSED
,
5952 enum machine_mode mode
, const_tree type
,
5953 bool named ATTRIBUTE_UNUSED
)
5958 size
= int_size_in_bytes (type
);
5960 size
= GET_MODE_SIZE (mode
);
5965 return size
<= 0 || size
> 8;
5969 function_arg_padding (enum machine_mode mode
, const_tree type
)
5974 && (AGGREGATE_TYPE_P (type
)
5975 || TREE_CODE (type
) == COMPLEX_TYPE
5976 || TREE_CODE (type
) == VECTOR_TYPE
)))
5978 /* Return none if justification is not required. */
5980 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
5981 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
5984 /* The directions set here are ignored when a BLKmode argument larger
5985 than a word is placed in a register. Different code is used for
5986 the stack and registers. This makes it difficult to have a
5987 consistent data representation for both the stack and registers.
5988 For both runtimes, the justification and padding for arguments on
5989 the stack and in registers should be identical. */
5991 /* The 64-bit runtime specifies left justification for aggregates. */
5994 /* The 32-bit runtime architecture specifies right justification.
5995 When the argument is passed on the stack, the argument is padded
5996 with garbage on the left. The HP compiler pads with zeros. */
6000 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
6007 /* Do what is necessary for `va_start'. We look at the current function
6008 to determine if stdargs or varargs is used and fill in an initial
6009 va_list. A pointer to this constructor is returned. */
6012 hppa_builtin_saveregs (void)
6015 tree fntype
= TREE_TYPE (current_function_decl
);
6016 int argadj
= ((!stdarg_p (fntype
))
6017 ? UNITS_PER_WORD
: 0);
6020 offset
= plus_constant (crtl
->args
.arg_offset_rtx
, argadj
);
6022 offset
= crtl
->args
.arg_offset_rtx
;
6028 /* Adjust for varargs/stdarg differences. */
6030 offset
= plus_constant (crtl
->args
.arg_offset_rtx
, -argadj
);
6032 offset
= crtl
->args
.arg_offset_rtx
;
6034 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6035 from the incoming arg pointer and growing to larger addresses. */
6036 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6037 emit_move_insn (gen_rtx_MEM (word_mode
,
6038 plus_constant (arg_pointer_rtx
, off
)),
6039 gen_rtx_REG (word_mode
, i
));
6041 /* The incoming args pointer points just beyond the flushback area;
6042 normally this is not a serious concern. However, when we are doing
6043 varargs/stdargs we want to make the arg pointer point to the start
6044 of the incoming argument area. */
6045 emit_move_insn (virtual_incoming_args_rtx
,
6046 plus_constant (arg_pointer_rtx
, -64));
6048 /* Now return a pointer to the first anonymous argument. */
6049 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6050 virtual_incoming_args_rtx
,
6051 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6054 /* Store general registers on the stack. */
6055 dest
= gen_rtx_MEM (BLKmode
,
6056 plus_constant (crtl
->args
.internal_arg_pointer
,
6058 set_mem_alias_set (dest
, get_varargs_alias_set ());
6059 set_mem_align (dest
, BITS_PER_WORD
);
6060 move_block_from_reg (23, dest
, 4);
6062 /* move_block_from_reg will emit code to store the argument registers
6063 individually as scalar stores.
6065 However, other insns may later load from the same addresses for
6066 a structure load (passing a struct to a varargs routine).
6068 The alias code assumes that such aliasing can never happen, so we
6069 have to keep memory referencing insns from moving up beyond the
6070 last argument register store. So we emit a blockage insn here. */
6071 emit_insn (gen_blockage ());
6073 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6074 crtl
->args
.internal_arg_pointer
,
6075 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6079 hppa_va_start (tree valist
, rtx nextarg
)
6081 nextarg
= expand_builtin_saveregs ();
6082 std_expand_builtin_va_start (valist
, nextarg
);
6086 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6091 /* Args grow upward. We can use the generic routines. */
6092 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6094 else /* !TARGET_64BIT */
6096 tree ptr
= build_pointer_type (type
);
6099 unsigned int size
, ofs
;
6102 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6106 ptr
= build_pointer_type (type
);
6108 size
= int_size_in_bytes (type
);
6109 valist_type
= TREE_TYPE (valist
);
6111 /* Args grow down. Not handled by generic routines. */
6113 u
= fold_convert (sizetype
, size_in_bytes (type
));
6114 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6115 t
= build2 (POINTER_PLUS_EXPR
, valist_type
, valist
, u
);
6117 /* Align to 4 or 8 byte boundary depending on argument size. */
6119 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6120 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6121 t
= fold_convert (valist_type
, t
);
6123 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6125 ofs
= (8 - size
) % 4;
6129 t
= build2 (POINTER_PLUS_EXPR
, valist_type
, t
, u
);
6132 t
= fold_convert (ptr
, t
);
6133 t
= build_va_arg_indirect_ref (t
);
6136 t
= build_va_arg_indirect_ref (t
);
6142 /* True if MODE is valid for the target. By "valid", we mean able to
6143 be manipulated in non-trivial ways. In particular, this means all
6144 the arithmetic is supported.
6146 Currently, TImode is not valid as the HP 64-bit runtime documentation
6147 doesn't document the alignment and calling conventions for this type.
6148 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6149 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6152 pa_scalar_mode_supported_p (enum machine_mode mode
)
6154 int precision
= GET_MODE_PRECISION (mode
);
6156 switch (GET_MODE_CLASS (mode
))
6158 case MODE_PARTIAL_INT
:
6160 if (precision
== CHAR_TYPE_SIZE
)
6162 if (precision
== SHORT_TYPE_SIZE
)
6164 if (precision
== INT_TYPE_SIZE
)
6166 if (precision
== LONG_TYPE_SIZE
)
6168 if (precision
== LONG_LONG_TYPE_SIZE
)
6173 if (precision
== FLOAT_TYPE_SIZE
)
6175 if (precision
== DOUBLE_TYPE_SIZE
)
6177 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6181 case MODE_DECIMAL_FLOAT
:
6189 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6190 it branches into the delay slot. Otherwise, return FALSE. */
6193 branch_to_delay_slot_p (rtx insn
)
6197 if (dbr_sequence_length ())
6200 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6203 insn
= next_active_insn (insn
);
6204 if (jump_insn
== insn
)
6207 /* We can't rely on the length of asms. So, we return FALSE when
6208 the branch is followed by an asm. */
6210 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6211 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
6212 || get_attr_length (insn
) > 0)
6219 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6221 This occurs when INSN has an unfilled delay slot and is followed
6222 by an asm. Disaster can occur if the asm is empty and the jump
6223 branches into the delay slot. So, we add a nop in the delay slot
6224 when this occurs. */
6227 branch_needs_nop_p (rtx insn
)
6231 if (dbr_sequence_length ())
6234 jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6237 insn
= next_active_insn (insn
);
6238 if (!insn
|| jump_insn
== insn
)
6241 if (!(GET_CODE (PATTERN (insn
)) == ASM_INPUT
6242 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6243 && get_attr_length (insn
) > 0)
6250 /* Return TRUE if INSN, a forward jump insn, can use nullification
6251 to skip the following instruction. This avoids an extra cycle due
6252 to a mis-predicted branch when we fall through. */
6255 use_skip_p (rtx insn
)
6257 rtx jump_insn
= next_active_insn (JUMP_LABEL (insn
));
6261 insn
= next_active_insn (insn
);
6263 /* We can't rely on the length of asms, so we can't skip asms. */
6265 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
6266 || extract_asm_operands (PATTERN (insn
)) != NULL_RTX
)
6268 if (get_attr_length (insn
) == 4
6269 && jump_insn
== next_active_insn (insn
))
6271 if (get_attr_length (insn
) > 0)
6278 /* This routine handles all the normal conditional branch sequences we
6279 might need to generate. It handles compare immediate vs compare
6280 register, nullification of delay slots, varying length branches,
6281 negated branches, and all combinations of the above. It returns the
6282 output appropriate to emit the branch corresponding to all given
6286 output_cbranch (rtx
*operands
, int negated
, rtx insn
)
6288 static char buf
[100];
6290 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6291 int length
= get_attr_length (insn
);
6294 /* A conditional branch to the following instruction (e.g. the delay slot)
6295 is asking for a disaster. This can happen when not optimizing and
6296 when jump optimization fails.
6298 While it is usually safe to emit nothing, this can fail if the
6299 preceding instruction is a nullified branch with an empty delay
6300 slot and the same branch target as this branch. We could check
6301 for this but jump optimization should eliminate nop jumps. It
6302 is always safe to emit a nop. */
6303 if (branch_to_delay_slot_p (insn
))
6306 /* The doubleword form of the cmpib instruction doesn't have the LEU
6307 and GTU conditions while the cmpb instruction does. Since we accept
6308 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6309 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6310 operands
[2] = gen_rtx_REG (DImode
, 0);
6311 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6312 operands
[1] = gen_rtx_REG (DImode
, 0);
6314 /* If this is a long branch with its delay slot unfilled, set `nullify'
6315 as it can nullify the delay slot and save a nop. */
6316 if (length
== 8 && dbr_sequence_length () == 0)
6319 /* If this is a short forward conditional branch which did not get
6320 its delay slot filled, the delay slot can still be nullified. */
6321 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6322 nullify
= forward_branch_p (insn
);
6324 /* A forward branch over a single nullified insn can be done with a
6325 comclr instruction. This avoids a single cycle penalty due to
6326 mis-predicted branch if we fall through (branch not taken). */
6327 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6331 /* All short conditional branches except backwards with an unfilled
6335 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6337 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6338 if (GET_MODE (operands
[1]) == DImode
)
6341 strcat (buf
, "%B3");
6343 strcat (buf
, "%S3");
6345 strcat (buf
, " %2,%r1,%%r0");
6348 if (branch_needs_nop_p (insn
))
6349 strcat (buf
, ",n %2,%r1,%0%#");
6351 strcat (buf
, ",n %2,%r1,%0");
6354 strcat (buf
, " %2,%r1,%0");
6357 /* All long conditionals. Note a short backward branch with an
6358 unfilled delay slot is treated just like a long backward branch
6359 with an unfilled delay slot. */
6361 /* Handle weird backwards branch with a filled delay slot
6362 which is nullified. */
6363 if (dbr_sequence_length () != 0
6364 && ! forward_branch_p (insn
)
6367 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6368 if (GET_MODE (operands
[1]) == DImode
)
6371 strcat (buf
, "%S3");
6373 strcat (buf
, "%B3");
6374 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6376 /* Handle short backwards branch with an unfilled delay slot.
6377 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6378 taken and untaken branches. */
6379 else if (dbr_sequence_length () == 0
6380 && ! forward_branch_p (insn
)
6381 && INSN_ADDRESSES_SET_P ()
6382 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6383 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6385 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6386 if (GET_MODE (operands
[1]) == DImode
)
6389 strcat (buf
, "%B3 %2,%r1,%0%#");
6391 strcat (buf
, "%S3 %2,%r1,%0%#");
6395 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6396 if (GET_MODE (operands
[1]) == DImode
)
6399 strcat (buf
, "%S3");
6401 strcat (buf
, "%B3");
6403 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6405 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6410 /* The reversed conditional branch must branch over one additional
6411 instruction if the delay slot is filled and needs to be extracted
6412 by output_lbranch. If the delay slot is empty or this is a
6413 nullified forward branch, the instruction after the reversed
6414 condition branch must be nullified. */
6415 if (dbr_sequence_length () == 0
6416 || (nullify
&& forward_branch_p (insn
)))
6420 operands
[4] = GEN_INT (length
);
6425 operands
[4] = GEN_INT (length
+ 4);
6428 /* Create a reversed conditional branch which branches around
6429 the following insns. */
6430 if (GET_MODE (operands
[1]) != DImode
)
6436 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6439 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6445 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6448 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6457 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6460 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6466 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6469 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6473 output_asm_insn (buf
, operands
);
6474 return output_lbranch (operands
[0], insn
, xdelay
);
6479 /* This routine handles output of long unconditional branches that
6480 exceed the maximum range of a simple branch instruction. Since
6481 we don't have a register available for the branch, we save register
6482 %r1 in the frame marker, load the branch destination DEST into %r1,
6483 execute the branch, and restore %r1 in the delay slot of the branch.
6485 Since long branches may have an insn in the delay slot and the
6486 delay slot is used to restore %r1, we in general need to extract
6487 this insn and execute it before the branch. However, to facilitate
6488 use of this function by conditional branches, we also provide an
6489 option to not extract the delay insn so that it will be emitted
6490 after the long branch. So, if there is an insn in the delay slot,
6491 it is extracted if XDELAY is nonzero.
6493 The lengths of the various long-branch sequences are 20, 16 and 24
6494 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6497 output_lbranch (rtx dest
, rtx insn
, int xdelay
)
6501 xoperands
[0] = dest
;
6503 /* First, free up the delay slot. */
6504 if (xdelay
&& dbr_sequence_length () != 0)
6506 /* We can't handle a jump in the delay slot. */
6507 gcc_assert (GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
);
6509 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6512 /* Now delete the delay insn. */
6513 SET_INSN_DELETED (NEXT_INSN (insn
));
6516 /* Output an insn to save %r1. The runtime documentation doesn't
6517 specify whether the "Clean Up" slot in the callers frame can
6518 be clobbered by the callee. It isn't copied by HP's builtin
6519 alloca, so this suggests that it can be clobbered if necessary.
6520 The "Static Link" location is copied by HP builtin alloca, so
6521 we avoid using it. Using the cleanup slot might be a problem
6522 if we have to interoperate with languages that pass cleanup
6523 information. However, it should be possible to handle these
6524 situations with GCC's asm feature.
6526 The "Current RP" slot is reserved for the called procedure, so
6527 we try to use it when we don't have a frame of our own. It's
6528 rather unlikely that we won't have a frame when we need to emit
6531 Really the way to go long term is a register scavenger; goto
6532 the target of the jump and find a register which we can use
6533 as a scratch to hold the value in %r1. Then, we wouldn't have
6534 to free up the delay slot or clobber a slot that may be needed
6535 for other purposes. */
6538 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6539 /* Use the return pointer slot in the frame marker. */
6540 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6542 /* Use the slot at -40 in the frame marker since HP builtin
6543 alloca doesn't copy it. */
6544 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6548 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6549 /* Use the return pointer slot in the frame marker. */
6550 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6552 /* Use the "Clean Up" slot in the frame marker. In GCC,
6553 the only other use of this location is for copying a
6554 floating point double argument from a floating-point
6555 register to two general registers. The copy is done
6556 as an "atomic" operation when outputting a call, so it
6557 won't interfere with our using the location here. */
6558 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6561 if (TARGET_PORTABLE_RUNTIME
)
6563 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6564 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6565 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6569 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
6570 if (TARGET_SOM
|| !TARGET_GAS
)
6572 xoperands
[1] = gen_label_rtx ();
6573 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands
);
6574 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6575 CODE_LABEL_NUMBER (xoperands
[1]));
6576 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands
);
6580 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands
);
6581 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
6583 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6586 /* Now output a very long branch to the original target. */
6587 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6589 /* Now restore the value of %r1 in the delay slot. */
6592 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6593 return "ldd -16(%%r30),%%r1";
6595 return "ldd -40(%%r30),%%r1";
6599 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6600 return "ldw -20(%%r30),%%r1";
6602 return "ldw -12(%%r30),%%r1";
6606 /* This routine handles all the branch-on-bit conditional branch sequences we
6607 might need to generate. It handles nullification of delay slots,
6608 varying length branches, negated branches and all combinations of the
6609 above. it returns the appropriate output template to emit the branch. */
6612 output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx insn
, int which
)
6614 static char buf
[100];
6616 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6617 int length
= get_attr_length (insn
);
6620 /* A conditional branch to the following instruction (e.g. the delay slot) is
6621 asking for a disaster. I do not think this can happen as this pattern
6622 is only used when optimizing; jump optimization should eliminate the
6623 jump. But be prepared just in case. */
6625 if (branch_to_delay_slot_p (insn
))
6628 /* If this is a long branch with its delay slot unfilled, set `nullify'
6629 as it can nullify the delay slot and save a nop. */
6630 if (length
== 8 && dbr_sequence_length () == 0)
6633 /* If this is a short forward conditional branch which did not get
6634 its delay slot filled, the delay slot can still be nullified. */
6635 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6636 nullify
= forward_branch_p (insn
);
6638 /* A forward branch over a single nullified insn can be done with a
6639 extrs instruction. This avoids a single cycle penalty due to
6640 mis-predicted branch if we fall through (branch not taken). */
6641 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6646 /* All short conditional branches except backwards with an unfilled
6650 strcpy (buf
, "{extrs,|extrw,s,}");
6652 strcpy (buf
, "bb,");
6653 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6654 strcpy (buf
, "extrd,s,*");
6655 else if (GET_MODE (operands
[0]) == DImode
)
6656 strcpy (buf
, "bb,*");
6657 if ((which
== 0 && negated
)
6658 || (which
== 1 && ! negated
))
6663 strcat (buf
, " %0,%1,1,%%r0");
6664 else if (nullify
&& negated
)
6666 if (branch_needs_nop_p (insn
))
6667 strcat (buf
, ",n %0,%1,%3%#");
6669 strcat (buf
, ",n %0,%1,%3");
6671 else if (nullify
&& ! negated
)
6673 if (branch_needs_nop_p (insn
))
6674 strcat (buf
, ",n %0,%1,%2%#");
6676 strcat (buf
, ",n %0,%1,%2");
6678 else if (! nullify
&& negated
)
6679 strcat (buf
, " %0,%1,%3");
6680 else if (! nullify
&& ! negated
)
6681 strcat (buf
, " %0,%1,%2");
6684 /* All long conditionals. Note a short backward branch with an
6685 unfilled delay slot is treated just like a long backward branch
6686 with an unfilled delay slot. */
6688 /* Handle weird backwards branch with a filled delay slot
6689 which is nullified. */
6690 if (dbr_sequence_length () != 0
6691 && ! forward_branch_p (insn
)
6694 strcpy (buf
, "bb,");
6695 if (GET_MODE (operands
[0]) == DImode
)
6697 if ((which
== 0 && negated
)
6698 || (which
== 1 && ! negated
))
6703 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
6705 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
6707 /* Handle short backwards branch with an unfilled delay slot.
6708 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6709 taken and untaken branches. */
6710 else if (dbr_sequence_length () == 0
6711 && ! forward_branch_p (insn
)
6712 && INSN_ADDRESSES_SET_P ()
6713 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6714 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6716 strcpy (buf
, "bb,");
6717 if (GET_MODE (operands
[0]) == DImode
)
6719 if ((which
== 0 && negated
)
6720 || (which
== 1 && ! negated
))
6725 strcat (buf
, " %0,%1,%3%#");
6727 strcat (buf
, " %0,%1,%2%#");
6731 if (GET_MODE (operands
[0]) == DImode
)
6732 strcpy (buf
, "extrd,s,*");
6734 strcpy (buf
, "{extrs,|extrw,s,}");
6735 if ((which
== 0 && negated
)
6736 || (which
== 1 && ! negated
))
6740 if (nullify
&& negated
)
6741 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
6742 else if (nullify
&& ! negated
)
6743 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
6745 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
6747 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
6752 /* The reversed conditional branch must branch over one additional
6753 instruction if the delay slot is filled and needs to be extracted
6754 by output_lbranch. If the delay slot is empty or this is a
6755 nullified forward branch, the instruction after the reversed
6756 condition branch must be nullified. */
6757 if (dbr_sequence_length () == 0
6758 || (nullify
&& forward_branch_p (insn
)))
6762 operands
[4] = GEN_INT (length
);
6767 operands
[4] = GEN_INT (length
+ 4);
6770 if (GET_MODE (operands
[0]) == DImode
)
6771 strcpy (buf
, "bb,*");
6773 strcpy (buf
, "bb,");
6774 if ((which
== 0 && negated
)
6775 || (which
== 1 && !negated
))
6780 strcat (buf
, ",n %0,%1,.+%4");
6782 strcat (buf
, " %0,%1,.+%4");
6783 output_asm_insn (buf
, operands
);
6784 return output_lbranch (negated
? operands
[3] : operands
[2],
6790 /* This routine handles all the branch-on-variable-bit conditional branch
6791 sequences we might need to generate. It handles nullification of delay
6792 slots, varying length branches, negated branches and all combinations
6793 of the above. it returns the appropriate output template to emit the
6797 output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx insn
, int which
)
6799 static char buf
[100];
6801 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6802 int length
= get_attr_length (insn
);
6805 /* A conditional branch to the following instruction (e.g. the delay slot) is
6806 asking for a disaster. I do not think this can happen as this pattern
6807 is only used when optimizing; jump optimization should eliminate the
6808 jump. But be prepared just in case. */
6810 if (branch_to_delay_slot_p (insn
))
6813 /* If this is a long branch with its delay slot unfilled, set `nullify'
6814 as it can nullify the delay slot and save a nop. */
6815 if (length
== 8 && dbr_sequence_length () == 0)
6818 /* If this is a short forward conditional branch which did not get
6819 its delay slot filled, the delay slot can still be nullified. */
6820 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6821 nullify
= forward_branch_p (insn
);
6823 /* A forward branch over a single nullified insn can be done with a
6824 extrs instruction. This avoids a single cycle penalty due to
6825 mis-predicted branch if we fall through (branch not taken). */
6826 useskip
= (length
== 4 && nullify
) ? use_skip_p (insn
) : FALSE
;
6831 /* All short conditional branches except backwards with an unfilled
6835 strcpy (buf
, "{vextrs,|extrw,s,}");
6837 strcpy (buf
, "{bvb,|bb,}");
6838 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6839 strcpy (buf
, "extrd,s,*");
6840 else if (GET_MODE (operands
[0]) == DImode
)
6841 strcpy (buf
, "bb,*");
6842 if ((which
== 0 && negated
)
6843 || (which
== 1 && ! negated
))
6848 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6849 else if (nullify
&& negated
)
6851 if (branch_needs_nop_p (insn
))
6852 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6854 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
6856 else if (nullify
&& ! negated
)
6858 if (branch_needs_nop_p (insn
))
6859 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6861 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
6863 else if (! nullify
&& negated
)
6864 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
6865 else if (! nullify
&& ! negated
)
6866 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
6869 /* All long conditionals. Note a short backward branch with an
6870 unfilled delay slot is treated just like a long backward branch
6871 with an unfilled delay slot. */
6873 /* Handle weird backwards branch with a filled delay slot
6874 which is nullified. */
6875 if (dbr_sequence_length () != 0
6876 && ! forward_branch_p (insn
)
6879 strcpy (buf
, "{bvb,|bb,}");
6880 if (GET_MODE (operands
[0]) == DImode
)
6882 if ((which
== 0 && negated
)
6883 || (which
== 1 && ! negated
))
6888 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6890 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6892 /* Handle short backwards branch with an unfilled delay slot.
6893 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6894 taken and untaken branches. */
6895 else if (dbr_sequence_length () == 0
6896 && ! forward_branch_p (insn
)
6897 && INSN_ADDRESSES_SET_P ()
6898 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6899 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6901 strcpy (buf
, "{bvb,|bb,}");
6902 if (GET_MODE (operands
[0]) == DImode
)
6904 if ((which
== 0 && negated
)
6905 || (which
== 1 && ! negated
))
6910 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
6912 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
6916 strcpy (buf
, "{vextrs,|extrw,s,}");
6917 if (GET_MODE (operands
[0]) == DImode
)
6918 strcpy (buf
, "extrd,s,*");
6919 if ((which
== 0 && negated
)
6920 || (which
== 1 && ! negated
))
6924 if (nullify
&& negated
)
6925 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6926 else if (nullify
&& ! negated
)
6927 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6929 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6931 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6936 /* The reversed conditional branch must branch over one additional
6937 instruction if the delay slot is filled and needs to be extracted
6938 by output_lbranch. If the delay slot is empty or this is a
6939 nullified forward branch, the instruction after the reversed
6940 condition branch must be nullified. */
6941 if (dbr_sequence_length () == 0
6942 || (nullify
&& forward_branch_p (insn
)))
6946 operands
[4] = GEN_INT (length
);
6951 operands
[4] = GEN_INT (length
+ 4);
6954 if (GET_MODE (operands
[0]) == DImode
)
6955 strcpy (buf
, "bb,*");
6957 strcpy (buf
, "{bvb,|bb,}");
6958 if ((which
== 0 && negated
)
6959 || (which
== 1 && !negated
))
6964 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
6966 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
6967 output_asm_insn (buf
, operands
);
6968 return output_lbranch (negated
? operands
[3] : operands
[2],
6974 /* Return the output template for emitting a dbra type insn.
6976 Note it may perform some output operations on its own before
6977 returning the final output string. */
6979 output_dbra (rtx
*operands
, rtx insn
, int which_alternative
)
6981 int length
= get_attr_length (insn
);
6983 /* A conditional branch to the following instruction (e.g. the delay slot) is
6984 asking for a disaster. Be prepared! */
6986 if (branch_to_delay_slot_p (insn
))
6988 if (which_alternative
== 0)
6989 return "ldo %1(%0),%0";
6990 else if (which_alternative
== 1)
6992 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
6993 output_asm_insn ("ldw -16(%%r30),%4", operands
);
6994 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
6995 return "{fldws|fldw} -16(%%r30),%0";
6999 output_asm_insn ("ldw %0,%4", operands
);
7000 return "ldo %1(%4),%4\n\tstw %4,%0";
7004 if (which_alternative
== 0)
7006 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7009 /* If this is a long branch with its delay slot unfilled, set `nullify'
7010 as it can nullify the delay slot and save a nop. */
7011 if (length
== 8 && dbr_sequence_length () == 0)
7014 /* If this is a short forward conditional branch which did not get
7015 its delay slot filled, the delay slot can still be nullified. */
7016 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7017 nullify
= forward_branch_p (insn
);
7024 if (branch_needs_nop_p (insn
))
7025 return "addib,%C2,n %1,%0,%3%#";
7027 return "addib,%C2,n %1,%0,%3";
7030 return "addib,%C2 %1,%0,%3";
7033 /* Handle weird backwards branch with a fulled delay slot
7034 which is nullified. */
7035 if (dbr_sequence_length () != 0
7036 && ! forward_branch_p (insn
)
7038 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7039 /* Handle short backwards branch with an unfilled delay slot.
7040 Using a addb;nop rather than addi;bl saves 1 cycle for both
7041 taken and untaken branches. */
7042 else if (dbr_sequence_length () == 0
7043 && ! forward_branch_p (insn
)
7044 && INSN_ADDRESSES_SET_P ()
7045 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7046 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7047 return "addib,%C2 %1,%0,%3%#";
7049 /* Handle normal cases. */
7051 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7053 return "addi,%N2 %1,%0,%0\n\tb %3";
7056 /* The reversed conditional branch must branch over one additional
7057 instruction if the delay slot is filled and needs to be extracted
7058 by output_lbranch. If the delay slot is empty or this is a
7059 nullified forward branch, the instruction after the reversed
7060 condition branch must be nullified. */
7061 if (dbr_sequence_length () == 0
7062 || (nullify
&& forward_branch_p (insn
)))
7066 operands
[4] = GEN_INT (length
);
7071 operands
[4] = GEN_INT (length
+ 4);
7075 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7077 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7079 return output_lbranch (operands
[3], insn
, xdelay
);
7083 /* Deal with gross reload from FP register case. */
7084 else if (which_alternative
== 1)
7086 /* Move loop counter from FP register to MEM then into a GR,
7087 increment the GR, store the GR into MEM, and finally reload
7088 the FP register from MEM from within the branch's delay slot. */
7089 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7091 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7093 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7094 else if (length
== 28)
7095 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7098 operands
[5] = GEN_INT (length
- 16);
7099 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7100 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7101 return output_lbranch (operands
[3], insn
, 0);
7104 /* Deal with gross reload from memory case. */
7107 /* Reload loop counter from memory, the store back to memory
7108 happens in the branch's delay slot. */
7109 output_asm_insn ("ldw %0,%4", operands
);
7111 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7112 else if (length
== 16)
7113 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7116 operands
[5] = GEN_INT (length
- 4);
7117 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7118 return output_lbranch (operands
[3], insn
, 0);
7123 /* Return the output template for emitting a movb type insn.
7125 Note it may perform some output operations on its own before
7126 returning the final output string. */
7128 output_movb (rtx
*operands
, rtx insn
, int which_alternative
,
7129 int reverse_comparison
)
7131 int length
= get_attr_length (insn
);
7133 /* A conditional branch to the following instruction (e.g. the delay slot) is
7134 asking for a disaster. Be prepared! */
7136 if (branch_to_delay_slot_p (insn
))
7138 if (which_alternative
== 0)
7139 return "copy %1,%0";
7140 else if (which_alternative
== 1)
7142 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7143 return "{fldws|fldw} -16(%%r30),%0";
7145 else if (which_alternative
== 2)
7151 /* Support the second variant. */
7152 if (reverse_comparison
)
7153 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7155 if (which_alternative
== 0)
7157 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7160 /* If this is a long branch with its delay slot unfilled, set `nullify'
7161 as it can nullify the delay slot and save a nop. */
7162 if (length
== 8 && dbr_sequence_length () == 0)
7165 /* If this is a short forward conditional branch which did not get
7166 its delay slot filled, the delay slot can still be nullified. */
7167 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7168 nullify
= forward_branch_p (insn
);
7175 if (branch_needs_nop_p (insn
))
7176 return "movb,%C2,n %1,%0,%3%#";
7178 return "movb,%C2,n %1,%0,%3";
7181 return "movb,%C2 %1,%0,%3";
7184 /* Handle weird backwards branch with a filled delay slot
7185 which is nullified. */
7186 if (dbr_sequence_length () != 0
7187 && ! forward_branch_p (insn
)
7189 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7191 /* Handle short backwards branch with an unfilled delay slot.
7192 Using a movb;nop rather than or;bl saves 1 cycle for both
7193 taken and untaken branches. */
7194 else if (dbr_sequence_length () == 0
7195 && ! forward_branch_p (insn
)
7196 && INSN_ADDRESSES_SET_P ()
7197 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7198 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7199 return "movb,%C2 %1,%0,%3%#";
7200 /* Handle normal cases. */
7202 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7204 return "or,%N2 %1,%%r0,%0\n\tb %3";
7207 /* The reversed conditional branch must branch over one additional
7208 instruction if the delay slot is filled and needs to be extracted
7209 by output_lbranch. If the delay slot is empty or this is a
7210 nullified forward branch, the instruction after the reversed
7211 condition branch must be nullified. */
7212 if (dbr_sequence_length () == 0
7213 || (nullify
&& forward_branch_p (insn
)))
7217 operands
[4] = GEN_INT (length
);
7222 operands
[4] = GEN_INT (length
+ 4);
7226 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7228 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7230 return output_lbranch (operands
[3], insn
, xdelay
);
7233 /* Deal with gross reload for FP destination register case. */
7234 else if (which_alternative
== 1)
7236 /* Move source register to MEM, perform the branch test, then
7237 finally load the FP register from MEM from within the branch's
7239 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7241 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7242 else if (length
== 16)
7243 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7246 operands
[4] = GEN_INT (length
- 4);
7247 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7248 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7249 return output_lbranch (operands
[3], insn
, 0);
7252 /* Deal with gross reload from memory case. */
7253 else if (which_alternative
== 2)
7255 /* Reload loop counter from memory, the store back to memory
7256 happens in the branch's delay slot. */
7258 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7259 else if (length
== 12)
7260 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7263 operands
[4] = GEN_INT (length
);
7264 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7266 return output_lbranch (operands
[3], insn
, 0);
7269 /* Handle SAR as a destination. */
7273 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7274 else if (length
== 12)
7275 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7278 operands
[4] = GEN_INT (length
);
7279 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7281 return output_lbranch (operands
[3], insn
, 0);
7286 /* Copy any FP arguments in INSN into integer registers. */
7288 copy_fp_args (rtx insn
)
7293 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7295 int arg_mode
, regno
;
7296 rtx use
= XEXP (link
, 0);
7298 if (! (GET_CODE (use
) == USE
7299 && GET_CODE (XEXP (use
, 0)) == REG
7300 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7303 arg_mode
= GET_MODE (XEXP (use
, 0));
7304 regno
= REGNO (XEXP (use
, 0));
7306 /* Is it a floating point register? */
7307 if (regno
>= 32 && regno
<= 39)
7309 /* Copy the FP register into an integer register via memory. */
7310 if (arg_mode
== SFmode
)
7312 xoperands
[0] = XEXP (use
, 0);
7313 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7314 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7315 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7319 xoperands
[0] = XEXP (use
, 0);
7320 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7321 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7322 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7323 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7329 /* Compute length of the FP argument copy sequence for INSN. */
7331 length_fp_args (rtx insn
)
7336 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7338 int arg_mode
, regno
;
7339 rtx use
= XEXP (link
, 0);
7341 if (! (GET_CODE (use
) == USE
7342 && GET_CODE (XEXP (use
, 0)) == REG
7343 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7346 arg_mode
= GET_MODE (XEXP (use
, 0));
7347 regno
= REGNO (XEXP (use
, 0));
7349 /* Is it a floating point register? */
7350 if (regno
>= 32 && regno
<= 39)
7352 if (arg_mode
== SFmode
)
7362 /* Return the attribute length for the millicode call instruction INSN.
7363 The length must match the code generated by output_millicode_call.
7364 We include the delay slot in the returned length as it is better to
7365 over estimate the length than to under estimate it. */
7368 attr_length_millicode_call (rtx insn
)
7370 unsigned long distance
= -1;
7371 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7373 if (INSN_ADDRESSES_SET_P ())
7375 distance
= (total
+ insn_current_reference_address (insn
));
7376 if (distance
< total
)
7382 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7387 else if (TARGET_PORTABLE_RUNTIME
)
7391 if (!TARGET_LONG_CALLS
&& distance
< 240000)
7394 if (TARGET_LONG_ABS_CALL
&& !flag_pic
)
7401 /* INSN is a function call. It may have an unconditional jump
7404 CALL_DEST is the routine we are calling. */
7407 output_millicode_call (rtx insn
, rtx call_dest
)
7409 int attr_length
= get_attr_length (insn
);
7410 int seq_length
= dbr_sequence_length ();
7415 xoperands
[0] = call_dest
;
7416 xoperands
[2] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7418 /* Handle the common case where we are sure that the branch will
7419 reach the beginning of the $CODE$ subspace. The within reach
7420 form of the $$sh_func_adrs call has a length of 28. Because
7421 it has an attribute type of multi, it never has a nonzero
7422 sequence length. The length of the $$sh_func_adrs is the same
7423 as certain out of reach PIC calls to other routines. */
7424 if (!TARGET_LONG_CALLS
7425 && ((seq_length
== 0
7426 && (attr_length
== 12
7427 || (attr_length
== 28 && get_attr_type (insn
) == TYPE_MULTI
)))
7428 || (seq_length
!= 0 && attr_length
== 8)))
7430 output_asm_insn ("{bl|b,l} %0,%2", xoperands
);
7436 /* It might seem that one insn could be saved by accessing
7437 the millicode function using the linkage table. However,
7438 this doesn't work in shared libraries and other dynamically
7439 loaded objects. Using a pc-relative sequence also avoids
7440 problems related to the implicit use of the gp register. */
7441 output_asm_insn ("b,l .+8,%%r1", xoperands
);
7445 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
7446 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
7450 xoperands
[1] = gen_label_rtx ();
7451 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7452 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7453 CODE_LABEL_NUMBER (xoperands
[1]));
7454 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7457 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7459 else if (TARGET_PORTABLE_RUNTIME
)
7461 /* Pure portable runtime doesn't allow be/ble; we also don't
7462 have PIC support in the assembler/linker, so this sequence
7465 /* Get the address of our target into %r1. */
7466 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7467 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7469 /* Get our return address into %r31. */
7470 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7471 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7473 /* Jump to our target address in %r1. */
7474 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7478 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7480 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7482 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7486 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7487 output_asm_insn ("addi 16,%%r1,%%r31", xoperands
);
7489 if (TARGET_SOM
|| !TARGET_GAS
)
7491 /* The HP assembler can generate relocations for the
7492 difference of two symbols. GAS can do this for a
7493 millicode symbol but not an arbitrary external
7494 symbol when generating SOM output. */
7495 xoperands
[1] = gen_label_rtx ();
7496 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7497 CODE_LABEL_NUMBER (xoperands
[1]));
7498 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7499 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7503 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands
);
7504 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7508 /* Jump to our target address in %r1. */
7509 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7513 if (seq_length
== 0)
7514 output_asm_insn ("nop", xoperands
);
7516 /* We are done if there isn't a jump in the delay slot. */
7517 if (seq_length
== 0 || GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
)
7520 /* This call has an unconditional jump in its delay slot. */
7521 xoperands
[0] = XEXP (PATTERN (NEXT_INSN (insn
)), 1);
7523 /* See if the return address can be adjusted. Use the containing
7524 sequence insn's address. */
7525 if (INSN_ADDRESSES_SET_P ())
7527 seq_insn
= NEXT_INSN (PREV_INSN (XVECEXP (final_sequence
, 0, 0)));
7528 distance
= (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn
))))
7529 - INSN_ADDRESSES (INSN_UID (seq_insn
)) - 8);
7531 if (VAL_14_BITS_P (distance
))
7533 xoperands
[1] = gen_label_rtx ();
7534 output_asm_insn ("ldo %0-%1(%2),%2", xoperands
);
7535 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7536 CODE_LABEL_NUMBER (xoperands
[1]));
7539 /* ??? This branch may not reach its target. */
7540 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7543 /* ??? This branch may not reach its target. */
7544 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7546 /* Delete the jump. */
7547 SET_INSN_DELETED (NEXT_INSN (insn
));
7552 /* Return the attribute length of the call instruction INSN. The SIBCALL
7553 flag indicates whether INSN is a regular call or a sibling call. The
7554 length returned must be longer than the code actually generated by
7555 output_call. Since branch shortening is done before delay branch
7556 sequencing, there is no way to determine whether or not the delay
7557 slot will be filled during branch shortening. Even when the delay
7558 slot is filled, we may have to add a nop if the delay slot contains
7559 a branch that can't reach its target. Thus, we always have to include
7560 the delay slot in the length estimate. This used to be done in
7561 pa_adjust_insn_length but we do it here now as some sequences always
7562 fill the delay slot and we can save four bytes in the estimate for
7566 attr_length_call (rtx insn
, int sibcall
)
7569 rtx call
, call_dest
;
7572 rtx pat
= PATTERN (insn
);
7573 unsigned long distance
= -1;
7575 gcc_assert (GET_CODE (insn
) == CALL_INSN
);
7577 if (INSN_ADDRESSES_SET_P ())
7579 unsigned long total
;
7581 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7582 distance
= (total
+ insn_current_reference_address (insn
));
7583 if (distance
< total
)
7587 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7589 /* Get the call rtx. */
7590 call
= XVECEXP (pat
, 0, 0);
7591 if (GET_CODE (call
) == SET
)
7592 call
= SET_SRC (call
);
7594 gcc_assert (GET_CODE (call
) == CALL
);
7596 /* Determine if this is a local call. */
7597 call_dest
= XEXP (XEXP (call
, 0), 0);
7598 call_decl
= SYMBOL_REF_DECL (call_dest
);
7599 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7601 /* pc-relative branch. */
7602 if (!TARGET_LONG_CALLS
7603 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7604 || distance
< 240000))
7607 /* 64-bit plabel sequence. */
7608 else if (TARGET_64BIT
&& !local_call
)
7609 length
+= sibcall
? 28 : 24;
7611 /* non-pic long absolute branch sequence. */
7612 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7615 /* long pc-relative branch sequence. */
7616 else if (TARGET_LONG_PIC_SDIFF_CALL
7617 || (TARGET_GAS
&& !TARGET_SOM
7618 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
)))
7622 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7626 /* 32-bit plabel sequence. */
7632 length
+= length_fp_args (insn
);
7642 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7650 /* INSN is a function call. It may have an unconditional jump
7653 CALL_DEST is the routine we are calling. */
7656 output_call (rtx insn
, rtx call_dest
, int sibcall
)
7658 int delay_insn_deleted
= 0;
7659 int delay_slot_filled
= 0;
7660 int seq_length
= dbr_sequence_length ();
7661 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7662 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7665 xoperands
[0] = call_dest
;
7667 /* Handle the common case where we're sure that the branch will reach
7668 the beginning of the "$CODE$" subspace. This is the beginning of
7669 the current function if we are in a named section. */
7670 if (!TARGET_LONG_CALLS
&& attr_length_call (insn
, sibcall
) == 8)
7672 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7673 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7677 if (TARGET_64BIT
&& !local_call
)
7679 /* ??? As far as I can tell, the HP linker doesn't support the
7680 long pc-relative sequence described in the 64-bit runtime
7681 architecture. So, we use a slightly longer indirect call. */
7682 xoperands
[0] = get_deferred_plabel (call_dest
);
7683 xoperands
[1] = gen_label_rtx ();
7685 /* If this isn't a sibcall, we put the load of %r27 into the
7686 delay slot. We can't do this in a sibcall as we don't
7687 have a second call-clobbered scratch register available. */
7689 && GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
7692 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7695 /* Now delete the delay insn. */
7696 SET_INSN_DELETED (NEXT_INSN (insn
));
7697 delay_insn_deleted
= 1;
7700 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7701 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7702 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7706 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7707 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7708 output_asm_insn ("bve (%%r1)", xoperands
);
7712 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7713 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7714 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7715 delay_slot_filled
= 1;
7720 int indirect_call
= 0;
7722 /* Emit a long call. There are several different sequences
7723 of increasing length and complexity. In most cases,
7724 they don't allow an instruction in the delay slot. */
7725 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7726 && !TARGET_LONG_PIC_SDIFF_CALL
7727 && !(TARGET_GAS
&& !TARGET_SOM
7728 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7733 && GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
7737 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7739 /* A non-jump insn in the delay slot. By definition we can
7740 emit this insn before the call (and in fact before argument
7742 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7745 /* Now delete the delay insn. */
7746 SET_INSN_DELETED (NEXT_INSN (insn
));
7747 delay_insn_deleted
= 1;
7750 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7752 /* This is the best sequence for making long calls in
7753 non-pic code. Unfortunately, GNU ld doesn't provide
7754 the stub needed for external calls, and GAS's support
7755 for this with the SOM linker is buggy. It is safe
7756 to use this for local calls. */
7757 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7759 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
7763 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7766 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7768 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7769 delay_slot_filled
= 1;
7774 if (TARGET_LONG_PIC_SDIFF_CALL
)
7776 /* The HP assembler and linker can handle relocations
7777 for the difference of two symbols. The HP assembler
7778 recognizes the sequence as a pc-relative call and
7779 the linker provides stubs when needed. */
7780 xoperands
[1] = gen_label_rtx ();
7781 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7782 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7783 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7784 CODE_LABEL_NUMBER (xoperands
[1]));
7785 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7787 else if (TARGET_GAS
&& !TARGET_SOM
7788 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7790 /* GAS currently can't generate the relocations that
7791 are needed for the SOM linker under HP-UX using this
7792 sequence. The GNU linker doesn't generate the stubs
7793 that are needed for external calls on TARGET_ELF32
7794 with this sequence. For now, we have to use a
7795 longer plabel sequence when using GAS. */
7796 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7797 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7799 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7804 /* Emit a long plabel-based call sequence. This is
7805 essentially an inline implementation of $$dyncall.
7806 We don't actually try to call $$dyncall as this is
7807 as difficult as calling the function itself. */
7808 xoperands
[0] = get_deferred_plabel (call_dest
);
7809 xoperands
[1] = gen_label_rtx ();
7811 /* Since the call is indirect, FP arguments in registers
7812 need to be copied to the general registers. Then, the
7813 argument relocation stub will copy them back. */
7815 copy_fp_args (insn
);
7819 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
7820 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
7821 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
7825 output_asm_insn ("addil LR'%0-$global$,%%r27",
7827 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7831 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
7832 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
7833 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
7834 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
7836 if (!sibcall
&& !TARGET_PA_20
)
7838 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
7839 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7840 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
7842 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
7849 output_asm_insn ("bve (%%r1)", xoperands
);
7854 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7855 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
7856 delay_slot_filled
= 1;
7859 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7864 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7865 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7870 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7871 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
7873 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
7877 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7878 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
7880 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
7883 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
7885 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7886 delay_slot_filled
= 1;
7893 if (!delay_slot_filled
&& (seq_length
== 0 || delay_insn_deleted
))
7894 output_asm_insn ("nop", xoperands
);
7896 /* We are done if there isn't a jump in the delay slot. */
7898 || delay_insn_deleted
7899 || GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
)
7902 /* A sibcall should never have a branch in the delay slot. */
7903 gcc_assert (!sibcall
);
7905 /* This call has an unconditional jump in its delay slot. */
7906 xoperands
[0] = XEXP (PATTERN (NEXT_INSN (insn
)), 1);
7908 if (!delay_slot_filled
&& INSN_ADDRESSES_SET_P ())
7910 /* See if the return address can be adjusted. Use the containing
7911 sequence insn's address. This would break the regular call/return@
7912 relationship assumed by the table based eh unwinder, so only do that
7913 if the call is not possibly throwing. */
7914 rtx seq_insn
= NEXT_INSN (PREV_INSN (XVECEXP (final_sequence
, 0, 0)));
7915 int distance
= (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn
))))
7916 - INSN_ADDRESSES (INSN_UID (seq_insn
)) - 8);
7918 if (VAL_14_BITS_P (distance
)
7919 && !(can_throw_internal (insn
) || can_throw_external (insn
)))
7921 xoperands
[1] = gen_label_rtx ();
7922 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands
);
7923 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7924 CODE_LABEL_NUMBER (xoperands
[1]));
7927 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7930 output_asm_insn ("b,n %0", xoperands
);
7932 /* Delete the jump. */
7933 SET_INSN_DELETED (NEXT_INSN (insn
));
7938 /* Return the attribute length of the indirect call instruction INSN.
7939 The length must match the code generated by output_indirect call.
7940 The returned length includes the delay slot. Currently, the delay
7941 slot of an indirect call sequence is not exposed and it is used by
7942 the sequence itself. */
7945 attr_length_indirect_call (rtx insn
)
7947 unsigned long distance
= -1;
7948 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7950 if (INSN_ADDRESSES_SET_P ())
7952 distance
= (total
+ insn_current_reference_address (insn
));
7953 if (distance
< total
)
7960 if (TARGET_FAST_INDIRECT_CALLS
7961 || (!TARGET_PORTABLE_RUNTIME
7962 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
7963 || distance
< 240000)))
7969 if (TARGET_PORTABLE_RUNTIME
)
7972 /* Out of reach, can use ble. */
7977 output_indirect_call (rtx insn
, rtx call_dest
)
7983 xoperands
[0] = call_dest
;
7984 output_asm_insn ("ldd 16(%0),%%r2", xoperands
);
7985 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands
);
7989 /* First the special case for kernels, level 0 systems, etc. */
7990 if (TARGET_FAST_INDIRECT_CALLS
)
7991 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7993 /* Now the normal case -- we can reach $$dyncall directly or
7994 we're sure that we can get there via a long-branch stub.
7996 No need to check target flags as the length uniquely identifies
7997 the remaining cases. */
7998 if (attr_length_indirect_call (insn
) == 8)
8000 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8001 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8002 variant of the B,L instruction can't be used on the SOM target. */
8003 if (TARGET_PA_20
&& !TARGET_SOM
)
8004 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8006 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8009 /* Long millicode call, but we are not generating PIC or portable runtime
8011 if (attr_length_indirect_call (insn
) == 12)
8012 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8014 /* Long millicode call for portable runtime. */
8015 if (attr_length_indirect_call (insn
) == 20)
8016 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8018 /* We need a long PIC call to $$dyncall. */
8019 xoperands
[0] = NULL_RTX
;
8020 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8021 if (TARGET_SOM
|| !TARGET_GAS
)
8023 xoperands
[0] = gen_label_rtx ();
8024 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands
);
8025 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8026 CODE_LABEL_NUMBER (xoperands
[0]));
8027 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands
);
8031 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands
);
8032 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8035 output_asm_insn ("blr %%r0,%%r2", xoperands
);
8036 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands
);
8040 /* Return the total length of the save and restore instructions needed for
8041 the data linkage table pointer (i.e., the PIC register) across the call
8042 instruction INSN. No-return calls do not require a save and restore.
8043 In addition, we may be able to avoid the save and restore for calls
8044 within the same translation unit. */
8047 attr_length_save_restore_dltp (rtx insn
)
8049 if (find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
8055 /* In HPUX 8.0's shared library scheme, special relocations are needed
8056 for function labels if they might be passed to a function
8057 in a shared library (because shared libraries don't live in code
8058 space), and special magic is needed to construct their address. */
8061 hppa_encode_label (rtx sym
)
8063 const char *str
= XSTR (sym
, 0);
8064 int len
= strlen (str
) + 1;
8067 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8071 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8075 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8077 int old_referenced
= 0;
8079 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8081 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8083 default_encode_section_info (decl
, rtl
, first
);
8085 if (first
&& TEXT_SPACE_P (decl
))
8087 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8088 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8089 hppa_encode_label (XEXP (rtl
, 0));
8091 else if (old_referenced
)
8092 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8095 /* This is sort of inverse to pa_encode_section_info. */
8098 pa_strip_name_encoding (const char *str
)
8100 str
+= (*str
== '@');
8101 str
+= (*str
== '*');
8105 /* Returns 1 if OP is a function label involved in a simple addition
8106 with a constant. Used to keep certain patterns from matching
8107 during instruction combination. */
8109 is_function_label_plus_const (rtx op
)
8111 /* Strip off any CONST. */
8112 if (GET_CODE (op
) == CONST
)
8115 return (GET_CODE (op
) == PLUS
8116 && function_label_operand (XEXP (op
, 0), VOIDmode
)
8117 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8120 /* Output assembly code for a thunk to FUNCTION. */
8123 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8124 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8127 static unsigned int current_thunk_number
;
8128 int val_14
= VAL_14_BITS_P (delta
);
8129 unsigned int old_last_address
= last_address
, nbytes
= 0;
8133 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8134 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8135 xoperands
[2] = GEN_INT (delta
);
8137 ASM_OUTPUT_LABEL (file
, XSTR (xoperands
[1], 0));
8138 fprintf (file
, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8140 /* Output the thunk. We know that the function is in the same
8141 translation unit (i.e., the same space) as the thunk, and that
8142 thunks are output after their method. Thus, we don't need an
8143 external branch to reach the function. With SOM and GAS,
8144 functions and thunks are effectively in different sections.
8145 Thus, we can always use a IA-relative branch and the linker
8146 will add a long branch stub if necessary.
8148 However, we have to be careful when generating PIC code on the
8149 SOM port to ensure that the sequence does not transfer to an
8150 import stub for the target function as this could clobber the
8151 return value saved at SP-24. This would also apply to the
8152 32-bit linux port if the multi-space model is implemented. */
8153 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8154 && !(flag_pic
&& TREE_PUBLIC (function
))
8155 && (TARGET_GAS
|| last_address
< 262132))
8156 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8157 && ((targetm_common
.have_named_sections
8158 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8159 /* The GNU 64-bit linker has rather poor stub management.
8160 So, we use a long branch from thunks that aren't in
8161 the same section as the target function. */
8163 && (DECL_SECTION_NAME (thunk_fndecl
)
8164 != DECL_SECTION_NAME (function
)))
8165 || ((DECL_SECTION_NAME (thunk_fndecl
)
8166 == DECL_SECTION_NAME (function
))
8167 && last_address
< 262132)))
8168 || (targetm_common
.have_named_sections
8169 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8170 && DECL_SECTION_NAME (function
) == NULL
8171 && last_address
< 262132)
8172 || (!targetm_common
.have_named_sections
8173 && last_address
< 262132))))
8176 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8178 output_asm_insn ("b %0", xoperands
);
8182 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8187 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8191 else if (TARGET_64BIT
)
8193 /* We only have one call-clobbered scratch register, so we can't
8194 make use of the delay slot if delta doesn't fit in 14 bits. */
8197 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8198 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8201 output_asm_insn ("b,l .+8,%%r1", xoperands
);
8205 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8206 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
8210 xoperands
[3] = GEN_INT (val_14
? 8 : 16);
8211 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands
);
8216 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8217 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8222 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8226 else if (TARGET_PORTABLE_RUNTIME
)
8228 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8229 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8232 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8234 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8238 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8243 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8247 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8249 /* The function is accessible from outside this module. The only
8250 way to avoid an import stub between the thunk and function is to
8251 call the function directly with an indirect sequence similar to
8252 that used by $$dyncall. This is possible because $$dyncall acts
8253 as the import stub in an indirect call. */
8254 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8255 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8256 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8257 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8258 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8259 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8260 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8261 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8262 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8266 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8272 output_asm_insn ("bve (%%r22)", xoperands
);
8275 else if (TARGET_NO_SPACE_REGS
)
8277 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8282 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8283 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8284 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8289 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8291 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8295 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8297 if (TARGET_SOM
|| !TARGET_GAS
)
8299 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands
);
8300 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands
);
8304 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8305 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands
);
8309 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8311 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8315 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8320 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8327 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8329 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8330 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8334 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8339 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8344 fprintf (file
, "\t.EXIT\n\t.PROCEND\n");
8346 if (TARGET_SOM
&& TARGET_GAS
)
8348 /* We done with this subspace except possibly for some additional
8349 debug information. Forget that we are in this subspace to ensure
8350 that the next function is output in its own subspace. */
8352 cfun
->machine
->in_nsubspa
= 2;
8355 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8357 switch_to_section (data_section
);
8358 output_asm_insn (".align 4", xoperands
);
8359 ASM_OUTPUT_LABEL (file
, label
);
8360 output_asm_insn (".word P'%0", xoperands
);
8363 current_thunk_number
++;
8364 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8365 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8366 last_address
+= nbytes
;
8367 if (old_last_address
> last_address
)
8368 last_address
= UINT_MAX
;
8369 update_total_code_bytes (nbytes
);
8372 /* Only direct calls to static functions are allowed to be sibling (tail)
8375 This restriction is necessary because some linker generated stubs will
8376 store return pointers into rp' in some cases which might clobber a
8377 live value already in rp'.
8379 In a sibcall the current function and the target function share stack
8380 space. Thus if the path to the current function and the path to the
8381 target function save a value in rp', they save the value into the
8382 same stack slot, which has undesirable consequences.
8384 Because of the deferred binding nature of shared libraries any function
8385 with external scope could be in a different load module and thus require
8386 rp' to be saved when calling that function. So sibcall optimizations
8387 can only be safe for static function.
8389 Note that GCC never needs return value relocations, so we don't have to
8390 worry about static calls with return value relocations (which require
8393 It is safe to perform a sibcall optimization when the target function
8394 will never return. */
8396 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8398 if (TARGET_PORTABLE_RUNTIME
)
8401 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8402 single subspace mode and the call is not indirect. As far as I know,
8403 there is no operating system support for the multiple subspace mode.
8404 It might be possible to support indirect calls if we didn't use
8405 $$dyncall (see the indirect sequence generated in output_call). */
8407 return (decl
!= NULL_TREE
);
8409 /* Sibcalls are not ok because the arg pointer register is not a fixed
8410 register. This prevents the sibcall optimization from occurring. In
8411 addition, there are problems with stub placement using GNU ld. This
8412 is because a normal sibcall branch uses a 17-bit relocation while
8413 a regular call branch uses a 22-bit relocation. As a result, more
8414 care needs to be taken in the placement of long-branch stubs. */
8418 /* Sibcalls are only ok within a translation unit. */
8419 return (decl
&& !TREE_PUBLIC (decl
));
8422 /* ??? Addition is not commutative on the PA due to the weird implicit
8423 space register selection rules for memory addresses. Therefore, we
8424 don't consider a + b == b + a, as this might be inside a MEM. */
8426 pa_commutative_p (const_rtx x
, int outer_code
)
8428 return (COMMUTATIVE_P (x
)
8429 && (TARGET_NO_SPACE_REGS
8430 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8431 || GET_CODE (x
) != PLUS
));
8434 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8435 use in fmpyadd instructions. */
8437 fmpyaddoperands (rtx
*operands
)
8439 enum machine_mode mode
= GET_MODE (operands
[0]);
8441 /* Must be a floating point mode. */
8442 if (mode
!= SFmode
&& mode
!= DFmode
)
8445 /* All modes must be the same. */
8446 if (! (mode
== GET_MODE (operands
[1])
8447 && mode
== GET_MODE (operands
[2])
8448 && mode
== GET_MODE (operands
[3])
8449 && mode
== GET_MODE (operands
[4])
8450 && mode
== GET_MODE (operands
[5])))
8453 /* All operands must be registers. */
8454 if (! (GET_CODE (operands
[1]) == REG
8455 && GET_CODE (operands
[2]) == REG
8456 && GET_CODE (operands
[3]) == REG
8457 && GET_CODE (operands
[4]) == REG
8458 && GET_CODE (operands
[5]) == REG
))
8461 /* Only 2 real operands to the addition. One of the input operands must
8462 be the same as the output operand. */
8463 if (! rtx_equal_p (operands
[3], operands
[4])
8464 && ! rtx_equal_p (operands
[3], operands
[5]))
8467 /* Inout operand of add cannot conflict with any operands from multiply. */
8468 if (rtx_equal_p (operands
[3], operands
[0])
8469 || rtx_equal_p (operands
[3], operands
[1])
8470 || rtx_equal_p (operands
[3], operands
[2]))
8473 /* multiply cannot feed into addition operands. */
8474 if (rtx_equal_p (operands
[4], operands
[0])
8475 || rtx_equal_p (operands
[5], operands
[0]))
8478 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8480 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8481 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8482 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8483 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8484 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8485 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8488 /* Passed. Operands are suitable for fmpyadd. */
8492 #if !defined(USE_COLLECT2)
8494 pa_asm_out_constructor (rtx symbol
, int priority
)
8496 if (!function_label_operand (symbol
, VOIDmode
))
8497 hppa_encode_label (symbol
);
8499 #ifdef CTORS_SECTION_ASM_OP
8500 default_ctor_section_asm_out_constructor (symbol
, priority
);
8502 # ifdef TARGET_ASM_NAMED_SECTION
8503 default_named_section_asm_out_constructor (symbol
, priority
);
8505 default_stabs_asm_out_constructor (symbol
, priority
);
8511 pa_asm_out_destructor (rtx symbol
, int priority
)
8513 if (!function_label_operand (symbol
, VOIDmode
))
8514 hppa_encode_label (symbol
);
8516 #ifdef DTORS_SECTION_ASM_OP
8517 default_dtor_section_asm_out_destructor (symbol
, priority
);
8519 # ifdef TARGET_ASM_NAMED_SECTION
8520 default_named_section_asm_out_destructor (symbol
, priority
);
8522 default_stabs_asm_out_destructor (symbol
, priority
);
8528 /* This function places uninitialized global data in the bss section.
8529 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8530 function on the SOM port to prevent uninitialized global data from
8531 being placed in the data section. */
8534 pa_asm_output_aligned_bss (FILE *stream
,
8536 unsigned HOST_WIDE_INT size
,
8539 switch_to_section (bss_section
);
8540 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8542 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8543 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8546 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8547 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8550 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8551 ASM_OUTPUT_LABEL (stream
, name
);
8552 fprintf (stream
, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8555 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8556 that doesn't allow the alignment of global common storage to be directly
8557 specified. The SOM linker aligns common storage based on the rounded
8558 value of the NUM_BYTES parameter in the .comm directive. It's not
8559 possible to use the .align directive as it doesn't affect the alignment
8560 of the label associated with a .comm directive. */
8563 pa_asm_output_aligned_common (FILE *stream
,
8565 unsigned HOST_WIDE_INT size
,
8568 unsigned int max_common_align
;
8570 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8571 if (align
> max_common_align
)
8573 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8574 "for global common data. Using %u",
8575 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8576 align
= max_common_align
;
8579 switch_to_section (bss_section
);
8581 assemble_name (stream
, name
);
8582 fprintf (stream
, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8583 MAX (size
, align
/ BITS_PER_UNIT
));
8586 /* We can't use .comm for local common storage as the SOM linker effectively
8587 treats the symbol as universal and uses the same storage for local symbols
8588 with the same name in different object files. The .block directive
8589 reserves an uninitialized block of storage. However, it's not common
8590 storage. Fortunately, GCC never requests common storage with the same
8591 name in any given translation unit. */
8594 pa_asm_output_aligned_local (FILE *stream
,
8596 unsigned HOST_WIDE_INT size
,
8599 switch_to_section (bss_section
);
8600 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8603 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8604 assemble_name (stream
, name
);
8605 fprintf (stream
, "\n");
8608 ASM_OUTPUT_LABEL (stream
, name
);
8609 fprintf (stream
, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8612 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8613 use in fmpysub instructions. */
8615 fmpysuboperands (rtx
*operands
)
8617 enum machine_mode mode
= GET_MODE (operands
[0]);
8619 /* Must be a floating point mode. */
8620 if (mode
!= SFmode
&& mode
!= DFmode
)
8623 /* All modes must be the same. */
8624 if (! (mode
== GET_MODE (operands
[1])
8625 && mode
== GET_MODE (operands
[2])
8626 && mode
== GET_MODE (operands
[3])
8627 && mode
== GET_MODE (operands
[4])
8628 && mode
== GET_MODE (operands
[5])))
8631 /* All operands must be registers. */
8632 if (! (GET_CODE (operands
[1]) == REG
8633 && GET_CODE (operands
[2]) == REG
8634 && GET_CODE (operands
[3]) == REG
8635 && GET_CODE (operands
[4]) == REG
8636 && GET_CODE (operands
[5]) == REG
))
8639 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8640 operation, so operands[4] must be the same as operand[3]. */
8641 if (! rtx_equal_p (operands
[3], operands
[4]))
8644 /* multiply cannot feed into subtraction. */
8645 if (rtx_equal_p (operands
[5], operands
[0]))
8648 /* Inout operand of sub cannot conflict with any operands from multiply. */
8649 if (rtx_equal_p (operands
[3], operands
[0])
8650 || rtx_equal_p (operands
[3], operands
[1])
8651 || rtx_equal_p (operands
[3], operands
[2]))
8654 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8656 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8657 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8658 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8659 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8660 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8661 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8664 /* Passed. Operands are suitable for fmpysub. */
8668 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8669 constants for shadd instructions. */
8671 shadd_constant_p (int val
)
8673 if (val
== 2 || val
== 4 || val
== 8)
8679 /* Return TRUE if INSN branches forward. */
8682 forward_branch_p (rtx insn
)
8684 rtx lab
= JUMP_LABEL (insn
);
8686 /* The INSN must have a jump label. */
8687 gcc_assert (lab
!= NULL_RTX
);
8689 if (INSN_ADDRESSES_SET_P ())
8690 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8697 insn
= NEXT_INSN (insn
);
8703 /* Return 1 if INSN is in the delay slot of a call instruction. */
8705 jump_in_call_delay (rtx insn
)
8708 if (GET_CODE (insn
) != JUMP_INSN
)
8711 if (PREV_INSN (insn
)
8712 && PREV_INSN (PREV_INSN (insn
))
8713 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn
)))) == INSN
)
8715 rtx test_insn
= next_real_insn (PREV_INSN (PREV_INSN (insn
)));
8717 return (GET_CODE (PATTERN (test_insn
)) == SEQUENCE
8718 && XVECEXP (PATTERN (test_insn
), 0, 1) == insn
);
8725 /* Output an unconditional move and branch insn. */
8728 output_parallel_movb (rtx
*operands
, rtx insn
)
8730 int length
= get_attr_length (insn
);
8732 /* These are the cases in which we win. */
8734 return "mov%I1b,tr %1,%0,%2";
8736 /* None of the following cases win, but they don't lose either. */
8739 if (dbr_sequence_length () == 0)
8741 /* Nothing in the delay slot, fake it by putting the combined
8742 insn (the copy or add) in the delay slot of a bl. */
8743 if (GET_CODE (operands
[1]) == CONST_INT
)
8744 return "b %2\n\tldi %1,%0";
8746 return "b %2\n\tcopy %1,%0";
8750 /* Something in the delay slot, but we've got a long branch. */
8751 if (GET_CODE (operands
[1]) == CONST_INT
)
8752 return "ldi %1,%0\n\tb %2";
8754 return "copy %1,%0\n\tb %2";
8758 if (GET_CODE (operands
[1]) == CONST_INT
)
8759 output_asm_insn ("ldi %1,%0", operands
);
8761 output_asm_insn ("copy %1,%0", operands
);
8762 return output_lbranch (operands
[2], insn
, 1);
8765 /* Output an unconditional add and branch insn. */
8768 output_parallel_addb (rtx
*operands
, rtx insn
)
8770 int length
= get_attr_length (insn
);
8772 /* To make life easy we want operand0 to be the shared input/output
8773 operand and operand1 to be the readonly operand. */
8774 if (operands
[0] == operands
[1])
8775 operands
[1] = operands
[2];
8777 /* These are the cases in which we win. */
8779 return "add%I1b,tr %1,%0,%3";
8781 /* None of the following cases win, but they don't lose either. */
8784 if (dbr_sequence_length () == 0)
8785 /* Nothing in the delay slot, fake it by putting the combined
8786 insn (the copy or add) in the delay slot of a bl. */
8787 return "b %3\n\tadd%I1 %1,%0,%0";
8789 /* Something in the delay slot, but we've got a long branch. */
8790 return "add%I1 %1,%0,%0\n\tb %3";
8793 output_asm_insn ("add%I1 %1,%0,%0", operands
);
8794 return output_lbranch (operands
[3], insn
, 1);
8797 /* Return nonzero if INSN (a jump insn) immediately follows a call
8798 to a named function. This is used to avoid filling the delay slot
8799 of the jump since it can usually be eliminated by modifying RP in
8800 the delay slot of the call. */
8803 following_call (rtx insn
)
8805 if (! TARGET_JUMP_IN_DELAY
)
8808 /* Find the previous real insn, skipping NOTEs. */
8809 insn
= PREV_INSN (insn
);
8810 while (insn
&& GET_CODE (insn
) == NOTE
)
8811 insn
= PREV_INSN (insn
);
8813 /* Check for CALL_INSNs and millicode calls. */
8815 && ((GET_CODE (insn
) == CALL_INSN
8816 && get_attr_type (insn
) != TYPE_DYNCALL
)
8817 || (GET_CODE (insn
) == INSN
8818 && GET_CODE (PATTERN (insn
)) != SEQUENCE
8819 && GET_CODE (PATTERN (insn
)) != USE
8820 && GET_CODE (PATTERN (insn
)) != CLOBBER
8821 && get_attr_type (insn
) == TYPE_MILLI
)))
8827 /* We use this hook to perform a PA specific optimization which is difficult
8828 to do in earlier passes.
8830 We want the delay slots of branches within jump tables to be filled.
8831 None of the compiler passes at the moment even has the notion that a
8832 PA jump table doesn't contain addresses, but instead contains actual
8835 Because we actually jump into the table, the addresses of each entry
8836 must stay constant in relation to the beginning of the table (which
8837 itself must stay constant relative to the instruction to jump into
8838 it). I don't believe we can guarantee earlier passes of the compiler
8839 will adhere to those rules.
8841 So, late in the compilation process we find all the jump tables, and
8842 expand them into real code -- e.g. each entry in the jump table vector
8843 will get an appropriate label followed by a jump to the final target.
8845 Reorg and the final jump pass can then optimize these branches and
8846 fill their delay slots. We end up with smaller, more efficient code.
8848 The jump instructions within the table are special; we must be able
8849 to identify them during assembly output (if the jumps don't get filled
8850 we need to emit a nop rather than nullifying the delay slot)). We
8851 identify jumps in switch tables by using insns with the attribute
8852 type TYPE_BTABLE_BRANCH.
8854 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8855 insns. This serves two purposes, first it prevents jump.c from
8856 noticing that the last N entries in the table jump to the instruction
8857 immediately after the table and deleting the jumps. Second, those
8858 insns mark where we should emit .begin_brtab and .end_brtab directives
8859 when using GAS (allows for better link time optimizations). */
8866 remove_useless_addtr_insns (1);
8868 if (pa_cpu
< PROCESSOR_8000
)
8869 pa_combine_instructions ();
8872 /* This is fairly cheap, so always run it if optimizing. */
8873 if (optimize
> 0 && !TARGET_BIG_SWITCH
)
8875 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8876 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8878 rtx pattern
, tmp
, location
, label
;
8879 unsigned int length
, i
;
8881 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8882 if (GET_CODE (insn
) != JUMP_INSN
8883 || (GET_CODE (PATTERN (insn
)) != ADDR_VEC
8884 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
))
8887 /* Emit marker for the beginning of the branch table. */
8888 emit_insn_before (gen_begin_brtab (), insn
);
8890 pattern
= PATTERN (insn
);
8891 location
= PREV_INSN (insn
);
8892 length
= XVECLEN (pattern
, GET_CODE (pattern
) == ADDR_DIFF_VEC
);
8894 for (i
= 0; i
< length
; i
++)
8896 /* Emit a label before each jump to keep jump.c from
8897 removing this code. */
8898 tmp
= gen_label_rtx ();
8899 LABEL_NUSES (tmp
) = 1;
8900 emit_label_after (tmp
, location
);
8901 location
= NEXT_INSN (location
);
8903 if (GET_CODE (pattern
) == ADDR_VEC
)
8904 label
= XEXP (XVECEXP (pattern
, 0, i
), 0);
8906 label
= XEXP (XVECEXP (pattern
, 1, i
), 0);
8908 tmp
= gen_short_jump (label
);
8910 /* Emit the jump itself. */
8911 tmp
= emit_jump_insn_after (tmp
, location
);
8912 JUMP_LABEL (tmp
) = label
;
8913 LABEL_NUSES (label
)++;
8914 location
= NEXT_INSN (location
);
8916 /* Emit a BARRIER after the jump. */
8917 emit_barrier_after (location
);
8918 location
= NEXT_INSN (location
);
8921 /* Emit marker for the end of the branch table. */
8922 emit_insn_before (gen_end_brtab (), location
);
8923 location
= NEXT_INSN (location
);
8924 emit_barrier_after (location
);
8926 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8932 /* Still need brtab marker insns. FIXME: the presence of these
8933 markers disables output of the branch table to readonly memory,
8934 and any alignment directives that might be needed. Possibly,
8935 the begin_brtab insn should be output before the label for the
8936 table. This doesn't matter at the moment since the tables are
8937 always output in the text section. */
8938 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8940 /* Find an ADDR_VEC insn. */
8941 if (GET_CODE (insn
) != JUMP_INSN
8942 || (GET_CODE (PATTERN (insn
)) != ADDR_VEC
8943 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
))
8946 /* Now generate markers for the beginning and end of the
8948 emit_insn_before (gen_begin_brtab (), insn
);
8949 emit_insn_after (gen_end_brtab (), insn
);
8954 /* The PA has a number of odd instructions which can perform multiple
8955 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8956 it may be profitable to combine two instructions into one instruction
8957 with two outputs. It's not profitable PA2.0 machines because the
8958 two outputs would take two slots in the reorder buffers.
8960 This routine finds instructions which can be combined and combines
8961 them. We only support some of the potential combinations, and we
8962 only try common ways to find suitable instructions.
8964 * addb can add two registers or a register and a small integer
8965 and jump to a nearby (+-8k) location. Normally the jump to the
8966 nearby location is conditional on the result of the add, but by
8967 using the "true" condition we can make the jump unconditional.
8968 Thus addb can perform two independent operations in one insn.
8970 * movb is similar to addb in that it can perform a reg->reg
8971 or small immediate->reg copy and jump to a nearby (+-8k location).
8973 * fmpyadd and fmpysub can perform a FP multiply and either an
8974 FP add or FP sub if the operands of the multiply and add/sub are
8975 independent (there are other minor restrictions). Note both
8976 the fmpy and fadd/fsub can in theory move to better spots according
8977 to data dependencies, but for now we require the fmpy stay at a
8980 * Many of the memory operations can perform pre & post updates
8981 of index registers. GCC's pre/post increment/decrement addressing
8982 is far too simple to take advantage of all the possibilities. This
8983 pass may not be suitable since those insns may not be independent.
8985 * comclr can compare two ints or an int and a register, nullify
8986 the following instruction and zero some other register. This
8987 is more difficult to use as it's harder to find an insn which
8988 will generate a comclr than finding something like an unconditional
8989 branch. (conditional moves & long branches create comclr insns).
8991 * Most arithmetic operations can conditionally skip the next
8992 instruction. They can be viewed as "perform this operation
8993 and conditionally jump to this nearby location" (where nearby
8994 is an insns away). These are difficult to use due to the
8995 branch length restrictions. */
8998 pa_combine_instructions (void)
9000 rtx anchor
, new_rtx
;
9002 /* This can get expensive since the basic algorithm is on the
9003 order of O(n^2) (or worse). Only do it for -O2 or higher
9004 levels of optimization. */
9008 /* Walk down the list of insns looking for "anchor" insns which
9009 may be combined with "floating" insns. As the name implies,
9010 "anchor" instructions don't move, while "floating" insns may
9012 new_rtx
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9013 new_rtx
= make_insn_raw (new_rtx
);
9015 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9017 enum attr_pa_combine_type anchor_attr
;
9018 enum attr_pa_combine_type floater_attr
;
9020 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9021 Also ignore any special USE insns. */
9022 if ((GET_CODE (anchor
) != INSN
9023 && GET_CODE (anchor
) != JUMP_INSN
9024 && GET_CODE (anchor
) != CALL_INSN
)
9025 || GET_CODE (PATTERN (anchor
)) == USE
9026 || GET_CODE (PATTERN (anchor
)) == CLOBBER
9027 || GET_CODE (PATTERN (anchor
)) == ADDR_VEC
9028 || GET_CODE (PATTERN (anchor
)) == ADDR_DIFF_VEC
)
9031 anchor_attr
= get_attr_pa_combine_type (anchor
);
9032 /* See if anchor is an insn suitable for combination. */
9033 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9034 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9035 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9036 && ! forward_branch_p (anchor
)))
9040 for (floater
= PREV_INSN (anchor
);
9042 floater
= PREV_INSN (floater
))
9044 if (GET_CODE (floater
) == NOTE
9045 || (GET_CODE (floater
) == INSN
9046 && (GET_CODE (PATTERN (floater
)) == USE
9047 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9050 /* Anything except a regular INSN will stop our search. */
9051 if (GET_CODE (floater
) != INSN
9052 || GET_CODE (PATTERN (floater
)) == ADDR_VEC
9053 || GET_CODE (PATTERN (floater
)) == ADDR_DIFF_VEC
)
9059 /* See if FLOATER is suitable for combination with the
9061 floater_attr
= get_attr_pa_combine_type (floater
);
9062 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9063 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9064 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9065 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9067 /* If ANCHOR and FLOATER can be combined, then we're
9068 done with this pass. */
9069 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9070 SET_DEST (PATTERN (floater
)),
9071 XEXP (SET_SRC (PATTERN (floater
)), 0),
9072 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9076 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9077 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9079 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9081 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9082 SET_DEST (PATTERN (floater
)),
9083 XEXP (SET_SRC (PATTERN (floater
)), 0),
9084 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9089 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9090 SET_DEST (PATTERN (floater
)),
9091 SET_SRC (PATTERN (floater
)),
9092 SET_SRC (PATTERN (floater
))))
9098 /* If we didn't find anything on the backwards scan try forwards. */
9100 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9101 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9103 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9105 if (GET_CODE (floater
) == NOTE
9106 || (GET_CODE (floater
) == INSN
9107 && (GET_CODE (PATTERN (floater
)) == USE
9108 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9112 /* Anything except a regular INSN will stop our search. */
9113 if (GET_CODE (floater
) != INSN
9114 || GET_CODE (PATTERN (floater
)) == ADDR_VEC
9115 || GET_CODE (PATTERN (floater
)) == ADDR_DIFF_VEC
)
9121 /* See if FLOATER is suitable for combination with the
9123 floater_attr
= get_attr_pa_combine_type (floater
);
9124 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9125 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9126 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9127 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9129 /* If ANCHOR and FLOATER can be combined, then we're
9130 done with this pass. */
9131 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9132 SET_DEST (PATTERN (floater
)),
9133 XEXP (SET_SRC (PATTERN (floater
)),
9135 XEXP (SET_SRC (PATTERN (floater
)),
9142 /* FLOATER will be nonzero if we found a suitable floating
9143 insn for combination with ANCHOR. */
9145 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9146 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9148 /* Emit the new instruction and delete the old anchor. */
9149 emit_insn_before (gen_rtx_PARALLEL
9151 gen_rtvec (2, PATTERN (anchor
),
9152 PATTERN (floater
))),
9155 SET_INSN_DELETED (anchor
);
9157 /* Emit a special USE insn for FLOATER, then delete
9158 the floating insn. */
9159 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9160 delete_insn (floater
);
9165 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9168 /* Emit the new_jump instruction and delete the old anchor. */
9170 = emit_jump_insn_before (gen_rtx_PARALLEL
9172 gen_rtvec (2, PATTERN (anchor
),
9173 PATTERN (floater
))),
9176 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9177 SET_INSN_DELETED (anchor
);
9179 /* Emit a special USE insn for FLOATER, then delete
9180 the floating insn. */
9181 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9182 delete_insn (floater
);
9190 pa_can_combine_p (rtx new_rtx
, rtx anchor
, rtx floater
, int reversed
, rtx dest
,
9193 int insn_code_number
;
9196 /* Create a PARALLEL with the patterns of ANCHOR and
9197 FLOATER, try to recognize it, then test constraints
9198 for the resulting pattern.
9200 If the pattern doesn't match or the constraints
9201 aren't met keep searching for a suitable floater
9203 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9204 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9205 INSN_CODE (new_rtx
) = -1;
9206 insn_code_number
= recog_memoized (new_rtx
);
9207 if (insn_code_number
< 0
9208 || (extract_insn (new_rtx
), ! constrain_operands (1)))
9222 /* There's up to three operands to consider. One
9223 output and two inputs.
9225 The output must not be used between FLOATER & ANCHOR
9226 exclusive. The inputs must not be set between
9227 FLOATER and ANCHOR exclusive. */
9229 if (reg_used_between_p (dest
, start
, end
))
9232 if (reg_set_between_p (src1
, start
, end
))
9235 if (reg_set_between_p (src2
, start
, end
))
9238 /* If we get here, then everything is good. */
9242 /* Return nonzero if references for INSN are delayed.
9244 Millicode insns are actually function calls with some special
9245 constraints on arguments and register usage.
9247 Millicode calls always expect their arguments in the integer argument
9248 registers, and always return their result in %r29 (ret1). They
9249 are expected to clobber their arguments, %r1, %r29, and the return
9250 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9252 This function tells reorg that the references to arguments and
9253 millicode calls do not appear to happen until after the millicode call.
9254 This allows reorg to put insns which set the argument registers into the
9255 delay slot of the millicode call -- thus they act more like traditional
9258 Note we cannot consider side effects of the insn to be delayed because
9259 the branch and link insn will clobber the return pointer. If we happened
9260 to use the return pointer in the delay slot of the call, then we lose.
9262 get_attr_type will try to recognize the given insn, so make sure to
9263 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9266 insn_refs_are_delayed (rtx insn
)
9268 return ((GET_CODE (insn
) == INSN
9269 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9270 && GET_CODE (PATTERN (insn
)) != USE
9271 && GET_CODE (PATTERN (insn
)) != CLOBBER
9272 && get_attr_type (insn
) == TYPE_MILLI
));
9275 /* Promote the return value, but not the arguments. */
9277 static enum machine_mode
9278 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9279 enum machine_mode mode
,
9280 int *punsignedp ATTRIBUTE_UNUSED
,
9281 const_tree fntype ATTRIBUTE_UNUSED
,
9284 if (for_return
== 0)
9286 return promote_mode (type
, mode
, punsignedp
);
9289 /* On the HP-PA the value is found in register(s) 28(-29), unless
9290 the mode is SF or DF. Then the value is returned in fr4 (32).
9292 This must perform the same promotions as PROMOTE_MODE, else promoting
9293 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9295 Small structures must be returned in a PARALLEL on PA64 in order
9296 to match the HP Compiler ABI. */
9299 pa_function_value (const_tree valtype
,
9300 const_tree func ATTRIBUTE_UNUSED
,
9301 bool outgoing ATTRIBUTE_UNUSED
)
9303 enum machine_mode valmode
;
9305 if (AGGREGATE_TYPE_P (valtype
)
9306 || TREE_CODE (valtype
) == COMPLEX_TYPE
9307 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9311 /* Aggregates with a size less than or equal to 128 bits are
9312 returned in GR 28(-29). They are left justified. The pad
9313 bits are undefined. Larger aggregates are returned in
9317 int ub
= int_size_in_bytes (valtype
) <= UNITS_PER_WORD
? 1 : 2;
9319 for (i
= 0; i
< ub
; i
++)
9321 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9322 gen_rtx_REG (DImode
, 28 + i
),
9327 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9329 else if (int_size_in_bytes (valtype
) > UNITS_PER_WORD
)
9331 /* Aggregates 5 to 8 bytes in size are returned in general
9332 registers r28-r29 in the same manner as other non
9333 floating-point objects. The data is right-justified and
9334 zero-extended to 64 bits. This is opposite to the normal
9335 justification used on big endian targets and requires
9336 special treatment. */
9337 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9338 gen_rtx_REG (DImode
, 28), const0_rtx
);
9339 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9343 if ((INTEGRAL_TYPE_P (valtype
)
9344 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9345 || POINTER_TYPE_P (valtype
))
9346 valmode
= word_mode
;
9348 valmode
= TYPE_MODE (valtype
);
9350 if (TREE_CODE (valtype
) == REAL_TYPE
9351 && !AGGREGATE_TYPE_P (valtype
)
9352 && TYPE_MODE (valtype
) != TFmode
9353 && !TARGET_SOFT_FLOAT
)
9354 return gen_rtx_REG (valmode
, 32);
9356 return gen_rtx_REG (valmode
, 28);
9359 /* Implement the TARGET_LIBCALL_VALUE hook. */
9362 pa_libcall_value (enum machine_mode mode
,
9363 const_rtx fun ATTRIBUTE_UNUSED
)
9365 if (! TARGET_SOFT_FLOAT
9366 && (mode
== SFmode
|| mode
== DFmode
))
9367 return gen_rtx_REG (mode
, 32);
9369 return gen_rtx_REG (mode
, 28);
9372 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9375 pa_function_value_regno_p (const unsigned int regno
)
9378 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9384 /* Update the data in CUM to advance over an argument
9385 of mode MODE and data type TYPE.
9386 (TYPE is null for libcalls where that information may not be available.) */
9389 pa_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
9390 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9392 int arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9394 cum
->nargs_prototype
--;
9395 cum
->words
+= (arg_size
9396 + ((cum
->words
& 01)
9397 && type
!= NULL_TREE
9401 /* Return the location of a parameter that is passed in a register or NULL
9402 if the parameter has any component that is passed in memory.
9404 This is new code and will be pushed to into the net sources after
9407 ??? We might want to restructure this so that it looks more like other
9410 pa_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
9411 const_tree type
, bool named ATTRIBUTE_UNUSED
)
9413 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9420 if (mode
== VOIDmode
)
9423 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9425 /* If this arg would be passed partially or totally on the stack, then
9426 this routine should return zero. pa_arg_partial_bytes will
9427 handle arguments which are split between regs and stack slots if
9428 the ABI mandates split arguments. */
9431 /* The 32-bit ABI does not split arguments. */
9432 if (cum
->words
+ arg_size
> max_arg_words
)
9438 alignment
= cum
->words
& 1;
9439 if (cum
->words
+ alignment
>= max_arg_words
)
9443 /* The 32bit ABIs and the 64bit ABIs are rather different,
9444 particularly in their handling of FP registers. We might
9445 be able to cleverly share code between them, but I'm not
9446 going to bother in the hope that splitting them up results
9447 in code that is more easily understood. */
9451 /* Advance the base registers to their current locations.
9453 Remember, gprs grow towards smaller register numbers while
9454 fprs grow to higher register numbers. Also remember that
9455 although FP regs are 32-bit addressable, we pretend that
9456 the registers are 64-bits wide. */
9457 gpr_reg_base
= 26 - cum
->words
;
9458 fpr_reg_base
= 32 + cum
->words
;
9460 /* Arguments wider than one word and small aggregates need special
9464 || (type
&& (AGGREGATE_TYPE_P (type
)
9465 || TREE_CODE (type
) == COMPLEX_TYPE
9466 || TREE_CODE (type
) == VECTOR_TYPE
)))
9468 /* Double-extended precision (80-bit), quad-precision (128-bit)
9469 and aggregates including complex numbers are aligned on
9470 128-bit boundaries. The first eight 64-bit argument slots
9471 are associated one-to-one, with general registers r26
9472 through r19, and also with floating-point registers fr4
9473 through fr11. Arguments larger than one word are always
9474 passed in general registers.
9476 Using a PARALLEL with a word mode register results in left
9477 justified data on a big-endian target. */
9480 int i
, offset
= 0, ub
= arg_size
;
9482 /* Align the base register. */
9483 gpr_reg_base
-= alignment
;
9485 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9486 for (i
= 0; i
< ub
; i
++)
9488 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9489 gen_rtx_REG (DImode
, gpr_reg_base
),
9495 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9500 /* If the argument is larger than a word, then we know precisely
9501 which registers we must use. */
9515 /* Structures 5 to 8 bytes in size are passed in the general
9516 registers in the same manner as other non floating-point
9517 objects. The data is right-justified and zero-extended
9518 to 64 bits. This is opposite to the normal justification
9519 used on big endian targets and requires special treatment.
9520 We now define BLOCK_REG_PADDING to pad these objects.
9521 Aggregates, complex and vector types are passed in the same
9522 manner as structures. */
9524 || (type
&& (AGGREGATE_TYPE_P (type
)
9525 || TREE_CODE (type
) == COMPLEX_TYPE
9526 || TREE_CODE (type
) == VECTOR_TYPE
)))
9528 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9529 gen_rtx_REG (DImode
, gpr_reg_base
),
9531 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9536 /* We have a single word (32 bits). A simple computation
9537 will get us the register #s we need. */
9538 gpr_reg_base
= 26 - cum
->words
;
9539 fpr_reg_base
= 32 + 2 * cum
->words
;
9543 /* Determine if the argument needs to be passed in both general and
9544 floating point registers. */
9545 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9546 /* If we are doing soft-float with portable runtime, then there
9547 is no need to worry about FP regs. */
9548 && !TARGET_SOFT_FLOAT
9549 /* The parameter must be some kind of scalar float, else we just
9550 pass it in integer registers. */
9551 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9552 /* The target function must not have a prototype. */
9553 && cum
->nargs_prototype
<= 0
9554 /* libcalls do not need to pass items in both FP and general
9556 && type
!= NULL_TREE
9557 /* All this hair applies to "outgoing" args only. This includes
9558 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9560 /* Also pass outgoing floating arguments in both registers in indirect
9561 calls with the 32 bit ABI and the HP assembler since there is no
9562 way to the specify argument locations in static functions. */
9567 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9573 gen_rtx_EXPR_LIST (VOIDmode
,
9574 gen_rtx_REG (mode
, fpr_reg_base
),
9576 gen_rtx_EXPR_LIST (VOIDmode
,
9577 gen_rtx_REG (mode
, gpr_reg_base
),
9582 /* See if we should pass this parameter in a general register. */
9583 if (TARGET_SOFT_FLOAT
9584 /* Indirect calls in the normal 32bit ABI require all arguments
9585 to be passed in general registers. */
9586 || (!TARGET_PORTABLE_RUNTIME
9590 /* If the parameter is not a scalar floating-point parameter,
9591 then it belongs in GPRs. */
9592 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9593 /* Structure with single SFmode field belongs in GPR. */
9594 || (type
&& AGGREGATE_TYPE_P (type
)))
9595 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9597 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9602 /* Arguments larger than one word are double word aligned. */
9605 pa_function_arg_boundary (enum machine_mode mode
, const_tree type
)
9607 bool singleword
= (type
9608 ? (integer_zerop (TYPE_SIZE (type
))
9609 || !TREE_CONSTANT (TYPE_SIZE (type
))
9610 || int_size_in_bytes (type
) <= UNITS_PER_WORD
)
9611 : GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
);
9613 return singleword
? PARM_BOUNDARY
: MAX_PARM_BOUNDARY
;
9616 /* If this arg would be passed totally in registers or totally on the stack,
9617 then this routine should return zero. */
9620 pa_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
9621 tree type
, bool named ATTRIBUTE_UNUSED
)
9623 unsigned int max_arg_words
= 8;
9624 unsigned int offset
= 0;
9629 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9632 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9633 /* Arg fits fully into registers. */
9635 else if (cum
->words
+ offset
>= max_arg_words
)
9636 /* Arg fully on the stack. */
9640 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9644 /* A get_unnamed_section callback for switching to the text section.
9646 This function is only used with SOM. Because we don't support
9647 named subspaces, we can only create a new subspace or switch back
9648 to the default text subspace. */
9651 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9653 gcc_assert (TARGET_SOM
);
9656 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9658 /* We only want to emit a .nsubspa directive once at the
9659 start of the function. */
9660 cfun
->machine
->in_nsubspa
= 1;
9662 /* Create a new subspace for the text. This provides
9663 better stub placement and one-only functions. */
9665 && DECL_ONE_ONLY (cfun
->decl
)
9666 && !DECL_WEAK (cfun
->decl
))
9668 output_section_asm_op ("\t.SPACE $TEXT$\n"
9669 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9670 "ACCESS=44,SORT=24,COMDAT");
9676 /* There isn't a current function or the body of the current
9677 function has been completed. So, we are changing to the
9678 text section to output debugging information. Thus, we
9679 need to forget that we are in the text section so that
9680 varasm.c will call us when text_section is selected again. */
9681 gcc_assert (!cfun
|| !cfun
->machine
9682 || cfun
->machine
->in_nsubspa
== 2);
9685 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9688 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9691 /* A get_unnamed_section callback for switching to comdat data
9692 sections. This function is only used with SOM. */
9695 som_output_comdat_data_section_asm_op (const void *data
)
9698 output_section_asm_op (data
);
9701 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9704 pa_som_asm_init_sections (void)
9707 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9709 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9710 is not being generated. */
9711 som_readonly_data_section
9712 = get_unnamed_section (0, output_section_asm_op
,
9713 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9715 /* When secondary definitions are not supported, SOM makes readonly
9716 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9718 som_one_only_readonly_data_section
9719 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9721 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9722 "ACCESS=0x2c,SORT=16,COMDAT");
9725 /* When secondary definitions are not supported, SOM makes data one-only
9726 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9727 som_one_only_data_section
9728 = get_unnamed_section (SECTION_WRITE
,
9729 som_output_comdat_data_section_asm_op
,
9730 "\t.SPACE $PRIVATE$\n"
9731 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9732 "ACCESS=31,SORT=24,COMDAT");
9734 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9735 which reference data within the $TEXT$ space (for example constant
9736 strings in the $LIT$ subspace).
9738 The assemblers (GAS and HP as) both have problems with handling
9739 the difference of two symbols which is the other correct way to
9740 reference constant data during PIC code generation.
9742 So, there's no way to reference constant data which is in the
9743 $TEXT$ space during PIC generation. Instead place all constant
9744 data into the $PRIVATE$ subspace (this reduces sharing, but it
9745 works correctly). */
9746 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9748 /* We must not have a reference to an external symbol defined in a
9749 shared library in a readonly section, else the SOM linker will
9752 So, we force exception information into the data section. */
9753 exception_section
= data_section
;
9756 /* On hpux10, the linker will give an error if we have a reference
9757 in the read-only data section to a symbol defined in a shared
9758 library. Therefore, expressions that might require a reloc can
9759 not be placed in the read-only data section. */
9762 pa_select_section (tree exp
, int reloc
,
9763 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9765 if (TREE_CODE (exp
) == VAR_DECL
9766 && TREE_READONLY (exp
)
9767 && !TREE_THIS_VOLATILE (exp
)
9768 && DECL_INITIAL (exp
)
9769 && (DECL_INITIAL (exp
) == error_mark_node
9770 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9774 && DECL_ONE_ONLY (exp
)
9775 && !DECL_WEAK (exp
))
9776 return som_one_only_readonly_data_section
;
9778 return readonly_data_section
;
9780 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9781 return readonly_data_section
;
9783 && TREE_CODE (exp
) == VAR_DECL
9784 && DECL_ONE_ONLY (exp
)
9785 && !DECL_WEAK (exp
))
9786 return som_one_only_data_section
;
9788 return data_section
;
9792 pa_globalize_label (FILE *stream
, const char *name
)
9794 /* We only handle DATA objects here, functions are globalized in
9795 ASM_DECLARE_FUNCTION_NAME. */
9796 if (! FUNCTION_NAME_P (name
))
9798 fputs ("\t.EXPORT ", stream
);
9799 assemble_name (stream
, name
);
9800 fputs (",DATA\n", stream
);
9804 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9807 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9808 int incoming ATTRIBUTE_UNUSED
)
9810 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9813 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9816 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9818 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9819 PA64 ABI says that objects larger than 128 bits are returned in memory.
9820 Note, int_size_in_bytes can return -1 if the size of the object is
9821 variable or larger than the maximum value that can be expressed as
9822 a HOST_WIDE_INT. It can also return zero for an empty type. The
9823 simplest way to handle variable and empty types is to pass them in
9824 memory. This avoids problems in defining the boundaries of argument
9825 slots, allocating registers, etc. */
9826 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9827 || int_size_in_bytes (type
) <= 0);
9830 /* Structure to hold declaration and name of external symbols that are
9831 emitted by GCC. We generate a vector of these symbols and output them
9832 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9833 This avoids putting out names that are never really used. */
9835 typedef struct GTY(()) extern_symbol
9841 /* Define gc'd vector type for extern_symbol. */
9842 DEF_VEC_O(extern_symbol
);
9843 DEF_VEC_ALLOC_O(extern_symbol
,gc
);
9845 /* Vector of extern_symbol pointers. */
9846 static GTY(()) VEC(extern_symbol
,gc
) *extern_symbols
;
9848 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9849 /* Mark DECL (name NAME) as an external reference (assembler output
9850 file FILE). This saves the names to output at the end of the file
9851 if actually referenced. */
9854 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9856 extern_symbol
* p
= VEC_safe_push (extern_symbol
, gc
, extern_symbols
, NULL
);
9858 gcc_assert (file
== asm_out_file
);
9863 /* Output text required at the end of an assembler file.
9864 This includes deferred plabels and .import directives for
9865 all external symbols that were actually referenced. */
9868 pa_hpux_file_end (void)
9873 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9874 output_deferred_profile_counters ();
9876 output_deferred_plabels ();
9878 for (i
= 0; VEC_iterate (extern_symbol
, extern_symbols
, i
, p
); i
++)
9880 tree decl
= p
->decl
;
9882 if (!TREE_ASM_WRITTEN (decl
)
9883 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9884 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9887 VEC_free (extern_symbol
, gc
, extern_symbols
);
9891 /* Return true if a change from mode FROM to mode TO for a register
9892 in register class RCLASS is invalid. */
9895 pa_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
9896 enum reg_class rclass
)
9901 /* Reject changes to/from complex and vector modes. */
9902 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
9903 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
9906 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
9909 /* There is no way to load QImode or HImode values directly from
9910 memory. SImode loads to the FP registers are not zero extended.
9911 On the 64-bit target, this conflicts with the definition of
9912 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9913 with different sizes in the floating-point registers. */
9914 if (MAYBE_FP_REG_CLASS_P (rclass
))
9917 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9918 in specific sets of registers. Thus, we cannot allow changing
9919 to a larger mode when it's larger than a word. */
9920 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
9921 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
9927 /* Returns TRUE if it is a good idea to tie two pseudo registers
9928 when one has mode MODE1 and one has mode MODE2.
9929 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9930 for any hard reg, then this must be FALSE for correct output.
9932 We should return FALSE for QImode and HImode because these modes
9933 are not ok in the floating-point registers. However, this prevents
9934 tieing these modes to SImode and DImode in the general registers.
9935 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9936 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9937 in the floating-point registers. */
9940 pa_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
9942 /* Don't tie modes in different classes. */
9943 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
9950 /* Length in units of the trampoline instruction code. */
9952 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9955 /* Output assembler code for a block containing the constant parts
9956 of a trampoline, leaving space for the variable parts.\
9958 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9959 and then branches to the specified routine.
9961 This code template is copied from text segment to stack location
9962 and then patched with pa_trampoline_init to contain valid values,
9963 and then entered as a subroutine.
9965 It is best to keep this as small as possible to avoid having to
9966 flush multiple lines in the cache. */
9969 pa_asm_trampoline_template (FILE *f
)
9973 fputs ("\tldw 36(%r22),%r21\n", f
);
9974 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
9975 if (ASSEMBLER_DIALECT
== 0)
9976 fputs ("\tdepi 0,31,2,%r21\n", f
);
9978 fputs ("\tdepwi 0,31,2,%r21\n", f
);
9979 fputs ("\tldw 4(%r21),%r19\n", f
);
9980 fputs ("\tldw 0(%r21),%r21\n", f
);
9983 fputs ("\tbve (%r21)\n", f
);
9984 fputs ("\tldw 40(%r22),%r29\n", f
);
9985 fputs ("\t.word 0\n", f
);
9986 fputs ("\t.word 0\n", f
);
9990 fputs ("\tldsid (%r21),%r1\n", f
);
9991 fputs ("\tmtsp %r1,%sr0\n", f
);
9992 fputs ("\tbe 0(%sr0,%r21)\n", f
);
9993 fputs ("\tldw 40(%r22),%r29\n", f
);
9995 fputs ("\t.word 0\n", f
);
9996 fputs ("\t.word 0\n", f
);
9997 fputs ("\t.word 0\n", f
);
9998 fputs ("\t.word 0\n", f
);
10002 fputs ("\t.dword 0\n", f
);
10003 fputs ("\t.dword 0\n", f
);
10004 fputs ("\t.dword 0\n", f
);
10005 fputs ("\t.dword 0\n", f
);
10006 fputs ("\tmfia %r31\n", f
);
10007 fputs ("\tldd 24(%r31),%r1\n", f
);
10008 fputs ("\tldd 24(%r1),%r27\n", f
);
10009 fputs ("\tldd 16(%r1),%r1\n", f
);
10010 fputs ("\tbve (%r1)\n", f
);
10011 fputs ("\tldd 32(%r31),%r31\n", f
);
10012 fputs ("\t.dword 0 ; fptr\n", f
);
10013 fputs ("\t.dword 0 ; static link\n", f
);
10017 /* Emit RTL insns to initialize the variable parts of a trampoline.
10018 FNADDR is an RTX for the address of the function's pure code.
10019 CXT is an RTX for the static chain value for the function.
10021 Move the function address to the trampoline template at offset 36.
10022 Move the static chain value to trampoline template at offset 40.
10023 Move the trampoline address to trampoline template at offset 44.
10024 Move r19 to trampoline template at offset 48. The latter two
10025 words create a plabel for the indirect call to the trampoline.
10027 A similar sequence is used for the 64-bit port but the plabel is
10028 at the beginning of the trampoline.
10030 Finally, the cache entries for the trampoline code are flushed.
10031 This is necessary to ensure that the trampoline instruction sequence
10032 is written to memory prior to any attempts at prefetching the code
10036 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10038 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10039 rtx start_addr
= gen_reg_rtx (Pmode
);
10040 rtx end_addr
= gen_reg_rtx (Pmode
);
10041 rtx line_length
= gen_reg_rtx (Pmode
);
10044 emit_block_move (m_tramp
, assemble_trampoline_template (),
10045 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10046 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10050 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10051 emit_move_insn (tmp
, fnaddr
);
10052 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10053 emit_move_insn (tmp
, chain_value
);
10055 /* Create a fat pointer for the trampoline. */
10056 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10057 emit_move_insn (tmp
, r_tramp
);
10058 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10059 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10061 /* fdc and fic only use registers for the address to flush,
10062 they do not accept integer displacements. We align the
10063 start and end addresses to the beginning of their respective
10064 cache lines to minimize the number of lines flushed. */
10065 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10066 GEN_INT (-MIN_CACHELINE_SIZE
)));
10067 tmp
= force_reg (Pmode
, plus_constant (r_tramp
, TRAMPOLINE_CODE_SIZE
-1));
10068 emit_insn (gen_andsi3 (end_addr
, tmp
,
10069 GEN_INT (-MIN_CACHELINE_SIZE
)));
10070 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10071 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10072 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10073 gen_reg_rtx (Pmode
),
10074 gen_reg_rtx (Pmode
)));
10078 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10079 emit_move_insn (tmp
, fnaddr
);
10080 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10081 emit_move_insn (tmp
, chain_value
);
10083 /* Create a fat pointer for the trampoline. */
10084 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10085 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (r_tramp
, 32)));
10086 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10087 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10089 /* fdc and fic only use registers for the address to flush,
10090 they do not accept integer displacements. We align the
10091 start and end addresses to the beginning of their respective
10092 cache lines to minimize the number of lines flushed. */
10093 tmp
= force_reg (Pmode
, plus_constant (r_tramp
, 32));
10094 emit_insn (gen_anddi3 (start_addr
, tmp
,
10095 GEN_INT (-MIN_CACHELINE_SIZE
)));
10096 tmp
= force_reg (Pmode
, plus_constant (tmp
, TRAMPOLINE_CODE_SIZE
- 1));
10097 emit_insn (gen_anddi3 (end_addr
, tmp
,
10098 GEN_INT (-MIN_CACHELINE_SIZE
)));
10099 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10100 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10101 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10102 gen_reg_rtx (Pmode
),
10103 gen_reg_rtx (Pmode
)));
10107 /* Perform any machine-specific adjustment in the address of the trampoline.
10108 ADDR contains the address that was passed to pa_trampoline_init.
10109 Adjust the trampoline address to point to the plabel at offset 44. */
10112 pa_trampoline_adjust_address (rtx addr
)
10115 addr
= memory_address (Pmode
, plus_constant (addr
, 46));
10120 pa_delegitimize_address (rtx orig_x
)
10122 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10124 if (GET_CODE (x
) == LO_SUM
10125 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10126 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10127 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));
10132 pa_internal_arg_pointer (void)
10134 /* The argument pointer and the hard frame pointer are the same in
10135 the 32-bit runtime, so we don't need a copy. */
10137 return copy_to_reg (virtual_incoming_args_rtx
);
10139 return virtual_incoming_args_rtx
;
10142 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10143 Frame pointer elimination is automatically handled. */
10146 pa_can_eliminate (const int from
, const int to
)
10148 /* The argument cannot be eliminated in the 64-bit runtime. */
10149 if (TARGET_64BIT
&& from
== ARG_POINTER_REGNUM
)
10152 return (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
10153 ? ! frame_pointer_needed
10157 /* Define the offset between two registers, FROM to be eliminated and its
10158 replacement TO, at the start of a routine. */
10160 pa_initial_elimination_offset (int from
, int to
)
10162 HOST_WIDE_INT offset
;
10164 if ((from
== HARD_FRAME_POINTER_REGNUM
|| from
== FRAME_POINTER_REGNUM
)
10165 && to
== STACK_POINTER_REGNUM
)
10166 offset
= -compute_frame_size (get_frame_size (), 0);
10167 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
10170 gcc_unreachable ();
10176 pa_conditional_register_usage (void)
10180 if (!TARGET_64BIT
&& !TARGET_PA_11
)
10182 for (i
= 56; i
<= FP_REG_LAST
; i
++)
10183 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10184 for (i
= 33; i
< 56; i
+= 2)
10185 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10187 if (TARGET_DISABLE_FPREGS
|| TARGET_SOFT_FLOAT
)
10189 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
10190 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10193 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
10196 /* Target hook for c_mode_for_suffix. */
10198 static enum machine_mode
10199 pa_c_mode_for_suffix (char suffix
)
10201 if (HPUX_LONG_DOUBLE_LIBRARY
)
10210 /* Target hook for function_section. */
10213 pa_function_section (tree decl
, enum node_frequency freq
,
10214 bool startup
, bool exit
)
10216 /* Put functions in text section if target doesn't have named sections. */
10217 if (!targetm_common
.have_named_sections
)
10218 return text_section
;
10220 /* Force nested functions into the same section as the containing
10223 && DECL_SECTION_NAME (decl
) == NULL_TREE
10224 && DECL_CONTEXT (decl
) != NULL_TREE
10225 && TREE_CODE (DECL_CONTEXT (decl
)) == FUNCTION_DECL
10226 && DECL_SECTION_NAME (DECL_CONTEXT (decl
)) == NULL_TREE
)
10227 return function_section (DECL_CONTEXT (decl
));
10229 /* Otherwise, use the default function section. */
10230 return default_function_section (decl
, freq
, startup
, exit
);
10233 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10235 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10236 that need more than three instructions to load prior to reload. This
10237 limit is somewhat arbitrary. It takes three instructions to load a
10238 CONST_INT from memory but two are memory accesses. It may be better
10239 to increase the allowed range for CONST_INTS. We may also be able
10240 to handle CONST_DOUBLES. */
10243 pa_legitimate_constant_p (enum machine_mode mode
, rtx x
)
10245 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
&& x
!= CONST0_RTX (mode
))
10248 if (!NEW_HP_ASSEMBLER
&& !TARGET_GAS
&& GET_CODE (x
) == LABEL_REF
)
10251 if (TARGET_64BIT
&& GET_CODE (x
) == CONST_DOUBLE
)
10255 && HOST_BITS_PER_WIDE_INT
> 32
10256 && GET_CODE (x
) == CONST_INT
10257 && !reload_in_progress
10258 && !reload_completed
10259 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x
))
10260 && !cint_ok_for_move (INTVAL (x
)))
10263 if (function_label_operand (x
, mode
))