]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/pa/pa.c
target-def.h (TARGET_HAVE_NAMED_SECTIONS): Move to common/common-target-def.h.
[thirdparty/gcc.git] / gcc / config / pa / pa.c
CommitLineData
188538df 1/* Subroutines for insn-output.c for HPPA.
8f949e7e 2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
96e45421 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
66647d44 4 Free Software Foundation, Inc.
188538df
TG
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
6
b7849684 7This file is part of GCC.
188538df 8
b7849684 9GCC is free software; you can redistribute it and/or modify
188538df 10it under the terms of the GNU General Public License as published by
2f83c7d6 11the Free Software Foundation; either version 3, or (at your option)
188538df
TG
12any later version.
13
b7849684 14GCC is distributed in the hope that it will be useful,
188538df
TG
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
2f83c7d6
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
188538df 22
188538df 23#include "config.h"
0b17dd98 24#include "system.h"
4977bab6
ZW
25#include "coretypes.h"
26#include "tm.h"
188538df
TG
27#include "rtl.h"
28#include "regs.h"
29#include "hard-reg-set.h"
188538df
TG
30#include "insn-config.h"
31#include "conditions.h"
188538df
TG
32#include "insn-attr.h"
33#include "flags.h"
34#include "tree.h"
d499455b 35#include "output.h"
823fbbce 36#include "except.h"
becf1647 37#include "expr.h"
e78d8e51 38#include "optabs.h"
e78d8e51 39#include "reload.h"
d777856d 40#include "integrate.h"
49ad7cfa 41#include "function.h"
718f9c0f 42#include "diagnostic-core.h"
d07d525a 43#include "ggc.h"
519104fe 44#include "recog.h"
823fbbce 45#include "predict.h"
519104fe 46#include "tm_p.h"
672a6f42 47#include "target.h"
677f3fa8 48#include "common/common-target.h"
672a6f42 49#include "target-def.h"
41a1208a 50#include "langhooks.h"
62a53968 51#include "df.h"
96e45421 52#include "opts.h"
188538df 53
5d50fab3
JL
54/* Return nonzero if there is a bypass for the output of
55 OUT_INSN and the fp store IN_INSN. */
56int
b7849684 57hppa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
5d50fab3
JL
58{
59 enum machine_mode store_mode;
60 enum machine_mode other_mode;
61 rtx set;
62
63 if (recog_memoized (in_insn) < 0
d4f2728a
JDA
64 || (get_attr_type (in_insn) != TYPE_FPSTORE
65 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
5d50fab3
JL
66 || recog_memoized (out_insn) < 0)
67 return 0;
68
69 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
70
71 set = single_set (out_insn);
72 if (!set)
73 return 0;
74
75 other_mode = GET_MODE (SET_SRC (set));
76
77 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
78}
79
80
19ec6a36
AM
81#ifndef DO_FRAME_NOTES
82#ifdef INCOMING_RETURN_ADDR_RTX
83#define DO_FRAME_NOTES 1
84#else
85#define DO_FRAME_NOTES 0
86#endif
87#endif
88
8a5b8538 89static void pa_option_override (void);
d8f95bed 90static void copy_reg_pointer (rtx, rtx);
a2017852 91static void fix_range (const char *);
8a5b8538
AS
92static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
93 reg_class_t);
f40751dd
JH
94static int hppa_address_cost (rtx, bool);
95static bool hppa_rtx_costs (rtx, int, int, int *, bool);
b7849684
JE
96static inline rtx force_mode (enum machine_mode, rtx);
97static void pa_reorg (void);
98static void pa_combine_instructions (void);
99static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
3232e9d8 100static bool forward_branch_p (rtx);
b7849684 101static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
70128ad9
AO
102static int compute_movmem_length (rtx);
103static int compute_clrmem_length (rtx);
b7849684
JE
104static bool pa_assemble_integer (rtx, unsigned int, int);
105static void remove_useless_addtr_insns (int);
a4295210
JDA
106static void store_reg (int, HOST_WIDE_INT, int);
107static void store_reg_modify (int, int, HOST_WIDE_INT);
108static void load_reg (int, HOST_WIDE_INT, int);
109static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
9193312a 110static rtx pa_function_value (const_tree, const_tree, bool);
8a5b8538
AS
111static rtx pa_libcall_value (enum machine_mode, const_rtx);
112static bool pa_function_value_regno_p (const unsigned int);
b7849684 113static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
67b846fa 114static void update_total_code_bytes (unsigned int);
b7849684
JE
115static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
116static int pa_adjust_cost (rtx, rtx, rtx, int);
117static int pa_adjust_priority (rtx, int);
118static int pa_issue_rate (void);
d6b5193b
RS
119static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
120static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
ae46c4e0 121 ATTRIBUTE_UNUSED;
b7849684
JE
122static void pa_encode_section_info (tree, rtx, int);
123static const char *pa_strip_name_encoding (const char *);
124static bool pa_function_ok_for_sibcall (tree, tree);
125static void pa_globalize_label (FILE *, const char *)
a5f3f0ab 126 ATTRIBUTE_UNUSED;
b7849684
JE
127static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
128 HOST_WIDE_INT, tree);
35d434ed 129#if !defined(USE_COLLECT2)
b7849684
JE
130static void pa_asm_out_constructor (rtx, int);
131static void pa_asm_out_destructor (rtx, int);
35d434ed 132#endif
b7849684 133static void pa_init_builtins (void);
41a1208a 134static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
3f12cd9b 135static rtx hppa_builtin_saveregs (void);
d7bd8aeb 136static void hppa_va_start (tree, rtx);
726a989a 137static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
83c32f2e 138static bool pa_scalar_mode_supported_p (enum machine_mode);
3101faab 139static bool pa_commutative_p (const_rtx x, int outer_code);
b7849684
JE
140static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
141static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
506d7b68 142static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
b7849684
JE
143static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
144static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
145static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
146static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
147static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
148static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
149static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
150static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
151static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
152static void output_deferred_plabels (void);
3674b34d 153static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
744b2d61
JDA
154#ifdef ASM_OUTPUT_EXTERNAL_REAL
155static void pa_hpux_file_end (void);
156#endif
41a1208a 157#if HPUX_LONG_DOUBLE_LIBRARY
c15c90bb
ZW
158static void pa_hpux_init_libfuncs (void);
159#endif
3f12cd9b 160static rtx pa_struct_value_rtx (tree, int);
78a52f11 161static bool pa_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
586de218 162 const_tree, bool);
78a52f11
RH
163static int pa_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
164 tree, bool);
fd29bdaf
NF
165static void pa_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
166 const_tree, bool);
167static rtx pa_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
168 const_tree, bool);
c2ed6cf8 169static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
9a55eab3 170static struct machine_function * pa_init_machine_status (void);
a87cf97e
JR
171static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
172 enum machine_mode,
173 secondary_reload_info *);
16c16a24 174static void pa_extra_live_on_entry (bitmap);
4ce3042d
JDA
175static enum machine_mode pa_promote_function_mode (const_tree,
176 enum machine_mode, int *,
177 const_tree, int);
2eddfed1 178
859c146c
RH
179static void pa_asm_trampoline_template (FILE *);
180static void pa_trampoline_init (rtx, tree, rtx);
181static rtx pa_trampoline_adjust_address (rtx);
1f65437d 182static rtx pa_delegitimize_address (rtx);
8a5b8538 183static bool pa_print_operand_punct_valid_p (unsigned char);
bc707992
JDA
184static rtx pa_internal_arg_pointer (void);
185static bool pa_can_eliminate (const int, const int);
5efd84c5 186static void pa_conditional_register_usage (void);
41a1208a 187static enum machine_mode pa_c_mode_for_suffix (char);
7550cb35 188static section *pa_function_section (tree, enum node_frequency, bool, bool);
fbbf66e7 189static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
1a627b35 190static bool pa_legitimate_constant_p (enum machine_mode, rtx);
859c146c 191
d6b5193b
RS
192/* The following extra sections are only used for SOM. */
193static GTY(()) section *som_readonly_data_section;
194static GTY(()) section *som_one_only_readonly_data_section;
195static GTY(()) section *som_one_only_data_section;
196
68386e1e
JL
197/* Counts for the number of callee-saved general and floating point
198 registers which were saved by the current function's prologue. */
199static int gr_saved, fr_saved;
200
16c16a24
JDA
201/* Boolean indicating whether the return pointer was saved by the
202 current function's prologue. */
203static bool rp_saved;
204
b7849684 205static rtx find_addr_reg (rtx);
188538df 206
5fad1c24 207/* Keep track of the number of bytes we have output in the CODE subspace
279c9bde 208 during this compilation so we'll know when to emit inline long-calls. */
a02aa5b0 209unsigned long total_code_bytes;
279c9bde 210
5fad1c24
JDA
211/* The last address of the previous function plus the number of bytes in
212 associated thunks that have been output. This is used to determine if
213 a thunk can use an IA-relative branch to reach its target function. */
67b846fa 214static unsigned int last_address;
5fad1c24 215
93ae92c1 216/* Variables to handle plabels that we discover are necessary at assembly
ddd5a7c1 217 output time. They are output after the current function. */
d1b38208 218struct GTY(()) deferred_plabel
93ae92c1
JL
219{
220 rtx internal_label;
744b2d61 221 rtx symbol;
e2500fed
GK
222};
223static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
224 deferred_plabels;
0f8e3849 225static size_t n_deferred_plabels = 0;
672a6f42
NB
226\f
227/* Initialize the GCC target structure. */
301d03af 228
8a5b8538
AS
229#undef TARGET_OPTION_OVERRIDE
230#define TARGET_OPTION_OVERRIDE pa_option_override
231
301d03af
RS
232#undef TARGET_ASM_ALIGNED_HI_OP
233#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
234#undef TARGET_ASM_ALIGNED_SI_OP
235#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
236#undef TARGET_ASM_ALIGNED_DI_OP
237#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
238#undef TARGET_ASM_UNALIGNED_HI_OP
239#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
240#undef TARGET_ASM_UNALIGNED_SI_OP
241#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
242#undef TARGET_ASM_UNALIGNED_DI_OP
243#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
244#undef TARGET_ASM_INTEGER
245#define TARGET_ASM_INTEGER pa_assemble_integer
246
08c148a8
NB
247#undef TARGET_ASM_FUNCTION_PROLOGUE
248#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
249#undef TARGET_ASM_FUNCTION_EPILOGUE
250#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
93ae92c1 251
9193312a
AS
252#undef TARGET_FUNCTION_VALUE
253#define TARGET_FUNCTION_VALUE pa_function_value
8a5b8538
AS
254#undef TARGET_LIBCALL_VALUE
255#define TARGET_LIBCALL_VALUE pa_libcall_value
256#undef TARGET_FUNCTION_VALUE_REGNO_P
257#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
9193312a 258
506d7b68
PB
259#undef TARGET_LEGITIMIZE_ADDRESS
260#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
261
c237e94a
ZW
262#undef TARGET_SCHED_ADJUST_COST
263#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
264#undef TARGET_SCHED_ADJUST_PRIORITY
265#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
266#undef TARGET_SCHED_ISSUE_RATE
267#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
268
fb49053f
RH
269#undef TARGET_ENCODE_SECTION_INFO
270#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
772c5265
RH
271#undef TARGET_STRIP_NAME_ENCODING
272#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
fb49053f 273
4977bab6
ZW
274#undef TARGET_FUNCTION_OK_FOR_SIBCALL
275#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
276
8ddf681a
R
277#undef TARGET_COMMUTATIVE_P
278#define TARGET_COMMUTATIVE_P pa_commutative_p
279
c590b625
RH
280#undef TARGET_ASM_OUTPUT_MI_THUNK
281#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
3961e8fe
RH
282#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
283#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
c590b625 284
a5fe455b 285#undef TARGET_ASM_FILE_END
744b2d61
JDA
286#ifdef ASM_OUTPUT_EXTERNAL_REAL
287#define TARGET_ASM_FILE_END pa_hpux_file_end
288#else
a5fe455b 289#define TARGET_ASM_FILE_END output_deferred_plabels
744b2d61 290#endif
a5fe455b 291
8a5b8538
AS
292#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
293#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
294
35d434ed
JDA
295#if !defined(USE_COLLECT2)
296#undef TARGET_ASM_CONSTRUCTOR
297#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
298#undef TARGET_ASM_DESTRUCTOR
299#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
300#endif
301
4677862a
JDA
302#undef TARGET_INIT_BUILTINS
303#define TARGET_INIT_BUILTINS pa_init_builtins
304
41a1208a
JDA
305#undef TARGET_EXPAND_BUILTIN
306#define TARGET_EXPAND_BUILTIN pa_expand_builtin
307
8a5b8538
AS
308#undef TARGET_REGISTER_MOVE_COST
309#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
3c50106f
RH
310#undef TARGET_RTX_COSTS
311#define TARGET_RTX_COSTS hppa_rtx_costs
dcefdf67
RH
312#undef TARGET_ADDRESS_COST
313#define TARGET_ADDRESS_COST hppa_address_cost
3c50106f 314
18dbd950
RS
315#undef TARGET_MACHINE_DEPENDENT_REORG
316#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
317
41a1208a 318#if HPUX_LONG_DOUBLE_LIBRARY
c15c90bb
ZW
319#undef TARGET_INIT_LIBFUNCS
320#define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
321#endif
322
cde0f3fd
PB
323#undef TARGET_PROMOTE_FUNCTION_MODE
324#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
3f12cd9b 325#undef TARGET_PROMOTE_PROTOTYPES
586de218 326#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
3f12cd9b
KH
327
328#undef TARGET_STRUCT_VALUE_RTX
329#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
330#undef TARGET_RETURN_IN_MEMORY
331#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
fe984136
RH
332#undef TARGET_MUST_PASS_IN_STACK
333#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8cd5a4e0
RH
334#undef TARGET_PASS_BY_REFERENCE
335#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
6cdd5672
RH
336#undef TARGET_CALLEE_COPIES
337#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
78a52f11
RH
338#undef TARGET_ARG_PARTIAL_BYTES
339#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
fd29bdaf
NF
340#undef TARGET_FUNCTION_ARG
341#define TARGET_FUNCTION_ARG pa_function_arg
342#undef TARGET_FUNCTION_ARG_ADVANCE
343#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
c2ed6cf8
NF
344#undef TARGET_FUNCTION_ARG_BOUNDARY
345#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
3f12cd9b
KH
346
347#undef TARGET_EXPAND_BUILTIN_SAVEREGS
348#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
d7bd8aeb
JJ
349#undef TARGET_EXPAND_BUILTIN_VA_START
350#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
8101c928
RH
351#undef TARGET_GIMPLIFY_VA_ARG_EXPR
352#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
3f12cd9b 353
83c32f2e
JDA
354#undef TARGET_SCALAR_MODE_SUPPORTED_P
355#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
356
51076f96 357#undef TARGET_CANNOT_FORCE_CONST_MEM
fbbf66e7 358#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
51076f96 359
ec963611
JDA
360#undef TARGET_SECONDARY_RELOAD
361#define TARGET_SECONDARY_RELOAD pa_secondary_reload
362
16c16a24
JDA
363#undef TARGET_EXTRA_LIVE_ON_ENTRY
364#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
365
859c146c
RH
366#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
367#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
368#undef TARGET_TRAMPOLINE_INIT
369#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
370#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
371#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
1f65437d
SE
372#undef TARGET_DELEGITIMIZE_ADDRESS
373#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
bc707992
JDA
374#undef TARGET_INTERNAL_ARG_POINTER
375#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
376#undef TARGET_CAN_ELIMINATE
377#define TARGET_CAN_ELIMINATE pa_can_eliminate
5efd84c5
NF
378#undef TARGET_CONDITIONAL_REGISTER_USAGE
379#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
41a1208a
JDA
380#undef TARGET_C_MODE_FOR_SUFFIX
381#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
7550cb35
JDA
382#undef TARGET_ASM_FUNCTION_SECTION
383#define TARGET_ASM_FUNCTION_SECTION pa_function_section
859c146c 384
1a627b35
RS
385#undef TARGET_LEGITIMATE_CONSTANT_P
386#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
387
f6897b10 388struct gcc_target targetm = TARGET_INITIALIZER;
672a6f42 389\f
a2017852
JDA
390/* Parse the -mfixed-range= option string. */
391
392static void
393fix_range (const char *const_str)
394{
395 int i, first, last;
396 char *str, *dash, *comma;
397
398 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
399 REG2 are either register names or register numbers. The effect
400 of this option is to mark the registers in the range from REG1 to
401 REG2 as ``fixed'' so they won't be used by the compiler. This is
419df6a2 402 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
a2017852
JDA
403
404 i = strlen (const_str);
405 str = (char *) alloca (i + 1);
406 memcpy (str, const_str, i + 1);
407
408 while (1)
409 {
410 dash = strchr (str, '-');
411 if (!dash)
412 {
d4ee4d25 413 warning (0, "value of -mfixed-range must have form REG1-REG2");
a2017852
JDA
414 return;
415 }
416 *dash = '\0';
417
418 comma = strchr (dash + 1, ',');
419 if (comma)
420 *comma = '\0';
421
422 first = decode_reg_name (str);
423 if (first < 0)
424 {
d4ee4d25 425 warning (0, "unknown register name: %s", str);
a2017852
JDA
426 return;
427 }
428
429 last = decode_reg_name (dash + 1);
430 if (last < 0)
431 {
d4ee4d25 432 warning (0, "unknown register name: %s", dash + 1);
a2017852
JDA
433 return;
434 }
435
436 *dash = '-';
437
438 if (first > last)
439 {
d4ee4d25 440 warning (0, "%s-%s is an empty range", str, dash + 1);
a2017852
JDA
441 return;
442 }
443
444 for (i = first; i <= last; ++i)
445 fixed_regs[i] = call_used_regs[i] = 1;
446
447 if (!comma)
448 break;
449
450 *comma = ',';
451 str = comma + 1;
452 }
453
454 /* Check if all floating point registers have been fixed. */
455 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
456 if (!fixed_regs[i])
457 break;
458
459 if (i > FP_REG_LAST)
460 target_flags |= MASK_DISABLE_FPREGS;
461}
462
8a5b8538
AS
463/* Implement the TARGET_OPTION_OVERRIDE hook. */
464
465static void
466pa_option_override (void)
66617831 467{
1bb721dc
JM
468 unsigned int i;
469 cl_deferred_option *opt;
470 VEC(cl_deferred_option,heap) *vec
471 = (VEC(cl_deferred_option,heap) *) pa_deferred_options;
472
473 FOR_EACH_VEC_ELT (cl_deferred_option, vec, i, opt)
474 {
475 switch (opt->opt_index)
476 {
477 case OPT_mfixed_range_:
478 fix_range (opt->arg);
479 break;
480
481 default:
482 gcc_unreachable ();
483 }
484 }
485
1c31ecf6
JDA
486 /* Unconditional branches in the delay slot are not compatible with dwarf2
487 call frame information. There is no benefit in using this optimization
488 on PA8000 and later processors. */
489 if (pa_cpu >= PROCESSOR_8000
677f3fa8 490 || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
d5fabb58 491 && flag_exceptions)
1c31ecf6
JDA
492 || flag_unwind_tables)
493 target_flags &= ~MASK_JUMP_IN_DELAY;
494
6a73009d
JL
495 if (flag_pic && TARGET_PORTABLE_RUNTIME)
496 {
ab532386 497 warning (0, "PIC code generation is not supported in the portable runtime model");
6a73009d
JL
498 }
499
a7721dc0 500 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
6a73009d 501 {
ab532386 502 warning (0, "PIC code generation is not compatible with fast indirect calls");
6a73009d 503 }
0eba3d30 504
54eef932
JL
505 if (! TARGET_GAS && write_symbols != NO_DEBUG)
506 {
d4ee4d25
DD
507 warning (0, "-g is only supported when using GAS on this processor,");
508 warning (0, "-g option disabled");
54eef932
JL
509 write_symbols = NO_DEBUG;
510 }
d07d525a 511
7ee72796
JL
512 /* We only support the "big PIC" model now. And we always generate PIC
513 code when in 64bit mode. */
514 if (flag_pic == 1 || TARGET_64BIT)
520babc7
JL
515 flag_pic = 2;
516
e92abd50
JDA
517 /* Disable -freorder-blocks-and-partition as we don't support hot and
518 cold partitioning. */
519 if (flag_reorder_blocks_and_partition)
520 {
521 inform (input_location,
522 "-freorder-blocks-and-partition does not work "
523 "on this architecture");
524 flag_reorder_blocks_and_partition = 0;
525 flag_reorder_blocks = 1;
526 }
527
301d03af
RS
528 /* We can't guarantee that .dword is available for 32-bit targets. */
529 if (UNITS_PER_WORD == 4)
530 targetm.asm_out.aligned_op.di = NULL;
531
532 /* The unaligned ops are only available when using GAS. */
533 if (!TARGET_GAS)
534 {
535 targetm.asm_out.unaligned_op.hi = NULL;
536 targetm.asm_out.unaligned_op.si = NULL;
537 targetm.asm_out.unaligned_op.di = NULL;
538 }
9a55eab3
JDA
539
540 init_machine_status = pa_init_machine_status;
c47decad
JL
541}
542
41a1208a
JDA
543enum pa_builtins
544{
545 PA_BUILTIN_COPYSIGNQ,
546 PA_BUILTIN_FABSQ,
547 PA_BUILTIN_INFQ,
548 PA_BUILTIN_HUGE_VALQ,
549 PA_BUILTIN_max
550};
551
552static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
553
eab9e742 554static void
b7849684 555pa_init_builtins (void)
4677862a
JDA
556{
557#ifdef DONT_HAVE_FPUTC_UNLOCKED
b53b5aa5
KG
558 built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] =
559 built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
560 implicit_built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED]
561 = implicit_built_in_decls[(int) BUILT_IN_PUTC_UNLOCKED];
4677862a 562#endif
dfcb2b51 563#if TARGET_HPUX_11
7d522000
SE
564 if (built_in_decls [BUILT_IN_FINITE])
565 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], "_Isfinite");
566 if (built_in_decls [BUILT_IN_FINITEF])
567 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], "_Isfinitef");
568#endif
41a1208a
JDA
569
570 if (HPUX_LONG_DOUBLE_LIBRARY)
571 {
572 tree decl, ftype;
573
574 /* Under HPUX, the __float128 type is a synonym for "long double". */
575 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
576 "__float128");
577
578 /* TFmode support builtins. */
579 ftype = build_function_type_list (long_double_type_node,
580 long_double_type_node,
581 NULL_TREE);
582 decl = add_builtin_function ("__builtin_fabsq", ftype,
583 PA_BUILTIN_FABSQ, BUILT_IN_MD,
584 "_U_Qfabs", NULL_TREE);
585 TREE_READONLY (decl) = 1;
586 pa_builtins[PA_BUILTIN_FABSQ] = decl;
587
588 ftype = build_function_type_list (long_double_type_node,
589 long_double_type_node,
590 long_double_type_node,
591 NULL_TREE);
592 decl = add_builtin_function ("__builtin_copysignq", ftype,
593 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
594 "_U_Qfcopysign", NULL_TREE);
595 TREE_READONLY (decl) = 1;
596 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
597
12526412 598 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
41a1208a
JDA
599 decl = add_builtin_function ("__builtin_infq", ftype,
600 PA_BUILTIN_INFQ, BUILT_IN_MD,
601 NULL, NULL_TREE);
602 pa_builtins[PA_BUILTIN_INFQ] = decl;
603
604 decl = add_builtin_function ("__builtin_huge_valq", ftype,
605 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
606 NULL, NULL_TREE);
607 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
608 }
609}
610
611static rtx
612pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
613 enum machine_mode mode ATTRIBUTE_UNUSED,
614 int ignore ATTRIBUTE_UNUSED)
615{
616 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
617 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
618
619 switch (fcode)
620 {
621 case PA_BUILTIN_FABSQ:
622 case PA_BUILTIN_COPYSIGNQ:
623 return expand_call (exp, target, ignore);
624
625 case PA_BUILTIN_INFQ:
626 case PA_BUILTIN_HUGE_VALQ:
627 {
628 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
629 REAL_VALUE_TYPE inf;
630 rtx tmp;
631
632 real_inf (&inf);
633 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
634
635 tmp = validize_mem (force_const_mem (target_mode, tmp));
636
637 if (target == 0)
638 target = gen_reg_rtx (target_mode);
639
640 emit_move_insn (target, tmp);
641 return target;
642 }
643
644 default:
645 gcc_unreachable ();
646 }
647
648 return NULL_RTX;
4677862a
JDA
649}
650
9a55eab3
JDA
651/* Function to init struct machine_function.
652 This will be called, via a pointer variable,
653 from push_function_context. */
654
655static struct machine_function *
656pa_init_machine_status (void)
657{
a9429e29 658 return ggc_alloc_cleared_machine_function ();
9a55eab3
JDA
659}
660
d8f95bed
JDA
661/* If FROM is a probable pointer register, mark TO as a probable
662 pointer register with the same pointer alignment as FROM. */
663
664static void
665copy_reg_pointer (rtx to, rtx from)
666{
667 if (REG_POINTER (from))
668 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
669}
670
23f6f34f
TG
671/* Return 1 if X contains a symbolic expression. We know these
672 expressions will have one of a few well defined forms, so
c1d1b3f0
JL
673 we need only check those forms. */
674int
b7849684 675symbolic_expression_p (rtx x)
c1d1b3f0
JL
676{
677
fe19a83d 678 /* Strip off any HIGH. */
c1d1b3f0
JL
679 if (GET_CODE (x) == HIGH)
680 x = XEXP (x, 0);
681
682 return (symbolic_operand (x, VOIDmode));
683}
684
47abc309 685/* Accept any constant that can be moved in one instruction into a
6746a52e 686 general register. */
23f6f34f 687int
5b281141 688cint_ok_for_move (HOST_WIDE_INT ival)
6746a52e
JL
689{
690 /* OK if ldo, ldil, or zdepi, can be used. */
5b281141
JDA
691 return (VAL_14_BITS_P (ival)
692 || ldil_cint_p (ival)
693 || zdepi_cint_p (ival));
6746a52e 694}
188538df 695\f
5b281141
JDA
696/* True iff ldil can be used to load this CONST_INT. The least
697 significant 11 bits of the value must be zero and the value must
698 not change sign when extended from 32 to 64 bits. */
699int
700ldil_cint_p (HOST_WIDE_INT ival)
701{
702 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
703
704 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
705}
706
831c1763 707/* True iff zdepi can be used to generate this CONST_INT.
a7b376ee 708 zdepi first sign extends a 5-bit signed number to a given field
831c1763 709 length, then places this field anywhere in a zero. */
0e7f4c19 710int
b7849684 711zdepi_cint_p (unsigned HOST_WIDE_INT x)
3a5babac 712{
0c235d7e 713 unsigned HOST_WIDE_INT lsb_mask, t;
3a5babac
TG
714
715 /* This might not be obvious, but it's at least fast.
ddd5a7c1 716 This function is critical; we don't have the time loops would take. */
a1747d2c
TG
717 lsb_mask = x & -x;
718 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
719 /* Return true iff t is a power of two. */
3a5babac
TG
720 return ((t & (t - 1)) == 0);
721}
722
23f6f34f
TG
723/* True iff depi or extru can be used to compute (reg & mask).
724 Accept bit pattern like these:
725 0....01....1
726 1....10....0
727 1..10..01..1 */
0e7f4c19 728int
b7849684 729and_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
730{
731 mask = ~mask;
732 mask += mask & -mask;
733 return (mask & (mask - 1)) == 0;
734}
735
0e7f4c19
TG
736/* True iff depi can be used to compute (reg | MASK). */
737int
b7849684 738ior_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
739{
740 mask += mask & -mask;
741 return (mask & (mask - 1)) == 0;
742}
188538df
TG
743\f
744/* Legitimize PIC addresses. If the address is already
745 position-independent, we return ORIG. Newly generated
746 position-independent addresses go to REG. If we need more
747 than one register, we lose. */
748
749rtx
b7849684 750legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
188538df
TG
751{
752 rtx pic_ref = orig;
753
06ae7eb1 754 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
51076f96 755
abc95ed3 756 /* Labels need special handling. */
519104fe 757 if (pic_label_operand (orig, mode))
6bb36601 758 {
4d811a05
JDA
759 rtx insn;
760
b3d9ecf0
JL
761 /* We do not want to go through the movXX expanders here since that
762 would create recursion.
763
764 Nor do we really want to call a generator for a named pattern
765 since that requires multiple patterns if we want to support
766 multiple word sizes.
767
768 So instead we just emit the raw set, which avoids the movXX
769 expanders completely. */
d8f95bed 770 mark_reg_pointer (reg, BITS_PER_UNIT);
4d811a05
JDA
771 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
772
773 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bbbbb16a 774 add_reg_note (insn, REG_EQUAL, orig);
4d811a05
JDA
775
776 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
777 and update LABEL_NUSES because this is not done automatically. */
778 if (reload_in_progress || reload_completed)
779 {
780 /* Extract LABEL_REF. */
781 if (GET_CODE (orig) == CONST)
782 orig = XEXP (XEXP (orig, 0), 0);
783 /* Extract CODE_LABEL. */
784 orig = XEXP (orig, 0);
65c5f2a6 785 add_reg_note (insn, REG_LABEL_OPERAND, orig);
4d811a05
JDA
786 LABEL_NUSES (orig)++;
787 }
e3b5732b 788 crtl->uses_pic_offset_table = 1;
6bb36601
JL
789 return reg;
790 }
188538df
TG
791 if (GET_CODE (orig) == SYMBOL_REF)
792 {
9ab81df2
JDA
793 rtx insn, tmp_reg;
794
144d51f9 795 gcc_assert (reg);
188538df 796
9ab81df2
JDA
797 /* Before reload, allocate a temporary register for the intermediate
798 result. This allows the sequence to be deleted when the final
799 result is unused and the insns are trivially dead. */
800 tmp_reg = ((reload_in_progress || reload_completed)
801 ? reg : gen_reg_rtx (Pmode));
802
9c575e20 803 if (function_label_operand (orig, VOIDmode))
7813231b 804 {
0b076fea
JDA
805 /* Force function label into memory in word mode. */
806 orig = XEXP (force_const_mem (word_mode, orig), 0);
7813231b
JDA
807 /* Load plabel address from DLT. */
808 emit_move_insn (tmp_reg,
809 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
810 gen_rtx_HIGH (word_mode, orig)));
811 pic_ref
812 = gen_const_mem (Pmode,
813 gen_rtx_LO_SUM (Pmode, tmp_reg,
814 gen_rtx_UNSPEC (Pmode,
542a8afa
RH
815 gen_rtvec (1, orig),
816 UNSPEC_DLTIND14R)));
7813231b
JDA
817 emit_move_insn (reg, pic_ref);
818 /* Now load address of function descriptor. */
819 pic_ref = gen_rtx_MEM (Pmode, reg);
820 }
821 else
822 {
823 /* Load symbol reference from DLT. */
824 emit_move_insn (tmp_reg,
825 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
826 gen_rtx_HIGH (word_mode, orig)));
827 pic_ref
828 = gen_const_mem (Pmode,
829 gen_rtx_LO_SUM (Pmode, tmp_reg,
830 gen_rtx_UNSPEC (Pmode,
831 gen_rtvec (1, orig),
832 UNSPEC_DLTIND14R)));
833 }
c5c76735 834
e3b5732b 835 crtl->uses_pic_offset_table = 1;
d8f95bed 836 mark_reg_pointer (reg, BITS_PER_UNIT);
9ab81df2
JDA
837 insn = emit_move_insn (reg, pic_ref);
838
839 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bd94cb6e 840 set_unique_reg_note (insn, REG_EQUAL, orig);
9ab81df2 841
188538df
TG
842 return reg;
843 }
844 else if (GET_CODE (orig) == CONST)
845 {
f1c7ce82 846 rtx base;
188538df
TG
847
848 if (GET_CODE (XEXP (orig, 0)) == PLUS
849 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
850 return orig;
851
144d51f9
NS
852 gcc_assert (reg);
853 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
854
855 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
856 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
857 base == reg ? 0 : reg);
d8f95bed 858
188538df
TG
859 if (GET_CODE (orig) == CONST_INT)
860 {
a1747d2c 861 if (INT_14_BITS (orig))
ed8908e7 862 return plus_constant (base, INTVAL (orig));
188538df
TG
863 orig = force_reg (Pmode, orig);
864 }
ad2c71b7 865 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
188538df
TG
866 /* Likewise, should we set special REG_NOTEs here? */
867 }
d8f95bed 868
188538df
TG
869 return pic_ref;
870}
871
51076f96
RC
872static GTY(()) rtx gen_tls_tga;
873
874static rtx
875gen_tls_get_addr (void)
876{
877 if (!gen_tls_tga)
878 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
879 return gen_tls_tga;
880}
881
882static rtx
883hppa_tls_call (rtx arg)
884{
885 rtx ret;
886
887 ret = gen_reg_rtx (Pmode);
888 emit_library_call_value (gen_tls_get_addr (), ret,
889 LCT_CONST, Pmode, 1, arg, Pmode);
890
891 return ret;
892}
893
894static rtx
895legitimize_tls_address (rtx addr)
896{
897 rtx ret, insn, tmp, t1, t2, tp;
898 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
899
900 switch (model)
901 {
902 case TLS_MODEL_GLOBAL_DYNAMIC:
903 tmp = gen_reg_rtx (Pmode);
a758fa89
AJ
904 if (flag_pic)
905 emit_insn (gen_tgd_load_pic (tmp, addr));
906 else
907 emit_insn (gen_tgd_load (tmp, addr));
51076f96
RC
908 ret = hppa_tls_call (tmp);
909 break;
910
911 case TLS_MODEL_LOCAL_DYNAMIC:
912 ret = gen_reg_rtx (Pmode);
913 tmp = gen_reg_rtx (Pmode);
914 start_sequence ();
a758fa89
AJ
915 if (flag_pic)
916 emit_insn (gen_tld_load_pic (tmp, addr));
917 else
918 emit_insn (gen_tld_load (tmp, addr));
51076f96
RC
919 t1 = hppa_tls_call (tmp);
920 insn = get_insns ();
921 end_sequence ();
922 t2 = gen_reg_rtx (Pmode);
923 emit_libcall_block (insn, t2, t1,
924 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
925 UNSPEC_TLSLDBASE));
926 emit_insn (gen_tld_offset_load (ret, addr, t2));
927 break;
928
929 case TLS_MODEL_INITIAL_EXEC:
930 tp = gen_reg_rtx (Pmode);
931 tmp = gen_reg_rtx (Pmode);
932 ret = gen_reg_rtx (Pmode);
933 emit_insn (gen_tp_load (tp));
a758fa89
AJ
934 if (flag_pic)
935 emit_insn (gen_tie_load_pic (tmp, addr));
936 else
937 emit_insn (gen_tie_load (tmp, addr));
51076f96
RC
938 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
939 break;
940
941 case TLS_MODEL_LOCAL_EXEC:
942 tp = gen_reg_rtx (Pmode);
943 ret = gen_reg_rtx (Pmode);
944 emit_insn (gen_tp_load (tp));
945 emit_insn (gen_tle_load (ret, addr, tp));
946 break;
947
948 default:
06ae7eb1 949 gcc_unreachable ();
51076f96
RC
950 }
951
952 return ret;
953}
954
c1d1b3f0
JL
955/* Try machine-dependent ways of modifying an illegitimate address
956 to be legitimate. If we find one, return the new, valid address.
957 This macro is used in only one place: `memory_address' in explow.c.
958
959 OLDX is the address as it was before break_out_memory_refs was called.
960 In some cases it is useful to look at this to decide what needs to be done.
961
c1d1b3f0 962 It is always safe for this macro to do nothing. It exists to recognize
23f6f34f 963 opportunities to optimize the output.
c1d1b3f0
JL
964
965 For the PA, transform:
966
967 memory(X + <large int>)
968
969 into:
970
971 if (<large int> & mask) >= 16
972 Y = (<large int> & ~mask) + mask + 1 Round up.
973 else
974 Y = (<large int> & ~mask) Round down.
975 Z = X + Y
976 memory (Z + (<large int> - Y));
977
23f6f34f 978 This is for CSE to find several similar references, and only use one Z.
c1d1b3f0 979
1e5f1716 980 X can either be a SYMBOL_REF or REG, but because combine cannot
c1d1b3f0
JL
981 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
982 D will not fit in 14 bits.
983
984 MODE_FLOAT references allow displacements which fit in 5 bits, so use
23f6f34f 985 0x1f as the mask.
c1d1b3f0
JL
986
987 MODE_INT references allow displacements which fit in 14 bits, so use
23f6f34f 988 0x3fff as the mask.
c1d1b3f0
JL
989
990 This relies on the fact that most mode MODE_FLOAT references will use FP
991 registers and most mode MODE_INT references will use integer registers.
992 (In the rare case of an FP register used in an integer MODE, we depend
993 on secondary reloads to clean things up.)
994
995
996 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
997 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
ddd5a7c1 998 addressing modes to be used).
c1d1b3f0
JL
999
1000 Put X and Z into registers. Then put the entire expression into
1001 a register. */
1002
1003rtx
b7849684
JE
1004hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1005 enum machine_mode mode)
c1d1b3f0 1006{
c1d1b3f0
JL
1007 rtx orig = x;
1008
d8f95bed
JDA
1009 /* We need to canonicalize the order of operands in unscaled indexed
1010 addresses since the code that checks if an address is valid doesn't
1011 always try both orders. */
1012 if (!TARGET_NO_SPACE_REGS
1013 && GET_CODE (x) == PLUS
1014 && GET_MODE (x) == Pmode
1015 && REG_P (XEXP (x, 0))
1016 && REG_P (XEXP (x, 1))
1017 && REG_POINTER (XEXP (x, 0))
1018 && !REG_POINTER (XEXP (x, 1)))
1019 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1020
51076f96
RC
1021 if (PA_SYMBOL_REF_TLS_P (x))
1022 return legitimize_tls_address (x);
1023 else if (flag_pic)
6bb36601
JL
1024 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1025
fe19a83d 1026 /* Strip off CONST. */
c1d1b3f0
JL
1027 if (GET_CODE (x) == CONST)
1028 x = XEXP (x, 0);
1029
68944452
JL
1030 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1031 That should always be safe. */
1032 if (GET_CODE (x) == PLUS
1033 && GET_CODE (XEXP (x, 0)) == REG
1034 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1035 {
690d4228
JL
1036 rtx reg = force_reg (Pmode, XEXP (x, 1));
1037 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
68944452
JL
1038 }
1039
326bc2de
JL
1040 /* Note we must reject symbols which represent function addresses
1041 since the assembler/linker can't handle arithmetic on plabels. */
c1d1b3f0
JL
1042 if (GET_CODE (x) == PLUS
1043 && GET_CODE (XEXP (x, 1)) == CONST_INT
326bc2de
JL
1044 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1045 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
c1d1b3f0
JL
1046 || GET_CODE (XEXP (x, 0)) == REG))
1047 {
1048 rtx int_part, ptr_reg;
1049 int newoffset;
1050 int offset = INTVAL (XEXP (x, 1));
f9bd8d8e
JL
1051 int mask;
1052
1053 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
94919bd2 1054 ? (INT14_OK_STRICT ? 0x3fff : 0x1f) : 0x3fff);
c1d1b3f0 1055
23f6f34f 1056 /* Choose which way to round the offset. Round up if we
c1d1b3f0
JL
1057 are >= halfway to the next boundary. */
1058 if ((offset & mask) >= ((mask + 1) / 2))
1059 newoffset = (offset & ~ mask) + mask + 1;
1060 else
1061 newoffset = (offset & ~ mask);
1062
1063 /* If the newoffset will not fit in 14 bits (ldo), then
1064 handling this would take 4 or 5 instructions (2 to load
1065 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1066 add the new offset and the SYMBOL_REF.) Combine can
1067 not handle 4->2 or 5->2 combinations, so do not create
1068 them. */
1069 if (! VAL_14_BITS_P (newoffset)
1070 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1071 {
c5c76735 1072 rtx const_part = plus_constant (XEXP (x, 0), newoffset);
c1d1b3f0 1073 rtx tmp_reg
e5e28962 1074 = force_reg (Pmode,
ad2c71b7 1075 gen_rtx_HIGH (Pmode, const_part));
c1d1b3f0 1076 ptr_reg
e5e28962 1077 = force_reg (Pmode,
c5c76735
JL
1078 gen_rtx_LO_SUM (Pmode,
1079 tmp_reg, const_part));
c1d1b3f0
JL
1080 }
1081 else
1082 {
1083 if (! VAL_14_BITS_P (newoffset))
e5e28962 1084 int_part = force_reg (Pmode, GEN_INT (newoffset));
c1d1b3f0
JL
1085 else
1086 int_part = GEN_INT (newoffset);
1087
e5e28962 1088 ptr_reg = force_reg (Pmode,
ad2c71b7
JL
1089 gen_rtx_PLUS (Pmode,
1090 force_reg (Pmode, XEXP (x, 0)),
1091 int_part));
c1d1b3f0
JL
1092 }
1093 return plus_constant (ptr_reg, offset - newoffset);
1094 }
7426c959 1095
78c0acfd 1096 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
7426c959 1097
c1d1b3f0
JL
1098 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1099 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7426c959 1100 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
ec8e098d 1101 && (OBJECT_P (XEXP (x, 1))
7426c959
JL
1102 || GET_CODE (XEXP (x, 1)) == SUBREG)
1103 && GET_CODE (XEXP (x, 1)) != CONST)
c1d1b3f0
JL
1104 {
1105 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1106 rtx reg1, reg2;
78c0acfd
JL
1107
1108 reg1 = XEXP (x, 1);
1109 if (GET_CODE (reg1) != REG)
1110 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1111
1112 reg2 = XEXP (XEXP (x, 0), 0);
1113 if (GET_CODE (reg2) != REG)
1114 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1115
ad2c71b7 1116 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
c5c76735
JL
1117 gen_rtx_MULT (Pmode,
1118 reg2,
1119 GEN_INT (val)),
ad2c71b7 1120 reg1));
c1d1b3f0 1121 }
7426c959 1122
305123ba
JL
1123 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1124
1125 Only do so for floating point modes since this is more speculative
1126 and we lose if it's an integer store. */
78c0acfd 1127 if (GET_CODE (x) == PLUS
305123ba
JL
1128 && GET_CODE (XEXP (x, 0)) == PLUS
1129 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1130 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
78c0acfd
JL
1131 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
1132 && (mode == SFmode || mode == DFmode))
305123ba 1133 {
78c0acfd
JL
1134
1135 /* First, try and figure out what to use as a base register. */
b38bccca 1136 rtx reg1, reg2, base, idx;
78c0acfd
JL
1137
1138 reg1 = XEXP (XEXP (x, 0), 1);
1139 reg2 = XEXP (x, 1);
1140 base = NULL_RTX;
1141 idx = NULL_RTX;
1142
1143 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
3502dc9c
JDA
1144 then emit_move_sequence will turn on REG_POINTER so we'll know
1145 it's a base register below. */
78c0acfd
JL
1146 if (GET_CODE (reg1) != REG)
1147 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1148
1149 if (GET_CODE (reg2) != REG)
1150 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1151
1152 /* Figure out what the base and index are. */
6619e96c 1153
78c0acfd 1154 if (GET_CODE (reg1) == REG
3502dc9c 1155 && REG_POINTER (reg1))
78c0acfd
JL
1156 {
1157 base = reg1;
ad2c71b7
JL
1158 idx = gen_rtx_PLUS (Pmode,
1159 gen_rtx_MULT (Pmode,
1160 XEXP (XEXP (XEXP (x, 0), 0), 0),
1161 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1162 XEXP (x, 1));
78c0acfd
JL
1163 }
1164 else if (GET_CODE (reg2) == REG
3502dc9c 1165 && REG_POINTER (reg2))
78c0acfd
JL
1166 {
1167 base = reg2;
78c0acfd
JL
1168 idx = XEXP (x, 0);
1169 }
1170
1171 if (base == 0)
31d4f31f 1172 return orig;
78c0acfd
JL
1173
1174 /* If the index adds a large constant, try to scale the
1175 constant so that it can be loaded with only one insn. */
1176 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1177 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1178 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1179 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1180 {
1181 /* Divide the CONST_INT by the scale factor, then add it to A. */
1182 int val = INTVAL (XEXP (idx, 1));
1183
1184 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1185 reg1 = XEXP (XEXP (idx, 0), 0);
1186 if (GET_CODE (reg1) != REG)
1187 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1188
ad2c71b7 1189 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
78c0acfd
JL
1190
1191 /* We can now generate a simple scaled indexed address. */
c5c76735
JL
1192 return
1193 force_reg
1194 (Pmode, gen_rtx_PLUS (Pmode,
1195 gen_rtx_MULT (Pmode, reg1,
1196 XEXP (XEXP (idx, 0), 1)),
1197 base));
78c0acfd
JL
1198 }
1199
1200 /* If B + C is still a valid base register, then add them. */
1201 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1202 && INTVAL (XEXP (idx, 1)) <= 4096
1203 && INTVAL (XEXP (idx, 1)) >= -4096)
1204 {
1205 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1206 rtx reg1, reg2;
1207
ad2c71b7 1208 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
78c0acfd
JL
1209
1210 reg2 = XEXP (XEXP (idx, 0), 0);
1211 if (GET_CODE (reg2) != CONST_INT)
1212 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1213
ad2c71b7 1214 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
c5c76735
JL
1215 gen_rtx_MULT (Pmode,
1216 reg2,
ad2c71b7
JL
1217 GEN_INT (val)),
1218 reg1));
78c0acfd
JL
1219 }
1220
1221 /* Get the index into a register, then add the base + index and
1222 return a register holding the result. */
1223
1224 /* First get A into a register. */
1225 reg1 = XEXP (XEXP (idx, 0), 0);
1226 if (GET_CODE (reg1) != REG)
1227 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1228
1229 /* And get B into a register. */
1230 reg2 = XEXP (idx, 1);
1231 if (GET_CODE (reg2) != REG)
1232 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1233
ad2c71b7
JL
1234 reg1 = force_reg (Pmode,
1235 gen_rtx_PLUS (Pmode,
1236 gen_rtx_MULT (Pmode, reg1,
1237 XEXP (XEXP (idx, 0), 1)),
1238 reg2));
78c0acfd
JL
1239
1240 /* Add the result to our base register and return. */
ad2c71b7 1241 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
6619e96c 1242
305123ba
JL
1243 }
1244
23f6f34f 1245 /* Uh-oh. We might have an address for x[n-100000]. This needs
c2827c50
JL
1246 special handling to avoid creating an indexed memory address
1247 with x-100000 as the base.
6619e96c 1248
c2827c50
JL
1249 If the constant part is small enough, then it's still safe because
1250 there is a guard page at the beginning and end of the data segment.
1251
1252 Scaled references are common enough that we want to try and rearrange the
1253 terms so that we can use indexing for these addresses too. Only
305123ba 1254 do the optimization for floatint point modes. */
7426c959 1255
c2827c50
JL
1256 if (GET_CODE (x) == PLUS
1257 && symbolic_expression_p (XEXP (x, 1)))
7426c959
JL
1258 {
1259 /* Ugly. We modify things here so that the address offset specified
1260 by the index expression is computed first, then added to x to form
c2827c50 1261 the entire address. */
7426c959 1262
305123ba 1263 rtx regx1, regx2, regy1, regy2, y;
7426c959
JL
1264
1265 /* Strip off any CONST. */
1266 y = XEXP (x, 1);
1267 if (GET_CODE (y) == CONST)
1268 y = XEXP (y, 0);
1269
77fc9313
RK
1270 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1271 {
305123ba
JL
1272 /* See if this looks like
1273 (plus (mult (reg) (shadd_const))
1274 (const (plus (symbol_ref) (const_int))))
1275
78c0acfd 1276 Where const_int is small. In that case the const
6619e96c 1277 expression is a valid pointer for indexing.
78c0acfd
JL
1278
1279 If const_int is big, but can be divided evenly by shadd_const
1280 and added to (reg). This allows more scaled indexed addresses. */
1281 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1282 && GET_CODE (XEXP (x, 0)) == MULT
305123ba 1283 && GET_CODE (XEXP (y, 1)) == CONST_INT
78c0acfd
JL
1284 && INTVAL (XEXP (y, 1)) >= -4096
1285 && INTVAL (XEXP (y, 1)) <= 4095
1286 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1287 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
1288 {
1289 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1290 rtx reg1, reg2;
1291
1292 reg1 = XEXP (x, 1);
1293 if (GET_CODE (reg1) != REG)
1294 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1295
1296 reg2 = XEXP (XEXP (x, 0), 0);
1297 if (GET_CODE (reg2) != REG)
1298 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1299
ad2c71b7
JL
1300 return force_reg (Pmode,
1301 gen_rtx_PLUS (Pmode,
c5c76735
JL
1302 gen_rtx_MULT (Pmode,
1303 reg2,
ad2c71b7 1304 GEN_INT (val)),
c5c76735 1305 reg1));
78c0acfd
JL
1306 }
1307 else if ((mode == DFmode || mode == SFmode)
1308 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1309 && GET_CODE (XEXP (x, 0)) == MULT
1310 && GET_CODE (XEXP (y, 1)) == CONST_INT
1311 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1312 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
1313 && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
305123ba
JL
1314 {
1315 regx1
1316 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1317 / INTVAL (XEXP (XEXP (x, 0), 1))));
1318 regx2 = XEXP (XEXP (x, 0), 0);
1319 if (GET_CODE (regx2) != REG)
1320 regx2 = force_reg (Pmode, force_operand (regx2, 0));
ad2c71b7
JL
1321 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1322 regx2, regx1));
c5c76735
JL
1323 return
1324 force_reg (Pmode,
1325 gen_rtx_PLUS (Pmode,
1326 gen_rtx_MULT (Pmode, regx2,
1327 XEXP (XEXP (x, 0), 1)),
1328 force_reg (Pmode, XEXP (y, 0))));
305123ba 1329 }
c2827c50
JL
1330 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1331 && INTVAL (XEXP (y, 1)) >= -4096
1332 && INTVAL (XEXP (y, 1)) <= 4095)
1333 {
1334 /* This is safe because of the guard page at the
1335 beginning and end of the data space. Just
1336 return the original address. */
1337 return orig;
1338 }
305123ba
JL
1339 else
1340 {
1341 /* Doesn't look like one we can optimize. */
1342 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1343 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1344 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1345 regx1 = force_reg (Pmode,
ad2c71b7
JL
1346 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1347 regx1, regy2));
1348 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
305123ba 1349 }
77fc9313 1350 }
7426c959
JL
1351 }
1352
c1d1b3f0
JL
1353 return orig;
1354}
1355
8a5b8538
AS
1356/* Implement the TARGET_REGISTER_MOVE_COST hook.
1357
1358 Compute extra cost of moving data between one register class
1359 and another.
1360
1361 Make moves from SAR so expensive they should never happen. We used to
1362 have 0xffff here, but that generates overflow in rare cases.
1363
1364 Copies involving a FP register and a non-FP register are relatively
1365 expensive because they must go through memory.
1366
1367 Other copies are reasonably cheap. */
1368
1369static int
1370hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1371 reg_class_t from, reg_class_t to)
1372{
1373 if (from == SHIFT_REGS)
1374 return 0x100;
483d7ad3
JDA
1375 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1376 return 18;
8a5b8538
AS
1377 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1378 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1379 return 16;
1380 else
1381 return 2;
1382}
1383
188538df
TG
1384/* For the HPPA, REG and REG+CONST is cost 0
1385 and addresses involving symbolic constants are cost 2.
1386
1387 PIC addresses are very expensive.
1388
1389 It is no coincidence that this has the same structure
1390 as GO_IF_LEGITIMATE_ADDRESS. */
dcefdf67
RH
1391
1392static int
f40751dd
JH
1393hppa_address_cost (rtx X,
1394 bool speed ATTRIBUTE_UNUSED)
188538df 1395{
dcefdf67
RH
1396 switch (GET_CODE (X))
1397 {
1398 case REG:
1399 case PLUS:
1400 case LO_SUM:
188538df 1401 return 1;
dcefdf67
RH
1402 case HIGH:
1403 return 2;
1404 default:
1405 return 4;
1406 }
188538df
TG
1407}
1408
3c50106f
RH
1409/* Compute a (partial) cost for rtx X. Return true if the complete
1410 cost has been computed, and false if subexpressions should be
1411 scanned. In either case, *TOTAL contains the cost result. */
1412
1413static bool
f40751dd
JH
1414hppa_rtx_costs (rtx x, int code, int outer_code, int *total,
1415 bool speed ATTRIBUTE_UNUSED)
3c50106f
RH
1416{
1417 switch (code)
1418 {
1419 case CONST_INT:
1420 if (INTVAL (x) == 0)
1421 *total = 0;
1422 else if (INT_14_BITS (x))
1423 *total = 1;
1424 else
1425 *total = 2;
1426 return true;
1427
1428 case HIGH:
1429 *total = 2;
1430 return true;
1431
1432 case CONST:
1433 case LABEL_REF:
1434 case SYMBOL_REF:
1435 *total = 4;
1436 return true;
1437
1438 case CONST_DOUBLE:
1439 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1440 && outer_code != SET)
1441 *total = 0;
1442 else
1443 *total = 8;
1444 return true;
1445
1446 case MULT:
1447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1448 *total = COSTS_N_INSNS (3);
1449 else if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1450 *total = COSTS_N_INSNS (8);
1451 else
1452 *total = COSTS_N_INSNS (20);
1453 return true;
1454
1455 case DIV:
1456 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1457 {
1458 *total = COSTS_N_INSNS (14);
1459 return true;
1460 }
5efb1046 1461 /* FALLTHRU */
3c50106f
RH
1462
1463 case UDIV:
1464 case MOD:
1465 case UMOD:
1466 *total = COSTS_N_INSNS (60);
1467 return true;
1468
1469 case PLUS: /* this includes shNadd insns */
1470 case MINUS:
1471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1472 *total = COSTS_N_INSNS (3);
1473 else
1474 *total = COSTS_N_INSNS (1);
1475 return true;
1476
1477 case ASHIFT:
1478 case ASHIFTRT:
1479 case LSHIFTRT:
1480 *total = COSTS_N_INSNS (1);
1481 return true;
1482
1483 default:
1484 return false;
1485 }
1486}
1487
6619e96c
AM
1488/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1489 new rtx with the correct mode. */
1490static inline rtx
b7849684 1491force_mode (enum machine_mode mode, rtx orig)
6619e96c
AM
1492{
1493 if (mode == GET_MODE (orig))
1494 return orig;
1495
144d51f9 1496 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
6619e96c
AM
1497
1498 return gen_rtx_REG (mode, REGNO (orig));
1499}
1500
51076f96
RC
1501/* Return 1 if *X is a thread-local symbol. */
1502
1503static int
1504pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1505{
1506 return PA_SYMBOL_REF_TLS_P (*x);
1507}
1508
1509/* Return 1 if X contains a thread-local symbol. */
1510
1511bool
1512pa_tls_referenced_p (rtx x)
1513{
1514 if (!TARGET_HAVE_TLS)
1515 return false;
1516
1517 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1518}
1519
fbbf66e7
RS
1520/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1521
1522static bool
1523pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1524{
1525 return pa_tls_referenced_p (x);
1526}
1527
188538df
TG
1528/* Emit insns to move operands[1] into operands[0].
1529
1530 Return 1 if we have written out everything that needs to be done to
1531 do the move. Otherwise, return 0 and the caller will emit the move
6619e96c 1532 normally.
1b8ad134
JL
1533
1534 Note SCRATCH_REG may not be in the proper mode depending on how it
c1207243 1535 will be used. This routine is responsible for creating a new copy
1b8ad134 1536 of SCRATCH_REG in the proper mode. */
188538df
TG
1537
1538int
b7849684 1539emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
188538df
TG
1540{
1541 register rtx operand0 = operands[0];
1542 register rtx operand1 = operands[1];
428be702 1543 register rtx tem;
188538df 1544
d8f95bed
JDA
1545 /* We can only handle indexed addresses in the destination operand
1546 of floating point stores. Thus, we need to break out indexed
1547 addresses from the destination operand. */
1548 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1549 {
b3a13419 1550 gcc_assert (can_create_pseudo_p ());
d8f95bed
JDA
1551
1552 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1553 operand0 = replace_equiv_address (operand0, tem);
1554 }
1555
1556 /* On targets with non-equivalent space registers, break out unscaled
1557 indexed addresses from the source operand before the final CSE.
1558 We have to do this because the REG_POINTER flag is not correctly
1559 carried through various optimization passes and CSE may substitute
1560 a pseudo without the pointer set for one with the pointer set. As
71cc389b 1561 a result, we loose various opportunities to create insns with
d8f95bed
JDA
1562 unscaled indexed addresses. */
1563 if (!TARGET_NO_SPACE_REGS
1564 && !cse_not_expected
1565 && GET_CODE (operand1) == MEM
1566 && GET_CODE (XEXP (operand1, 0)) == PLUS
1567 && REG_P (XEXP (XEXP (operand1, 0), 0))
1568 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1569 operand1
1570 = replace_equiv_address (operand1,
1571 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1572
54d65918
JL
1573 if (scratch_reg
1574 && reload_in_progress && GET_CODE (operand0) == REG
8a642d97 1575 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
f2034d06 1576 operand0 = reg_equiv_mem (REGNO (operand0));
54d65918
JL
1577 else if (scratch_reg
1578 && reload_in_progress && GET_CODE (operand0) == SUBREG
8a642d97
RK
1579 && GET_CODE (SUBREG_REG (operand0)) == REG
1580 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1581 {
ddef6bc7 1582 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1583 the code which tracks sets/uses for delete_output_reload. */
1584 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
f2034d06 1585 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
ddef6bc7 1586 SUBREG_BYTE (operand0));
847898f6 1587 operand0 = alter_subreg (&temp);
27a2c2b5 1588 }
8a642d97 1589
54d65918
JL
1590 if (scratch_reg
1591 && reload_in_progress && GET_CODE (operand1) == REG
8a642d97 1592 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
f2034d06 1593 operand1 = reg_equiv_mem (REGNO (operand1));
54d65918
JL
1594 else if (scratch_reg
1595 && reload_in_progress && GET_CODE (operand1) == SUBREG
8a642d97
RK
1596 && GET_CODE (SUBREG_REG (operand1)) == REG
1597 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1598 {
ddef6bc7 1599 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1600 the code which tracks sets/uses for delete_output_reload. */
1601 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
f2034d06 1602 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
ddef6bc7 1603 SUBREG_BYTE (operand1));
847898f6 1604 operand1 = alter_subreg (&temp);
27a2c2b5 1605 }
8a642d97 1606
54d65918 1607 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
428be702
RK
1608 && ((tem = find_replacement (&XEXP (operand0, 0)))
1609 != XEXP (operand0, 0)))
7c95bbfb 1610 operand0 = replace_equiv_address (operand0, tem);
d8f95bed 1611
54d65918 1612 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
428be702
RK
1613 && ((tem = find_replacement (&XEXP (operand1, 0)))
1614 != XEXP (operand1, 0)))
7c95bbfb 1615 operand1 = replace_equiv_address (operand1, tem);
428be702 1616
4d3cea21 1617 /* Handle secondary reloads for loads/stores of FP registers from
cae80939 1618 REG+D addresses where D does not fit in 5 or 14 bits, including
68944452 1619 (subreg (mem (addr))) cases. */
a4295210
JDA
1620 if (scratch_reg
1621 && fp_reg_operand (operand0, mode)
42fbe27f 1622 && ((GET_CODE (operand1) == MEM
cae80939
JDA
1623 && !memory_address_p ((GET_MODE_SIZE (mode) == 4 ? SFmode : DFmode),
1624 XEXP (operand1, 0)))
42fbe27f
JL
1625 || ((GET_CODE (operand1) == SUBREG
1626 && GET_CODE (XEXP (operand1, 0)) == MEM
cae80939
JDA
1627 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1628 ? SFmode : DFmode),
1629 XEXP (XEXP (operand1, 0), 0))))))
d2a94ec0 1630 {
42fbe27f
JL
1631 if (GET_CODE (operand1) == SUBREG)
1632 operand1 = XEXP (operand1, 0);
1633
1b8ad134
JL
1634 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1635 it in WORD_MODE regardless of what mode it was originally given
1636 to us. */
6619e96c 1637 scratch_reg = force_mode (word_mode, scratch_reg);
2d7b2c36
JL
1638
1639 /* D might not fit in 14 bits either; for such cases load D into
1640 scratch reg. */
690d4228 1641 if (!memory_address_p (Pmode, XEXP (operand1, 0)))
2d7b2c36
JL
1642 {
1643 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
d8f95bed
JDA
1644 emit_move_insn (scratch_reg,
1645 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1646 Pmode,
1647 XEXP (XEXP (operand1, 0), 0),
1648 scratch_reg));
2d7b2c36
JL
1649 }
1650 else
1651 emit_move_insn (scratch_reg, XEXP (operand1, 0));
c5c76735 1652 emit_insn (gen_rtx_SET (VOIDmode, operand0,
7c95bbfb 1653 replace_equiv_address (operand1, scratch_reg)));
d2a94ec0
TM
1654 return 1;
1655 }
a4295210
JDA
1656 else if (scratch_reg
1657 && fp_reg_operand (operand1, mode)
42fbe27f 1658 && ((GET_CODE (operand0) == MEM
cae80939
JDA
1659 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1660 ? SFmode : DFmode),
1661 XEXP (operand0, 0)))
42fbe27f
JL
1662 || ((GET_CODE (operand0) == SUBREG)
1663 && GET_CODE (XEXP (operand0, 0)) == MEM
cae80939
JDA
1664 && !memory_address_p ((GET_MODE_SIZE (mode) == 4
1665 ? SFmode : DFmode),
a4295210 1666 XEXP (XEXP (operand0, 0), 0)))))
d2a94ec0 1667 {
42fbe27f
JL
1668 if (GET_CODE (operand0) == SUBREG)
1669 operand0 = XEXP (operand0, 0);
1670
1b8ad134
JL
1671 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1672 it in WORD_MODE regardless of what mode it was originally given
1673 to us. */
6619e96c 1674 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1675
2d7b2c36
JL
1676 /* D might not fit in 14 bits either; for such cases load D into
1677 scratch reg. */
690d4228 1678 if (!memory_address_p (Pmode, XEXP (operand0, 0)))
2d7b2c36
JL
1679 {
1680 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
ad2c71b7
JL
1681 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1682 0)),
690d4228 1683 Pmode,
ad2c71b7
JL
1684 XEXP (XEXP (operand0, 0),
1685 0),
1686 scratch_reg));
2d7b2c36
JL
1687 }
1688 else
1689 emit_move_insn (scratch_reg, XEXP (operand0, 0));
7c95bbfb
RH
1690 emit_insn (gen_rtx_SET (VOIDmode,
1691 replace_equiv_address (operand0, scratch_reg),
ad2c71b7 1692 operand1));
d2a94ec0
TM
1693 return 1;
1694 }
c063ad75
JL
1695 /* Handle secondary reloads for loads of FP registers from constant
1696 expressions by forcing the constant into memory.
1697
a4295210 1698 Use scratch_reg to hold the address of the memory location.
c063ad75 1699
4b0b4ab0 1700 The proper fix is to change TARGET_PREFERRED_RELOAD_CLASS to return
5bdc5878 1701 NO_REGS when presented with a const_int and a register class
c063ad75
JL
1702 containing only FP registers. Doing so unfortunately creates
1703 more problems than it solves. Fix this for 2.5. */
a4295210 1704 else if (scratch_reg
c063ad75 1705 && CONSTANT_P (operand1)
a4295210 1706 && fp_reg_operand (operand0, mode))
c063ad75 1707 {
7c95bbfb 1708 rtx const_mem, xoperands[2];
c063ad75 1709
1b8ad134
JL
1710 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1711 it in WORD_MODE regardless of what mode it was originally given
1712 to us. */
6619e96c 1713 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1714
c063ad75
JL
1715 /* Force the constant into memory and put the address of the
1716 memory location into scratch_reg. */
7c95bbfb 1717 const_mem = force_const_mem (mode, operand1);
c063ad75 1718 xoperands[0] = scratch_reg;
7c95bbfb 1719 xoperands[1] = XEXP (const_mem, 0);
669054c1 1720 emit_move_sequence (xoperands, Pmode, 0);
c063ad75
JL
1721
1722 /* Now load the destination register. */
c5c76735 1723 emit_insn (gen_rtx_SET (mode, operand0,
7c95bbfb 1724 replace_equiv_address (const_mem, scratch_reg)));
c063ad75
JL
1725 return 1;
1726 }
4d3cea21 1727 /* Handle secondary reloads for SAR. These occur when trying to load
483d7ad3 1728 the SAR from memory or a constant. */
a4295210
JDA
1729 else if (scratch_reg
1730 && GET_CODE (operand0) == REG
9c1eed37 1731 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
4d3cea21 1732 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
483d7ad3 1733 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
4d3cea21 1734 {
09ece7b5
JL
1735 /* D might not fit in 14 bits either; for such cases load D into
1736 scratch reg. */
1737 if (GET_CODE (operand1) == MEM
2fd74bff 1738 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
09ece7b5 1739 {
dd8c13e3
JL
1740 /* We are reloading the address into the scratch register, so we
1741 want to make sure the scratch register is a full register. */
6619e96c 1742 scratch_reg = force_mode (word_mode, scratch_reg);
dd8c13e3 1743
6619e96c 1744 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
ad2c71b7
JL
1745 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1746 0)),
690d4228 1747 Pmode,
ad2c71b7
JL
1748 XEXP (XEXP (operand1, 0),
1749 0),
1750 scratch_reg));
dd8c13e3
JL
1751
1752 /* Now we are going to load the scratch register from memory,
1753 we want to load it in the same width as the original MEM,
1754 which must be the same as the width of the ultimate destination,
1755 OPERAND0. */
6619e96c
AM
1756 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1757
7c95bbfb
RH
1758 emit_move_insn (scratch_reg,
1759 replace_equiv_address (operand1, scratch_reg));
09ece7b5
JL
1760 }
1761 else
dd8c13e3
JL
1762 {
1763 /* We want to load the scratch register using the same mode as
1764 the ultimate destination. */
6619e96c
AM
1765 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1766
dd8c13e3
JL
1767 emit_move_insn (scratch_reg, operand1);
1768 }
1769
1770 /* And emit the insn to set the ultimate destination. We know that
1771 the scratch register has the same mode as the destination at this
1772 point. */
4d3cea21
JL
1773 emit_move_insn (operand0, scratch_reg);
1774 return 1;
1775 }
d8f95bed 1776 /* Handle the most common case: storing into a register. */
d2a94ec0 1777 else if (register_operand (operand0, mode))
188538df
TG
1778 {
1779 if (register_operand (operand1, mode)
b8e42321
JDA
1780 || (GET_CODE (operand1) == CONST_INT
1781 && cint_ok_for_move (INTVAL (operand1)))
f048ca47 1782 || (operand1 == CONST0_RTX (mode))
188538df 1783 || (GET_CODE (operand1) == HIGH
80225b66 1784 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
188538df
TG
1785 /* Only `general_operands' can come here, so MEM is ok. */
1786 || GET_CODE (operand1) == MEM)
1787 {
d8f95bed
JDA
1788 /* Various sets are created during RTL generation which don't
1789 have the REG_POINTER flag correctly set. After the CSE pass,
1790 instruction recognition can fail if we don't consistently
1791 set this flag when performing register copies. This should
1792 also improve the opportunities for creating insns that use
1793 unscaled indexing. */
1794 if (REG_P (operand0) && REG_P (operand1))
1795 {
1796 if (REG_POINTER (operand1)
1797 && !REG_POINTER (operand0)
1798 && !HARD_REGISTER_P (operand0))
1799 copy_reg_pointer (operand0, operand1);
d8f95bed
JDA
1800 }
1801
1802 /* When MEMs are broken out, the REG_POINTER flag doesn't
1803 get set. In some cases, we can set the REG_POINTER flag
1804 from the declaration for the MEM. */
1805 if (REG_P (operand0)
1806 && GET_CODE (operand1) == MEM
1807 && !REG_POINTER (operand0))
1808 {
1809 tree decl = MEM_EXPR (operand1);
1810
1811 /* Set the register pointer flag and register alignment
1812 if the declaration for this memory reference is a
077c8ada
SE
1813 pointer type. */
1814 if (decl)
d8f95bed
JDA
1815 {
1816 tree type;
1817
1818 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1819 tree operand 1. */
1820 if (TREE_CODE (decl) == COMPONENT_REF)
1821 decl = TREE_OPERAND (decl, 1);
1822
1823 type = TREE_TYPE (decl);
dd25a747 1824 type = strip_array_types (type);
d8f95bed
JDA
1825
1826 if (POINTER_TYPE_P (type))
1827 {
1828 int align;
1829
1830 type = TREE_TYPE (type);
1831 /* Using TYPE_ALIGN_OK is rather conservative as
1832 only the ada frontend actually sets it. */
1833 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1834 : BITS_PER_UNIT);
1835 mark_reg_pointer (operand0, align);
1836 }
1837 }
1838 }
1839
ad2c71b7 1840 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
188538df
TG
1841 return 1;
1842 }
1843 }
1844 else if (GET_CODE (operand0) == MEM)
1845 {
d66dec28
JL
1846 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1847 && !(reload_in_progress || reload_completed))
1848 {
1849 rtx temp = gen_reg_rtx (DFmode);
1850
ad2c71b7
JL
1851 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1852 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
d66dec28
JL
1853 return 1;
1854 }
f048ca47 1855 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
188538df
TG
1856 {
1857 /* Run this case quickly. */
ad2c71b7 1858 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
188538df
TG
1859 return 1;
1860 }
1bc695cd 1861 if (! (reload_in_progress || reload_completed))
188538df
TG
1862 {
1863 operands[0] = validize_mem (operand0);
1864 operands[1] = operand1 = force_reg (mode, operand1);
1865 }
1866 }
1867
44201dba
JL
1868 /* Simplify the source if we need to.
1869 Note we do have to handle function labels here, even though we do
1870 not consider them legitimate constants. Loop optimizations can
06387d7c 1871 call the emit_move_xxx with one as a source. */
f1c7ce82 1872 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
9c575e20 1873 || function_label_operand (operand1, VOIDmode)
43940f6b 1874 || (GET_CODE (operand1) == HIGH
ba365a19 1875 && symbolic_operand (XEXP (operand1, 0), mode)))
188538df 1876 {
43940f6b
JL
1877 int ishighonly = 0;
1878
1879 if (GET_CODE (operand1) == HIGH)
1880 {
1881 ishighonly = 1;
1882 operand1 = XEXP (operand1, 0);
1883 }
188538df
TG
1884 if (symbolic_operand (operand1, mode))
1885 {
5eceed92 1886 /* Argh. The assembler and linker can't handle arithmetic
b0fabad3 1887 involving plabels.
5eceed92 1888
b0fabad3
JL
1889 So we force the plabel into memory, load operand0 from
1890 the memory location, then add in the constant part. */
44201dba
JL
1891 if ((GET_CODE (operand1) == CONST
1892 && GET_CODE (XEXP (operand1, 0)) == PLUS
9c575e20
JDA
1893 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1894 VOIDmode))
1895 || function_label_operand (operand1, VOIDmode))
5eceed92 1896 {
8e64b41a 1897 rtx temp, const_part;
b0fabad3
JL
1898
1899 /* Figure out what (if any) scratch register to use. */
1900 if (reload_in_progress || reload_completed)
1b8ad134
JL
1901 {
1902 scratch_reg = scratch_reg ? scratch_reg : operand0;
1903 /* SCRATCH_REG will hold an address and maybe the actual
1904 data. We want it in WORD_MODE regardless of what mode it
1905 was originally given to us. */
6619e96c 1906 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1907 }
b0fabad3
JL
1908 else if (flag_pic)
1909 scratch_reg = gen_reg_rtx (Pmode);
1910
44201dba
JL
1911 if (GET_CODE (operand1) == CONST)
1912 {
1913 /* Save away the constant part of the expression. */
1914 const_part = XEXP (XEXP (operand1, 0), 1);
144d51f9 1915 gcc_assert (GET_CODE (const_part) == CONST_INT);
44201dba
JL
1916
1917 /* Force the function label into memory. */
1918 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1919 }
1920 else
1921 {
1922 /* No constant part. */
1923 const_part = NULL_RTX;
5eceed92 1924
44201dba
JL
1925 /* Force the function label into memory. */
1926 temp = force_const_mem (mode, operand1);
1927 }
6619e96c 1928
b0fabad3
JL
1929
1930 /* Get the address of the memory location. PIC-ify it if
1931 necessary. */
1932 temp = XEXP (temp, 0);
1933 if (flag_pic)
1934 temp = legitimize_pic_address (temp, mode, scratch_reg);
1935
1936 /* Put the address of the memory location into our destination
1937 register. */
1938 operands[1] = temp;
1939 emit_move_sequence (operands, mode, scratch_reg);
1940
1941 /* Now load from the memory location into our destination
1942 register. */
ad2c71b7 1943 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
b0fabad3
JL
1944 emit_move_sequence (operands, mode, scratch_reg);
1945
1946 /* And add back in the constant part. */
44201dba
JL
1947 if (const_part != NULL_RTX)
1948 expand_inc (operand0, const_part);
b0fabad3
JL
1949
1950 return 1;
5eceed92
JL
1951 }
1952
188538df
TG
1953 if (flag_pic)
1954 {
1bc695cd
JL
1955 rtx temp;
1956
1957 if (reload_in_progress || reload_completed)
1b8ad134
JL
1958 {
1959 temp = scratch_reg ? scratch_reg : operand0;
1960 /* TEMP will hold an address and maybe the actual
1961 data. We want it in WORD_MODE regardless of what mode it
1962 was originally given to us. */
6619e96c 1963 temp = force_mode (word_mode, temp);
1b8ad134 1964 }
1bc695cd
JL
1965 else
1966 temp = gen_reg_rtx (Pmode);
23f6f34f 1967
b0fabad3
JL
1968 /* (const (plus (symbol) (const_int))) must be forced to
1969 memory during/after reload if the const_int will not fit
1970 in 14 bits. */
1971 if (GET_CODE (operand1) == CONST
bc4a9f17
JL
1972 && GET_CODE (XEXP (operand1, 0)) == PLUS
1973 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
1974 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
1975 && (reload_completed || reload_in_progress)
1976 && flag_pic)
1977 {
7c95bbfb 1978 rtx const_mem = force_const_mem (mode, operand1);
1c9ef36d 1979 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
bc4a9f17 1980 mode, temp);
7c95bbfb 1981 operands[1] = replace_equiv_address (const_mem, operands[1]);
bc4a9f17
JL
1982 emit_move_sequence (operands, mode, temp);
1983 }
5eceed92
JL
1984 else
1985 {
1986 operands[1] = legitimize_pic_address (operand1, mode, temp);
d8f95bed
JDA
1987 if (REG_P (operand0) && REG_P (operands[1]))
1988 copy_reg_pointer (operand0, operands[1]);
ad2c71b7 1989 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
5eceed92 1990 }
188538df 1991 }
6bb36601
JL
1992 /* On the HPPA, references to data space are supposed to use dp,
1993 register 27, but showing it in the RTL inhibits various cse
1994 and loop optimizations. */
23f6f34f 1995 else
188538df 1996 {
5eceed92 1997 rtx temp, set;
43940f6b 1998
23f6f34f 1999 if (reload_in_progress || reload_completed)
1b8ad134
JL
2000 {
2001 temp = scratch_reg ? scratch_reg : operand0;
2002 /* TEMP will hold an address and maybe the actual
2003 data. We want it in WORD_MODE regardless of what mode it
2004 was originally given to us. */
6619e96c 2005 temp = force_mode (word_mode, temp);
1b8ad134 2006 }
43940f6b
JL
2007 else
2008 temp = gen_reg_rtx (mode);
2009
68944452 2010 /* Loading a SYMBOL_REF into a register makes that register
6619e96c 2011 safe to be used as the base in an indexed address.
68944452
JL
2012
2013 Don't mark hard registers though. That loses. */
c34d858f
RK
2014 if (GET_CODE (operand0) == REG
2015 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
d8f95bed 2016 mark_reg_pointer (operand0, BITS_PER_UNIT);
68944452 2017 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
d8f95bed
JDA
2018 mark_reg_pointer (temp, BITS_PER_UNIT);
2019
43940f6b 2020 if (ishighonly)
ad2c71b7 2021 set = gen_rtx_SET (mode, operand0, temp);
43940f6b 2022 else
c5c76735
JL
2023 set = gen_rtx_SET (VOIDmode,
2024 operand0,
ad2c71b7 2025 gen_rtx_LO_SUM (mode, temp, operand1));
23f6f34f 2026
ad2c71b7
JL
2027 emit_insn (gen_rtx_SET (VOIDmode,
2028 temp,
2029 gen_rtx_HIGH (mode, operand1)));
b0ce651a 2030 emit_insn (set);
326bc2de 2031
188538df 2032 }
43940f6b 2033 return 1;
188538df 2034 }
51076f96
RC
2035 else if (pa_tls_referenced_p (operand1))
2036 {
2037 rtx tmp = operand1;
2038 rtx addend = NULL;
2039
2040 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2041 {
2042 addend = XEXP (XEXP (tmp, 0), 1);
2043 tmp = XEXP (XEXP (tmp, 0), 0);
2044 }
2045
2046 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2047 tmp = legitimize_tls_address (tmp);
2048 if (addend)
2049 {
2050 tmp = gen_rtx_PLUS (mode, tmp, addend);
2051 tmp = force_operand (tmp, operands[0]);
2052 }
2053 operands[1] = tmp;
2054 }
a1747d2c 2055 else if (GET_CODE (operand1) != CONST_INT
a4295210 2056 || !cint_ok_for_move (INTVAL (operand1)))
188538df 2057 {
a4295210
JDA
2058 rtx insn, temp;
2059 rtx op1 = operand1;
4cce9dd8 2060 HOST_WIDE_INT value = 0;
a4295210
JDA
2061 HOST_WIDE_INT insv = 0;
2062 int insert = 0;
2063
4cce9dd8
RS
2064 if (GET_CODE (operand1) == CONST_INT)
2065 value = INTVAL (operand1);
2066
a4295210
JDA
2067 if (TARGET_64BIT
2068 && GET_CODE (operand1) == CONST_INT
e0c556d3 2069 && HOST_BITS_PER_WIDE_INT > 32
520babc7
JL
2070 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2071 {
e0c556d3 2072 HOST_WIDE_INT nval;
520babc7 2073
b8e42321
JDA
2074 /* Extract the low order 32 bits of the value and sign extend.
2075 If the new value is the same as the original value, we can
2076 can use the original value as-is. If the new value is
2077 different, we use it and insert the most-significant 32-bits
2078 of the original value into the final result. */
a4295210 2079 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
e0c556d3 2080 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
a4295210 2081 if (value != nval)
520babc7 2082 {
b8e42321 2083#if HOST_BITS_PER_WIDE_INT > 32
a4295210 2084 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
b8e42321 2085#endif
a4295210
JDA
2086 insert = 1;
2087 value = nval;
520babc7
JL
2088 operand1 = GEN_INT (nval);
2089 }
2090 }
1bc695cd
JL
2091
2092 if (reload_in_progress || reload_completed)
a4295210 2093 temp = scratch_reg ? scratch_reg : operand0;
1bc695cd
JL
2094 else
2095 temp = gen_reg_rtx (mode);
2096
47abc309
JDA
2097 /* We don't directly split DImode constants on 32-bit targets
2098 because PLUS uses an 11-bit immediate and the insn sequence
2099 generated is not as efficient as the one using HIGH/LO_SUM. */
2100 if (GET_CODE (operand1) == CONST_INT
0eab7815 2101 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
a4295210
JDA
2102 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2103 && !insert)
b8e42321 2104 {
47abc309 2105 /* Directly break constant into high and low parts. This
b8e42321
JDA
2106 provides better optimization opportunities because various
2107 passes recognize constants split with PLUS but not LO_SUM.
2108 We use a 14-bit signed low part except when the addition
2109 of 0x4000 to the high part might change the sign of the
2110 high part. */
b8e42321
JDA
2111 HOST_WIDE_INT low = value & 0x3fff;
2112 HOST_WIDE_INT high = value & ~ 0x3fff;
2113
2114 if (low >= 0x2000)
2115 {
2116 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2117 high += 0x2000;
2118 else
2119 high += 0x4000;
2120 }
2121
2122 low = value - high;
520babc7 2123
b8e42321
JDA
2124 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2125 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2126 }
2127 else
520babc7 2128 {
b8e42321
JDA
2129 emit_insn (gen_rtx_SET (VOIDmode, temp,
2130 gen_rtx_HIGH (mode, operand1)));
2131 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
520babc7 2132 }
6619e96c 2133
a4295210
JDA
2134 insn = emit_move_insn (operands[0], operands[1]);
2135
2136 /* Now insert the most significant 32 bits of the value
2137 into the register. When we don't have a second register
2138 available, it could take up to nine instructions to load
2139 a 64-bit integer constant. Prior to reload, we force
2140 constants that would take more than three instructions
2141 to load to the constant pool. During and after reload,
2142 we have to handle all possible values. */
2143 if (insert)
2144 {
2145 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2146 register and the value to be inserted is outside the
2147 range that can be loaded with three depdi instructions. */
2148 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2149 {
2150 operand1 = GEN_INT (insv);
2151
2152 emit_insn (gen_rtx_SET (VOIDmode, temp,
2153 gen_rtx_HIGH (mode, operand1)));
2154 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2155 emit_insn (gen_insv (operand0, GEN_INT (32),
2156 const0_rtx, temp));
2157 }
2158 else
2159 {
2160 int len = 5, pos = 27;
2161
2162 /* Insert the bits using the depdi instruction. */
2163 while (pos >= 0)
2164 {
2165 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2166 HOST_WIDE_INT sign = v5 < 0;
2167
2168 /* Left extend the insertion. */
2169 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2170 while (pos > 0 && (insv & 1) == sign)
2171 {
2172 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2173 len += 1;
2174 pos -= 1;
2175 }
2176
2177 emit_insn (gen_insv (operand0, GEN_INT (len),
2178 GEN_INT (pos), GEN_INT (v5)));
2179
2180 len = pos > 0 && pos < 5 ? pos : 5;
2181 pos -= len;
2182 }
2183 }
2184 }
b8e42321 2185
bd94cb6e 2186 set_unique_reg_note (insn, REG_EQUAL, op1);
b8e42321 2187
520babc7 2188 return 1;
188538df
TG
2189 }
2190 }
2191 /* Now have insn-emit do whatever it normally does. */
2192 return 0;
2193}
2194
c77c286a 2195/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
c4bb6b38 2196 it will need a link/runtime reloc). */
c77c286a
JL
2197
2198int
b7849684 2199reloc_needed (tree exp)
c77c286a
JL
2200{
2201 int reloc = 0;
2202
2203 switch (TREE_CODE (exp))
2204 {
2205 case ADDR_EXPR:
2206 return 1;
2207
5be014d5 2208 case POINTER_PLUS_EXPR:
c77c286a
JL
2209 case PLUS_EXPR:
2210 case MINUS_EXPR:
2211 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2212 reloc |= reloc_needed (TREE_OPERAND (exp, 1));
2213 break;
2214
1043771b 2215 CASE_CONVERT:
c77c286a
JL
2216 case NON_LVALUE_EXPR:
2217 reloc = reloc_needed (TREE_OPERAND (exp, 0));
2218 break;
2219
2220 case CONSTRUCTOR:
2221 {
28f155be
GB
2222 tree value;
2223 unsigned HOST_WIDE_INT ix;
2224
2225 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2226 if (value)
2227 reloc |= reloc_needed (value);
c77c286a
JL
2228 }
2229 break;
2230
2231 case ERROR_MARK:
2232 break;
51723711
KG
2233
2234 default:
2235 break;
c77c286a
JL
2236 }
2237 return reloc;
2238}
2239
188538df
TG
2240\f
2241/* Return the best assembler insn template
71cc389b 2242 for moving operands[1] into operands[0] as a fullword. */
519104fe 2243const char *
b7849684 2244singlemove_string (rtx *operands)
188538df 2245{
0c235d7e
TG
2246 HOST_WIDE_INT intval;
2247
188538df
TG
2248 if (GET_CODE (operands[0]) == MEM)
2249 return "stw %r1,%0";
0c235d7e 2250 if (GET_CODE (operands[1]) == MEM)
188538df 2251 return "ldw %1,%0";
0c235d7e 2252 if (GET_CODE (operands[1]) == CONST_DOUBLE)
e5c2baa1 2253 {
0c235d7e
TG
2254 long i;
2255 REAL_VALUE_TYPE d;
e5c2baa1 2256
144d51f9 2257 gcc_assert (GET_MODE (operands[1]) == SFmode);
e5c2baa1 2258
0c235d7e
TG
2259 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2260 bit pattern. */
2261 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2262 REAL_VALUE_TO_TARGET_SINGLE (d, i);
e5c2baa1 2263
0c235d7e
TG
2264 operands[1] = GEN_INT (i);
2265 /* Fall through to CONST_INT case. */
2266 }
2267 if (GET_CODE (operands[1]) == CONST_INT)
e5c2baa1 2268 {
0c235d7e
TG
2269 intval = INTVAL (operands[1]);
2270
2271 if (VAL_14_BITS_P (intval))
2272 return "ldi %1,%0";
2273 else if ((intval & 0x7ff) == 0)
2274 return "ldil L'%1,%0";
2275 else if (zdepi_cint_p (intval))
f38b27c7 2276 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
e5c2baa1
RS
2277 else
2278 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2279 }
188538df
TG
2280 return "copy %1,%0";
2281}
2282\f
2283
f133af4c
TG
2284/* Compute position (in OP[1]) and width (in OP[2])
2285 useful for copying IMM to a register using the zdepi
2286 instructions. Store the immediate value to insert in OP[0]. */
519104fe 2287static void
b7849684 2288compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
c819adf2 2289{
0e7f4c19 2290 int lsb, len;
c819adf2 2291
0e7f4c19
TG
2292 /* Find the least significant set bit in IMM. */
2293 for (lsb = 0; lsb < 32; lsb++)
c819adf2 2294 {
0e7f4c19 2295 if ((imm & 1) != 0)
c819adf2 2296 break;
0e7f4c19 2297 imm >>= 1;
c819adf2
TG
2298 }
2299
0e7f4c19
TG
2300 /* Choose variants based on *sign* of the 5-bit field. */
2301 if ((imm & 0x10) == 0)
2302 len = (lsb <= 28) ? 4 : 32 - lsb;
c819adf2
TG
2303 else
2304 {
0e7f4c19 2305 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2306 for (len = 5; len < 32 - lsb; len++)
c819adf2 2307 {
ef8d9a0e 2308 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
c819adf2 2309 break;
c819adf2
TG
2310 }
2311
0e7f4c19
TG
2312 /* Sign extend IMM as a 5-bit value. */
2313 imm = (imm & 0xf) - 0x10;
c819adf2
TG
2314 }
2315
a1747d2c
TG
2316 op[0] = imm;
2317 op[1] = 31 - lsb;
2318 op[2] = len;
c819adf2
TG
2319}
2320
520babc7
JL
2321/* Compute position (in OP[1]) and width (in OP[2])
2322 useful for copying IMM to a register using the depdi,z
2323 instructions. Store the immediate value to insert in OP[0]. */
2324void
b7849684 2325compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
520babc7 2326{
ef8d9a0e
JDA
2327 int lsb, len, maxlen;
2328
2329 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
520babc7
JL
2330
2331 /* Find the least significant set bit in IMM. */
ef8d9a0e 2332 for (lsb = 0; lsb < maxlen; lsb++)
520babc7
JL
2333 {
2334 if ((imm & 1) != 0)
2335 break;
2336 imm >>= 1;
2337 }
2338
2339 /* Choose variants based on *sign* of the 5-bit field. */
2340 if ((imm & 0x10) == 0)
ef8d9a0e 2341 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
520babc7
JL
2342 else
2343 {
2344 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2345 for (len = 5; len < maxlen - lsb; len++)
520babc7 2346 {
831c1763 2347 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
520babc7
JL
2348 break;
2349 }
2350
ef8d9a0e
JDA
2351 /* Extend length if host is narrow and IMM is negative. */
2352 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2353 len += 32;
2354
520babc7
JL
2355 /* Sign extend IMM as a 5-bit value. */
2356 imm = (imm & 0xf) - 0x10;
2357 }
2358
2359 op[0] = imm;
2360 op[1] = 63 - lsb;
2361 op[2] = len;
2362}
2363
188538df
TG
2364/* Output assembler code to perform a doubleword move insn
2365 with operands OPERANDS. */
2366
519104fe 2367const char *
b7849684 2368output_move_double (rtx *operands)
188538df
TG
2369{
2370 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2371 rtx latehalf[2];
2372 rtx addreg0 = 0, addreg1 = 0;
2373
2374 /* First classify both operands. */
2375
2376 if (REG_P (operands[0]))
2377 optype0 = REGOP;
2378 else if (offsettable_memref_p (operands[0]))
2379 optype0 = OFFSOP;
2380 else if (GET_CODE (operands[0]) == MEM)
2381 optype0 = MEMOP;
2382 else
2383 optype0 = RNDOP;
2384
2385 if (REG_P (operands[1]))
2386 optype1 = REGOP;
2387 else if (CONSTANT_P (operands[1]))
2388 optype1 = CNSTOP;
2389 else if (offsettable_memref_p (operands[1]))
2390 optype1 = OFFSOP;
2391 else if (GET_CODE (operands[1]) == MEM)
2392 optype1 = MEMOP;
2393 else
2394 optype1 = RNDOP;
2395
2396 /* Check for the cases that the operand constraints are not
144d51f9
NS
2397 supposed to allow to happen. */
2398 gcc_assert (optype0 == REGOP || optype1 == REGOP);
188538df 2399
5401050b
JDA
2400 /* Handle copies between general and floating registers. */
2401
2402 if (optype0 == REGOP && optype1 == REGOP
2403 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2404 {
2405 if (FP_REG_P (operands[0]))
2406 {
2407 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2408 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2409 return "{fldds|fldd} -16(%%sp),%0";
2410 }
2411 else
2412 {
2413 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2414 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2415 return "{ldws|ldw} -12(%%sp),%R0";
2416 }
2417 }
2418
188538df
TG
2419 /* Handle auto decrementing and incrementing loads and stores
2420 specifically, since the structure of the function doesn't work
2421 for them without major modification. Do it better when we learn
2422 this port about the general inc/dec addressing of PA.
2423 (This was written by tege. Chide him if it doesn't work.) */
2424
2425 if (optype0 == MEMOP)
2426 {
e37ce5f6
JL
2427 /* We have to output the address syntax ourselves, since print_operand
2428 doesn't deal with the addresses we want to use. Fix this later. */
2429
188538df 2430 rtx addr = XEXP (operands[0], 0);
e37ce5f6 2431 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
188538df 2432 {
ad2c71b7 2433 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2434
2435 operands[0] = XEXP (addr, 0);
144d51f9
NS
2436 gcc_assert (GET_CODE (operands[1]) == REG
2437 && GET_CODE (operands[0]) == REG);
e37ce5f6 2438
144d51f9
NS
2439 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2440
2441 /* No overlap between high target register and address
2442 register. (We do this in a non-obvious way to
2443 save a register file writeback) */
2444 if (GET_CODE (addr) == POST_INC)
2445 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2446 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
9682683d 2447 }
e37ce5f6 2448 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
9682683d 2449 {
ad2c71b7 2450 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2451
2452 operands[0] = XEXP (addr, 0);
144d51f9
NS
2453 gcc_assert (GET_CODE (operands[1]) == REG
2454 && GET_CODE (operands[0]) == REG);
2455
2456 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2457 /* No overlap between high target register and address
2458 register. (We do this in a non-obvious way to save a
2459 register file writeback) */
2460 if (GET_CODE (addr) == PRE_INC)
2461 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2462 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
188538df
TG
2463 }
2464 }
2465 if (optype1 == MEMOP)
2466 {
2467 /* We have to output the address syntax ourselves, since print_operand
2468 doesn't deal with the addresses we want to use. Fix this later. */
2469
2470 rtx addr = XEXP (operands[1], 0);
2471 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2472 {
ad2c71b7 2473 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2474
2475 operands[1] = XEXP (addr, 0);
144d51f9
NS
2476 gcc_assert (GET_CODE (operands[0]) == REG
2477 && GET_CODE (operands[1]) == REG);
188538df
TG
2478
2479 if (!reg_overlap_mentioned_p (high_reg, addr))
2480 {
2481 /* No overlap between high target register and address
dd605bb4 2482 register. (We do this in a non-obvious way to
188538df
TG
2483 save a register file writeback) */
2484 if (GET_CODE (addr) == POST_INC)
f38b27c7 2485 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
6126a380 2486 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
188538df
TG
2487 }
2488 else
2489 {
2490 /* This is an undefined situation. We should load into the
2491 address register *and* update that register. Probably
2492 we don't need to handle this at all. */
2493 if (GET_CODE (addr) == POST_INC)
f38b27c7
JL
2494 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2495 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
188538df
TG
2496 }
2497 }
2498 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2499 {
ad2c71b7 2500 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2501
2502 operands[1] = XEXP (addr, 0);
144d51f9
NS
2503 gcc_assert (GET_CODE (operands[0]) == REG
2504 && GET_CODE (operands[1]) == REG);
188538df
TG
2505
2506 if (!reg_overlap_mentioned_p (high_reg, addr))
2507 {
2508 /* No overlap between high target register and address
dd605bb4 2509 register. (We do this in a non-obvious way to
188538df
TG
2510 save a register file writeback) */
2511 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2512 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2513 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
188538df
TG
2514 }
2515 else
2516 {
2517 /* This is an undefined situation. We should load into the
2518 address register *and* update that register. Probably
2519 we don't need to handle this at all. */
2520 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2521 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2522 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
188538df
TG
2523 }
2524 }
a89974a2
JL
2525 else if (GET_CODE (addr) == PLUS
2526 && GET_CODE (XEXP (addr, 0)) == MULT)
2527 {
4c6d8726 2528 rtx xoperands[4];
ad2c71b7 2529 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
a89974a2
JL
2530
2531 if (!reg_overlap_mentioned_p (high_reg, addr))
2532 {
a89974a2
JL
2533 xoperands[0] = high_reg;
2534 xoperands[1] = XEXP (addr, 1);
2535 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2536 xoperands[3] = XEXP (XEXP (addr, 0), 1);
f38b27c7
JL
2537 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2538 xoperands);
d2d28085 2539 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
a89974a2
JL
2540 }
2541 else
2542 {
a89974a2
JL
2543 xoperands[0] = high_reg;
2544 xoperands[1] = XEXP (addr, 1);
2545 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2546 xoperands[3] = XEXP (XEXP (addr, 0), 1);
f38b27c7
JL
2547 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2548 xoperands);
d2d28085 2549 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
a89974a2 2550 }
a89974a2 2551 }
188538df
TG
2552 }
2553
2554 /* If an operand is an unoffsettable memory ref, find a register
2555 we can increment temporarily to make it refer to the second word. */
2556
2557 if (optype0 == MEMOP)
2558 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2559
2560 if (optype1 == MEMOP)
2561 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2562
2563 /* Ok, we can do one word at a time.
2564 Normally we do the low-numbered word first.
2565
2566 In either case, set up in LATEHALF the operands to use
2567 for the high-numbered word and in some cases alter the
2568 operands in OPERANDS to be suitable for the low-numbered word. */
2569
2570 if (optype0 == REGOP)
ad2c71b7 2571 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
188538df 2572 else if (optype0 == OFFSOP)
b72f00af 2573 latehalf[0] = adjust_address (operands[0], SImode, 4);
188538df
TG
2574 else
2575 latehalf[0] = operands[0];
2576
2577 if (optype1 == REGOP)
ad2c71b7 2578 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
188538df 2579 else if (optype1 == OFFSOP)
b72f00af 2580 latehalf[1] = adjust_address (operands[1], SImode, 4);
188538df
TG
2581 else if (optype1 == CNSTOP)
2582 split_double (operands[1], &operands[1], &latehalf[1]);
2583 else
2584 latehalf[1] = operands[1];
2585
2586 /* If the first move would clobber the source of the second one,
2587 do them in the other order.
2588
bad883f8 2589 This can happen in two cases:
188538df 2590
bad883f8
JL
2591 mem -> register where the first half of the destination register
2592 is the same register used in the memory's address. Reload
2593 can create such insns.
188538df 2594
bad883f8 2595 mem in this case will be either register indirect or register
6619e96c 2596 indirect plus a valid offset.
bad883f8
JL
2597
2598 register -> register move where REGNO(dst) == REGNO(src + 1)
6619e96c 2599 someone (Tim/Tege?) claimed this can happen for parameter loads.
bad883f8
JL
2600
2601 Handle mem -> register case first. */
2602 if (optype0 == REGOP
2603 && (optype1 == MEMOP || optype1 == OFFSOP)
2604 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2605 operands[1], 0))
188538df 2606 {
188538df
TG
2607 /* Do the late half first. */
2608 if (addreg1)
498ee10c 2609 output_asm_insn ("ldo 4(%0),%0", &addreg1);
188538df 2610 output_asm_insn (singlemove_string (latehalf), latehalf);
bad883f8
JL
2611
2612 /* Then clobber. */
188538df 2613 if (addreg1)
498ee10c 2614 output_asm_insn ("ldo -4(%0),%0", &addreg1);
188538df
TG
2615 return singlemove_string (operands);
2616 }
2617
bad883f8 2618 /* Now handle register -> register case. */
63a1f834
TG
2619 if (optype0 == REGOP && optype1 == REGOP
2620 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2621 {
2622 output_asm_insn (singlemove_string (latehalf), latehalf);
2623 return singlemove_string (operands);
2624 }
2625
188538df
TG
2626 /* Normal case: do the two words, low-numbered first. */
2627
2628 output_asm_insn (singlemove_string (operands), operands);
2629
2630 /* Make any unoffsettable addresses point at high-numbered word. */
2631 if (addreg0)
498ee10c 2632 output_asm_insn ("ldo 4(%0),%0", &addreg0);
188538df 2633 if (addreg1)
498ee10c 2634 output_asm_insn ("ldo 4(%0),%0", &addreg1);
188538df
TG
2635
2636 /* Do that word. */
2637 output_asm_insn (singlemove_string (latehalf), latehalf);
2638
2639 /* Undo the adds we just did. */
2640 if (addreg0)
498ee10c 2641 output_asm_insn ("ldo -4(%0),%0", &addreg0);
188538df 2642 if (addreg1)
498ee10c 2643 output_asm_insn ("ldo -4(%0),%0", &addreg1);
188538df
TG
2644
2645 return "";
2646}
2647\f
519104fe 2648const char *
b7849684 2649output_fp_move_double (rtx *operands)
188538df
TG
2650{
2651 if (FP_REG_P (operands[0]))
2652 {
23f6f34f 2653 if (FP_REG_P (operands[1])
f048ca47 2654 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
55abf18a 2655 output_asm_insn ("fcpy,dbl %f1,%0", operands);
23f6f34f 2656 else
2414e0e2 2657 output_asm_insn ("fldd%F1 %1,%0", operands);
188538df
TG
2658 }
2659 else if (FP_REG_P (operands[1]))
2660 {
2414e0e2 2661 output_asm_insn ("fstd%F0 %1,%0", operands);
188538df 2662 }
144d51f9 2663 else
f048ca47 2664 {
144d51f9
NS
2665 rtx xoperands[2];
2666
2667 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2668
23f6f34f 2669 /* This is a pain. You have to be prepared to deal with an
ddd5a7c1 2670 arbitrary address here including pre/post increment/decrement.
f048ca47
JL
2671
2672 so avoid this in the MD. */
144d51f9
NS
2673 gcc_assert (GET_CODE (operands[0]) == REG);
2674
2675 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2676 xoperands[0] = operands[0];
2677 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
f048ca47 2678 }
188538df
TG
2679 return "";
2680}
2681\f
2682/* Return a REG that occurs in ADDR with coefficient 1.
2683 ADDR can be effectively incremented by incrementing REG. */
2684
2685static rtx
b7849684 2686find_addr_reg (rtx addr)
188538df
TG
2687{
2688 while (GET_CODE (addr) == PLUS)
2689 {
2690 if (GET_CODE (XEXP (addr, 0)) == REG)
2691 addr = XEXP (addr, 0);
2692 else if (GET_CODE (XEXP (addr, 1)) == REG)
2693 addr = XEXP (addr, 1);
2694 else if (CONSTANT_P (XEXP (addr, 0)))
2695 addr = XEXP (addr, 1);
2696 else if (CONSTANT_P (XEXP (addr, 1)))
2697 addr = XEXP (addr, 0);
2698 else
144d51f9 2699 gcc_unreachable ();
188538df 2700 }
144d51f9
NS
2701 gcc_assert (GET_CODE (addr) == REG);
2702 return addr;
188538df
TG
2703}
2704
188538df
TG
2705/* Emit code to perform a block move.
2706
188538df
TG
2707 OPERANDS[0] is the destination pointer as a REG, clobbered.
2708 OPERANDS[1] is the source pointer as a REG, clobbered.
68944452 2709 OPERANDS[2] is a register for temporary storage.
188538df 2710 OPERANDS[3] is a register for temporary storage.
cdc9103c 2711 OPERANDS[4] is the size as a CONST_INT
6619e96c 2712 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
71cc389b 2713 OPERANDS[6] is another temporary register. */
188538df 2714
519104fe 2715const char *
b7849684 2716output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
188538df
TG
2717{
2718 int align = INTVAL (operands[5]);
68944452 2719 unsigned long n_bytes = INTVAL (operands[4]);
188538df 2720
cdc9103c 2721 /* We can't move more than a word at a time because the PA
188538df 2722 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2723 if (align > (TARGET_64BIT ? 8 : 4))
2724 align = (TARGET_64BIT ? 8 : 4);
188538df 2725
68944452
JL
2726 /* Note that we know each loop below will execute at least twice
2727 (else we would have open-coded the copy). */
2728 switch (align)
188538df 2729 {
cdc9103c
JDA
2730 case 8:
2731 /* Pre-adjust the loop counter. */
2732 operands[4] = GEN_INT (n_bytes - 16);
2733 output_asm_insn ("ldi %4,%2", operands);
2734
2735 /* Copying loop. */
2736 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2737 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2738 output_asm_insn ("std,ma %3,8(%0)", operands);
2739 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2740 output_asm_insn ("std,ma %6,8(%0)", operands);
2741
2742 /* Handle the residual. There could be up to 7 bytes of
2743 residual to copy! */
2744 if (n_bytes % 16 != 0)
2745 {
2746 operands[4] = GEN_INT (n_bytes % 8);
2747 if (n_bytes % 16 >= 8)
2748 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2749 if (n_bytes % 8 != 0)
2750 output_asm_insn ("ldd 0(%1),%6", operands);
2751 if (n_bytes % 16 >= 8)
2752 output_asm_insn ("std,ma %3,8(%0)", operands);
2753 if (n_bytes % 8 != 0)
2754 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2755 }
2756 return "";
2757
68944452
JL
2758 case 4:
2759 /* Pre-adjust the loop counter. */
2760 operands[4] = GEN_INT (n_bytes - 8);
2761 output_asm_insn ("ldi %4,%2", operands);
2762
2763 /* Copying loop. */
f38b27c7
JL
2764 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2765 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2766 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2767 output_asm_insn ("addib,>= -8,%2,.-12", operands);
f38b27c7 2768 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
68944452
JL
2769
2770 /* Handle the residual. There could be up to 7 bytes of
2771 residual to copy! */
2772 if (n_bytes % 8 != 0)
2773 {
2774 operands[4] = GEN_INT (n_bytes % 4);
2775 if (n_bytes % 8 >= 4)
f38b27c7 2776 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
68944452 2777 if (n_bytes % 4 != 0)
d2d28085 2778 output_asm_insn ("ldw 0(%1),%6", operands);
68944452 2779 if (n_bytes % 8 >= 4)
f38b27c7 2780 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2781 if (n_bytes % 4 != 0)
f38b27c7 2782 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
68944452
JL
2783 }
2784 return "";
188538df 2785
68944452
JL
2786 case 2:
2787 /* Pre-adjust the loop counter. */
2788 operands[4] = GEN_INT (n_bytes - 4);
2789 output_asm_insn ("ldi %4,%2", operands);
188538df 2790
68944452 2791 /* Copying loop. */
f38b27c7
JL
2792 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2793 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2794 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2795 output_asm_insn ("addib,>= -4,%2,.-12", operands);
f38b27c7 2796 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
188538df 2797
68944452
JL
2798 /* Handle the residual. */
2799 if (n_bytes % 4 != 0)
2800 {
2801 if (n_bytes % 4 >= 2)
f38b27c7 2802 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
68944452 2803 if (n_bytes % 2 != 0)
d2d28085 2804 output_asm_insn ("ldb 0(%1),%6", operands);
68944452 2805 if (n_bytes % 4 >= 2)
f38b27c7 2806 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2807 if (n_bytes % 2 != 0)
d2d28085 2808 output_asm_insn ("stb %6,0(%0)", operands);
68944452
JL
2809 }
2810 return "";
188538df 2811
68944452
JL
2812 case 1:
2813 /* Pre-adjust the loop counter. */
2814 operands[4] = GEN_INT (n_bytes - 2);
2815 output_asm_insn ("ldi %4,%2", operands);
188538df 2816
68944452 2817 /* Copying loop. */
f38b27c7
JL
2818 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2819 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2820 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
68944452 2821 output_asm_insn ("addib,>= -2,%2,.-12", operands);
f38b27c7 2822 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
188538df 2823
68944452
JL
2824 /* Handle the residual. */
2825 if (n_bytes % 2 != 0)
2826 {
d2d28085
JL
2827 output_asm_insn ("ldb 0(%1),%3", operands);
2828 output_asm_insn ("stb %3,0(%0)", operands);
68944452
JL
2829 }
2830 return "";
188538df 2831
68944452 2832 default:
144d51f9 2833 gcc_unreachable ();
188538df 2834 }
188538df 2835}
3673e996
RS
2836
2837/* Count the number of insns necessary to handle this block move.
2838
2839 Basic structure is the same as emit_block_move, except that we
2840 count insns rather than emit them. */
2841
519104fe 2842static int
70128ad9 2843compute_movmem_length (rtx insn)
3673e996
RS
2844{
2845 rtx pat = PATTERN (insn);
a36a47ad
GS
2846 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2847 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
68944452 2848 unsigned int n_insns = 0;
3673e996
RS
2849
2850 /* We can't move more than four bytes at a time because the PA
2851 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2852 if (align > (TARGET_64BIT ? 8 : 4))
2853 align = (TARGET_64BIT ? 8 : 4);
3673e996 2854
90304f64 2855 /* The basic copying loop. */
68944452 2856 n_insns = 6;
3673e996 2857
68944452
JL
2858 /* Residuals. */
2859 if (n_bytes % (2 * align) != 0)
3673e996 2860 {
90304f64
JL
2861 if ((n_bytes % (2 * align)) >= align)
2862 n_insns += 2;
2863
2864 if ((n_bytes % align) != 0)
2865 n_insns += 2;
3673e996 2866 }
68944452
JL
2867
2868 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2869 return n_insns * 4;
3673e996 2870}
cdc9103c
JDA
2871
2872/* Emit code to perform a block clear.
2873
2874 OPERANDS[0] is the destination pointer as a REG, clobbered.
2875 OPERANDS[1] is a register for temporary storage.
2876 OPERANDS[2] is the size as a CONST_INT
2877 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2878
2879const char *
2880output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2881{
2882 int align = INTVAL (operands[3]);
2883 unsigned long n_bytes = INTVAL (operands[2]);
2884
2885 /* We can't clear more than a word at a time because the PA
2886 has no longer integer move insns. */
2887 if (align > (TARGET_64BIT ? 8 : 4))
2888 align = (TARGET_64BIT ? 8 : 4);
2889
2890 /* Note that we know each loop below will execute at least twice
2891 (else we would have open-coded the copy). */
2892 switch (align)
2893 {
2894 case 8:
2895 /* Pre-adjust the loop counter. */
2896 operands[2] = GEN_INT (n_bytes - 16);
2897 output_asm_insn ("ldi %2,%1", operands);
2898
2899 /* Loop. */
2900 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2901 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2902 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2903
2904 /* Handle the residual. There could be up to 7 bytes of
2905 residual to copy! */
2906 if (n_bytes % 16 != 0)
2907 {
2908 operands[2] = GEN_INT (n_bytes % 8);
2909 if (n_bytes % 16 >= 8)
2910 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2911 if (n_bytes % 8 != 0)
2912 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2913 }
2914 return "";
2915
2916 case 4:
2917 /* Pre-adjust the loop counter. */
2918 operands[2] = GEN_INT (n_bytes - 8);
2919 output_asm_insn ("ldi %2,%1", operands);
2920
2921 /* Loop. */
2922 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2923 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2924 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2925
2926 /* Handle the residual. There could be up to 7 bytes of
2927 residual to copy! */
2928 if (n_bytes % 8 != 0)
2929 {
2930 operands[2] = GEN_INT (n_bytes % 4);
2931 if (n_bytes % 8 >= 4)
2932 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2933 if (n_bytes % 4 != 0)
2934 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2935 }
2936 return "";
2937
2938 case 2:
2939 /* Pre-adjust the loop counter. */
2940 operands[2] = GEN_INT (n_bytes - 4);
2941 output_asm_insn ("ldi %2,%1", operands);
2942
2943 /* Loop. */
2944 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2945 output_asm_insn ("addib,>= -4,%1,.-4", operands);
2946 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2947
2948 /* Handle the residual. */
2949 if (n_bytes % 4 != 0)
2950 {
2951 if (n_bytes % 4 >= 2)
2952 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
2953 if (n_bytes % 2 != 0)
2954 output_asm_insn ("stb %%r0,0(%0)", operands);
2955 }
2956 return "";
2957
2958 case 1:
2959 /* Pre-adjust the loop counter. */
2960 operands[2] = GEN_INT (n_bytes - 2);
2961 output_asm_insn ("ldi %2,%1", operands);
2962
2963 /* Loop. */
2964 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2965 output_asm_insn ("addib,>= -2,%1,.-4", operands);
2966 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
2967
2968 /* Handle the residual. */
2969 if (n_bytes % 2 != 0)
2970 output_asm_insn ("stb %%r0,0(%0)", operands);
2971
2972 return "";
2973
2974 default:
144d51f9 2975 gcc_unreachable ();
cdc9103c
JDA
2976 }
2977}
2978
2979/* Count the number of insns necessary to handle this block move.
2980
2981 Basic structure is the same as emit_block_move, except that we
2982 count insns rather than emit them. */
2983
2984static int
70128ad9 2985compute_clrmem_length (rtx insn)
cdc9103c
JDA
2986{
2987 rtx pat = PATTERN (insn);
2988 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
2989 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
2990 unsigned int n_insns = 0;
2991
2992 /* We can't clear more than a word at a time because the PA
2993 has no longer integer move insns. */
2994 if (align > (TARGET_64BIT ? 8 : 4))
2995 align = (TARGET_64BIT ? 8 : 4);
2996
2997 /* The basic loop. */
2998 n_insns = 4;
2999
3000 /* Residuals. */
3001 if (n_bytes % (2 * align) != 0)
3002 {
3003 if ((n_bytes % (2 * align)) >= align)
3004 n_insns++;
3005
3006 if ((n_bytes % align) != 0)
3007 n_insns++;
3008 }
3009
3010 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3011 return n_insns * 4;
3012}
188538df
TG
3013\f
3014
519104fe 3015const char *
b7849684 3016output_and (rtx *operands)
0e7f4c19 3017{
d2a94ec0 3018 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
0e7f4c19 3019 {
0c235d7e 3020 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
0e7f4c19
TG
3021 int ls0, ls1, ms0, p, len;
3022
3023 for (ls0 = 0; ls0 < 32; ls0++)
3024 if ((mask & (1 << ls0)) == 0)
3025 break;
3026
3027 for (ls1 = ls0; ls1 < 32; ls1++)
3028 if ((mask & (1 << ls1)) != 0)
3029 break;
3030
3031 for (ms0 = ls1; ms0 < 32; ms0++)
3032 if ((mask & (1 << ms0)) == 0)
3033 break;
3034
144d51f9 3035 gcc_assert (ms0 == 32);
0e7f4c19
TG
3036
3037 if (ls1 == 32)
3038 {
3039 len = ls0;
3040
144d51f9 3041 gcc_assert (len);
0e7f4c19 3042
8919037c 3043 operands[2] = GEN_INT (len);
f38b27c7 3044 return "{extru|extrw,u} %1,31,%2,%0";
0e7f4c19
TG
3045 }
3046 else
3047 {
3048 /* We could use this `depi' for the case above as well, but `depi'
3049 requires one more register file access than an `extru'. */
3050
3051 p = 31 - ls0;
3052 len = ls1 - ls0;
3053
8919037c
TG
3054 operands[2] = GEN_INT (p);
3055 operands[3] = GEN_INT (len);
f38b27c7 3056 return "{depi|depwi} 0,%2,%3,%0";
0e7f4c19
TG
3057 }
3058 }
3059 else
3060 return "and %1,%2,%0";
3061}
3062
520babc7
JL
3063/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3064 storing the result in operands[0]. */
0952f89b 3065const char *
b7849684 3066output_64bit_and (rtx *operands)
520babc7
JL
3067{
3068 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3069 {
3070 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3071 int ls0, ls1, ms0, p, len;
520babc7
JL
3072
3073 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
e0c556d3 3074 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
520babc7
JL
3075 break;
3076
3077 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
e0c556d3 3078 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
520babc7
JL
3079 break;
3080
3081 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
e0c556d3 3082 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
520babc7
JL
3083 break;
3084
144d51f9 3085 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
520babc7
JL
3086
3087 if (ls1 == HOST_BITS_PER_WIDE_INT)
3088 {
3089 len = ls0;
3090
144d51f9 3091 gcc_assert (len);
520babc7
JL
3092
3093 operands[2] = GEN_INT (len);
3094 return "extrd,u %1,63,%2,%0";
3095 }
3096 else
3097 {
3098 /* We could use this `depi' for the case above as well, but `depi'
3099 requires one more register file access than an `extru'. */
3100
3101 p = 63 - ls0;
3102 len = ls1 - ls0;
3103
3104 operands[2] = GEN_INT (p);
3105 operands[3] = GEN_INT (len);
3106 return "depdi 0,%2,%3,%0";
3107 }
3108 }
3109 else
3110 return "and %1,%2,%0";
3111}
3112
519104fe 3113const char *
b7849684 3114output_ior (rtx *operands)
0e7f4c19 3115{
0c235d7e 3116 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
f1c7ce82 3117 int bs0, bs1, p, len;
23f6f34f 3118
8365d59b
TG
3119 if (INTVAL (operands[2]) == 0)
3120 return "copy %1,%0";
0e7f4c19 3121
8365d59b
TG
3122 for (bs0 = 0; bs0 < 32; bs0++)
3123 if ((mask & (1 << bs0)) != 0)
3124 break;
0e7f4c19 3125
8365d59b
TG
3126 for (bs1 = bs0; bs1 < 32; bs1++)
3127 if ((mask & (1 << bs1)) == 0)
3128 break;
0e7f4c19 3129
144d51f9 3130 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
0e7f4c19 3131
8365d59b
TG
3132 p = 31 - bs0;
3133 len = bs1 - bs0;
0e7f4c19 3134
8919037c
TG
3135 operands[2] = GEN_INT (p);
3136 operands[3] = GEN_INT (len);
f38b27c7 3137 return "{depi|depwi} -1,%2,%3,%0";
0e7f4c19 3138}
520babc7
JL
3139
3140/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3141 storing the result in operands[0]. */
0952f89b 3142const char *
b7849684 3143output_64bit_ior (rtx *operands)
520babc7
JL
3144{
3145 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3146 int bs0, bs1, p, len;
520babc7
JL
3147
3148 if (INTVAL (operands[2]) == 0)
3149 return "copy %1,%0";
3150
3151 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
e0c556d3 3152 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
520babc7
JL
3153 break;
3154
3155 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
e0c556d3 3156 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
520babc7
JL
3157 break;
3158
144d51f9
NS
3159 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3160 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
520babc7
JL
3161
3162 p = 63 - bs0;
3163 len = bs1 - bs0;
3164
3165 operands[2] = GEN_INT (p);
3166 operands[3] = GEN_INT (len);
3167 return "depdi -1,%2,%3,%0";
3168}
0e7f4c19 3169\f
301d03af 3170/* Target hook for assembling integer objects. This code handles
cdcb88d7
JDA
3171 aligned SI and DI integers specially since function references
3172 must be preceded by P%. */
301d03af
RS
3173
3174static bool
b7849684 3175pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3176{
cdcb88d7
JDA
3177 if (size == UNITS_PER_WORD
3178 && aligned_p
301d03af
RS
3179 && function_label_operand (x, VOIDmode))
3180 {
3181 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3182 output_addr_const (asm_out_file, x);
3183 fputc ('\n', asm_out_file);
3184 return true;
3185 }
3186 return default_assemble_integer (x, size, aligned_p);
3187}
3188\f
188538df 3189/* Output an ascii string. */
f1c7ce82 3190void
b7849684 3191output_ascii (FILE *file, const char *p, int size)
188538df
TG
3192{
3193 int i;
3194 int chars_output;
71cc389b 3195 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
188538df
TG
3196
3197 /* The HP assembler can only take strings of 256 characters at one
3198 time. This is a limitation on input line length, *not* the
3199 length of the string. Sigh. Even worse, it seems that the
3200 restriction is in number of input characters (see \xnn &
3201 \whatever). So we have to do this very carefully. */
3202
e236a9ff 3203 fputs ("\t.STRING \"", file);
188538df
TG
3204
3205 chars_output = 0;
3206 for (i = 0; i < size; i += 4)
3207 {
3208 int co = 0;
3209 int io = 0;
3210 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3211 {
6b5ffd4e 3212 register unsigned int c = (unsigned char) p[i + io];
188538df
TG
3213
3214 if (c == '\"' || c == '\\')
3215 partial_output[co++] = '\\';
3216 if (c >= ' ' && c < 0177)
3217 partial_output[co++] = c;
3218 else
3219 {
3220 unsigned int hexd;
3221 partial_output[co++] = '\\';
3222 partial_output[co++] = 'x';
3223 hexd = c / 16 - 0 + '0';
3224 if (hexd > '9')
3225 hexd -= '9' - 'a' + 1;
3226 partial_output[co++] = hexd;
3227 hexd = c % 16 - 0 + '0';
3228 if (hexd > '9')
3229 hexd -= '9' - 'a' + 1;
3230 partial_output[co++] = hexd;
3231 }
3232 }
3233 if (chars_output + co > 243)
3234 {
e236a9ff 3235 fputs ("\"\n\t.STRING \"", file);
188538df
TG
3236 chars_output = 0;
3237 }
823fbbce 3238 fwrite (partial_output, 1, (size_t) co, file);
188538df
TG
3239 chars_output += co;
3240 co = 0;
3241 }
e236a9ff 3242 fputs ("\"\n", file);
188538df 3243}
5621d717
JL
3244
3245/* Try to rewrite floating point comparisons & branches to avoid
3246 useless add,tr insns.
3247
3248 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3249 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3250 first attempt to remove useless add,tr insns. It is zero
3251 for the second pass as reorg sometimes leaves bogus REG_DEAD
3252 notes lying around.
3253
3254 When CHECK_NOTES is zero we can only eliminate add,tr insns
3255 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3256 instructions. */
519104fe 3257static void
b7849684 3258remove_useless_addtr_insns (int check_notes)
5621d717
JL
3259{
3260 rtx insn;
5621d717
JL
3261 static int pass = 0;
3262
3263 /* This is fairly cheap, so always run it when optimizing. */
3264 if (optimize > 0)
3265 {
3266 int fcmp_count = 0;
3267 int fbranch_count = 0;
3268
3269 /* Walk all the insns in this function looking for fcmp & fbranch
3270 instructions. Keep track of how many of each we find. */
18dbd950 3271 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717
JL
3272 {
3273 rtx tmp;
3274
3275 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3276 if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
3277 continue;
3278
3279 tmp = PATTERN (insn);
3280
3281 /* It must be a set. */
3282 if (GET_CODE (tmp) != SET)
3283 continue;
3284
3285 /* If the destination is CCFP, then we've found an fcmp insn. */
3286 tmp = SET_DEST (tmp);
3287 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3288 {
3289 fcmp_count++;
3290 continue;
3291 }
6619e96c 3292
5621d717
JL
3293 tmp = PATTERN (insn);
3294 /* If this is an fbranch instruction, bump the fbranch counter. */
3295 if (GET_CODE (tmp) == SET
3296 && SET_DEST (tmp) == pc_rtx
3297 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3298 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3299 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3300 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3301 {
3302 fbranch_count++;
3303 continue;
3304 }
3305 }
3306
3307
3308 /* Find all floating point compare + branch insns. If possible,
3309 reverse the comparison & the branch to avoid add,tr insns. */
18dbd950 3310 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717
JL
3311 {
3312 rtx tmp, next;
3313
3314 /* Ignore anything that isn't an INSN. */
3315 if (GET_CODE (insn) != INSN)
3316 continue;
3317
3318 tmp = PATTERN (insn);
3319
3320 /* It must be a set. */
3321 if (GET_CODE (tmp) != SET)
3322 continue;
3323
3324 /* The destination must be CCFP, which is register zero. */
3325 tmp = SET_DEST (tmp);
3326 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3327 continue;
3328
3329 /* INSN should be a set of CCFP.
3330
3331 See if the result of this insn is used in a reversed FP
3332 conditional branch. If so, reverse our condition and
3333 the branch. Doing so avoids useless add,tr insns. */
3334 next = next_insn (insn);
3335 while (next)
3336 {
3337 /* Jumps, calls and labels stop our search. */
3338 if (GET_CODE (next) == JUMP_INSN
3339 || GET_CODE (next) == CALL_INSN
3340 || GET_CODE (next) == CODE_LABEL)
3341 break;
3342
3343 /* As does another fcmp insn. */
3344 if (GET_CODE (next) == INSN
3345 && GET_CODE (PATTERN (next)) == SET
3346 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3347 && REGNO (SET_DEST (PATTERN (next))) == 0)
3348 break;
3349
3350 next = next_insn (next);
3351 }
3352
3353 /* Is NEXT_INSN a branch? */
3354 if (next
3355 && GET_CODE (next) == JUMP_INSN)
3356 {
3357 rtx pattern = PATTERN (next);
3358
112cdef5 3359 /* If it a reversed fp conditional branch (e.g. uses add,tr)
5621d717
JL
3360 and CCFP dies, then reverse our conditional and the branch
3361 to avoid the add,tr. */
3362 if (GET_CODE (pattern) == SET
3363 && SET_DEST (pattern) == pc_rtx
3364 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3365 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3366 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3367 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3368 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3369 && (fcmp_count == fbranch_count
3370 || (check_notes
3371 && find_regno_note (next, REG_DEAD, 0))))
3372 {
3373 /* Reverse the branch. */
3374 tmp = XEXP (SET_SRC (pattern), 1);
3375 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3376 XEXP (SET_SRC (pattern), 2) = tmp;
3377 INSN_CODE (next) = -1;
3378
3379 /* Reverse our condition. */
3380 tmp = PATTERN (insn);
3381 PUT_CODE (XEXP (tmp, 1),
831c1763
AM
3382 (reverse_condition_maybe_unordered
3383 (GET_CODE (XEXP (tmp, 1)))));
5621d717
JL
3384 }
3385 }
3386 }
3387 }
3388
3389 pass = !pass;
3390
3391}
188538df 3392\f
831c1763
AM
3393/* You may have trouble believing this, but this is the 32 bit HP-PA
3394 stack layout. Wow.
188538df
TG
3395
3396 Offset Contents
3397
3398 Variable arguments (optional; any number may be allocated)
3399
3400 SP-(4*(N+9)) arg word N
3401 : :
3402 SP-56 arg word 5
3403 SP-52 arg word 4
3404
3405 Fixed arguments (must be allocated; may remain unused)
3406
3407 SP-48 arg word 3
3408 SP-44 arg word 2
3409 SP-40 arg word 1
3410 SP-36 arg word 0
3411
3412 Frame Marker
3413
3414 SP-32 External Data Pointer (DP)
3415 SP-28 External sr4
3416 SP-24 External/stub RP (RP')
3417 SP-20 Current RP
3418 SP-16 Static Link
3419 SP-12 Clean up
3420 SP-8 Calling Stub RP (RP'')
3421 SP-4 Previous SP
3422
3423 Top of Frame
3424
3425 SP-0 Stack Pointer (points to next available address)
3426
3427*/
3428
3429/* This function saves registers as follows. Registers marked with ' are
3430 this function's registers (as opposed to the previous function's).
3431 If a frame_pointer isn't needed, r4 is saved as a general register;
3432 the space for the frame pointer is still allocated, though, to keep
3433 things simple.
3434
3435
3436 Top of Frame
3437
3438 SP (FP') Previous FP
3439 SP + 4 Alignment filler (sigh)
3440 SP + 8 Space for locals reserved here.
3441 .
3442 .
3443 .
3444 SP + n All call saved register used.
3445 .
3446 .
3447 .
3448 SP + o All call saved fp registers used.
3449 .
3450 .
3451 .
3452 SP + p (SP') points to next available address.
23f6f34f 3453
188538df
TG
3454*/
3455
08c148a8 3456/* Global variables set by output_function_prologue(). */
19ec6a36
AM
3457/* Size of frame. Need to know this to emit return insns from
3458 leaf procedures. */
a4295210
JDA
3459static HOST_WIDE_INT actual_fsize, local_fsize;
3460static int save_fregs;
19ec6a36 3461
aadcdb45 3462/* Emit RTL to store REG at the memory location specified by BASE+DISP.
fc82f2f1 3463 Handle case where DISP > 8k by using the add_high_const patterns.
aadcdb45
JL
3464
3465 Note in DISP > 8k case, we will leave the high part of the address
3466 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
c5c76735 3467
f6bcf44c 3468static void
a4295210 3469store_reg (int reg, HOST_WIDE_INT disp, int base)
188538df 3470{
f6bcf44c 3471 rtx insn, dest, src, basereg;
19ec6a36
AM
3472
3473 src = gen_rtx_REG (word_mode, reg);
3474 basereg = gen_rtx_REG (Pmode, base);
188538df 3475 if (VAL_14_BITS_P (disp))
aadcdb45 3476 {
19ec6a36 3477 dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
f6bcf44c 3478 insn = emit_move_insn (dest, src);
aadcdb45 3479 }
a4295210
JDA
3480 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3481 {
3482 rtx delta = GEN_INT (disp);
3483 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3484
3485 emit_move_insn (tmpreg, delta);
5dcc9605 3486 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
a4295210
JDA
3487 if (DO_FRAME_NOTES)
3488 {
bbbbb16a
ILT
3489 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3490 gen_rtx_SET (VOIDmode, tmpreg,
3491 gen_rtx_PLUS (Pmode, basereg, delta)));
5dcc9605 3492 RTX_FRAME_RELATED_P (insn) = 1;
a4295210 3493 }
5dcc9605
JDA
3494 dest = gen_rtx_MEM (word_mode, tmpreg);
3495 insn = emit_move_insn (dest, src);
a4295210 3496 }
aadcdb45
JL
3497 else
3498 {
19ec6a36
AM
3499 rtx delta = GEN_INT (disp);
3500 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3501 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 3502
19ec6a36
AM
3503 emit_move_insn (tmpreg, high);
3504 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
f6bcf44c
JDA
3505 insn = emit_move_insn (dest, src);
3506 if (DO_FRAME_NOTES)
bbbbb16a
ILT
3507 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3508 gen_rtx_SET (VOIDmode,
3509 gen_rtx_MEM (word_mode,
3510 gen_rtx_PLUS (word_mode,
3511 basereg,
3512 delta)),
3513 src));
aadcdb45 3514 }
f6bcf44c
JDA
3515
3516 if (DO_FRAME_NOTES)
3517 RTX_FRAME_RELATED_P (insn) = 1;
aadcdb45
JL
3518}
3519
823fbbce
JDA
3520/* Emit RTL to store REG at the memory location specified by BASE and then
3521 add MOD to BASE. MOD must be <= 8k. */
aadcdb45 3522
823fbbce 3523static void
a4295210 3524store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
823fbbce
JDA
3525{
3526 rtx insn, basereg, srcreg, delta;
3527
144d51f9 3528 gcc_assert (VAL_14_BITS_P (mod));
823fbbce
JDA
3529
3530 basereg = gen_rtx_REG (Pmode, base);
3531 srcreg = gen_rtx_REG (word_mode, reg);
3532 delta = GEN_INT (mod);
3533
3534 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3535 if (DO_FRAME_NOTES)
3536 {
3537 RTX_FRAME_RELATED_P (insn) = 1;
3538
3539 /* RTX_FRAME_RELATED_P must be set on each frame related set
77c4f044
RH
3540 in a parallel with more than one element. */
3541 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3542 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
823fbbce
JDA
3543 }
3544}
3545
3546/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3547 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3548 whether to add a frame note or not.
3549
3550 In the DISP > 8k case, we leave the high part of the address in %r1.
3551 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
c5c76735 3552
f6bcf44c 3553static void
a4295210 3554set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
188538df 3555{
f6bcf44c 3556 rtx insn;
19ec6a36 3557
188538df 3558 if (VAL_14_BITS_P (disp))
19ec6a36 3559 {
f6bcf44c
JDA
3560 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3561 plus_constant (gen_rtx_REG (Pmode, base), disp));
19ec6a36 3562 }
a4295210
JDA
3563 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3564 {
3565 rtx basereg = gen_rtx_REG (Pmode, base);
3566 rtx delta = GEN_INT (disp);
3567 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3568
3569 emit_move_insn (tmpreg, delta);
3570 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3571 gen_rtx_PLUS (Pmode, tmpreg, basereg));
5dcc9605 3572 if (DO_FRAME_NOTES)
bbbbb16a
ILT
3573 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3574 gen_rtx_SET (VOIDmode, tmpreg,
3575 gen_rtx_PLUS (Pmode, basereg, delta)));
a4295210 3576 }
188538df 3577 else
aadcdb45 3578 {
f6bcf44c 3579 rtx basereg = gen_rtx_REG (Pmode, base);
19ec6a36 3580 rtx delta = GEN_INT (disp);
a4295210 3581 rtx tmpreg = gen_rtx_REG (Pmode, 1);
f6bcf44c 3582
a4295210 3583 emit_move_insn (tmpreg,
f6bcf44c 3584 gen_rtx_PLUS (Pmode, basereg,
19ec6a36 3585 gen_rtx_HIGH (Pmode, delta)));
f6bcf44c 3586 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
a4295210 3587 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
aadcdb45 3588 }
f6bcf44c 3589
823fbbce 3590 if (DO_FRAME_NOTES && note)
f6bcf44c 3591 RTX_FRAME_RELATED_P (insn) = 1;
188538df
TG
3592}
3593
a4295210
JDA
3594HOST_WIDE_INT
3595compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
188538df 3596{
95f3f59e
JDA
3597 int freg_saved = 0;
3598 int i, j;
3599
3600 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3601 be consistent with the rounding and size calculation done here.
3602 Change them at the same time. */
3603
3604 /* We do our own stack alignment. First, round the size of the
3605 stack locals up to a word boundary. */
3606 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3607
3608 /* Space for previous frame pointer + filler. If any frame is
3609 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3610 waste some space here for the sake of HP compatibility. The
3611 first slot is only used when the frame pointer is needed. */
3612 if (size || frame_pointer_needed)
3613 size += STARTING_FRAME_OFFSET;
3614
823fbbce
JDA
3615 /* If the current function calls __builtin_eh_return, then we need
3616 to allocate stack space for registers that will hold data for
3617 the exception handler. */
e3b5732b 3618 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3619 {
3620 unsigned int i;
3621
3622 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3623 continue;
95f3f59e 3624 size += i * UNITS_PER_WORD;
823fbbce
JDA
3625 }
3626
6261ede7 3627 /* Account for space used by the callee general register saves. */
95f3f59e 3628 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
6fb5fa3c 3629 if (df_regs_ever_live_p (i))
95f3f59e 3630 size += UNITS_PER_WORD;
80225b66 3631
6261ede7 3632 /* Account for space used by the callee floating point register saves. */
88624c0e 3633 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
3634 if (df_regs_ever_live_p (i)
3635 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
80225b66 3636 {
95f3f59e 3637 freg_saved = 1;
9e18f575 3638
6261ede7
JL
3639 /* We always save both halves of the FP register, so always
3640 increment the frame size by 8 bytes. */
95f3f59e 3641 size += 8;
80225b66
TG
3642 }
3643
95f3f59e
JDA
3644 /* If any of the floating registers are saved, account for the
3645 alignment needed for the floating point register save block. */
3646 if (freg_saved)
3647 {
3648 size = (size + 7) & ~7;
3649 if (fregs_live)
3650 *fregs_live = 1;
3651 }
3652
6261ede7 3653 /* The various ABIs include space for the outgoing parameters in the
95f3f59e
JDA
3654 size of the current function's stack frame. We don't need to align
3655 for the outgoing arguments as their alignment is set by the final
3656 rounding for the frame as a whole. */
38173d38 3657 size += crtl->outgoing_args_size;
6261ede7
JL
3658
3659 /* Allocate space for the fixed frame marker. This space must be
685d0e07 3660 allocated for any function that makes calls or allocates
6261ede7 3661 stack space. */
95f3f59e 3662 if (!current_function_is_leaf || size)
685d0e07 3663 size += TARGET_64BIT ? 48 : 32;
520babc7 3664
95f3f59e 3665 /* Finally, round to the preferred stack boundary. */
5fad1c24
JDA
3666 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3667 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
188538df 3668}
23f6f34f 3669
08c148a8
NB
3670/* Generate the assembly code for function entry. FILE is a stdio
3671 stream to output the code to. SIZE is an int: how many units of
3672 temporary storage to allocate.
3673
3674 Refer to the array `regs_ever_live' to determine which registers to
3675 save; `regs_ever_live[I]' is nonzero if register number I is ever
3676 used in the function. This function is responsible for knowing
3677 which registers should not be saved even if used. */
3678
3679/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3680 of memory. If any fpu reg is used in the function, we allocate
3681 such a block here, at the bottom of the frame, just in case it's needed.
3682
3683 If this function is a leaf procedure, then we may choose not
3684 to do a "save" insn. The decision about whether or not
3685 to do this is made in regclass.c. */
3686
c590b625 3687static void
b7849684 3688pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
188538df 3689{
ba0bfdac
JL
3690 /* The function's label and associated .PROC must never be
3691 separated and must be output *after* any profiling declarations
3692 to avoid changing spaces/subspaces within a procedure. */
3693 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3694 fputs ("\t.PROC\n", file);
3695
aadcdb45
JL
3696 /* hppa_expand_prologue does the dirty work now. We just need
3697 to output the assembler directives which denote the start
3698 of a function. */
a4295210 3699 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
16c16a24 3700 if (current_function_is_leaf)
e236a9ff 3701 fputs (",NO_CALLS", file);
16c16a24
JDA
3702 else
3703 fputs (",CALLS", file);
3704 if (rp_saved)
3705 fputs (",SAVE_RP", file);
da3c3336 3706
685d0e07
JDA
3707 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3708 at the beginning of the frame and that it is used as the frame
3709 pointer for the frame. We do this because our current frame
a4d05547 3710 layout doesn't conform to that specified in the HP runtime
685d0e07
JDA
3711 documentation and we need a way to indicate to programs such as
3712 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3713 isn't used by HP compilers but is supported by the assembler.
3714 However, SAVE_SP is supposed to indicate that the previous stack
3715 pointer has been saved in the frame marker. */
da3c3336 3716 if (frame_pointer_needed)
e236a9ff 3717 fputs (",SAVE_SP", file);
da3c3336 3718
68386e1e 3719 /* Pass on information about the number of callee register saves
e8cfae5c
JL
3720 performed in the prologue.
3721
3722 The compiler is supposed to pass the highest register number
23f6f34f 3723 saved, the assembler then has to adjust that number before
e8cfae5c 3724 entering it into the unwind descriptor (to account for any
23f6f34f 3725 caller saved registers with lower register numbers than the
e8cfae5c
JL
3726 first callee saved register). */
3727 if (gr_saved)
3728 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3729
3730 if (fr_saved)
3731 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
68386e1e 3732
e236a9ff 3733 fputs ("\n\t.ENTRY\n", file);
aadcdb45 3734
18dbd950 3735 remove_useless_addtr_insns (0);
aadcdb45
JL
3736}
3737
f1c7ce82 3738void
b7849684 3739hppa_expand_prologue (void)
aadcdb45 3740{
4971c587 3741 int merge_sp_adjust_with_store = 0;
a4295210
JDA
3742 HOST_WIDE_INT size = get_frame_size ();
3743 HOST_WIDE_INT offset;
3744 int i;
823fbbce 3745 rtx insn, tmpreg;
aadcdb45 3746
68386e1e
JL
3747 gr_saved = 0;
3748 fr_saved = 0;
8a9c76f3 3749 save_fregs = 0;
6261ede7 3750
95f3f59e
JDA
3751 /* Compute total size for frame pointer, filler, locals and rounding to
3752 the next word boundary. Similar code appears in compute_frame_size
3753 and must be changed in tandem with this code. */
3754 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3755 if (local_fsize || frame_pointer_needed)
3756 local_fsize += STARTING_FRAME_OFFSET;
6261ede7 3757
2b41935c 3758 actual_fsize = compute_frame_size (size, &save_fregs);
a11e0df4 3759 if (flag_stack_usage_info)
d3c12306 3760 current_function_static_stack_size = actual_fsize;
188538df 3761
aadcdb45 3762 /* Compute a few things we will use often. */
690d4228 3763 tmpreg = gen_rtx_REG (word_mode, 1);
188538df 3764
23f6f34f 3765 /* Save RP first. The calling conventions manual states RP will
19ec6a36 3766 always be stored into the caller's frame at sp - 20 or sp - 16
520babc7 3767 depending on which ABI is in use. */
e3b5732b 3768 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
16c16a24
JDA
3769 {
3770 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3771 rp_saved = true;
3772 }
3773 else
3774 rp_saved = false;
23f6f34f 3775
aadcdb45 3776 /* Allocate the local frame and set up the frame pointer if needed. */
31d68947
AM
3777 if (actual_fsize != 0)
3778 {
3779 if (frame_pointer_needed)
3780 {
3781 /* Copy the old frame pointer temporarily into %r1. Set up the
3782 new stack pointer, then store away the saved old frame pointer
823fbbce
JDA
3783 into the stack at sp and at the same time update the stack
3784 pointer by actual_fsize bytes. Two versions, first
31d68947
AM
3785 handles small (<8k) frames. The second handles large (>=8k)
3786 frames. */
bc707992 3787 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
823fbbce 3788 if (DO_FRAME_NOTES)
77c4f044 3789 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce 3790
bc707992 3791 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
823fbbce
JDA
3792 if (DO_FRAME_NOTES)
3793 RTX_FRAME_RELATED_P (insn) = 1;
3794
3795 if (VAL_14_BITS_P (actual_fsize))
3796 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
31d68947
AM
3797 else
3798 {
3799 /* It is incorrect to store the saved frame pointer at *sp,
3800 then increment sp (writes beyond the current stack boundary).
3801
3802 So instead use stwm to store at *sp and post-increment the
3803 stack pointer as an atomic operation. Then increment sp to
3804 finish allocating the new frame. */
a4295210
JDA
3805 HOST_WIDE_INT adjust1 = 8192 - 64;
3806 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
19ec6a36 3807
823fbbce 3808 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
f6bcf44c 3809 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3810 adjust2, 1);
31d68947 3811 }
823fbbce 3812
685d0e07
JDA
3813 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3814 we need to store the previous stack pointer (frame pointer)
3815 into the frame marker on targets that use the HP unwind
3816 library. This allows the HP unwind library to be used to
3817 unwind GCC frames. However, we are not fully compatible
3818 with the HP library because our frame layout differs from
3819 that specified in the HP runtime specification.
3820
3821 We don't want a frame note on this instruction as the frame
3822 marker moves during dynamic stack allocation.
3823
3824 This instruction also serves as a blockage to prevent
3825 register spills from being scheduled before the stack
3826 pointer is raised. This is necessary as we store
3827 registers using the frame pointer as a base register,
3828 and the frame pointer is set before sp is raised. */
3829 if (TARGET_HPUX_UNWIND_LIBRARY)
3830 {
3831 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3832 GEN_INT (TARGET_64BIT ? -8 : -4));
3833
3834 emit_move_insn (gen_rtx_MEM (word_mode, addr),
bc707992 3835 hard_frame_pointer_rtx);
685d0e07
JDA
3836 }
3837 else
3838 emit_insn (gen_blockage ());
31d68947
AM
3839 }
3840 /* no frame pointer needed. */
3841 else
3842 {
3843 /* In some cases we can perform the first callee register save
3844 and allocating the stack frame at the same time. If so, just
3845 make a note of it and defer allocating the frame until saving
3846 the callee registers. */
1c7a8112 3847 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
31d68947
AM
3848 merge_sp_adjust_with_store = 1;
3849 /* Can not optimize. Adjust the stack frame by actual_fsize
3850 bytes. */
3851 else
f6bcf44c 3852 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3853 actual_fsize, 1);
31d68947 3854 }
a9d91d6f
RS
3855 }
3856
23f6f34f 3857 /* Normal register save.
aadcdb45
JL
3858
3859 Do not save the frame pointer in the frame_pointer_needed case. It
3860 was done earlier. */
188538df
TG
3861 if (frame_pointer_needed)
3862 {
823fbbce
JDA
3863 offset = local_fsize;
3864
3865 /* Saving the EH return data registers in the frame is the simplest
3866 way to get the frame unwind information emitted. We put them
3867 just before the general registers. */
e3b5732b 3868 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3869 {
3870 unsigned int i, regno;
3871
3872 for (i = 0; ; ++i)
3873 {
3874 regno = EH_RETURN_DATA_REGNO (i);
3875 if (regno == INVALID_REGNUM)
3876 break;
3877
bc707992 3878 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
3879 offset += UNITS_PER_WORD;
3880 }
3881 }
3882
3883 for (i = 18; i >= 4; i--)
6fb5fa3c 3884 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 3885 {
bc707992 3886 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 3887 offset += UNITS_PER_WORD;
68386e1e 3888 gr_saved++;
188538df 3889 }
e63ffc38 3890 /* Account for %r3 which is saved in a special place. */
e8cfae5c 3891 gr_saved++;
188538df 3892 }
aadcdb45 3893 /* No frame pointer needed. */
188538df
TG
3894 else
3895 {
823fbbce
JDA
3896 offset = local_fsize - actual_fsize;
3897
3898 /* Saving the EH return data registers in the frame is the simplest
3899 way to get the frame unwind information emitted. */
e3b5732b 3900 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3901 {
3902 unsigned int i, regno;
3903
3904 for (i = 0; ; ++i)
3905 {
3906 regno = EH_RETURN_DATA_REGNO (i);
3907 if (regno == INVALID_REGNUM)
3908 break;
3909
3910 /* If merge_sp_adjust_with_store is nonzero, then we can
3911 optimize the first save. */
3912 if (merge_sp_adjust_with_store)
3913 {
3914 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3915 merge_sp_adjust_with_store = 0;
3916 }
3917 else
3918 store_reg (regno, offset, STACK_POINTER_REGNUM);
3919 offset += UNITS_PER_WORD;
3920 }
3921 }
3922
3923 for (i = 18; i >= 3; i--)
6fb5fa3c 3924 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 3925 {
23f6f34f 3926 /* If merge_sp_adjust_with_store is nonzero, then we can
4971c587 3927 optimize the first GR save. */
f133af4c 3928 if (merge_sp_adjust_with_store)
4971c587 3929 {
823fbbce 3930 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4971c587 3931 merge_sp_adjust_with_store = 0;
4971c587
JL
3932 }
3933 else
f6bcf44c 3934 store_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 3935 offset += UNITS_PER_WORD;
68386e1e 3936 gr_saved++;
188538df 3937 }
aadcdb45 3938
4971c587 3939 /* If we wanted to merge the SP adjustment with a GR save, but we never
aadcdb45 3940 did any GR saves, then just emit the adjustment here. */
f133af4c 3941 if (merge_sp_adjust_with_store)
f6bcf44c 3942 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3943 actual_fsize, 1);
188538df 3944 }
23f6f34f 3945
1c7a8112
AM
3946 /* The hppa calling conventions say that %r19, the pic offset
3947 register, is saved at sp - 32 (in this function's frame)
3948 when generating PIC code. FIXME: What is the correct thing
3949 to do for functions which make no calls and allocate no
3950 frame? Do we need to allocate a frame, or can we just omit
3ffa9dc1
JDA
3951 the save? For now we'll just omit the save.
3952
3953 We don't want a note on this insn as the frame marker can
3954 move if there is a dynamic stack allocation. */
1c7a8112 3955 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3ffa9dc1
JDA
3956 {
3957 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
3958
3959 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
3960
3961 }
1c7a8112 3962
188538df
TG
3963 /* Align pointer properly (doubleword boundary). */
3964 offset = (offset + 7) & ~7;
3965
3966 /* Floating point register store. */
3967 if (save_fregs)
188538df 3968 {
823fbbce
JDA
3969 rtx base;
3970
aadcdb45
JL
3971 /* First get the frame or stack pointer to the start of the FP register
3972 save area. */
2b41935c 3973 if (frame_pointer_needed)
823fbbce 3974 {
bc707992
JDA
3975 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
3976 base = hard_frame_pointer_rtx;
823fbbce 3977 }
2b41935c 3978 else
823fbbce
JDA
3979 {
3980 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
3981 base = stack_pointer_rtx;
3982 }
aadcdb45
JL
3983
3984 /* Now actually save the FP registers. */
88624c0e 3985 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
e63ffc38 3986 {
6fb5fa3c
DB
3987 if (df_regs_ever_live_p (i)
3988 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
e63ffc38 3989 {
f6bcf44c 3990 rtx addr, insn, reg;
19ec6a36
AM
3991 addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
3992 reg = gen_rtx_REG (DFmode, i);
f6bcf44c
JDA
3993 insn = emit_move_insn (addr, reg);
3994 if (DO_FRAME_NOTES)
3995 {
3996 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce
JDA
3997 if (TARGET_64BIT)
3998 {
3999 rtx mem = gen_rtx_MEM (DFmode,
4000 plus_constant (base, offset));
bbbbb16a
ILT
4001 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4002 gen_rtx_SET (VOIDmode, mem, reg));
823fbbce
JDA
4003 }
4004 else
4005 {
4006 rtx meml = gen_rtx_MEM (SFmode,
4007 plus_constant (base, offset));
4008 rtx memr = gen_rtx_MEM (SFmode,
4009 plus_constant (base, offset + 4));
4010 rtx regl = gen_rtx_REG (SFmode, i);
4011 rtx regr = gen_rtx_REG (SFmode, i + 1);
4012 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4013 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4014 rtvec vec;
4015
4016 RTX_FRAME_RELATED_P (setl) = 1;
4017 RTX_FRAME_RELATED_P (setr) = 1;
4018 vec = gen_rtvec (2, setl, setr);
bbbbb16a
ILT
4019 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4020 gen_rtx_SEQUENCE (VOIDmode, vec));
823fbbce 4021 }
f6bcf44c
JDA
4022 }
4023 offset += GET_MODE_SIZE (DFmode);
e63ffc38
JL
4024 fr_saved++;
4025 }
4026 }
188538df
TG
4027 }
4028}
4029
19ec6a36
AM
4030/* Emit RTL to load REG from the memory location specified by BASE+DISP.
4031 Handle case where DISP > 8k by using the add_high_const patterns. */
4032
f6bcf44c 4033static void
a4295210 4034load_reg (int reg, HOST_WIDE_INT disp, int base)
19ec6a36 4035{
a4295210
JDA
4036 rtx dest = gen_rtx_REG (word_mode, reg);
4037 rtx basereg = gen_rtx_REG (Pmode, base);
4038 rtx src;
19ec6a36 4039
19ec6a36 4040 if (VAL_14_BITS_P (disp))
a4295210
JDA
4041 src = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
4042 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
19ec6a36 4043 {
a4295210
JDA
4044 rtx delta = GEN_INT (disp);
4045 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4046
4047 emit_move_insn (tmpreg, delta);
4048 if (TARGET_DISABLE_INDEXING)
4049 {
4050 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4051 src = gen_rtx_MEM (word_mode, tmpreg);
4052 }
4053 else
4054 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
19ec6a36
AM
4055 }
4056 else
4057 {
4058 rtx delta = GEN_INT (disp);
4059 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4060 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 4061
19ec6a36
AM
4062 emit_move_insn (tmpreg, high);
4063 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
19ec6a36 4064 }
a4295210
JDA
4065
4066 emit_move_insn (dest, src);
19ec6a36 4067}
aadcdb45 4068
5fad1c24
JDA
4069/* Update the total code bytes output to the text section. */
4070
4071static void
67b846fa 4072update_total_code_bytes (unsigned int nbytes)
5fad1c24
JDA
4073{
4074 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
62910663 4075 && !IN_NAMED_SECTION_P (cfun->decl))
5fad1c24 4076 {
67b846fa 4077 unsigned int old_total = total_code_bytes;
5fad1c24 4078
67b846fa 4079 total_code_bytes += nbytes;
5fad1c24 4080
67b846fa
JDA
4081 /* Be prepared to handle overflows. */
4082 if (old_total > total_code_bytes)
4083 total_code_bytes = UINT_MAX;
5fad1c24
JDA
4084 }
4085}
4086
08c148a8
NB
4087/* This function generates the assembly code for function exit.
4088 Args are as for output_function_prologue ().
4089
4090 The function epilogue should not depend on the current stack
4091 pointer! It should use the frame pointer only. This is mandatory
4092 because of alloca; we also take advantage of it to omit stack
fe19a83d 4093 adjustments before returning. */
08c148a8
NB
4094
4095static void
b7849684 4096pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
188538df 4097{
08a2b118
RS
4098 rtx insn = get_last_insn ();
4099
5fad1c24
JDA
4100 last_address = 0;
4101
aadcdb45
JL
4102 /* hppa_expand_epilogue does the dirty work now. We just need
4103 to output the assembler directives which denote the end
08a2b118
RS
4104 of a function.
4105
4106 To make debuggers happy, emit a nop if the epilogue was completely
4107 eliminated due to a volatile call as the last insn in the
23f6f34f 4108 current function. That way the return address (in %r2) will
08a2b118
RS
4109 always point to a valid instruction in the current function. */
4110
4111 /* Get the last real insn. */
4112 if (GET_CODE (insn) == NOTE)
4113 insn = prev_real_insn (insn);
4114
4115 /* If it is a sequence, then look inside. */
4116 if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4117 insn = XVECEXP (PATTERN (insn), 0, 0);
4118
23f6f34f 4119 /* If insn is a CALL_INSN, then it must be a call to a volatile
08a2b118
RS
4120 function (otherwise there would be epilogue insns). */
4121 if (insn && GET_CODE (insn) == CALL_INSN)
17e6098e
JDA
4122 {
4123 fputs ("\tnop\n", file);
4124 last_address += 4;
4125 }
23f6f34f 4126
e236a9ff 4127 fputs ("\t.EXIT\n\t.PROCEND\n", file);
17e6098e 4128
9a55eab3
JDA
4129 if (TARGET_SOM && TARGET_GAS)
4130 {
4131 /* We done with this subspace except possibly for some additional
4132 debug information. Forget that we are in this subspace to ensure
4133 that the next function is output in its own subspace. */
d6b5193b 4134 in_section = NULL;
1a83bfc3 4135 cfun->machine->in_nsubspa = 2;
9a55eab3
JDA
4136 }
4137
5fad1c24 4138 if (INSN_ADDRESSES_SET_P ())
17e6098e 4139 {
5fad1c24
JDA
4140 insn = get_last_nonnote_insn ();
4141 last_address += INSN_ADDRESSES (INSN_UID (insn));
4142 if (INSN_P (insn))
4143 last_address += insn_default_length (insn);
4144 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4145 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
17e6098e 4146 }
67b846fa
JDA
4147 else
4148 last_address = UINT_MAX;
5fad1c24
JDA
4149
4150 /* Finally, update the total number of code bytes output so far. */
4151 update_total_code_bytes (last_address);
aadcdb45 4152}
4971c587 4153
aadcdb45 4154void
b7849684 4155hppa_expand_epilogue (void)
aadcdb45 4156{
23f6f34f 4157 rtx tmpreg;
a4295210
JDA
4158 HOST_WIDE_INT offset;
4159 HOST_WIDE_INT ret_off = 0;
4160 int i;
31d68947 4161 int merge_sp_adjust_with_load = 0;
aadcdb45
JL
4162
4163 /* We will use this often. */
690d4228 4164 tmpreg = gen_rtx_REG (word_mode, 1);
aadcdb45
JL
4165
4166 /* Try to restore RP early to avoid load/use interlocks when
4167 RP gets used in the return (bv) instruction. This appears to still
fe19a83d 4168 be necessary even when we schedule the prologue and epilogue. */
16c16a24 4169 if (rp_saved)
31d68947
AM
4170 {
4171 ret_off = TARGET_64BIT ? -16 : -20;
4172 if (frame_pointer_needed)
4173 {
bc707992 4174 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
31d68947
AM
4175 ret_off = 0;
4176 }
4177 else
4178 {
4179 /* No frame pointer, and stack is smaller than 8k. */
4180 if (VAL_14_BITS_P (ret_off - actual_fsize))
4181 {
f6bcf44c 4182 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
31d68947
AM
4183 ret_off = 0;
4184 }
4185 }
4186 }
aadcdb45
JL
4187
4188 /* General register restores. */
188538df
TG
4189 if (frame_pointer_needed)
4190 {
823fbbce
JDA
4191 offset = local_fsize;
4192
4193 /* If the current function calls __builtin_eh_return, then we need
4194 to restore the saved EH data registers. */
e3b5732b 4195 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4196 {
4197 unsigned int i, regno;
4198
4199 for (i = 0; ; ++i)
4200 {
4201 regno = EH_RETURN_DATA_REGNO (i);
4202 if (regno == INVALID_REGNUM)
4203 break;
4204
bc707992 4205 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
4206 offset += UNITS_PER_WORD;
4207 }
4208 }
4209
4210 for (i = 18; i >= 4; i--)
6fb5fa3c 4211 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4212 {
bc707992 4213 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 4214 offset += UNITS_PER_WORD;
188538df 4215 }
188538df
TG
4216 }
4217 else
4218 {
823fbbce
JDA
4219 offset = local_fsize - actual_fsize;
4220
4221 /* If the current function calls __builtin_eh_return, then we need
4222 to restore the saved EH data registers. */
e3b5732b 4223 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4224 {
4225 unsigned int i, regno;
4226
4227 for (i = 0; ; ++i)
4228 {
4229 regno = EH_RETURN_DATA_REGNO (i);
4230 if (regno == INVALID_REGNUM)
4231 break;
4232
4233 /* Only for the first load.
4234 merge_sp_adjust_with_load holds the register load
4235 with which we will merge the sp adjustment. */
4236 if (merge_sp_adjust_with_load == 0
4237 && local_fsize == 0
4238 && VAL_14_BITS_P (-actual_fsize))
4239 merge_sp_adjust_with_load = regno;
4240 else
4241 load_reg (regno, offset, STACK_POINTER_REGNUM);
4242 offset += UNITS_PER_WORD;
4243 }
4244 }
4245
4246 for (i = 18; i >= 3; i--)
e63ffc38 4247 {
6fb5fa3c 4248 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
e63ffc38 4249 {
e63ffc38
JL
4250 /* Only for the first load.
4251 merge_sp_adjust_with_load holds the register load
4252 with which we will merge the sp adjustment. */
31d68947 4253 if (merge_sp_adjust_with_load == 0
e63ffc38 4254 && local_fsize == 0
31d68947 4255 && VAL_14_BITS_P (-actual_fsize))
e63ffc38
JL
4256 merge_sp_adjust_with_load = i;
4257 else
f6bcf44c 4258 load_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 4259 offset += UNITS_PER_WORD;
e63ffc38
JL
4260 }
4261 }
188538df 4262 }
aadcdb45 4263
188538df
TG
4264 /* Align pointer properly (doubleword boundary). */
4265 offset = (offset + 7) & ~7;
4266
aadcdb45 4267 /* FP register restores. */
188538df 4268 if (save_fregs)
188538df 4269 {
aadcdb45 4270 /* Adjust the register to index off of. */
2b41935c 4271 if (frame_pointer_needed)
bc707992 4272 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
2b41935c 4273 else
823fbbce 4274 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
aadcdb45
JL
4275
4276 /* Actually do the restores now. */
88624c0e 4277 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
4278 if (df_regs_ever_live_p (i)
4279 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
19ec6a36
AM
4280 {
4281 rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
4282 rtx dest = gen_rtx_REG (DFmode, i);
f6bcf44c 4283 emit_move_insn (dest, src);
19ec6a36 4284 }
188538df 4285 }
aadcdb45 4286
1144563f
JL
4287 /* Emit a blockage insn here to keep these insns from being moved to
4288 an earlier spot in the epilogue, or into the main instruction stream.
4289
4290 This is necessary as we must not cut the stack back before all the
4291 restores are finished. */
4292 emit_insn (gen_blockage ());
aadcdb45 4293
6619e96c 4294 /* Reset stack pointer (and possibly frame pointer). The stack
68944452 4295 pointer is initially set to fp + 64 to avoid a race condition. */
31d68947 4296 if (frame_pointer_needed)
188538df 4297 {
19ec6a36 4298 rtx delta = GEN_INT (-64);
823fbbce 4299
bc707992
JDA
4300 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4301 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4302 stack_pointer_rtx, delta));
188538df 4303 }
aadcdb45 4304 /* If we were deferring a callee register restore, do it now. */
31d68947
AM
4305 else if (merge_sp_adjust_with_load)
4306 {
4307 rtx delta = GEN_INT (-actual_fsize);
19ec6a36 4308 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
823fbbce
JDA
4309
4310 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
31d68947 4311 }
aadcdb45 4312 else if (actual_fsize != 0)
823fbbce
JDA
4313 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4314 - actual_fsize, 0);
31d68947
AM
4315
4316 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4317 frame greater than 8k), do so now. */
4318 if (ret_off != 0)
f6bcf44c 4319 load_reg (2, ret_off, STACK_POINTER_REGNUM);
823fbbce 4320
e3b5732b 4321 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4322 {
4323 rtx sa = EH_RETURN_STACKADJ_RTX;
4324
4325 emit_insn (gen_blockage ());
4326 emit_insn (TARGET_64BIT
4327 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4328 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4329 }
188538df
TG
4330}
4331
d777856d 4332rtx
b7849684 4333hppa_pic_save_rtx (void)
824e7605 4334{
d777856d 4335 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
1c7a8112
AM
4336}
4337
3674b34d
JDA
4338#ifndef NO_DEFERRED_PROFILE_COUNTERS
4339#define NO_DEFERRED_PROFILE_COUNTERS 0
4340#endif
4341
3674b34d
JDA
4342
4343/* Vector of funcdef numbers. */
4344static VEC(int,heap) *funcdef_nos;
4345
4346/* Output deferred profile counters. */
4347static void
4348output_deferred_profile_counters (void)
4349{
4350 unsigned int i;
4351 int align, n;
4352
4353 if (VEC_empty (int, funcdef_nos))
4354 return;
4355
d6b5193b 4356 switch_to_section (data_section);
3674b34d
JDA
4357 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4358 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4359
4360 for (i = 0; VEC_iterate (int, funcdef_nos, i, n); i++)
4361 {
4362 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4363 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4364 }
4365
4366 VEC_free (int, heap, funcdef_nos);
4367}
4368
1c7a8112 4369void
b7849684 4370hppa_profile_hook (int label_no)
1c7a8112 4371{
a3d4c92f
RC
4372 /* We use SImode for the address of the function in both 32 and
4373 64-bit code to avoid having to provide DImode versions of the
4374 lcla2 and load_offset_label_address insn patterns. */
4375 rtx reg = gen_reg_rtx (SImode);
4376 rtx label_rtx = gen_label_rtx ();
8f949e7e
JDA
4377 rtx begin_label_rtx, call_insn;
4378 char begin_label_name[16];
1c7a8112 4379
8f949e7e 4380 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
f6f315fe 4381 label_no);
a3d4c92f 4382 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
1c7a8112
AM
4383
4384 if (TARGET_64BIT)
4385 emit_move_insn (arg_pointer_rtx,
4386 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4387 GEN_INT (64)));
4388
1c7a8112
AM
4389 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4390
110abdbc 4391 /* The address of the function is loaded into %r25 with an instruction-
a3d4c92f
RC
4392 relative sequence that avoids the use of relocations. The sequence
4393 is split so that the load_offset_label_address instruction can
4394 occupy the delay slot of the call to _mcount. */
4395 if (TARGET_PA_20)
4396 emit_insn (gen_lcla2 (reg, label_rtx));
4397 else
4398 emit_insn (gen_lcla1 (reg, label_rtx));
4399
4400 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4401 reg, begin_label_rtx, label_rtx));
4402
3674b34d 4403#if !NO_DEFERRED_PROFILE_COUNTERS
1c7a8112
AM
4404 {
4405 rtx count_label_rtx, addr, r24;
8f949e7e 4406 char count_label_name[16];
1c7a8112 4407
3674b34d 4408 VEC_safe_push (int, heap, funcdef_nos, label_no);
8f949e7e
JDA
4409 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4410 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
1c7a8112 4411
bdad4be5 4412 addr = force_reg (Pmode, count_label_rtx);
1c7a8112
AM
4413 r24 = gen_rtx_REG (Pmode, 24);
4414 emit_move_insn (r24, addr);
4415
1c7a8112 4416 call_insn =
a3d4c92f
RC
4417 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4418 gen_rtx_SYMBOL_REF (Pmode,
4419 "_mcount")),
4420 GEN_INT (TARGET_64BIT ? 24 : 12)));
1c7a8112
AM
4421
4422 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4423 }
4424#else
a3d4c92f 4425
1c7a8112 4426 call_insn =
a3d4c92f
RC
4427 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4428 gen_rtx_SYMBOL_REF (Pmode,
4429 "_mcount")),
4430 GEN_INT (TARGET_64BIT ? 16 : 8)));
4431
1c7a8112
AM
4432#endif
4433
a3d4c92f
RC
4434 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4435 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4436
1c7a8112
AM
4437 /* Indicate the _mcount call cannot throw, nor will it execute a
4438 non-local goto. */
062a5fd1 4439 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
824e7605
AM
4440}
4441
e99d6592
MS
4442/* Fetch the return address for the frame COUNT steps up from
4443 the current frame, after the prologue. FRAMEADDR is the
4444 frame pointer of the COUNT frame.
4445
cf3735b8
JDA
4446 We want to ignore any export stub remnants here. To handle this,
4447 we examine the code at the return address, and if it is an export
4448 stub, we return a memory rtx for the stub return address stored
4449 at frame-24.
c28eb6c2
JL
4450
4451 The value returned is used in two different ways:
4452
4453 1. To find a function's caller.
4454
4455 2. To change the return address for a function.
4456
4457 This function handles most instances of case 1; however, it will
4458 fail if there are two levels of stubs to execute on the return
4459 path. The only way I believe that can happen is if the return value
4460 needs a parameter relocation, which never happens for C code.
4461
4462 This function handles most instances of case 2; however, it will
4463 fail if we did not originally have stub code on the return path
cf3735b8 4464 but will need stub code on the new return path. This can happen if
c28eb6c2 4465 the caller & callee are both in the main program, but the new
cf3735b8 4466 return location is in a shared library. */
e99d6592
MS
4467
4468rtx
b7849684 4469return_addr_rtx (int count, rtx frameaddr)
e99d6592
MS
4470{
4471 rtx label;
cf3735b8 4472 rtx rp;
e99d6592
MS
4473 rtx saved_rp;
4474 rtx ins;
4475
f90b7a5a
PB
4476 /* Instruction stream at the normal return address for the export stub:
4477
4478 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4479 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4480 0x00011820 | stub+16: mtsp r1,sr0
4481 0xe0400002 | stub+20: be,n 0(sr0,rp)
4482
4483 0xe0400002 must be specified as -532676606 so that it won't be
4484 rejected as an invalid immediate operand on 64-bit hosts. */
4485
4486 HOST_WIDE_INT insns[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4487 int i;
4488
cf3735b8
JDA
4489 if (count != 0)
4490 return NULL_RTX;
a7721dc0 4491
cf3735b8 4492 rp = get_hard_reg_initial_val (Pmode, 2);
e99d6592 4493
cf3735b8
JDA
4494 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4495 return rp;
e99d6592 4496
f90b7a5a
PB
4497 /* If there is no export stub then just use the value saved from
4498 the return pointer register. */
4499
a7721dc0 4500 saved_rp = gen_reg_rtx (Pmode);
cf3735b8 4501 emit_move_insn (saved_rp, rp);
e99d6592
MS
4502
4503 /* Get pointer to the instruction stream. We have to mask out the
4504 privilege level from the two low order bits of the return address
4505 pointer here so that ins will point to the start of the first
4506 instruction that would have been executed if we returned. */
cf3735b8 4507 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
e99d6592
MS
4508 label = gen_label_rtx ();
4509
4510 /* Check the instruction stream at the normal return address for the
f90b7a5a
PB
4511 export stub. If it is an export stub, than our return address is
4512 really in -24[frameaddr]. */
e99d6592 4513
f90b7a5a
PB
4514 for (i = 0; i < 3; i++)
4515 {
4516 rtx op0 = gen_rtx_MEM (SImode, plus_constant (ins, i * 4));
4517 rtx op1 = GEN_INT (insns[i]);
4518 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4519 }
e99d6592 4520
cf3735b8 4521 /* Here we know that our return address points to an export
e99d6592 4522 stub. We don't want to return the address of the export stub,
cf3735b8
JDA
4523 but rather the return address of the export stub. That return
4524 address is stored at -24[frameaddr]. */
e99d6592 4525
cf3735b8
JDA
4526 emit_move_insn (saved_rp,
4527 gen_rtx_MEM (Pmode,
4528 memory_address (Pmode,
4529 plus_constant (frameaddr,
4530 -24))));
e99d6592
MS
4531
4532 emit_label (label);
f90b7a5a 4533
cf3735b8 4534 return saved_rp;
e99d6592
MS
4535}
4536
188538df 4537void
f90b7a5a 4538emit_bcond_fp (rtx operands[])
188538df 4539{
f90b7a5a
PB
4540 enum rtx_code code = GET_CODE (operands[0]);
4541 rtx operand0 = operands[1];
4542 rtx operand1 = operands[2];
4543 rtx label = operands[3];
4544
4545 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4546 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4547
ad2c71b7
JL
4548 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode,
f90b7a5a 4550 gen_rtx_fmt_ee (NE,
ad2c71b7
JL
4551 VOIDmode,
4552 gen_rtx_REG (CCFPmode, 0),
4553 const0_rtx),
f90b7a5a 4554 gen_rtx_LABEL_REF (VOIDmode, label),
ad2c71b7 4555 pc_rtx)));
188538df
TG
4556
4557}
4558
780f491f
TG
4559/* Adjust the cost of a scheduling dependency. Return the new cost of
4560 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4561
c237e94a 4562static int
b7849684 4563pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
780f491f 4564{
b09fa787
JL
4565 enum attr_type attr_type;
4566
5d50fab3
JL
4567 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4568 true dependencies as they are described with bypasses now. */
4569 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
86001391
JQ
4570 return cost;
4571
e150ae4f
TG
4572 if (! recog_memoized (insn))
4573 return 0;
780f491f 4574
b09fa787
JL
4575 attr_type = get_attr_type (insn);
4576
144d51f9 4577 switch (REG_NOTE_KIND (link))
780f491f 4578 {
144d51f9 4579 case REG_DEP_ANTI:
780f491f
TG
4580 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4581 cycles later. */
4582
b09fa787 4583 if (attr_type == TYPE_FPLOAD)
780f491f 4584 {
e150ae4f
TG
4585 rtx pat = PATTERN (insn);
4586 rtx dep_pat = PATTERN (dep_insn);
4587 if (GET_CODE (pat) == PARALLEL)
4588 {
4589 /* This happens for the fldXs,mb patterns. */
4590 pat = XVECEXP (pat, 0, 0);
4591 }
4592 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
780f491f 4593 /* If this happens, we have to extend this to schedule
e150ae4f
TG
4594 optimally. Return 0 for now. */
4595 return 0;
780f491f 4596
e150ae4f 4597 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
780f491f 4598 {
e150ae4f
TG
4599 if (! recog_memoized (dep_insn))
4600 return 0;
780f491f
TG
4601 switch (get_attr_type (dep_insn))
4602 {
4603 case TYPE_FPALU:
c47decad
JL
4604 case TYPE_FPMULSGL:
4605 case TYPE_FPMULDBL:
780f491f
TG
4606 case TYPE_FPDIVSGL:
4607 case TYPE_FPDIVDBL:
4608 case TYPE_FPSQRTSGL:
4609 case TYPE_FPSQRTDBL:
e150ae4f 4610 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4611 preceding arithmetic operation has finished if
e150ae4f
TG
4612 the target of the fpload is any of the sources
4613 (or destination) of the arithmetic operation. */
5d50fab3 4614 return insn_default_latency (dep_insn) - 1;
c47decad
JL
4615
4616 default:
4617 return 0;
4618 }
4619 }
4620 }
b09fa787 4621 else if (attr_type == TYPE_FPALU)
c47decad
JL
4622 {
4623 rtx pat = PATTERN (insn);
4624 rtx dep_pat = PATTERN (dep_insn);
4625 if (GET_CODE (pat) == PARALLEL)
4626 {
4627 /* This happens for the fldXs,mb patterns. */
4628 pat = XVECEXP (pat, 0, 0);
4629 }
4630 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4631 /* If this happens, we have to extend this to schedule
4632 optimally. Return 0 for now. */
4633 return 0;
4634
4635 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4636 {
4637 if (! recog_memoized (dep_insn))
4638 return 0;
4639 switch (get_attr_type (dep_insn))
4640 {
4641 case TYPE_FPDIVSGL:
4642 case TYPE_FPDIVDBL:
4643 case TYPE_FPSQRTSGL:
4644 case TYPE_FPSQRTDBL:
4645 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4646 preceding divide or sqrt operation has finished if
c47decad
JL
4647 the target of the ALU flop is any of the sources
4648 (or destination) of the divide or sqrt operation. */
5d50fab3 4649 return insn_default_latency (dep_insn) - 2;
780f491f
TG
4650
4651 default:
4652 return 0;
4653 }
4654 }
4655 }
4656
4657 /* For other anti dependencies, the cost is 0. */
4658 return 0;
144d51f9
NS
4659
4660 case REG_DEP_OUTPUT:
c47decad
JL
4661 /* Output dependency; DEP_INSN writes a register that INSN writes some
4662 cycles later. */
b09fa787 4663 if (attr_type == TYPE_FPLOAD)
c47decad
JL
4664 {
4665 rtx pat = PATTERN (insn);
4666 rtx dep_pat = PATTERN (dep_insn);
4667 if (GET_CODE (pat) == PARALLEL)
4668 {
4669 /* This happens for the fldXs,mb patterns. */
4670 pat = XVECEXP (pat, 0, 0);
4671 }
4672 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4673 /* If this happens, we have to extend this to schedule
4674 optimally. Return 0 for now. */
4675 return 0;
4676
4677 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4678 {
4679 if (! recog_memoized (dep_insn))
4680 return 0;
4681 switch (get_attr_type (dep_insn))
4682 {
4683 case TYPE_FPALU:
4684 case TYPE_FPMULSGL:
4685 case TYPE_FPMULDBL:
4686 case TYPE_FPDIVSGL:
4687 case TYPE_FPDIVDBL:
4688 case TYPE_FPSQRTSGL:
4689 case TYPE_FPSQRTDBL:
4690 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4691 preceding arithmetic operation has finished if
c47decad 4692 the target of the fpload is the destination of the
fae15c93
VM
4693 arithmetic operation.
4694
4695 Exception: For PA7100LC, PA7200 and PA7300, the cost
4696 is 3 cycles, unless they bundle together. We also
4697 pay the penalty if the second insn is a fpload. */
5d50fab3 4698 return insn_default_latency (dep_insn) - 1;
780f491f 4699
c47decad
JL
4700 default:
4701 return 0;
4702 }
4703 }
4704 }
b09fa787 4705 else if (attr_type == TYPE_FPALU)
c47decad
JL
4706 {
4707 rtx pat = PATTERN (insn);
4708 rtx dep_pat = PATTERN (dep_insn);
4709 if (GET_CODE (pat) == PARALLEL)
4710 {
4711 /* This happens for the fldXs,mb patterns. */
4712 pat = XVECEXP (pat, 0, 0);
4713 }
4714 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4715 /* If this happens, we have to extend this to schedule
4716 optimally. Return 0 for now. */
4717 return 0;
4718
4719 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4720 {
4721 if (! recog_memoized (dep_insn))
4722 return 0;
4723 switch (get_attr_type (dep_insn))
4724 {
4725 case TYPE_FPDIVSGL:
4726 case TYPE_FPDIVDBL:
4727 case TYPE_FPSQRTSGL:
4728 case TYPE_FPSQRTDBL:
4729 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4730 preceding divide or sqrt operation has finished if
c47decad 4731 the target of the ALU flop is also the target of
38e01259 4732 the divide or sqrt operation. */
5d50fab3 4733 return insn_default_latency (dep_insn) - 2;
c47decad
JL
4734
4735 default:
4736 return 0;
4737 }
4738 }
4739 }
4740
4741 /* For other output dependencies, the cost is 0. */
4742 return 0;
144d51f9
NS
4743
4744 default:
4745 gcc_unreachable ();
c47decad 4746 }
780f491f 4747}
188538df 4748
c237e94a
ZW
4749/* Adjust scheduling priorities. We use this to try and keep addil
4750 and the next use of %r1 close together. */
4751static int
b7849684 4752pa_adjust_priority (rtx insn, int priority)
c237e94a
ZW
4753{
4754 rtx set = single_set (insn);
4755 rtx src, dest;
4756 if (set)
4757 {
4758 src = SET_SRC (set);
4759 dest = SET_DEST (set);
4760 if (GET_CODE (src) == LO_SUM
4761 && symbolic_operand (XEXP (src, 1), VOIDmode)
4762 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4763 priority >>= 3;
4764
4765 else if (GET_CODE (src) == MEM
4766 && GET_CODE (XEXP (src, 0)) == LO_SUM
4767 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4768 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4769 priority >>= 1;
4770
4771 else if (GET_CODE (dest) == MEM
4772 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4773 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4774 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4775 priority >>= 3;
4776 }
4777 return priority;
4778}
4779
4780/* The 700 can only issue a single insn at a time.
4781 The 7XXX processors can issue two insns at a time.
4782 The 8000 can issue 4 insns at a time. */
4783static int
b7849684 4784pa_issue_rate (void)
c237e94a
ZW
4785{
4786 switch (pa_cpu)
4787 {
4788 case PROCESSOR_700: return 1;
4789 case PROCESSOR_7100: return 2;
4790 case PROCESSOR_7100LC: return 2;
4791 case PROCESSOR_7200: return 2;
fae15c93 4792 case PROCESSOR_7300: return 2;
c237e94a
ZW
4793 case PROCESSOR_8000: return 4;
4794
4795 default:
144d51f9 4796 gcc_unreachable ();
c237e94a
ZW
4797 }
4798}
4799
4800
4801
3673e996 4802/* Return any length adjustment needed by INSN which already has its length
23f6f34f 4803 computed as LENGTH. Return zero if no adjustment is necessary.
3673e996 4804
b9821af8 4805 For the PA: function calls, millicode calls, and backwards short
23f6f34f 4806 conditional branches with unfilled delay slots need an adjustment by +1
b9821af8 4807 (to account for the NOP which will be inserted into the instruction stream).
3673e996
RS
4808
4809 Also compute the length of an inline block move here as it is too
b9821af8 4810 complicated to express as a length attribute in pa.md. */
3673e996 4811int
b7849684 4812pa_adjust_insn_length (rtx insn, int length)
3673e996
RS
4813{
4814 rtx pat = PATTERN (insn);
4815
32562302
JDA
4816 /* Jumps inside switch tables which have unfilled delay slots need
4817 adjustment. */
4818 if (GET_CODE (insn) == JUMP_INSN
cb4d476c
JDA
4819 && GET_CODE (pat) == PARALLEL
4820 && get_attr_type (insn) == TYPE_BTABLE_BRANCH)
746a9efa 4821 return 4;
3673e996
RS
4822 /* Millicode insn with an unfilled delay slot. */
4823 else if (GET_CODE (insn) == INSN
4824 && GET_CODE (pat) != SEQUENCE
4825 && GET_CODE (pat) != USE
4826 && GET_CODE (pat) != CLOBBER
4827 && get_attr_type (insn) == TYPE_MILLI)
a1b36964 4828 return 4;
3673e996
RS
4829 /* Block move pattern. */
4830 else if (GET_CODE (insn) == INSN
4831 && GET_CODE (pat) == PARALLEL
4096479e 4832 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
3673e996
RS
4833 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4834 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4835 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4836 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
70128ad9 4837 return compute_movmem_length (insn) - 4;
cdc9103c
JDA
4838 /* Block clear pattern. */
4839 else if (GET_CODE (insn) == INSN
4840 && GET_CODE (pat) == PARALLEL
4841 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4842 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4843 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4844 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
70128ad9 4845 return compute_clrmem_length (insn) - 4;
3673e996 4846 /* Conditional branch with an unfilled delay slot. */
b9821af8
JL
4847 else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn))
4848 {
4849 /* Adjust a short backwards conditional with an unfilled delay slot. */
4850 if (GET_CODE (pat) == SET
a1b36964 4851 && length == 4
3232e9d8 4852 && JUMP_LABEL (insn) != NULL_RTX
b9821af8 4853 && ! forward_branch_p (insn))
a1b36964 4854 return 4;
b1092901
JL
4855 else if (GET_CODE (pat) == PARALLEL
4856 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4857 && length == 4)
4858 return 4;
b9821af8 4859 /* Adjust dbra insn with short backwards conditional branch with
23f6f34f 4860 unfilled delay slot -- only for case where counter is in a
fe19a83d 4861 general register register. */
b9821af8
JL
4862 else if (GET_CODE (pat) == PARALLEL
4863 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4864 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
23f6f34f 4865 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
a1b36964 4866 && length == 4
b9821af8 4867 && ! forward_branch_p (insn))
a1b36964 4868 return 4;
b9821af8
JL
4869 else
4870 return 0;
4871 }
b1092901 4872 return 0;
3673e996
RS
4873}
4874
8a5b8538
AS
4875/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4876
4877static bool
4878pa_print_operand_punct_valid_p (unsigned char code)
4879{
4880 if (code == '@'
4881 || code == '#'
4882 || code == '*'
4883 || code == '^')
4884 return true;
4885
4886 return false;
4887}
4888
188538df
TG
4889/* Print operand X (an rtx) in assembler syntax to file FILE.
4890 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4891 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4892
4893void
b7849684 4894print_operand (FILE *file, rtx x, int code)
188538df
TG
4895{
4896 switch (code)
4897 {
4898 case '#':
4899 /* Output a 'nop' if there's nothing for the delay slot. */
4900 if (dbr_sequence_length () == 0)
4901 fputs ("\n\tnop", file);
4902 return;
4903 case '*':
5bdc5878 4904 /* Output a nullification completer if there's nothing for the */
23f6f34f 4905 /* delay slot or nullification is requested. */
188538df
TG
4906 if (dbr_sequence_length () == 0 ||
4907 (final_sequence &&
4908 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
4909 fputs (",n", file);
4910 return;
4911 case 'R':
4912 /* Print out the second register name of a register pair.
4913 I.e., R (6) => 7. */
831c1763 4914 fputs (reg_names[REGNO (x) + 1], file);
188538df
TG
4915 return;
4916 case 'r':
fe19a83d 4917 /* A register or zero. */
f048ca47
JL
4918 if (x == const0_rtx
4919 || (x == CONST0_RTX (DFmode))
4920 || (x == CONST0_RTX (SFmode)))
188538df 4921 {
55abf18a
JL
4922 fputs ("%r0", file);
4923 return;
4924 }
4925 else
4926 break;
4927 case 'f':
fe19a83d 4928 /* A register or zero (floating point). */
55abf18a
JL
4929 if (x == const0_rtx
4930 || (x == CONST0_RTX (DFmode))
4931 || (x == CONST0_RTX (SFmode)))
4932 {
4933 fputs ("%fr0", file);
188538df
TG
4934 return;
4935 }
4936 else
4937 break;
f8eb41cc
JL
4938 case 'A':
4939 {
4940 rtx xoperands[2];
4941
4942 xoperands[0] = XEXP (XEXP (x, 0), 0);
4943 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
4944 output_global_address (file, xoperands[1], 0);
4945 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
4946 return;
4947 }
4948
c85b8963 4949 case 'C': /* Plain (C)ondition */
188538df
TG
4950 case 'X':
4951 switch (GET_CODE (x))
23f6f34f 4952 {
188538df 4953 case EQ:
e236a9ff 4954 fputs ("=", file); break;
188538df 4955 case NE:
e236a9ff 4956 fputs ("<>", file); break;
188538df 4957 case GT:
e236a9ff 4958 fputs (">", file); break;
188538df 4959 case GE:
e236a9ff 4960 fputs (">=", file); break;
188538df 4961 case GEU:
e236a9ff 4962 fputs (">>=", file); break;
188538df 4963 case GTU:
e236a9ff 4964 fputs (">>", file); break;
188538df 4965 case LT:
e236a9ff 4966 fputs ("<", file); break;
188538df 4967 case LE:
e236a9ff 4968 fputs ("<=", file); break;
188538df 4969 case LEU:
e236a9ff 4970 fputs ("<<=", file); break;
188538df 4971 case LTU:
e236a9ff 4972 fputs ("<<", file); break;
188538df 4973 default:
144d51f9 4974 gcc_unreachable ();
188538df
TG
4975 }
4976 return;
c85b8963 4977 case 'N': /* Condition, (N)egated */
188538df
TG
4978 switch (GET_CODE (x))
4979 {
4980 case EQ:
e236a9ff 4981 fputs ("<>", file); break;
188538df 4982 case NE:
e236a9ff 4983 fputs ("=", file); break;
188538df 4984 case GT:
e236a9ff 4985 fputs ("<=", file); break;
188538df 4986 case GE:
e236a9ff 4987 fputs ("<", file); break;
188538df 4988 case GEU:
e236a9ff 4989 fputs ("<<", file); break;
188538df 4990 case GTU:
e236a9ff 4991 fputs ("<<=", file); break;
188538df 4992 case LT:
e236a9ff 4993 fputs (">=", file); break;
188538df 4994 case LE:
e236a9ff 4995 fputs (">", file); break;
188538df 4996 case LEU:
e236a9ff 4997 fputs (">>", file); break;
188538df 4998 case LTU:
e236a9ff 4999 fputs (">>=", file); break;
188538df 5000 default:
144d51f9 5001 gcc_unreachable ();
188538df
TG
5002 }
5003 return;
831c1763 5004 /* For floating point comparisons. Note that the output
69049ba0
JDA
5005 predicates are the complement of the desired mode. The
5006 conditions for GT, GE, LT, LE and LTGT cause an invalid
5007 operation exception if the result is unordered and this
5008 exception is enabled in the floating-point status register. */
d6c0d377
JL
5009 case 'Y':
5010 switch (GET_CODE (x))
5011 {
5012 case EQ:
e236a9ff 5013 fputs ("!=", file); break;
d6c0d377 5014 case NE:
e236a9ff 5015 fputs ("=", file); break;
d6c0d377 5016 case GT:
becf1647 5017 fputs ("!>", file); break;
d6c0d377 5018 case GE:
becf1647 5019 fputs ("!>=", file); break;
d6c0d377 5020 case LT:
becf1647 5021 fputs ("!<", file); break;
d6c0d377 5022 case LE:
becf1647
DA
5023 fputs ("!<=", file); break;
5024 case LTGT:
5025 fputs ("!<>", file); break;
5026 case UNLE:
69049ba0 5027 fputs ("!?<=", file); break;
becf1647 5028 case UNLT:
69049ba0 5029 fputs ("!?<", file); break;
becf1647 5030 case UNGE:
69049ba0 5031 fputs ("!?>=", file); break;
becf1647 5032 case UNGT:
69049ba0 5033 fputs ("!?>", file); break;
becf1647 5034 case UNEQ:
69049ba0 5035 fputs ("!?=", file); break;
becf1647 5036 case UNORDERED:
69049ba0 5037 fputs ("!?", file); break;
becf1647 5038 case ORDERED:
69049ba0 5039 fputs ("?", file); break;
d6c0d377 5040 default:
144d51f9 5041 gcc_unreachable ();
d6c0d377
JL
5042 }
5043 return;
c85b8963
TG
5044 case 'S': /* Condition, operands are (S)wapped. */
5045 switch (GET_CODE (x))
5046 {
5047 case EQ:
e236a9ff 5048 fputs ("=", file); break;
c85b8963 5049 case NE:
e236a9ff 5050 fputs ("<>", file); break;
c85b8963 5051 case GT:
e236a9ff 5052 fputs ("<", file); break;
c85b8963 5053 case GE:
e236a9ff 5054 fputs ("<=", file); break;
c85b8963 5055 case GEU:
e236a9ff 5056 fputs ("<<=", file); break;
c85b8963 5057 case GTU:
e236a9ff 5058 fputs ("<<", file); break;
c85b8963 5059 case LT:
e236a9ff 5060 fputs (">", file); break;
c85b8963 5061 case LE:
e236a9ff 5062 fputs (">=", file); break;
c85b8963 5063 case LEU:
e236a9ff 5064 fputs (">>=", file); break;
c85b8963 5065 case LTU:
e236a9ff 5066 fputs (">>", file); break;
c85b8963 5067 default:
144d51f9 5068 gcc_unreachable ();
23f6f34f 5069 }
c85b8963
TG
5070 return;
5071 case 'B': /* Condition, (B)oth swapped and negate. */
5072 switch (GET_CODE (x))
5073 {
5074 case EQ:
e236a9ff 5075 fputs ("<>", file); break;
c85b8963 5076 case NE:
e236a9ff 5077 fputs ("=", file); break;
c85b8963 5078 case GT:
e236a9ff 5079 fputs (">=", file); break;
c85b8963 5080 case GE:
e236a9ff 5081 fputs (">", file); break;
c85b8963 5082 case GEU:
e236a9ff 5083 fputs (">>", file); break;
c85b8963 5084 case GTU:
e236a9ff 5085 fputs (">>=", file); break;
c85b8963 5086 case LT:
e236a9ff 5087 fputs ("<=", file); break;
c85b8963 5088 case LE:
e236a9ff 5089 fputs ("<", file); break;
c85b8963 5090 case LEU:
e236a9ff 5091 fputs ("<<", file); break;
c85b8963 5092 case LTU:
e236a9ff 5093 fputs ("<<=", file); break;
c85b8963 5094 default:
144d51f9 5095 gcc_unreachable ();
23f6f34f 5096 }
c85b8963
TG
5097 return;
5098 case 'k':
144d51f9
NS
5099 gcc_assert (GET_CODE (x) == CONST_INT);
5100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5101 return;
520babc7 5102 case 'Q':
144d51f9
NS
5103 gcc_assert (GET_CODE (x) == CONST_INT);
5104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5105 return;
c8d6697c 5106 case 'L':
144d51f9
NS
5107 gcc_assert (GET_CODE (x) == CONST_INT);
5108 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5109 return;
4802a0d6 5110 case 'O':
144d51f9
NS
5111 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5112 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5113 return;
520babc7 5114 case 'p':
144d51f9
NS
5115 gcc_assert (GET_CODE (x) == CONST_INT);
5116 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5117 return;
c8d6697c 5118 case 'P':
144d51f9
NS
5119 gcc_assert (GET_CODE (x) == CONST_INT);
5120 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5121 return;
c85b8963
TG
5122 case 'I':
5123 if (GET_CODE (x) == CONST_INT)
5124 fputs ("i", file);
5125 return;
188538df 5126 case 'M':
2414e0e2 5127 case 'F':
188538df
TG
5128 switch (GET_CODE (XEXP (x, 0)))
5129 {
5130 case PRE_DEC:
5131 case PRE_INC:
f38b27c7
JL
5132 if (ASSEMBLER_DIALECT == 0)
5133 fputs ("s,mb", file);
5134 else
5135 fputs (",mb", file);
188538df
TG
5136 break;
5137 case POST_DEC:
5138 case POST_INC:
f38b27c7
JL
5139 if (ASSEMBLER_DIALECT == 0)
5140 fputs ("s,ma", file);
5141 else
5142 fputs (",ma", file);
188538df 5143 break;
2414e0e2 5144 case PLUS:
d8f95bed
JDA
5145 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5146 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5147 {
5148 if (ASSEMBLER_DIALECT == 0)
5149 fputs ("x", file);
5150 }
5151 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5152 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
f38b27c7
JL
5153 {
5154 if (ASSEMBLER_DIALECT == 0)
5155 fputs ("x,s", file);
5156 else
5157 fputs (",s", file);
5158 }
5159 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5160 fputs ("s", file);
188538df
TG
5161 break;
5162 default:
f38b27c7 5163 if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5164 fputs ("s", file);
188538df
TG
5165 break;
5166 }
5167 return;
5168 case 'G':
ad238e4b
JL
5169 output_global_address (file, x, 0);
5170 return;
5171 case 'H':
5172 output_global_address (file, x, 1);
188538df
TG
5173 return;
5174 case 0: /* Don't do anything special */
5175 break;
a1747d2c
TG
5176 case 'Z':
5177 {
5178 unsigned op[3];
6fda0f5b 5179 compute_zdepwi_operands (INTVAL (x), op);
a1747d2c
TG
5180 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5181 return;
5182 }
520babc7
JL
5183 case 'z':
5184 {
5185 unsigned op[3];
5186 compute_zdepdi_operands (INTVAL (x), op);
5187 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5188 return;
5189 }
11881f37
AM
5190 case 'c':
5191 /* We can get here from a .vtable_inherit due to our
5192 CONSTANT_ADDRESS_P rejecting perfectly good constant
5193 addresses. */
5194 break;
188538df 5195 default:
144d51f9 5196 gcc_unreachable ();
188538df
TG
5197 }
5198 if (GET_CODE (x) == REG)
80225b66 5199 {
3ba1236f 5200 fputs (reg_names [REGNO (x)], file);
520babc7
JL
5201 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5202 {
5203 fputs ("R", file);
5204 return;
5205 }
5206 if (FP_REG_P (x)
5207 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5208 && (REGNO (x) & 1) == 0)
3ba1236f 5209 fputs ("L", file);
80225b66 5210 }
188538df
TG
5211 else if (GET_CODE (x) == MEM)
5212 {
5213 int size = GET_MODE_SIZE (GET_MODE (x));
478a4495 5214 rtx base = NULL_RTX;
188538df
TG
5215 switch (GET_CODE (XEXP (x, 0)))
5216 {
5217 case PRE_DEC:
5218 case POST_DEC:
520babc7 5219 base = XEXP (XEXP (x, 0), 0);
d2d28085 5220 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
188538df
TG
5221 break;
5222 case PRE_INC:
5223 case POST_INC:
520babc7 5224 base = XEXP (XEXP (x, 0), 0);
d2d28085 5225 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
188538df 5226 break;
d8f95bed
JDA
5227 case PLUS:
5228 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
d2d28085 5229 fprintf (file, "%s(%s)",
2414e0e2
JL
5230 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5231 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
d8f95bed 5232 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
d2d28085 5233 fprintf (file, "%s(%s)",
2414e0e2
JL
5234 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5235 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
d8f95bed
JDA
5236 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5237 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5238 {
5239 /* Because the REG_POINTER flag can get lost during reload,
5240 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5241 index and base registers in the combined move patterns. */
5242 rtx base = XEXP (XEXP (x, 0), 1);
5243 rtx index = XEXP (XEXP (x, 0), 0);
5244
5245 fprintf (file, "%s(%s)",
5246 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5247 }
2414e0e2
JL
5248 else
5249 output_address (XEXP (x, 0));
188538df 5250 break;
d8f95bed
JDA
5251 default:
5252 output_address (XEXP (x, 0));
5253 break;
188538df
TG
5254 }
5255 }
188538df
TG
5256 else
5257 output_addr_const (file, x);
5258}
5259
fe19a83d 5260/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
188538df
TG
5261
5262void
b7849684 5263output_global_address (FILE *file, rtx x, int round_constant)
188538df 5264{
43940f6b
JL
5265
5266 /* Imagine (high (const (plus ...))). */
5267 if (GET_CODE (x) == HIGH)
5268 x = XEXP (x, 0);
5269
519104fe 5270 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
744b2d61 5271 output_addr_const (file, x);
6bb36601 5272 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
188538df 5273 {
744b2d61 5274 output_addr_const (file, x);
e236a9ff 5275 fputs ("-$global$", file);
188538df
TG
5276 }
5277 else if (GET_CODE (x) == CONST)
5278 {
519104fe 5279 const char *sep = "";
188538df 5280 int offset = 0; /* assembler wants -$global$ at end */
516c2342 5281 rtx base = NULL_RTX;
23f6f34f 5282
144d51f9 5283 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
188538df 5284 {
144d51f9 5285 case SYMBOL_REF:
188538df
TG
5286 base = XEXP (XEXP (x, 0), 0);
5287 output_addr_const (file, base);
144d51f9
NS
5288 break;
5289 case CONST_INT:
5290 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5291 break;
5292 default:
5293 gcc_unreachable ();
188538df 5294 }
188538df 5295
144d51f9 5296 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
188538df 5297 {
144d51f9 5298 case SYMBOL_REF:
188538df
TG
5299 base = XEXP (XEXP (x, 0), 1);
5300 output_addr_const (file, base);
144d51f9
NS
5301 break;
5302 case CONST_INT:
5303 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5304 break;
5305 default:
5306 gcc_unreachable ();
188538df 5307 }
188538df 5308
ad238e4b
JL
5309 /* How bogus. The compiler is apparently responsible for
5310 rounding the constant if it uses an LR field selector.
5311
5312 The linker and/or assembler seem a better place since
5313 they have to do this kind of thing already.
5314
5315 If we fail to do this, HP's optimizing linker may eliminate
5316 an addil, but not update the ldw/stw/ldo instruction that
5317 uses the result of the addil. */
5318 if (round_constant)
5319 offset = ((offset + 0x1000) & ~0x1fff);
5320
144d51f9 5321 switch (GET_CODE (XEXP (x, 0)))
188538df 5322 {
144d51f9 5323 case PLUS:
188538df
TG
5324 if (offset < 0)
5325 {
5326 offset = -offset;
5327 sep = "-";
5328 }
5329 else
5330 sep = "+";
144d51f9
NS
5331 break;
5332
5333 case MINUS:
5334 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5335 sep = "-";
5336 break;
188538df 5337
144d51f9
NS
5338 default:
5339 gcc_unreachable ();
5340 }
5341
519104fe 5342 if (!read_only_operand (base, VOIDmode) && !flag_pic)
e236a9ff 5343 fputs ("-$global$", file);
ad238e4b 5344 if (offset)
831c1763 5345 fprintf (file, "%s%d", sep, offset);
188538df
TG
5346 }
5347 else
5348 output_addr_const (file, x);
5349}
5350
1bc7c5b6
ZW
5351/* Output boilerplate text to appear at the beginning of the file.
5352 There are several possible versions. */
5353#define aputs(x) fputs(x, asm_out_file)
5354static inline void
b7849684 5355pa_file_start_level (void)
1bc7c5b6
ZW
5356{
5357 if (TARGET_64BIT)
5358 aputs ("\t.LEVEL 2.0w\n");
5359 else if (TARGET_PA_20)
5360 aputs ("\t.LEVEL 2.0\n");
5361 else if (TARGET_PA_11)
5362 aputs ("\t.LEVEL 1.1\n");
5363 else
5364 aputs ("\t.LEVEL 1.0\n");
5365}
5366
5367static inline void
b7849684 5368pa_file_start_space (int sortspace)
1bc7c5b6
ZW
5369{
5370 aputs ("\t.SPACE $PRIVATE$");
5371 if (sortspace)
5372 aputs (",SORT=16");
5373 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5374 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5375 "\n\t.SPACE $TEXT$");
5376 if (sortspace)
5377 aputs (",SORT=8");
5378 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5379 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5380}
5381
5382static inline void
b7849684 5383pa_file_start_file (int want_version)
1bc7c5b6
ZW
5384{
5385 if (write_symbols != NO_DEBUG)
5386 {
5387 output_file_directive (asm_out_file, main_input_filename);
5388 if (want_version)
5389 aputs ("\t.version\t\"01.01\"\n");
5390 }
5391}
5392
5393static inline void
b7849684 5394pa_file_start_mcount (const char *aswhat)
1bc7c5b6
ZW
5395{
5396 if (profile_flag)
5397 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5398}
5399
5400static void
b7849684 5401pa_elf_file_start (void)
1bc7c5b6
ZW
5402{
5403 pa_file_start_level ();
5404 pa_file_start_mcount ("ENTRY");
5405 pa_file_start_file (0);
5406}
5407
5408static void
b7849684 5409pa_som_file_start (void)
1bc7c5b6
ZW
5410{
5411 pa_file_start_level ();
5412 pa_file_start_space (0);
5413 aputs ("\t.IMPORT $global$,DATA\n"
5414 "\t.IMPORT $$dyncall,MILLICODE\n");
5415 pa_file_start_mcount ("CODE");
5416 pa_file_start_file (0);
5417}
5418
5419static void
b7849684 5420pa_linux_file_start (void)
1bc7c5b6
ZW
5421{
5422 pa_file_start_file (1);
5423 pa_file_start_level ();
5424 pa_file_start_mcount ("CODE");
5425}
5426
5427static void
b7849684 5428pa_hpux64_gas_file_start (void)
1bc7c5b6
ZW
5429{
5430 pa_file_start_level ();
5431#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5432 if (profile_flag)
5433 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5434#endif
5435 pa_file_start_file (1);
5436}
5437
5438static void
b7849684 5439pa_hpux64_hpas_file_start (void)
1bc7c5b6
ZW
5440{
5441 pa_file_start_level ();
5442 pa_file_start_space (1);
5443 pa_file_start_mcount ("CODE");
5444 pa_file_start_file (0);
5445}
5446#undef aputs
5447
7aaf280e
JDA
5448/* Search the deferred plabel list for SYMBOL and return its internal
5449 label. If an entry for SYMBOL is not found, a new entry is created. */
5450
5451rtx
5452get_deferred_plabel (rtx symbol)
a02aa5b0 5453{
744b2d61 5454 const char *fname = XSTR (symbol, 0);
a02aa5b0
JDA
5455 size_t i;
5456
5457 /* See if we have already put this function on the list of deferred
5458 plabels. This list is generally small, so a liner search is not
5459 too ugly. If it proves too slow replace it with something faster. */
5460 for (i = 0; i < n_deferred_plabels; i++)
744b2d61 5461 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
a02aa5b0
JDA
5462 break;
5463
5464 /* If the deferred plabel list is empty, or this entry was not found
5465 on the list, create a new entry on the list. */
5466 if (deferred_plabels == NULL || i == n_deferred_plabels)
5467 {
744b2d61
JDA
5468 tree id;
5469
a02aa5b0 5470 if (deferred_plabels == 0)
a9429e29 5471 deferred_plabels = ggc_alloc_deferred_plabel ();
a02aa5b0 5472 else
a9429e29
LB
5473 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5474 deferred_plabels,
5475 n_deferred_plabels + 1);
a02aa5b0
JDA
5476
5477 i = n_deferred_plabels++;
5478 deferred_plabels[i].internal_label = gen_label_rtx ();
744b2d61 5479 deferred_plabels[i].symbol = symbol;
a02aa5b0 5480
744b2d61
JDA
5481 /* Gross. We have just implicitly taken the address of this
5482 function. Mark it in the same manner as assemble_name. */
5483 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5484 if (id)
5485 mark_referenced (id);
a02aa5b0
JDA
5486 }
5487
7aaf280e 5488 return deferred_plabels[i].internal_label;
a02aa5b0
JDA
5489}
5490
a5fe455b 5491static void
b7849684 5492output_deferred_plabels (void)
359255a9 5493{
0f8e3849 5494 size_t i;
1a83bfc3
JDA
5495
5496 /* If we have some deferred plabels, then we need to switch into the
5497 data or readonly data section, and align it to a 4 byte boundary
6416ae7f 5498 before outputting the deferred plabels. */
359255a9
JL
5499 if (n_deferred_plabels)
5500 {
1a83bfc3 5501 switch_to_section (flag_pic ? data_section : readonly_data_section);
a5fe455b 5502 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
359255a9
JL
5503 }
5504
5505 /* Now output the deferred plabels. */
5506 for (i = 0; i < n_deferred_plabels; i++)
5507 {
ecc418c4 5508 targetm.asm_out.internal_label (asm_out_file, "L",
a5fe455b 5509 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
744b2d61 5510 assemble_integer (deferred_plabels[i].symbol,
3d9268b6 5511 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
359255a9
JL
5512 }
5513}
5514
41a1208a 5515#if HPUX_LONG_DOUBLE_LIBRARY
c15c90bb
ZW
5516/* Initialize optabs to point to HPUX long double emulation routines. */
5517static void
5518pa_hpux_init_libfuncs (void)
5519{
5520 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5521 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5522 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5523 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5524 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5525 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5526 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5527 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5528 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5529
c9034561
ZW
5530 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5531 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5532 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5533 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5534 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5535 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
e2ddd6ca 5536 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
c15c90bb 5537
85363ca0
ZW
5538 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5539 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5540 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5541 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5542
5543 set_conv_libfunc (sfix_optab, SImode, TFmode, TARGET_64BIT
5544 ? "__U_Qfcnvfxt_quad_to_sgl"
5545 : "_U_Qfcnvfxt_quad_to_sgl");
5546 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
5547 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_usgl");
5548 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_udbl");
5549
5550 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
5551 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
7db0cc7e
JDA
5552 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_U_Qfcnvxf_usgl_to_quad");
5553 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxf_udbl_to_quad");
c15c90bb
ZW
5554}
5555#endif
5556
188538df
TG
5557/* HP's millicode routines mean something special to the assembler.
5558 Keep track of which ones we have used. */
5559
f3a4e54e 5560enum millicodes { remI, remU, divI, divU, mulI, end1000 };
b7849684 5561static void import_milli (enum millicodes);
831c1763 5562static char imported[(int) end1000];
f3a4e54e 5563static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
8b60264b 5564static const char import_string[] = ".IMPORT $$....,MILLICODE";
188538df
TG
5565#define MILLI_START 10
5566
f1c7ce82 5567static void
b7849684 5568import_milli (enum millicodes code)
188538df
TG
5569{
5570 char str[sizeof (import_string)];
23f6f34f 5571
831c1763 5572 if (!imported[(int) code])
188538df 5573 {
831c1763 5574 imported[(int) code] = 1;
188538df 5575 strcpy (str, import_string);
831c1763 5576 strncpy (str + MILLI_START, milli_names[(int) code], 4);
188538df
TG
5577 output_asm_insn (str, 0);
5578 }
5579}
5580
23f6f34f 5581/* The register constraints have put the operands and return value in
fe19a83d 5582 the proper registers. */
188538df 5583
519104fe 5584const char *
b7849684 5585output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
188538df 5586{
9b38c2fa 5587 import_milli (mulI);
690d4228 5588 return output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
188538df
TG
5589}
5590
fe19a83d 5591/* Emit the rtl for doing a division by a constant. */
188538df 5592
9b38c2fa 5593/* Do magic division millicodes exist for this value? */
c9a88190 5594const int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
188538df 5595
23f6f34f 5596/* We'll use an array to keep track of the magic millicodes and
188538df 5597 whether or not we've used them already. [n][0] is signed, [n][1] is
fe19a83d 5598 unsigned. */
188538df 5599
188538df
TG
5600static int div_milli[16][2];
5601
188538df 5602int
b7849684 5603emit_hpdiv_const (rtx *operands, int unsignedp)
188538df
TG
5604{
5605 if (GET_CODE (operands[2]) == CONST_INT
5606 && INTVAL (operands[2]) > 0
5607 && INTVAL (operands[2]) < 16
5608 && magic_milli[INTVAL (operands[2])])
5609 {
7d8b1412
AM
5610 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5611
ad2c71b7 5612 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
188538df 5613 emit
92fd5e41
KH
5614 (gen_rtx_PARALLEL
5615 (VOIDmode,
bd83f9a5 5616 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
ad2c71b7
JL
5617 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5618 SImode,
5619 gen_rtx_REG (SImode, 26),
5620 operands[2])),
bd83f9a5 5621 gen_rtx_CLOBBER (VOIDmode, operands[4]),
ad2c71b7
JL
5622 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5623 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5624 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
7d8b1412 5625 gen_rtx_CLOBBER (VOIDmode, ret))));
ad2c71b7 5626 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
188538df
TG
5627 return 1;
5628 }
5629 return 0;
5630}
5631
519104fe 5632const char *
b7849684 5633output_div_insn (rtx *operands, int unsignedp, rtx insn)
188538df
TG
5634{
5635 int divisor;
23f6f34f
TG
5636
5637 /* If the divisor is a constant, try to use one of the special
188538df
TG
5638 opcodes .*/
5639 if (GET_CODE (operands[0]) == CONST_INT)
5640 {
2c4ff308 5641 static char buf[100];
188538df
TG
5642 divisor = INTVAL (operands[0]);
5643 if (!div_milli[divisor][unsignedp])
5644 {
2c4ff308 5645 div_milli[divisor][unsignedp] = 1;
188538df
TG
5646 if (unsignedp)
5647 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5648 else
5649 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
188538df
TG
5650 }
5651 if (unsignedp)
2c4ff308 5652 {
4a0a75dd
KG
5653 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5654 INTVAL (operands[0]));
6a73009d 5655 return output_millicode_call (insn,
ad2c71b7 5656 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308
JL
5657 }
5658 else
5659 {
4a0a75dd
KG
5660 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5661 INTVAL (operands[0]));
6a73009d 5662 return output_millicode_call (insn,
ad2c71b7 5663 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308 5664 }
188538df 5665 }
fe19a83d 5666 /* Divisor isn't a special constant. */
188538df
TG
5667 else
5668 {
5669 if (unsignedp)
5670 {
5671 import_milli (divU);
6a73009d 5672 return output_millicode_call (insn,
ad2c71b7 5673 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
188538df
TG
5674 }
5675 else
5676 {
5677 import_milli (divI);
6a73009d 5678 return output_millicode_call (insn,
ad2c71b7 5679 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
188538df
TG
5680 }
5681 }
5682}
5683
fe19a83d 5684/* Output a $$rem millicode to do mod. */
188538df 5685
519104fe 5686const char *
b7849684 5687output_mod_insn (int unsignedp, rtx insn)
188538df
TG
5688{
5689 if (unsignedp)
5690 {
5691 import_milli (remU);
6a73009d 5692 return output_millicode_call (insn,
ad2c71b7 5693 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
188538df
TG
5694 }
5695 else
5696 {
5697 import_milli (remI);
6a73009d 5698 return output_millicode_call (insn,
ad2c71b7 5699 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
188538df
TG
5700 }
5701}
5702
5703void
b7849684 5704output_arg_descriptor (rtx call_insn)
188538df 5705{
519104fe 5706 const char *arg_regs[4];
188538df 5707 enum machine_mode arg_mode;
80225b66 5708 rtx link;
188538df
TG
5709 int i, output_flag = 0;
5710 int regno;
23f6f34f 5711
520babc7 5712 /* We neither need nor want argument location descriptors for the
e25724d8
AM
5713 64bit runtime environment or the ELF32 environment. */
5714 if (TARGET_64BIT || TARGET_ELF32)
520babc7
JL
5715 return;
5716
188538df
TG
5717 for (i = 0; i < 4; i++)
5718 arg_regs[i] = 0;
5719
2822d96e
JL
5720 /* Specify explicitly that no argument relocations should take place
5721 if using the portable runtime calling conventions. */
5722 if (TARGET_PORTABLE_RUNTIME)
5723 {
e236a9ff
JL
5724 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5725 asm_out_file);
2822d96e
JL
5726 return;
5727 }
5728
144d51f9
NS
5729 gcc_assert (GET_CODE (call_insn) == CALL_INSN);
5730 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5731 link; link = XEXP (link, 1))
188538df 5732 {
80225b66 5733 rtx use = XEXP (link, 0);
3529be83 5734
80225b66
TG
5735 if (! (GET_CODE (use) == USE
5736 && GET_CODE (XEXP (use, 0)) == REG
5737 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
3529be83
RS
5738 continue;
5739
80225b66
TG
5740 arg_mode = GET_MODE (XEXP (use, 0));
5741 regno = REGNO (XEXP (use, 0));
188538df 5742 if (regno >= 23 && regno <= 26)
a9d91d6f
RS
5743 {
5744 arg_regs[26 - regno] = "GR";
5745 if (arg_mode == DImode)
5746 arg_regs[25 - regno] = "GR";
5747 }
80225b66 5748 else if (regno >= 32 && regno <= 39)
188538df
TG
5749 {
5750 if (arg_mode == SFmode)
80225b66 5751 arg_regs[(regno - 32) / 2] = "FR";
d0616842 5752 else
188538df 5753 {
22d6e660 5754#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
80225b66
TG
5755 arg_regs[(regno - 34) / 2] = "FR";
5756 arg_regs[(regno - 34) / 2 + 1] = "FU";
188538df 5757#else
80225b66
TG
5758 arg_regs[(regno - 34) / 2] = "FU";
5759 arg_regs[(regno - 34) / 2 + 1] = "FR";
188538df
TG
5760#endif
5761 }
188538df
TG
5762 }
5763 }
5764 fputs ("\t.CALL ", asm_out_file);
5765 for (i = 0; i < 4; i++)
5766 {
5767 if (arg_regs[i])
5768 {
5769 if (output_flag++)
5770 fputc (',', asm_out_file);
5771 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5772 }
5773 }
5774 fputc ('\n', asm_out_file);
5775}
5776\f
483d7ad3
JDA
5777/* Inform reload about cases where moving X with a mode MODE to a register in
5778 RCLASS requires an extra scratch or immediate register. Return the class
5779 needed for the immediate register. */
5780
a87cf97e
JR
5781static reg_class_t
5782pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
ec963611
JDA
5783 enum machine_mode mode, secondary_reload_info *sri)
5784{
715a567d 5785 int regno;
a87cf97e 5786 enum reg_class rclass = (enum reg_class) rclass_i;
e236a9ff 5787
ec963611 5788 /* Handle the easy stuff first. */
0a2aaacc 5789 if (rclass == R1_REGS)
ec963611 5790 return NO_REGS;
e236a9ff 5791
ec963611
JDA
5792 if (REG_P (x))
5793 {
5794 regno = REGNO (x);
0a2aaacc 5795 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
ec963611
JDA
5796 return NO_REGS;
5797 }
69f8a2d6
JDA
5798 else
5799 regno = -1;
188538df 5800
ec963611
JDA
5801 /* If we have something like (mem (mem (...)), we can safely assume the
5802 inner MEM will end up in a general register after reloading, so there's
5803 no need for a secondary reload. */
5804 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5805 return NO_REGS;
188538df 5806
6bb36601 5807 /* Trying to load a constant into a FP register during PIC code
ec963611 5808 generation requires %r1 as a scratch register. */
7ee72796 5809 if (flag_pic
7e646101 5810 && (mode == SImode || mode == DImode)
0a2aaacc 5811 && FP_REG_CLASS_P (rclass)
ec963611 5812 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
e236a9ff 5813 {
ec963611
JDA
5814 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5815 : CODE_FOR_reload_indi_r1);
5816 return NO_REGS;
e236a9ff 5817 }
e236a9ff 5818
715a567d
JDA
5819 /* Secondary reloads of symbolic operands require %r1 as a scratch
5820 register when we're generating PIC code and when the operand isn't
5821 readonly. */
5822 if (symbolic_expression_p (x))
5823 {
5824 if (GET_CODE (x) == HIGH)
5825 x = XEXP (x, 0);
5826
5827 if (flag_pic || !read_only_operand (x, VOIDmode))
5828 {
5829 gcc_assert (mode == SImode || mode == DImode);
5830 sri->icode = (mode == SImode ? CODE_FOR_reload_insi_r1
5831 : CODE_FOR_reload_indi_r1);
5832 return NO_REGS;
5833 }
5834 }
5835
ec963611
JDA
5836 /* Profiling showed the PA port spends about 1.3% of its compilation
5837 time in true_regnum from calls inside pa_secondary_reload_class. */
5838 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5839 regno = true_regnum (x);
39dfb55a 5840
6982c5d4
JDA
5841 /* In order to allow 14-bit displacements in integer loads and stores,
5842 we need to prevent reload from generating out of range integer mode
5843 loads and stores to the floating point registers. Previously, we
5844 used to call for a secondary reload and have emit_move_sequence()
5845 fix the instruction sequence. However, reload occasionally wouldn't
5846 generate the reload and we would end up with an invalid REG+D memory
5847 address. So, now we use an intermediate general register for most
5848 memory loads and stores. */
5849 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
5850 && GET_MODE_CLASS (mode) == MODE_INT
0a2aaacc 5851 && FP_REG_CLASS_P (rclass))
6982c5d4
JDA
5852 {
5853 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5854 the secondary reload needed for a pseudo. It never passes a
5855 REG+D address. */
5856 if (GET_CODE (x) == MEM)
5857 {
5858 x = XEXP (x, 0);
5859
5860 /* We don't need an intermediate for indexed and LO_SUM DLT
5861 memory addresses. When INT14_OK_STRICT is true, it might
5862 appear that we could directly allow register indirect
5863 memory addresses. However, this doesn't work because we
5864 don't support SUBREGs in floating-point register copies
5865 and reload doesn't tell us when it's going to use a SUBREG. */
5866 if (IS_INDEX_ADDR_P (x)
5867 || IS_LO_SUM_DLT_ADDR_P (x))
5868 return NO_REGS;
5869
5870 /* Otherwise, we need an intermediate general register. */
5871 return GENERAL_REGS;
5872 }
5873
5874 /* Request a secondary reload with a general scratch register
5875 for everthing else. ??? Could symbolic operands be handled
5876 directly when generating non-pic PA 2.0 code? */
f9621cc4
RS
5877 sri->icode = (in_p
5878 ? direct_optab_handler (reload_in_optab, mode)
5879 : direct_optab_handler (reload_out_optab, mode));
6982c5d4
JDA
5880 return NO_REGS;
5881 }
5882
483d7ad3
JDA
5883 /* A SAR<->FP register copy requires an intermediate general register
5884 and secondary memory. We need a secondary reload with a general
5885 scratch register for spills. */
5886 if (rclass == SHIFT_REGS)
ec963611 5887 {
483d7ad3
JDA
5888 /* Handle spill. */
5889 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
5890 {
5891 sri->icode = (in_p
5892 ? direct_optab_handler (reload_in_optab, mode)
5893 : direct_optab_handler (reload_out_optab, mode));
5894 return NO_REGS;
5895 }
5896
5897 /* Handle FP copy. */
5898 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
5899 return GENERAL_REGS;
ec963611 5900 }
fa5e5c1e 5901
26ee120d 5902 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
483d7ad3
JDA
5903 && REGNO_REG_CLASS (regno) == SHIFT_REGS
5904 && FP_REG_CLASS_P (rclass))
5905 return GENERAL_REGS;
43940f6b 5906
fa5e5c1e 5907 return NO_REGS;
188538df
TG
5908}
5909
16c16a24
JDA
5910/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5911 is only marked as live on entry by df-scan when it is a fixed
5912 register. It isn't a fixed register in the 64-bit runtime,
5913 so we need to mark it here. */
5914
5915static void
5916pa_extra_live_on_entry (bitmap regs)
5917{
5918 if (TARGET_64BIT)
5919 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
5920}
5921
5922/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5923 to prevent it from being deleted. */
5924
5925rtx
5926pa_eh_return_handler_rtx (void)
5927{
5928 rtx tmp;
5929
bc707992 5930 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
16c16a24
JDA
5931 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
5932 tmp = gen_rtx_MEM (word_mode, tmp);
5933 tmp->volatil = 1;
5934 return tmp;
5935}
5936
8cd5a4e0
RH
5937/* In the 32-bit runtime, arguments larger than eight bytes are passed
5938 by invisible reference. As a GCC extension, we also pass anything
5939 with a zero or variable size by reference.
5940
5941 The 64-bit runtime does not describe passing any types by invisible
5942 reference. The internals of GCC can't currently handle passing
5943 empty structures, and zero or variable length arrays when they are
5944 not passed entirely on the stack or by reference. Thus, as a GCC
5945 extension, we pass these types by reference. The HP compiler doesn't
5946 support these types, so hopefully there shouldn't be any compatibility
5947 issues. This may have to be revisited when HP releases a C99 compiler
5948 or updates the ABI. */
5949
5950static bool
5951pa_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
586de218 5952 enum machine_mode mode, const_tree type,
8cd5a4e0
RH
5953 bool named ATTRIBUTE_UNUSED)
5954{
5955 HOST_WIDE_INT size;
5956
5957 if (type)
5958 size = int_size_in_bytes (type);
5959 else
5960 size = GET_MODE_SIZE (mode);
5961
5962 if (TARGET_64BIT)
5963 return size <= 0;
5964 else
5965 return size <= 0 || size > 8;
5966}
5967
188538df 5968enum direction
586de218 5969function_arg_padding (enum machine_mode mode, const_tree type)
188538df 5970{
9dff28ab 5971 if (mode == BLKmode
c3e39a47
JDA
5972 || (TARGET_64BIT
5973 && type
5974 && (AGGREGATE_TYPE_P (type)
5975 || TREE_CODE (type) == COMPLEX_TYPE
5976 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab
JDA
5977 {
5978 /* Return none if justification is not required. */
5979 if (type
5980 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
5981 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
5982 return none;
5983
5984 /* The directions set here are ignored when a BLKmode argument larger
5985 than a word is placed in a register. Different code is used for
5986 the stack and registers. This makes it difficult to have a
5987 consistent data representation for both the stack and registers.
5988 For both runtimes, the justification and padding for arguments on
5989 the stack and in registers should be identical. */
5990 if (TARGET_64BIT)
5991 /* The 64-bit runtime specifies left justification for aggregates. */
5992 return upward;
188538df 5993 else
9dff28ab
JDA
5994 /* The 32-bit runtime architecture specifies right justification.
5995 When the argument is passed on the stack, the argument is padded
5996 with garbage on the left. The HP compiler pads with zeros. */
5997 return downward;
188538df 5998 }
9dff28ab
JDA
5999
6000 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
188538df 6001 return downward;
188538df
TG
6002 else
6003 return none;
6004}
6005
188538df 6006\f
648d2ffc
RH
6007/* Do what is necessary for `va_start'. We look at the current function
6008 to determine if stdargs or varargs is used and fill in an initial
6009 va_list. A pointer to this constructor is returned. */
188538df 6010
3f12cd9b 6011static rtx
b7849684 6012hppa_builtin_saveregs (void)
188538df 6013{
5e32727c 6014 rtx offset, dest;
188538df 6015 tree fntype = TREE_TYPE (current_function_decl);
f38958e8 6016 int argadj = ((!stdarg_p (fntype))
188538df
TG
6017 ? UNITS_PER_WORD : 0);
6018
6019 if (argadj)
38173d38 6020 offset = plus_constant (crtl->args.arg_offset_rtx, argadj);
188538df 6021 else
38173d38 6022 offset = crtl->args.arg_offset_rtx;
17e1dfa2 6023
520babc7
JL
6024 if (TARGET_64BIT)
6025 {
6026 int i, off;
6619e96c 6027
520babc7
JL
6028 /* Adjust for varargs/stdarg differences. */
6029 if (argadj)
38173d38 6030 offset = plus_constant (crtl->args.arg_offset_rtx, -argadj);
520babc7 6031 else
38173d38 6032 offset = crtl->args.arg_offset_rtx;
520babc7
JL
6033
6034 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6035 from the incoming arg pointer and growing to larger addresses. */
6036 for (i = 26, off = -64; i >= 19; i--, off += 8)
6037 emit_move_insn (gen_rtx_MEM (word_mode,
6038 plus_constant (arg_pointer_rtx, off)),
6039 gen_rtx_REG (word_mode, i));
6040
6041 /* The incoming args pointer points just beyond the flushback area;
f710504c 6042 normally this is not a serious concern. However, when we are doing
520babc7
JL
6043 varargs/stdargs we want to make the arg pointer point to the start
6044 of the incoming argument area. */
6045 emit_move_insn (virtual_incoming_args_rtx,
6046 plus_constant (arg_pointer_rtx, -64));
6047
6048 /* Now return a pointer to the first anonymous argument. */
6049 return copy_to_reg (expand_binop (Pmode, add_optab,
6050 virtual_incoming_args_rtx,
6051 offset, 0, 0, OPTAB_LIB_WIDEN));
6052 }
6053
fe19a83d 6054 /* Store general registers on the stack. */
ad2c71b7 6055 dest = gen_rtx_MEM (BLKmode,
38173d38 6056 plus_constant (crtl->args.internal_arg_pointer,
ad2c71b7 6057 -16));
ba4828e0 6058 set_mem_alias_set (dest, get_varargs_alias_set ());
8ac61af7 6059 set_mem_align (dest, BITS_PER_WORD);
c6b97fac 6060 move_block_from_reg (23, dest, 4);
5e32727c 6061
39dfb55a
JL
6062 /* move_block_from_reg will emit code to store the argument registers
6063 individually as scalar stores.
6064
6065 However, other insns may later load from the same addresses for
956d6950 6066 a structure load (passing a struct to a varargs routine).
39dfb55a
JL
6067
6068 The alias code assumes that such aliasing can never happen, so we
6069 have to keep memory referencing insns from moving up beyond the
6070 last argument register store. So we emit a blockage insn here. */
6071 emit_insn (gen_blockage ());
6072
17e1dfa2 6073 return copy_to_reg (expand_binop (Pmode, add_optab,
38173d38 6074 crtl->args.internal_arg_pointer,
17e1dfa2 6075 offset, 0, 0, OPTAB_LIB_WIDEN));
188538df 6076}
d2a94ec0 6077
d7bd8aeb 6078static void
b7849684 6079hppa_va_start (tree valist, rtx nextarg)
ca5f4364
RH
6080{
6081 nextarg = expand_builtin_saveregs ();
e5faf155 6082 std_expand_builtin_va_start (valist, nextarg);
ca5f4364
RH
6083}
6084
8101c928 6085static tree
726a989a
RB
6086hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6087 gimple_seq *post_p)
ca5f4364 6088{
520babc7
JL
6089 if (TARGET_64BIT)
6090 {
8101c928 6091 /* Args grow upward. We can use the generic routines. */
af064de5 6092 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
ca5f4364 6093 }
9dff28ab 6094 else /* !TARGET_64BIT */
ca5f4364 6095 {
8101c928
RH
6096 tree ptr = build_pointer_type (type);
6097 tree valist_type;
6098 tree t, u;
6099 unsigned int size, ofs;
af064de5 6100 bool indirect;
ca5f4364 6101
af064de5 6102 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
8101c928 6103 if (indirect)
9dff28ab 6104 {
8101c928
RH
6105 type = ptr;
6106 ptr = build_pointer_type (type);
ca5f4364 6107 }
8101c928
RH
6108 size = int_size_in_bytes (type);
6109 valist_type = TREE_TYPE (valist);
9dff28ab 6110
8101c928 6111 /* Args grow down. Not handled by generic routines. */
9dff28ab 6112
5be014d5
AP
6113 u = fold_convert (sizetype, size_in_bytes (type));
6114 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6115 t = build2 (POINTER_PLUS_EXPR, valist_type, valist, u);
9dff28ab 6116
e4f1aef1
RG
6117 /* Align to 4 or 8 byte boundary depending on argument size. */
6118
6119 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6120 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
5be014d5 6121 t = fold_convert (valist_type, t);
8101c928 6122
66863d89 6123 t = build2 (MODIFY_EXPR, valist_type, valist, t);
ca5f4364 6124
8101c928
RH
6125 ofs = (8 - size) % 4;
6126 if (ofs != 0)
6127 {
5be014d5
AP
6128 u = size_int (ofs);
6129 t = build2 (POINTER_PLUS_EXPR, valist_type, t, u);
9dff28ab 6130 }
ca5f4364 6131
8101c928 6132 t = fold_convert (ptr, t);
d6e9821f 6133 t = build_va_arg_indirect_ref (t);
ca5f4364 6134
8101c928 6135 if (indirect)
d6e9821f 6136 t = build_va_arg_indirect_ref (t);
ca5f4364 6137
8101c928
RH
6138 return t;
6139 }
6140}
ca5f4364 6141
83c32f2e
JDA
6142/* True if MODE is valid for the target. By "valid", we mean able to
6143 be manipulated in non-trivial ways. In particular, this means all
6144 the arithmetic is supported.
6145
6146 Currently, TImode is not valid as the HP 64-bit runtime documentation
6147 doesn't document the alignment and calling conventions for this type.
6148 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6149 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6150
6151static bool
6152pa_scalar_mode_supported_p (enum machine_mode mode)
6153{
6154 int precision = GET_MODE_PRECISION (mode);
6155
6156 switch (GET_MODE_CLASS (mode))
6157 {
6158 case MODE_PARTIAL_INT:
6159 case MODE_INT:
6160 if (precision == CHAR_TYPE_SIZE)
6161 return true;
6162 if (precision == SHORT_TYPE_SIZE)
6163 return true;
6164 if (precision == INT_TYPE_SIZE)
6165 return true;
6166 if (precision == LONG_TYPE_SIZE)
6167 return true;
6168 if (precision == LONG_LONG_TYPE_SIZE)
6169 return true;
6170 return false;
6171
6172 case MODE_FLOAT:
6173 if (precision == FLOAT_TYPE_SIZE)
6174 return true;
6175 if (precision == DOUBLE_TYPE_SIZE)
6176 return true;
6177 if (precision == LONG_DOUBLE_TYPE_SIZE)
6178 return true;
6179 return false;
6180
70c1d012
JDA
6181 case MODE_DECIMAL_FLOAT:
6182 return false;
6183
83c32f2e
JDA
6184 default:
6185 gcc_unreachable ();
6186 }
6187}
6188
f5e66865 6189/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
16923e7b 6190 it branches into the delay slot. Otherwise, return FALSE. */
f5e66865
JDA
6191
6192static bool
6193branch_to_delay_slot_p (rtx insn)
6194{
16923e7b
JDA
6195 rtx jump_insn;
6196
f5e66865
JDA
6197 if (dbr_sequence_length ())
6198 return FALSE;
6199
16923e7b
JDA
6200 jump_insn = next_active_insn (JUMP_LABEL (insn));
6201 while (insn)
6202 {
6203 insn = next_active_insn (insn);
6204 if (jump_insn == insn)
6205 return TRUE;
6206
6207 /* We can't rely on the length of asms. So, we return FALSE when
6208 the branch is followed by an asm. */
6209 if (!insn
6210 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6211 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6212 || get_attr_length (insn) > 0)
6213 break;
6214 }
6215
6216 return FALSE;
f5e66865
JDA
6217}
6218
16923e7b 6219/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
f5e66865
JDA
6220
6221 This occurs when INSN has an unfilled delay slot and is followed
16923e7b
JDA
6222 by an asm. Disaster can occur if the asm is empty and the jump
6223 branches into the delay slot. So, we add a nop in the delay slot
6224 when this occurs. */
f5e66865
JDA
6225
6226static bool
6227branch_needs_nop_p (rtx insn)
6228{
16923e7b 6229 rtx jump_insn;
f5e66865
JDA
6230
6231 if (dbr_sequence_length ())
6232 return FALSE;
6233
16923e7b
JDA
6234 jump_insn = next_active_insn (JUMP_LABEL (insn));
6235 while (insn)
6236 {
6237 insn = next_active_insn (insn);
6238 if (!insn || jump_insn == insn)
6239 return TRUE;
6240
6241 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6242 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6243 && get_attr_length (insn) > 0)
6244 break;
6245 }
6246
6247 return FALSE;
6248}
6249
6250/* Return TRUE if INSN, a forward jump insn, can use nullification
6251 to skip the following instruction. This avoids an extra cycle due
6252 to a mis-predicted branch when we fall through. */
6253
6254static bool
6255use_skip_p (rtx insn)
6256{
6257 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6258
6259 while (insn)
6260 {
6261 insn = next_active_insn (insn);
6262
6263 /* We can't rely on the length of asms, so we can't skip asms. */
6264 if (!insn
6265 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6266 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6267 break;
6268 if (get_attr_length (insn) == 4
6269 && jump_insn == next_active_insn (insn))
6270 return TRUE;
6271 if (get_attr_length (insn) > 0)
6272 break;
6273 }
6274
6275 return FALSE;
f5e66865
JDA
6276}
6277
23f6f34f
TG
6278/* This routine handles all the normal conditional branch sequences we
6279 might need to generate. It handles compare immediate vs compare
6280 register, nullification of delay slots, varying length branches,
d2364a74 6281 negated branches, and all combinations of the above. It returns the
23f6f34f 6282 output appropriate to emit the branch corresponding to all given
d2364a74
JL
6283 parameters. */
6284
519104fe 6285const char *
16d74a3c 6286output_cbranch (rtx *operands, int negated, rtx insn)
b1a275e1 6287{
d2364a74 6288 static char buf[100];
16923e7b 6289 bool useskip;
16d74a3c
JDA
6290 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6291 int length = get_attr_length (insn);
6292 int xdelay;
d2364a74 6293
112cdef5 6294 /* A conditional branch to the following instruction (e.g. the delay slot)
02a57c73
JDA
6295 is asking for a disaster. This can happen when not optimizing and
6296 when jump optimization fails.
b1a275e1 6297
7772f0a9
JDA
6298 While it is usually safe to emit nothing, this can fail if the
6299 preceding instruction is a nullified branch with an empty delay
6300 slot and the same branch target as this branch. We could check
6301 for this but jump optimization should eliminate nop jumps. It
6302 is always safe to emit a nop. */
f5e66865 6303 if (branch_to_delay_slot_p (insn))
02a57c73 6304 return "nop";
23f6f34f 6305
ae2ea719
JDA
6306 /* The doubleword form of the cmpib instruction doesn't have the LEU
6307 and GTU conditions while the cmpb instruction does. Since we accept
6308 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6309 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6310 operands[2] = gen_rtx_REG (DImode, 0);
9972f30d
SE
6311 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6312 operands[1] = gen_rtx_REG (DImode, 0);
ae2ea719 6313
b9821af8
JL
6314 /* If this is a long branch with its delay slot unfilled, set `nullify'
6315 as it can nullify the delay slot and save a nop. */
a1b36964 6316 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6317 nullify = 1;
6318
6319 /* If this is a short forward conditional branch which did not get
6320 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6321 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6322 nullify = forward_branch_p (insn);
6323
23f6f34f 6324 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6325 comclr instruction. This avoids a single cycle penalty due to
6326 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6327 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6328
6329 switch (length)
6330 {
b9821af8
JL
6331 /* All short conditional branches except backwards with an unfilled
6332 delay slot. */
a1b36964 6333 case 4:
d2364a74 6334 if (useskip)
f38b27c7 6335 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
d2364a74 6336 else
f38b27c7 6337 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6338 if (GET_MODE (operands[1]) == DImode)
6339 strcat (buf, "*");
d2364a74
JL
6340 if (negated)
6341 strcat (buf, "%B3");
6342 else
6343 strcat (buf, "%S3");
6344 if (useskip)
3b5e5fb3 6345 strcat (buf, " %2,%r1,%%r0");
d2364a74 6346 else if (nullify)
f5e66865
JDA
6347 {
6348 if (branch_needs_nop_p (insn))
6349 strcat (buf, ",n %2,%r1,%0%#");
6350 else
6351 strcat (buf, ",n %2,%r1,%0");
6352 }
23f6f34f 6353 else
dcaeffef 6354 strcat (buf, " %2,%r1,%0");
d2364a74
JL
6355 break;
6356
5bdc5878 6357 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6358 unfilled delay slot is treated just like a long backward branch
6359 with an unfilled delay slot. */
a1b36964 6360 case 8:
b9821af8 6361 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6362 which is nullified. */
b9821af8
JL
6363 if (dbr_sequence_length () != 0
6364 && ! forward_branch_p (insn)
6365 && nullify)
6366 {
f38b27c7 6367 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6368 if (GET_MODE (operands[1]) == DImode)
6369 strcat (buf, "*");
b9821af8
JL
6370 if (negated)
6371 strcat (buf, "%S3");
6372 else
6373 strcat (buf, "%B3");
3b5e5fb3 6374 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
b9821af8 6375 }
923f781d
JL
6376 /* Handle short backwards branch with an unfilled delay slot.
6377 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6378 taken and untaken branches. */
6379 else if (dbr_sequence_length () == 0
6380 && ! forward_branch_p (insn)
9d98a694
AO
6381 && INSN_ADDRESSES_SET_P ()
6382 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6383 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 6384 {
f38b27c7 6385 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6386 if (GET_MODE (operands[1]) == DImode)
6387 strcat (buf, "*");
923f781d 6388 if (negated)
dcaeffef 6389 strcat (buf, "%B3 %2,%r1,%0%#");
923f781d 6390 else
dcaeffef 6391 strcat (buf, "%S3 %2,%r1,%0%#");
923f781d 6392 }
d2364a74 6393 else
b9821af8 6394 {
f38b27c7 6395 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
520babc7
JL
6396 if (GET_MODE (operands[1]) == DImode)
6397 strcat (buf, "*");
b9821af8
JL
6398 if (negated)
6399 strcat (buf, "%S3");
6400 else
6401 strcat (buf, "%B3");
6402 if (nullify)
3b5e5fb3 6403 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
b9821af8 6404 else
3b5e5fb3 6405 strcat (buf, " %2,%r1,%%r0\n\tb %0");
b9821af8 6406 }
d2364a74
JL
6407 break;
6408
16d74a3c 6409 default:
685d0e07 6410 /* The reversed conditional branch must branch over one additional
16d74a3c
JDA
6411 instruction if the delay slot is filled and needs to be extracted
6412 by output_lbranch. If the delay slot is empty or this is a
6413 nullified forward branch, the instruction after the reversed
6414 condition branch must be nullified. */
6415 if (dbr_sequence_length () == 0
6416 || (nullify && forward_branch_p (insn)))
6417 {
6418 nullify = 1;
6419 xdelay = 0;
6420 operands[4] = GEN_INT (length);
6421 }
6422 else
6423 {
6424 xdelay = 1;
6425 operands[4] = GEN_INT (length + 4);
6426 }
4bcb9e3f
JL
6427
6428 /* Create a reversed conditional branch which branches around
6429 the following insns. */
685d0e07
JDA
6430 if (GET_MODE (operands[1]) != DImode)
6431 {
6432 if (nullify)
6433 {
6434 if (negated)
6435 strcpy (buf,
6436 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6437 else
6438 strcpy (buf,
6439 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6440 }
6441 else
6442 {
6443 if (negated)
6444 strcpy (buf,
6445 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6446 else
6447 strcpy (buf,
6448 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6449 }
6450 }
4bcb9e3f 6451 else
520babc7 6452 {
685d0e07
JDA
6453 if (nullify)
6454 {
6455 if (negated)
6456 strcpy (buf,
6457 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6458 else
6459 strcpy (buf,
6460 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6461 }
520babc7 6462 else
685d0e07
JDA
6463 {
6464 if (negated)
6465 strcpy (buf,
6466 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6467 else
6468 strcpy (buf,
6469 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6470 }
520babc7 6471 }
4bcb9e3f 6472
16d74a3c
JDA
6473 output_asm_insn (buf, operands);
6474 return output_lbranch (operands[0], insn, xdelay);
685d0e07
JDA
6475 }
6476 return buf;
6477}
4bcb9e3f 6478
16d74a3c
JDA
6479/* This routine handles output of long unconditional branches that
6480 exceed the maximum range of a simple branch instruction. Since
6481 we don't have a register available for the branch, we save register
6482 %r1 in the frame marker, load the branch destination DEST into %r1,
6483 execute the branch, and restore %r1 in the delay slot of the branch.
6484
6485 Since long branches may have an insn in the delay slot and the
6486 delay slot is used to restore %r1, we in general need to extract
6487 this insn and execute it before the branch. However, to facilitate
6488 use of this function by conditional branches, we also provide an
6489 option to not extract the delay insn so that it will be emitted
6490 after the long branch. So, if there is an insn in the delay slot,
6491 it is extracted if XDELAY is nonzero.
6492
6493 The lengths of the various long-branch sequences are 20, 16 and 24
6494 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
4bcb9e3f 6495
685d0e07 6496const char *
16d74a3c 6497output_lbranch (rtx dest, rtx insn, int xdelay)
685d0e07
JDA
6498{
6499 rtx xoperands[2];
6500
6501 xoperands[0] = dest;
4bcb9e3f 6502
685d0e07 6503 /* First, free up the delay slot. */
16d74a3c 6504 if (xdelay && dbr_sequence_length () != 0)
685d0e07
JDA
6505 {
6506 /* We can't handle a jump in the delay slot. */
144d51f9 6507 gcc_assert (GET_CODE (NEXT_INSN (insn)) != JUMP_INSN);
4bcb9e3f 6508
685d0e07 6509 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 6510 optimize, 0, NULL);
4bcb9e3f 6511
685d0e07 6512 /* Now delete the delay insn. */
a38e7aa5 6513 SET_INSN_DELETED (NEXT_INSN (insn));
685d0e07 6514 }
4bcb9e3f 6515
685d0e07
JDA
6516 /* Output an insn to save %r1. The runtime documentation doesn't
6517 specify whether the "Clean Up" slot in the callers frame can
6518 be clobbered by the callee. It isn't copied by HP's builtin
6519 alloca, so this suggests that it can be clobbered if necessary.
6520 The "Static Link" location is copied by HP builtin alloca, so
6521 we avoid using it. Using the cleanup slot might be a problem
6522 if we have to interoperate with languages that pass cleanup
6523 information. However, it should be possible to handle these
6524 situations with GCC's asm feature.
6525
6526 The "Current RP" slot is reserved for the called procedure, so
6527 we try to use it when we don't have a frame of our own. It's
6528 rather unlikely that we won't have a frame when we need to emit
6529 a very long branch.
6530
6531 Really the way to go long term is a register scavenger; goto
6532 the target of the jump and find a register which we can use
6533 as a scratch to hold the value in %r1. Then, we wouldn't have
6534 to free up the delay slot or clobber a slot that may be needed
6535 for other purposes. */
6536 if (TARGET_64BIT)
6537 {
6fb5fa3c 6538 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6539 /* Use the return pointer slot in the frame marker. */
6540 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6541 else
6542 /* Use the slot at -40 in the frame marker since HP builtin
6543 alloca doesn't copy it. */
6544 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6545 }
6546 else
6547 {
6fb5fa3c 6548 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6549 /* Use the return pointer slot in the frame marker. */
6550 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6551 else
6552 /* Use the "Clean Up" slot in the frame marker. In GCC,
6553 the only other use of this location is for copying a
6554 floating point double argument from a floating-point
6555 register to two general registers. The copy is done
aa7f1eb1 6556 as an "atomic" operation when outputting a call, so it
685d0e07
JDA
6557 won't interfere with our using the location here. */
6558 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6559 }
3d9268b6 6560
5fad1c24
JDA
6561 if (TARGET_PORTABLE_RUNTIME)
6562 {
6563 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6564 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6565 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6566 }
6567 else if (flag_pic)
685d0e07
JDA
6568 {
6569 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6570 if (TARGET_SOM || !TARGET_GAS)
6571 {
6572 xoperands[1] = gen_label_rtx ();
6573 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
ecc418c4
JDA
6574 targetm.asm_out.internal_label (asm_out_file, "L",
6575 CODE_LABEL_NUMBER (xoperands[1]));
685d0e07 6576 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
4bcb9e3f 6577 }
685d0e07
JDA
6578 else
6579 {
6580 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6581 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6582 }
6583 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6584 }
6585 else
6586 /* Now output a very long branch to the original target. */
6587 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
4bcb9e3f 6588
685d0e07
JDA
6589 /* Now restore the value of %r1 in the delay slot. */
6590 if (TARGET_64BIT)
6591 {
6fb5fa3c 6592 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6593 return "ldd -16(%%r30),%%r1";
6594 else
6595 return "ldd -40(%%r30),%%r1";
6596 }
6597 else
6598 {
6fb5fa3c 6599 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6600 return "ldw -20(%%r30),%%r1";
6601 else
6602 return "ldw -12(%%r30),%%r1";
b9821af8 6603 }
d2364a74
JL
6604}
6605
23f6f34f 6606/* This routine handles all the branch-on-bit conditional branch sequences we
d2364a74
JL
6607 might need to generate. It handles nullification of delay slots,
6608 varying length branches, negated branches and all combinations of the
6609 above. it returns the appropriate output template to emit the branch. */
6610
519104fe 6611const char *
16d74a3c 6612output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
b1a275e1 6613{
d2364a74 6614 static char buf[100];
16923e7b 6615 bool useskip;
16d74a3c
JDA
6616 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6617 int length = get_attr_length (insn);
6618 int xdelay;
d2364a74 6619
112cdef5 6620 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1 6621 asking for a disaster. I do not think this can happen as this pattern
23f6f34f 6622 is only used when optimizing; jump optimization should eliminate the
b1a275e1 6623 jump. But be prepared just in case. */
23f6f34f 6624
f5e66865 6625 if (branch_to_delay_slot_p (insn))
02a57c73 6626 return "nop";
23f6f34f 6627
b9821af8
JL
6628 /* If this is a long branch with its delay slot unfilled, set `nullify'
6629 as it can nullify the delay slot and save a nop. */
a1b36964 6630 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6631 nullify = 1;
6632
6633 /* If this is a short forward conditional branch which did not get
6634 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6635 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6636 nullify = forward_branch_p (insn);
6637
23f6f34f 6638 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6639 extrs instruction. This avoids a single cycle penalty due to
6640 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6641 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6642
6643 switch (length)
6644 {
6645
b9821af8
JL
6646 /* All short conditional branches except backwards with an unfilled
6647 delay slot. */
a1b36964 6648 case 4:
d2364a74 6649 if (useskip)
f38b27c7 6650 strcpy (buf, "{extrs,|extrw,s,}");
23f6f34f 6651 else
d2364a74 6652 strcpy (buf, "bb,");
520babc7
JL
6653 if (useskip && GET_MODE (operands[0]) == DImode)
6654 strcpy (buf, "extrd,s,*");
6655 else if (GET_MODE (operands[0]) == DImode)
6656 strcpy (buf, "bb,*");
d2364a74
JL
6657 if ((which == 0 && negated)
6658 || (which == 1 && ! negated))
6659 strcat (buf, ">=");
6660 else
6661 strcat (buf, "<");
6662 if (useskip)
3b5e5fb3 6663 strcat (buf, " %0,%1,1,%%r0");
d2364a74 6664 else if (nullify && negated)
f5e66865
JDA
6665 {
6666 if (branch_needs_nop_p (insn))
6667 strcat (buf, ",n %0,%1,%3%#");
6668 else
6669 strcat (buf, ",n %0,%1,%3");
6670 }
d2364a74 6671 else if (nullify && ! negated)
f5e66865
JDA
6672 {
6673 if (branch_needs_nop_p (insn))
6674 strcat (buf, ",n %0,%1,%2%#");
6675 else
6676 strcat (buf, ",n %0,%1,%2");
6677 }
d2364a74 6678 else if (! nullify && negated)
f5e66865 6679 strcat (buf, " %0,%1,%3");
d2364a74 6680 else if (! nullify && ! negated)
b9821af8 6681 strcat (buf, " %0,%1,%2");
d2364a74
JL
6682 break;
6683
5bdc5878 6684 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6685 unfilled delay slot is treated just like a long backward branch
6686 with an unfilled delay slot. */
a1b36964 6687 case 8:
b9821af8 6688 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6689 which is nullified. */
b9821af8
JL
6690 if (dbr_sequence_length () != 0
6691 && ! forward_branch_p (insn)
6692 && nullify)
6693 {
6694 strcpy (buf, "bb,");
520babc7
JL
6695 if (GET_MODE (operands[0]) == DImode)
6696 strcat (buf, "*");
b9821af8
JL
6697 if ((which == 0 && negated)
6698 || (which == 1 && ! negated))
6699 strcat (buf, "<");
6700 else
6701 strcat (buf, ">=");
6702 if (negated)
3b5e5fb3 6703 strcat (buf, ",n %0,%1,.+12\n\tb %3");
b9821af8 6704 else
3b5e5fb3 6705 strcat (buf, ",n %0,%1,.+12\n\tb %2");
b9821af8 6706 }
923f781d
JL
6707 /* Handle short backwards branch with an unfilled delay slot.
6708 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6709 taken and untaken branches. */
6710 else if (dbr_sequence_length () == 0
6711 && ! forward_branch_p (insn)
9d98a694
AO
6712 && INSN_ADDRESSES_SET_P ()
6713 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6714 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d
JL
6715 {
6716 strcpy (buf, "bb,");
520babc7
JL
6717 if (GET_MODE (operands[0]) == DImode)
6718 strcat (buf, "*");
923f781d
JL
6719 if ((which == 0 && negated)
6720 || (which == 1 && ! negated))
6721 strcat (buf, ">=");
6722 else
6723 strcat (buf, "<");
6724 if (negated)
6725 strcat (buf, " %0,%1,%3%#");
6726 else
6727 strcat (buf, " %0,%1,%2%#");
6728 }
d2364a74 6729 else
b9821af8 6730 {
520babc7
JL
6731 if (GET_MODE (operands[0]) == DImode)
6732 strcpy (buf, "extrd,s,*");
16d74a3c
JDA
6733 else
6734 strcpy (buf, "{extrs,|extrw,s,}");
b9821af8
JL
6735 if ((which == 0 && negated)
6736 || (which == 1 && ! negated))
6737 strcat (buf, "<");
6738 else
6739 strcat (buf, ">=");
6740 if (nullify && negated)
55abf18a 6741 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
b9821af8 6742 else if (nullify && ! negated)
55abf18a 6743 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
b9821af8 6744 else if (negated)
3b5e5fb3 6745 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
23f6f34f 6746 else
3b5e5fb3 6747 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
b9821af8 6748 }
d2364a74
JL
6749 break;
6750
6751 default:
16d74a3c
JDA
6752 /* The reversed conditional branch must branch over one additional
6753 instruction if the delay slot is filled and needs to be extracted
6754 by output_lbranch. If the delay slot is empty or this is a
6755 nullified forward branch, the instruction after the reversed
6756 condition branch must be nullified. */
6757 if (dbr_sequence_length () == 0
6758 || (nullify && forward_branch_p (insn)))
6759 {
6760 nullify = 1;
6761 xdelay = 0;
8370f6fa 6762 operands[4] = GEN_INT (length);
16d74a3c
JDA
6763 }
6764 else
6765 {
6766 xdelay = 1;
8370f6fa 6767 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
6768 }
6769
6770 if (GET_MODE (operands[0]) == DImode)
8370f6fa 6771 strcpy (buf, "bb,*");
16d74a3c 6772 else
8370f6fa 6773 strcpy (buf, "bb,");
16d74a3c
JDA
6774 if ((which == 0 && negated)
6775 || (which == 1 && !negated))
8370f6fa 6776 strcat (buf, "<");
16d74a3c 6777 else
8370f6fa 6778 strcat (buf, ">=");
16d74a3c 6779 if (nullify)
8370f6fa 6780 strcat (buf, ",n %0,%1,.+%4");
16d74a3c 6781 else
8370f6fa 6782 strcat (buf, " %0,%1,.+%4");
16d74a3c
JDA
6783 output_asm_insn (buf, operands);
6784 return output_lbranch (negated ? operands[3] : operands[2],
6785 insn, xdelay);
b9821af8 6786 }
d2364a74
JL
6787 return buf;
6788}
6789
6a73009d
JL
6790/* This routine handles all the branch-on-variable-bit conditional branch
6791 sequences we might need to generate. It handles nullification of delay
6792 slots, varying length branches, negated branches and all combinations
6793 of the above. it returns the appropriate output template to emit the
6794 branch. */
6795
519104fe 6796const char *
16d74a3c 6797output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
6a73009d
JL
6798{
6799 static char buf[100];
16923e7b 6800 bool useskip;
16d74a3c
JDA
6801 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6802 int length = get_attr_length (insn);
6803 int xdelay;
6a73009d 6804
112cdef5 6805 /* A conditional branch to the following instruction (e.g. the delay slot) is
6a73009d
JL
6806 asking for a disaster. I do not think this can happen as this pattern
6807 is only used when optimizing; jump optimization should eliminate the
6808 jump. But be prepared just in case. */
6809
f5e66865 6810 if (branch_to_delay_slot_p (insn))
02a57c73 6811 return "nop";
6a73009d
JL
6812
6813 /* If this is a long branch with its delay slot unfilled, set `nullify'
6814 as it can nullify the delay slot and save a nop. */
6815 if (length == 8 && dbr_sequence_length () == 0)
6816 nullify = 1;
6817
6818 /* If this is a short forward conditional branch which did not get
6819 its delay slot filled, the delay slot can still be nullified. */
6820 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6821 nullify = forward_branch_p (insn);
6822
6823 /* A forward branch over a single nullified insn can be done with a
6824 extrs instruction. This avoids a single cycle penalty due to
6825 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6826 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6a73009d
JL
6827
6828 switch (length)
6829 {
6830
6831 /* All short conditional branches except backwards with an unfilled
6832 delay slot. */
6833 case 4:
6834 if (useskip)
f38b27c7 6835 strcpy (buf, "{vextrs,|extrw,s,}");
6a73009d 6836 else
f38b27c7 6837 strcpy (buf, "{bvb,|bb,}");
520babc7 6838 if (useskip && GET_MODE (operands[0]) == DImode)
e72ed000 6839 strcpy (buf, "extrd,s,*");
520babc7
JL
6840 else if (GET_MODE (operands[0]) == DImode)
6841 strcpy (buf, "bb,*");
6a73009d
JL
6842 if ((which == 0 && negated)
6843 || (which == 1 && ! negated))
6844 strcat (buf, ">=");
6845 else
6846 strcat (buf, "<");
6847 if (useskip)
f38b27c7 6848 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6a73009d 6849 else if (nullify && negated)
f5e66865
JDA
6850 {
6851 if (branch_needs_nop_p (insn))
6852 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6853 else
6854 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6855 }
6a73009d 6856 else if (nullify && ! negated)
f5e66865
JDA
6857 {
6858 if (branch_needs_nop_p (insn))
6859 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6860 else
6861 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
6862 }
6a73009d 6863 else if (! nullify && negated)
f5e66865 6864 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6a73009d 6865 else if (! nullify && ! negated)
f38b27c7 6866 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6a73009d
JL
6867 break;
6868
5bdc5878 6869 /* All long conditionals. Note a short backward branch with an
6a73009d
JL
6870 unfilled delay slot is treated just like a long backward branch
6871 with an unfilled delay slot. */
6872 case 8:
6873 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6874 which is nullified. */
6a73009d
JL
6875 if (dbr_sequence_length () != 0
6876 && ! forward_branch_p (insn)
6877 && nullify)
6878 {
f38b27c7 6879 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
6880 if (GET_MODE (operands[0]) == DImode)
6881 strcat (buf, "*");
6a73009d
JL
6882 if ((which == 0 && negated)
6883 || (which == 1 && ! negated))
6884 strcat (buf, "<");
6885 else
6886 strcat (buf, ">=");
6887 if (negated)
f38b27c7 6888 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6a73009d 6889 else
f38b27c7 6890 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6a73009d
JL
6891 }
6892 /* Handle short backwards branch with an unfilled delay slot.
6893 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6894 taken and untaken branches. */
6895 else if (dbr_sequence_length () == 0
6896 && ! forward_branch_p (insn)
9d98a694
AO
6897 && INSN_ADDRESSES_SET_P ()
6898 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6899 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6a73009d 6900 {
f38b27c7 6901 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
6902 if (GET_MODE (operands[0]) == DImode)
6903 strcat (buf, "*");
6a73009d
JL
6904 if ((which == 0 && negated)
6905 || (which == 1 && ! negated))
6906 strcat (buf, ">=");
6907 else
6908 strcat (buf, "<");
6909 if (negated)
f38b27c7 6910 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6a73009d 6911 else
f38b27c7 6912 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6a73009d
JL
6913 }
6914 else
6915 {
f38b27c7 6916 strcpy (buf, "{vextrs,|extrw,s,}");
520babc7
JL
6917 if (GET_MODE (operands[0]) == DImode)
6918 strcpy (buf, "extrd,s,*");
6a73009d
JL
6919 if ((which == 0 && negated)
6920 || (which == 1 && ! negated))
6921 strcat (buf, "<");
6922 else
6923 strcat (buf, ">=");
6924 if (nullify && negated)
f38b27c7 6925 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6a73009d 6926 else if (nullify && ! negated)
f38b27c7 6927 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6a73009d 6928 else if (negated)
f38b27c7 6929 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6a73009d 6930 else
f38b27c7 6931 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6a73009d
JL
6932 }
6933 break;
6934
6935 default:
16d74a3c
JDA
6936 /* The reversed conditional branch must branch over one additional
6937 instruction if the delay slot is filled and needs to be extracted
6938 by output_lbranch. If the delay slot is empty or this is a
6939 nullified forward branch, the instruction after the reversed
6940 condition branch must be nullified. */
6941 if (dbr_sequence_length () == 0
6942 || (nullify && forward_branch_p (insn)))
6943 {
6944 nullify = 1;
6945 xdelay = 0;
8370f6fa 6946 operands[4] = GEN_INT (length);
16d74a3c
JDA
6947 }
6948 else
6949 {
6950 xdelay = 1;
8370f6fa 6951 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
6952 }
6953
6954 if (GET_MODE (operands[0]) == DImode)
8370f6fa 6955 strcpy (buf, "bb,*");
16d74a3c 6956 else
8370f6fa 6957 strcpy (buf, "{bvb,|bb,}");
16d74a3c
JDA
6958 if ((which == 0 && negated)
6959 || (which == 1 && !negated))
8370f6fa 6960 strcat (buf, "<");
16d74a3c 6961 else
8370f6fa 6962 strcat (buf, ">=");
16d74a3c 6963 if (nullify)
8370f6fa 6964 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 6965 else
8370f6fa 6966 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c
JDA
6967 output_asm_insn (buf, operands);
6968 return output_lbranch (negated ? operands[3] : operands[2],
6969 insn, xdelay);
6a73009d
JL
6970 }
6971 return buf;
6972}
6973
b1a275e1
JL
6974/* Return the output template for emitting a dbra type insn.
6975
6976 Note it may perform some output operations on its own before
6977 returning the final output string. */
519104fe 6978const char *
b7849684 6979output_dbra (rtx *operands, rtx insn, int which_alternative)
b1a275e1 6980{
16d74a3c 6981 int length = get_attr_length (insn);
b1a275e1 6982
112cdef5 6983 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
6984 asking for a disaster. Be prepared! */
6985
f5e66865 6986 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
6987 {
6988 if (which_alternative == 0)
6989 return "ldo %1(%0),%0";
6990 else if (which_alternative == 1)
6991 {
831c1763
AM
6992 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
6993 output_asm_insn ("ldw -16(%%r30),%4", operands);
d2d28085 6994 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
f38b27c7 6995 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1
JL
6996 }
6997 else
6998 {
6999 output_asm_insn ("ldw %0,%4", operands);
7000 return "ldo %1(%4),%4\n\tstw %4,%0";
7001 }
7002 }
7003
7004 if (which_alternative == 0)
7005 {
7006 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7007 int xdelay;
b1a275e1
JL
7008
7009 /* If this is a long branch with its delay slot unfilled, set `nullify'
7010 as it can nullify the delay slot and save a nop. */
a1b36964 7011 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7012 nullify = 1;
7013
7014 /* If this is a short forward conditional branch which did not get
7015 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7016 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7017 nullify = forward_branch_p (insn);
7018
144d51f9 7019 switch (length)
b1a275e1 7020 {
144d51f9
NS
7021 case 4:
7022 if (nullify)
f5e66865
JDA
7023 {
7024 if (branch_needs_nop_p (insn))
7025 return "addib,%C2,n %1,%0,%3%#";
7026 else
7027 return "addib,%C2,n %1,%0,%3";
7028 }
144d51f9
NS
7029 else
7030 return "addib,%C2 %1,%0,%3";
7031
7032 case 8:
23f6f34f 7033 /* Handle weird backwards branch with a fulled delay slot
b1a275e1
JL
7034 which is nullified. */
7035 if (dbr_sequence_length () != 0
7036 && ! forward_branch_p (insn)
7037 && nullify)
3b5e5fb3 7038 return "addib,%N2,n %1,%0,.+12\n\tb %3";
923f781d
JL
7039 /* Handle short backwards branch with an unfilled delay slot.
7040 Using a addb;nop rather than addi;bl saves 1 cycle for both
7041 taken and untaken branches. */
7042 else if (dbr_sequence_length () == 0
7043 && ! forward_branch_p (insn)
9d98a694
AO
7044 && INSN_ADDRESSES_SET_P ()
7045 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7046 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7047 return "addib,%C2 %1,%0,%3%#";
23f6f34f
TG
7048
7049 /* Handle normal cases. */
b1a275e1 7050 if (nullify)
3b5e5fb3 7051 return "addi,%N2 %1,%0,%0\n\tb,n %3";
b1a275e1 7052 else
3b5e5fb3 7053 return "addi,%N2 %1,%0,%0\n\tb %3";
144d51f9
NS
7054
7055 default:
16d74a3c
JDA
7056 /* The reversed conditional branch must branch over one additional
7057 instruction if the delay slot is filled and needs to be extracted
7058 by output_lbranch. If the delay slot is empty or this is a
7059 nullified forward branch, the instruction after the reversed
7060 condition branch must be nullified. */
7061 if (dbr_sequence_length () == 0
7062 || (nullify && forward_branch_p (insn)))
7063 {
7064 nullify = 1;
7065 xdelay = 0;
7066 operands[4] = GEN_INT (length);
7067 }
7068 else
7069 {
7070 xdelay = 1;
7071 operands[4] = GEN_INT (length + 4);
7072 }
7073
7074 if (nullify)
7075 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7076 else
7077 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7078
7079 return output_lbranch (operands[3], insn, xdelay);
b1a275e1 7080 }
144d51f9 7081
b1a275e1
JL
7082 }
7083 /* Deal with gross reload from FP register case. */
7084 else if (which_alternative == 1)
7085 {
7086 /* Move loop counter from FP register to MEM then into a GR,
7087 increment the GR, store the GR into MEM, and finally reload
23f6f34f 7088 the FP register from MEM from within the branch's delay slot. */
831c1763
AM
7089 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7090 operands);
d2d28085 7091 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
16d74a3c 7092 if (length == 24)
f38b27c7 7093 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7094 else if (length == 28)
f38b27c7 7095 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7096 else
7097 {
8370f6fa
JDA
7098 operands[5] = GEN_INT (length - 16);
7099 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
16d74a3c
JDA
7100 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7101 return output_lbranch (operands[3], insn, 0);
7102 }
b1a275e1
JL
7103 }
7104 /* Deal with gross reload from memory case. */
7105 else
7106 {
7107 /* Reload loop counter from memory, the store back to memory
71cc389b 7108 happens in the branch's delay slot. */
b1a275e1 7109 output_asm_insn ("ldw %0,%4", operands);
16d74a3c 7110 if (length == 12)
b1a275e1 7111 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
16d74a3c 7112 else if (length == 16)
3b5e5fb3 7113 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
16d74a3c
JDA
7114 else
7115 {
8370f6fa
JDA
7116 operands[5] = GEN_INT (length - 4);
7117 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
16d74a3c
JDA
7118 return output_lbranch (operands[3], insn, 0);
7119 }
b1a275e1
JL
7120 }
7121}
7122
16d74a3c 7123/* Return the output template for emitting a movb type insn.
b1a275e1
JL
7124
7125 Note it may perform some output operations on its own before
7126 returning the final output string. */
519104fe 7127const char *
b7849684
JE
7128output_movb (rtx *operands, rtx insn, int which_alternative,
7129 int reverse_comparison)
b1a275e1 7130{
16d74a3c 7131 int length = get_attr_length (insn);
b1a275e1 7132
112cdef5 7133 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7134 asking for a disaster. Be prepared! */
7135
f5e66865 7136 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7137 {
7138 if (which_alternative == 0)
7139 return "copy %1,%0";
7140 else if (which_alternative == 1)
7141 {
831c1763 7142 output_asm_insn ("stw %1,-16(%%r30)", operands);
f38b27c7 7143 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1 7144 }
b1092901 7145 else if (which_alternative == 2)
b1a275e1 7146 return "stw %1,%0";
b1092901
JL
7147 else
7148 return "mtsar %r1";
b1a275e1
JL
7149 }
7150
7151 /* Support the second variant. */
7152 if (reverse_comparison)
7153 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7154
7155 if (which_alternative == 0)
7156 {
7157 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7158 int xdelay;
b1a275e1
JL
7159
7160 /* If this is a long branch with its delay slot unfilled, set `nullify'
7161 as it can nullify the delay slot and save a nop. */
a1b36964 7162 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7163 nullify = 1;
7164
7165 /* If this is a short forward conditional branch which did not get
7166 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7167 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7168 nullify = forward_branch_p (insn);
7169
144d51f9 7170 switch (length)
b1a275e1 7171 {
144d51f9
NS
7172 case 4:
7173 if (nullify)
f5e66865
JDA
7174 {
7175 if (branch_needs_nop_p (insn))
7176 return "movb,%C2,n %1,%0,%3%#";
7177 else
7178 return "movb,%C2,n %1,%0,%3";
7179 }
144d51f9
NS
7180 else
7181 return "movb,%C2 %1,%0,%3";
7182
7183 case 8:
23f6f34f 7184 /* Handle weird backwards branch with a filled delay slot
b1a275e1
JL
7185 which is nullified. */
7186 if (dbr_sequence_length () != 0
7187 && ! forward_branch_p (insn)
7188 && nullify)
3b5e5fb3 7189 return "movb,%N2,n %1,%0,.+12\n\tb %3";
23f6f34f 7190
923f781d
JL
7191 /* Handle short backwards branch with an unfilled delay slot.
7192 Using a movb;nop rather than or;bl saves 1 cycle for both
7193 taken and untaken branches. */
7194 else if (dbr_sequence_length () == 0
7195 && ! forward_branch_p (insn)
9d98a694
AO
7196 && INSN_ADDRESSES_SET_P ()
7197 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7198 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7199 return "movb,%C2 %1,%0,%3%#";
23f6f34f 7200 /* Handle normal cases. */
b1a275e1 7201 if (nullify)
3b5e5fb3 7202 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
b1a275e1 7203 else
3b5e5fb3 7204 return "or,%N2 %1,%%r0,%0\n\tb %3";
144d51f9
NS
7205
7206 default:
16d74a3c
JDA
7207 /* The reversed conditional branch must branch over one additional
7208 instruction if the delay slot is filled and needs to be extracted
7209 by output_lbranch. If the delay slot is empty or this is a
7210 nullified forward branch, the instruction after the reversed
7211 condition branch must be nullified. */
7212 if (dbr_sequence_length () == 0
7213 || (nullify && forward_branch_p (insn)))
7214 {
7215 nullify = 1;
7216 xdelay = 0;
7217 operands[4] = GEN_INT (length);
7218 }
7219 else
7220 {
7221 xdelay = 1;
7222 operands[4] = GEN_INT (length + 4);
7223 }
7224
7225 if (nullify)
7226 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7227 else
7228 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7229
7230 return output_lbranch (operands[3], insn, xdelay);
b1a275e1 7231 }
b1a275e1 7232 }
16d74a3c 7233 /* Deal with gross reload for FP destination register case. */
b1a275e1
JL
7234 else if (which_alternative == 1)
7235 {
16d74a3c
JDA
7236 /* Move source register to MEM, perform the branch test, then
7237 finally load the FP register from MEM from within the branch's
7238 delay slot. */
831c1763 7239 output_asm_insn ("stw %1,-16(%%r30)", operands);
16d74a3c 7240 if (length == 12)
f38b27c7 7241 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7242 else if (length == 16)
f38b27c7 7243 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7244 else
7245 {
8370f6fa
JDA
7246 operands[4] = GEN_INT (length - 4);
7247 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
16d74a3c
JDA
7248 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7249 return output_lbranch (operands[3], insn, 0);
7250 }
b1a275e1
JL
7251 }
7252 /* Deal with gross reload from memory case. */
b1092901 7253 else if (which_alternative == 2)
b1a275e1
JL
7254 {
7255 /* Reload loop counter from memory, the store back to memory
71cc389b 7256 happens in the branch's delay slot. */
16d74a3c 7257 if (length == 8)
f38b27c7 7258 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
16d74a3c 7259 else if (length == 12)
f38b27c7 7260 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
16d74a3c
JDA
7261 else
7262 {
8370f6fa
JDA
7263 operands[4] = GEN_INT (length);
7264 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7265 operands);
16d74a3c
JDA
7266 return output_lbranch (operands[3], insn, 0);
7267 }
b1a275e1 7268 }
b1092901
JL
7269 /* Handle SAR as a destination. */
7270 else
7271 {
16d74a3c 7272 if (length == 8)
f38b27c7 7273 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
16d74a3c 7274 else if (length == 12)
715ab8c3 7275 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
16d74a3c
JDA
7276 else
7277 {
8370f6fa
JDA
7278 operands[4] = GEN_INT (length);
7279 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7280 operands);
16d74a3c
JDA
7281 return output_lbranch (operands[3], insn, 0);
7282 }
b1092901 7283 }
b1a275e1
JL
7284}
7285
a02aa5b0
JDA
7286/* Copy any FP arguments in INSN into integer registers. */
7287static void
b7849684 7288copy_fp_args (rtx insn)
a02aa5b0
JDA
7289{
7290 rtx link;
7291 rtx xoperands[2];
b1a275e1 7292
a02aa5b0
JDA
7293 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7294 {
7295 int arg_mode, regno;
7296 rtx use = XEXP (link, 0);
f726ea7d 7297
a02aa5b0
JDA
7298 if (! (GET_CODE (use) == USE
7299 && GET_CODE (XEXP (use, 0)) == REG
7300 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7301 continue;
2c4ff308 7302
a02aa5b0
JDA
7303 arg_mode = GET_MODE (XEXP (use, 0));
7304 regno = REGNO (XEXP (use, 0));
520babc7 7305
a02aa5b0
JDA
7306 /* Is it a floating point register? */
7307 if (regno >= 32 && regno <= 39)
7308 {
7309 /* Copy the FP register into an integer register via memory. */
7310 if (arg_mode == SFmode)
7311 {
7312 xoperands[0] = XEXP (use, 0);
7313 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7314 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7315 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7316 }
7317 else
7318 {
7319 xoperands[0] = XEXP (use, 0);
7320 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7321 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7322 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7323 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7324 }
7325 }
279c9bde 7326 }
a02aa5b0
JDA
7327}
7328
7329/* Compute length of the FP argument copy sequence for INSN. */
7330static int
b7849684 7331length_fp_args (rtx insn)
a02aa5b0
JDA
7332{
7333 int length = 0;
7334 rtx link;
279c9bde 7335
a02aa5b0 7336 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6a73009d 7337 {
a02aa5b0
JDA
7338 int arg_mode, regno;
7339 rtx use = XEXP (link, 0);
7340
7341 if (! (GET_CODE (use) == USE
7342 && GET_CODE (XEXP (use, 0)) == REG
7343 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7344 continue;
6a73009d 7345
a02aa5b0
JDA
7346 arg_mode = GET_MODE (XEXP (use, 0));
7347 regno = REGNO (XEXP (use, 0));
7348
7349 /* Is it a floating point register? */
7350 if (regno >= 32 && regno <= 39)
6a73009d 7351 {
a02aa5b0
JDA
7352 if (arg_mode == SFmode)
7353 length += 8;
7354 else
7355 length += 12;
6a73009d 7356 }
a02aa5b0 7357 }
6a73009d 7358
a02aa5b0
JDA
7359 return length;
7360}
3d9268b6 7361
611ad29e
JDA
7362/* Return the attribute length for the millicode call instruction INSN.
7363 The length must match the code generated by output_millicode_call.
7364 We include the delay slot in the returned length as it is better to
a02aa5b0 7365 over estimate the length than to under estimate it. */
a7721dc0 7366
a02aa5b0 7367int
b7849684 7368attr_length_millicode_call (rtx insn)
a02aa5b0 7369{
611ad29e 7370 unsigned long distance = -1;
62910663 7371 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
a02aa5b0 7372
611ad29e
JDA
7373 if (INSN_ADDRESSES_SET_P ())
7374 {
5fad1c24
JDA
7375 distance = (total + insn_current_reference_address (insn));
7376 if (distance < total)
611ad29e
JDA
7377 distance = -1;
7378 }
a02aa5b0
JDA
7379
7380 if (TARGET_64BIT)
7381 {
7382 if (!TARGET_LONG_CALLS && distance < 7600000)
611ad29e 7383 return 8;
a02aa5b0 7384
611ad29e 7385 return 20;
a02aa5b0
JDA
7386 }
7387 else if (TARGET_PORTABLE_RUNTIME)
611ad29e 7388 return 24;
a02aa5b0
JDA
7389 else
7390 {
7391 if (!TARGET_LONG_CALLS && distance < 240000)
611ad29e 7392 return 8;
a02aa5b0
JDA
7393
7394 if (TARGET_LONG_ABS_CALL && !flag_pic)
611ad29e 7395 return 12;
a02aa5b0 7396
611ad29e 7397 return 24;
a02aa5b0
JDA
7398 }
7399}
7400
7401/* INSN is a function call. It may have an unconditional jump
7402 in its delay slot.
a7721dc0 7403
a02aa5b0 7404 CALL_DEST is the routine we are calling. */
a7721dc0 7405
a02aa5b0 7406const char *
b7849684 7407output_millicode_call (rtx insn, rtx call_dest)
a02aa5b0
JDA
7408{
7409 int attr_length = get_attr_length (insn);
7410 int seq_length = dbr_sequence_length ();
7411 int distance;
7412 rtx seq_insn;
7413 rtx xoperands[3];
a7721dc0 7414
a02aa5b0
JDA
7415 xoperands[0] = call_dest;
7416 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7417
7418 /* Handle the common case where we are sure that the branch will
7419 reach the beginning of the $CODE$ subspace. The within reach
7420 form of the $$sh_func_adrs call has a length of 28. Because
272d0bee 7421 it has an attribute type of multi, it never has a nonzero
a02aa5b0
JDA
7422 sequence length. The length of the $$sh_func_adrs is the same
7423 as certain out of reach PIC calls to other routines. */
7424 if (!TARGET_LONG_CALLS
7425 && ((seq_length == 0
7426 && (attr_length == 12
7427 || (attr_length == 28 && get_attr_type (insn) == TYPE_MULTI)))
7428 || (seq_length != 0 && attr_length == 8)))
7429 {
7430 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7431 }
7432 else
7433 {
7434 if (TARGET_64BIT)
7435 {
7436 /* It might seem that one insn could be saved by accessing
7437 the millicode function using the linkage table. However,
7438 this doesn't work in shared libraries and other dynamically
7439 loaded objects. Using a pc-relative sequence also avoids
7440 problems related to the implicit use of the gp register. */
7441 output_asm_insn ("b,l .+8,%%r1", xoperands);
581d9404
JDA
7442
7443 if (TARGET_GAS)
7444 {
7445 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7446 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7447 }
7448 else
7449 {
7450 xoperands[1] = gen_label_rtx ();
7451 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
ecc418c4 7452 targetm.asm_out.internal_label (asm_out_file, "L",
581d9404
JDA
7453 CODE_LABEL_NUMBER (xoperands[1]));
7454 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7455 }
7456
a02aa5b0 7457 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6a73009d 7458 }
6a73009d
JL
7459 else if (TARGET_PORTABLE_RUNTIME)
7460 {
a02aa5b0
JDA
7461 /* Pure portable runtime doesn't allow be/ble; we also don't
7462 have PIC support in the assembler/linker, so this sequence
7463 is needed. */
6a73009d 7464
a02aa5b0
JDA
7465 /* Get the address of our target into %r1. */
7466 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7467 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6a73009d 7468
a02aa5b0
JDA
7469 /* Get our return address into %r31. */
7470 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7471 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6a73009d 7472
a02aa5b0
JDA
7473 /* Jump to our target address in %r1. */
7474 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7475 }
a02aa5b0 7476 else if (!flag_pic)
6a73009d 7477 {
a02aa5b0 7478 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6248c4dd 7479 if (TARGET_PA_20)
a02aa5b0 7480 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6248c4dd 7481 else
a02aa5b0 7482 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 7483 }
a02aa5b0 7484 else
6a73009d 7485 {
581d9404
JDA
7486 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7487 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7488
a02aa5b0
JDA
7489 if (TARGET_SOM || !TARGET_GAS)
7490 {
7491 /* The HP assembler can generate relocations for the
7492 difference of two symbols. GAS can do this for a
7493 millicode symbol but not an arbitrary external
7494 symbol when generating SOM output. */
7495 xoperands[1] = gen_label_rtx ();
ecc418c4 7496 targetm.asm_out.internal_label (asm_out_file, "L",
a02aa5b0
JDA
7497 CODE_LABEL_NUMBER (xoperands[1]));
7498 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7499 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7500 }
7501 else
7502 {
a02aa5b0
JDA
7503 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7504 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7505 xoperands);
7506 }
6a73009d 7507
a02aa5b0
JDA
7508 /* Jump to our target address in %r1. */
7509 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7510 }
6a73009d
JL
7511 }
7512
a02aa5b0
JDA
7513 if (seq_length == 0)
7514 output_asm_insn ("nop", xoperands);
6a73009d 7515
a02aa5b0
JDA
7516 /* We are done if there isn't a jump in the delay slot. */
7517 if (seq_length == 0 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7518 return "";
6a73009d 7519
a02aa5b0
JDA
7520 /* This call has an unconditional jump in its delay slot. */
7521 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6a73009d 7522
a02aa5b0
JDA
7523 /* See if the return address can be adjusted. Use the containing
7524 sequence insn's address. */
611ad29e 7525 if (INSN_ADDRESSES_SET_P ())
6a73009d 7526 {
611ad29e
JDA
7527 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7528 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7529 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7530
7531 if (VAL_14_BITS_P (distance))
7532 {
7533 xoperands[1] = gen_label_rtx ();
7534 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
ecc418c4
JDA
7535 targetm.asm_out.internal_label (asm_out_file, "L",
7536 CODE_LABEL_NUMBER (xoperands[1]));
611ad29e
JDA
7537 }
7538 else
7539 /* ??? This branch may not reach its target. */
7540 output_asm_insn ("nop\n\tb,n %0", xoperands);
6a73009d 7541 }
a02aa5b0
JDA
7542 else
7543 /* ??? This branch may not reach its target. */
7544 output_asm_insn ("nop\n\tb,n %0", xoperands);
6a73009d
JL
7545
7546 /* Delete the jump. */
a38e7aa5 7547 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0 7548
6a73009d
JL
7549 return "";
7550}
7551
611ad29e
JDA
7552/* Return the attribute length of the call instruction INSN. The SIBCALL
7553 flag indicates whether INSN is a regular call or a sibling call. The
32562302
JDA
7554 length returned must be longer than the code actually generated by
7555 output_call. Since branch shortening is done before delay branch
7556 sequencing, there is no way to determine whether or not the delay
7557 slot will be filled during branch shortening. Even when the delay
7558 slot is filled, we may have to add a nop if the delay slot contains
7559 a branch that can't reach its target. Thus, we always have to include
7560 the delay slot in the length estimate. This used to be done in
7561 pa_adjust_insn_length but we do it here now as some sequences always
7562 fill the delay slot and we can save four bytes in the estimate for
7563 these sequences. */
a02aa5b0
JDA
7564
7565int
b7849684 7566attr_length_call (rtx insn, int sibcall)
a02aa5b0 7567{
32562302 7568 int local_call;
e40375e0 7569 rtx call, call_dest;
32562302
JDA
7570 tree call_decl;
7571 int length = 0;
7572 rtx pat = PATTERN (insn);
611ad29e 7573 unsigned long distance = -1;
a02aa5b0 7574
e40375e0
JDA
7575 gcc_assert (GET_CODE (insn) == CALL_INSN);
7576
611ad29e
JDA
7577 if (INSN_ADDRESSES_SET_P ())
7578 {
32562302
JDA
7579 unsigned long total;
7580
7581 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
5fad1c24
JDA
7582 distance = (total + insn_current_reference_address (insn));
7583 if (distance < total)
611ad29e
JDA
7584 distance = -1;
7585 }
a02aa5b0 7586
e40375e0 7587 gcc_assert (GET_CODE (pat) == PARALLEL);
a02aa5b0 7588
e40375e0
JDA
7589 /* Get the call rtx. */
7590 call = XVECEXP (pat, 0, 0);
7591 if (GET_CODE (call) == SET)
7592 call = SET_SRC (call);
7593
7594 gcc_assert (GET_CODE (call) == CALL);
7595
7596 /* Determine if this is a local call. */
7597 call_dest = XEXP (XEXP (call, 0), 0);
32562302 7598 call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7599 local_call = call_decl && targetm.binds_local_p (call_decl);
a02aa5b0 7600
32562302
JDA
7601 /* pc-relative branch. */
7602 if (!TARGET_LONG_CALLS
7603 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7604 || distance < 240000))
7605 length += 8;
a02aa5b0 7606
32562302
JDA
7607 /* 64-bit plabel sequence. */
7608 else if (TARGET_64BIT && !local_call)
7609 length += sibcall ? 28 : 24;
a02aa5b0 7610
32562302
JDA
7611 /* non-pic long absolute branch sequence. */
7612 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7613 length += 12;
a02aa5b0 7614
32562302 7615 /* long pc-relative branch sequence. */
9dbd54be 7616 else if (TARGET_LONG_PIC_SDIFF_CALL
751d9855
JDA
7617 || (TARGET_GAS && !TARGET_SOM
7618 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
32562302
JDA
7619 {
7620 length += 20;
a02aa5b0 7621
0831e1d1 7622 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302
JDA
7623 length += 8;
7624 }
62910663 7625
32562302
JDA
7626 /* 32-bit plabel sequence. */
7627 else
7628 {
7629 length += 32;
a02aa5b0 7630
32562302
JDA
7631 if (TARGET_SOM)
7632 length += length_fp_args (insn);
7633
7634 if (flag_pic)
7635 length += 4;
90330d31 7636
32562302
JDA
7637 if (!TARGET_PA_20)
7638 {
a02aa5b0
JDA
7639 if (!sibcall)
7640 length += 8;
7641
0831e1d1 7642 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302 7643 length += 8;
a02aa5b0
JDA
7644 }
7645 }
32562302
JDA
7646
7647 return length;
a02aa5b0
JDA
7648}
7649
7650/* INSN is a function call. It may have an unconditional jump
6a73009d
JL
7651 in its delay slot.
7652
7653 CALL_DEST is the routine we are calling. */
7654
519104fe 7655const char *
b7849684 7656output_call (rtx insn, rtx call_dest, int sibcall)
6a73009d 7657{
a02aa5b0
JDA
7658 int delay_insn_deleted = 0;
7659 int delay_slot_filled = 0;
3d9268b6 7660 int seq_length = dbr_sequence_length ();
5fad1c24 7661 tree call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7662 int local_call = call_decl && targetm.binds_local_p (call_decl);
a02aa5b0
JDA
7663 rtx xoperands[2];
7664
7665 xoperands[0] = call_dest;
6a73009d 7666
a02aa5b0 7667 /* Handle the common case where we're sure that the branch will reach
5fad1c24
JDA
7668 the beginning of the "$CODE$" subspace. This is the beginning of
7669 the current function if we are in a named section. */
611ad29e 7670 if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
2c4ff308 7671 {
520babc7 7672 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
a02aa5b0 7673 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
279c9bde 7674 }
a02aa5b0 7675 else
279c9bde 7676 {
5fad1c24 7677 if (TARGET_64BIT && !local_call)
f726ea7d 7678 {
a02aa5b0
JDA
7679 /* ??? As far as I can tell, the HP linker doesn't support the
7680 long pc-relative sequence described in the 64-bit runtime
7681 architecture. So, we use a slightly longer indirect call. */
7aaf280e 7682 xoperands[0] = get_deferred_plabel (call_dest);
a02aa5b0
JDA
7683 xoperands[1] = gen_label_rtx ();
7684
7685 /* If this isn't a sibcall, we put the load of %r27 into the
7686 delay slot. We can't do this in a sibcall as we don't
7687 have a second call-clobbered scratch register available. */
7688 if (seq_length != 0
7689 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7690 && !sibcall)
7691 {
7692 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 7693 optimize, 0, NULL);
a02aa5b0
JDA
7694
7695 /* Now delete the delay insn. */
a38e7aa5 7696 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0
JDA
7697 delay_insn_deleted = 1;
7698 }
279c9bde 7699
a02aa5b0
JDA
7700 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7701 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7702 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
279c9bde 7703
a02aa5b0 7704 if (sibcall)
279c9bde 7705 {
a02aa5b0
JDA
7706 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7707 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7708 output_asm_insn ("bve (%%r1)", xoperands);
7709 }
7710 else
7711 {
7712 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7713 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7714 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7715 delay_slot_filled = 1;
279c9bde
JL
7716 }
7717 }
a02aa5b0 7718 else
93ae92c1 7719 {
a02aa5b0
JDA
7720 int indirect_call = 0;
7721
7722 /* Emit a long call. There are several different sequences
7723 of increasing length and complexity. In most cases,
7724 they don't allow an instruction in the delay slot. */
5fad1c24 7725 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
9dbd54be 7726 && !TARGET_LONG_PIC_SDIFF_CALL
751d9855
JDA
7727 && !(TARGET_GAS && !TARGET_SOM
7728 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
5fad1c24 7729 && !TARGET_64BIT)
a02aa5b0
JDA
7730 indirect_call = 1;
7731
7732 if (seq_length != 0
7733 && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
7734 && !sibcall
44b86471
JDA
7735 && (!TARGET_PA_20
7736 || indirect_call
7737 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
359255a9 7738 {
a02aa5b0
JDA
7739 /* A non-jump insn in the delay slot. By definition we can
7740 emit this insn before the call (and in fact before argument
7741 relocating. */
c9d691e9 7742 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
5cfc5f84 7743 NULL);
a02aa5b0
JDA
7744
7745 /* Now delete the delay insn. */
a38e7aa5 7746 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0 7747 delay_insn_deleted = 1;
359255a9 7748 }
93ae92c1 7749
5fad1c24 7750 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
359255a9 7751 {
a02aa5b0
JDA
7752 /* This is the best sequence for making long calls in
7753 non-pic code. Unfortunately, GNU ld doesn't provide
7754 the stub needed for external calls, and GAS's support
5fad1c24
JDA
7755 for this with the SOM linker is buggy. It is safe
7756 to use this for local calls. */
a02aa5b0
JDA
7757 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7758 if (sibcall)
7759 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7760 else
7761 {
7762 if (TARGET_PA_20)
7763 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7764 xoperands);
7765 else
7766 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 7767
a02aa5b0
JDA
7768 output_asm_insn ("copy %%r31,%%r2", xoperands);
7769 delay_slot_filled = 1;
7770 }
7771 }
7772 else
7773 {
9dbd54be 7774 if (TARGET_LONG_PIC_SDIFF_CALL)
3d9268b6 7775 {
a02aa5b0 7776 /* The HP assembler and linker can handle relocations
9dbd54be
JDA
7777 for the difference of two symbols. The HP assembler
7778 recognizes the sequence as a pc-relative call and
7779 the linker provides stubs when needed. */
a02aa5b0
JDA
7780 xoperands[1] = gen_label_rtx ();
7781 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7782 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
ecc418c4 7783 targetm.asm_out.internal_label (asm_out_file, "L",
3d9268b6 7784 CODE_LABEL_NUMBER (xoperands[1]));
a02aa5b0
JDA
7785 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7786 }
751d9855
JDA
7787 else if (TARGET_GAS && !TARGET_SOM
7788 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
3d9268b6 7789 {
a02aa5b0
JDA
7790 /* GAS currently can't generate the relocations that
7791 are needed for the SOM linker under HP-UX using this
7792 sequence. The GNU linker doesn't generate the stubs
7793 that are needed for external calls on TARGET_ELF32
7794 with this sequence. For now, we have to use a
7795 longer plabel sequence when using GAS. */
7796 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7797 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
3d9268b6 7798 xoperands);
a02aa5b0 7799 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
3d9268b6
JDA
7800 xoperands);
7801 }
520babc7
JL
7802 else
7803 {
a02aa5b0
JDA
7804 /* Emit a long plabel-based call sequence. This is
7805 essentially an inline implementation of $$dyncall.
7806 We don't actually try to call $$dyncall as this is
7807 as difficult as calling the function itself. */
7aaf280e 7808 xoperands[0] = get_deferred_plabel (call_dest);
a02aa5b0
JDA
7809 xoperands[1] = gen_label_rtx ();
7810
7811 /* Since the call is indirect, FP arguments in registers
7812 need to be copied to the general registers. Then, the
7813 argument relocation stub will copy them back. */
7814 if (TARGET_SOM)
7815 copy_fp_args (insn);
7816
7817 if (flag_pic)
7818 {
7819 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7820 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7821 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7822 }
7823 else
7824 {
7825 output_asm_insn ("addil LR'%0-$global$,%%r27",
7826 xoperands);
7827 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7828 xoperands);
7829 }
279c9bde 7830
a02aa5b0
JDA
7831 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7832 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7833 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7834 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6a73009d 7835
a02aa5b0
JDA
7836 if (!sibcall && !TARGET_PA_20)
7837 {
7838 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
0831e1d1 7839 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
7840 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7841 else
7842 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
a02aa5b0
JDA
7843 }
7844 }
6a73009d 7845
a02aa5b0 7846 if (TARGET_PA_20)
520babc7 7847 {
a02aa5b0
JDA
7848 if (sibcall)
7849 output_asm_insn ("bve (%%r1)", xoperands);
7850 else
7851 {
7852 if (indirect_call)
7853 {
7854 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7855 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7856 delay_slot_filled = 1;
7857 }
7858 else
7859 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7860 }
520babc7
JL
7861 }
7862 else
7863 {
0831e1d1 7864 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
90330d31
JDA
7865 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7866 xoperands);
279c9bde 7867
a02aa5b0 7868 if (sibcall)
90330d31 7869 {
0831e1d1 7870 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
7871 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
7872 else
7873 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
7874 }
a02aa5b0
JDA
7875 else
7876 {
0831e1d1 7877 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
7878 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
7879 else
7880 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
279c9bde 7881
a02aa5b0
JDA
7882 if (indirect_call)
7883 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
7884 else
7885 output_asm_insn ("copy %%r31,%%r2", xoperands);
7886 delay_slot_filled = 1;
7887 }
7888 }
7889 }
279c9bde 7890 }
2c4ff308 7891 }
23f6f34f 7892
62910663 7893 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
a02aa5b0 7894 output_asm_insn ("nop", xoperands);
2c4ff308 7895
a02aa5b0
JDA
7896 /* We are done if there isn't a jump in the delay slot. */
7897 if (seq_length == 0
7898 || delay_insn_deleted
7899 || GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
7900 return "";
2c4ff308 7901
a02aa5b0 7902 /* A sibcall should never have a branch in the delay slot. */
144d51f9 7903 gcc_assert (!sibcall);
2c4ff308 7904
a02aa5b0
JDA
7905 /* This call has an unconditional jump in its delay slot. */
7906 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
2c4ff308 7907
611ad29e 7908 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
2c4ff308 7909 {
a02aa5b0 7910 /* See if the return address can be adjusted. Use the containing
28326880
OH
7911 sequence insn's address. This would break the regular call/return@
7912 relationship assumed by the table based eh unwinder, so only do that
7913 if the call is not possibly throwing. */
a02aa5b0
JDA
7914 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7915 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7916 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7917
28326880
OH
7918 if (VAL_14_BITS_P (distance)
7919 && !(can_throw_internal (insn) || can_throw_external (insn)))
a02aa5b0
JDA
7920 {
7921 xoperands[1] = gen_label_rtx ();
7922 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
ecc418c4
JDA
7923 targetm.asm_out.internal_label (asm_out_file, "L",
7924 CODE_LABEL_NUMBER (xoperands[1]));
a02aa5b0
JDA
7925 }
7926 else
a02aa5b0 7927 output_asm_insn ("nop\n\tb,n %0", xoperands);
2c4ff308 7928 }
a02aa5b0 7929 else
a02aa5b0 7930 output_asm_insn ("b,n %0", xoperands);
2c4ff308
JL
7931
7932 /* Delete the jump. */
a38e7aa5 7933 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0 7934
2c4ff308
JL
7935 return "";
7936}
7937
611ad29e
JDA
7938/* Return the attribute length of the indirect call instruction INSN.
7939 The length must match the code generated by output_indirect call.
7940 The returned length includes the delay slot. Currently, the delay
7941 slot of an indirect call sequence is not exposed and it is used by
7942 the sequence itself. */
7943
7944int
b7849684 7945attr_length_indirect_call (rtx insn)
611ad29e
JDA
7946{
7947 unsigned long distance = -1;
62910663 7948 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
611ad29e
JDA
7949
7950 if (INSN_ADDRESSES_SET_P ())
7951 {
5fad1c24
JDA
7952 distance = (total + insn_current_reference_address (insn));
7953 if (distance < total)
611ad29e
JDA
7954 distance = -1;
7955 }
7956
7957 if (TARGET_64BIT)
7958 return 12;
7959
7960 if (TARGET_FAST_INDIRECT_CALLS
7961 || (!TARGET_PORTABLE_RUNTIME
40fc2e0b
JDA
7962 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
7963 || distance < 240000)))
611ad29e
JDA
7964 return 8;
7965
7966 if (flag_pic)
7967 return 24;
7968
7969 if (TARGET_PORTABLE_RUNTIME)
7970 return 20;
7971
7972 /* Out of reach, can use ble. */
7973 return 12;
7974}
7975
7976const char *
b7849684 7977output_indirect_call (rtx insn, rtx call_dest)
611ad29e
JDA
7978{
7979 rtx xoperands[1];
7980
7981 if (TARGET_64BIT)
7982 {
7983 xoperands[0] = call_dest;
7984 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
7985 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
7986 return "";
7987 }
7988
7989 /* First the special case for kernels, level 0 systems, etc. */
7990 if (TARGET_FAST_INDIRECT_CALLS)
7991 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7992
7993 /* Now the normal case -- we can reach $$dyncall directly or
7994 we're sure that we can get there via a long-branch stub.
7995
7996 No need to check target flags as the length uniquely identifies
7997 the remaining cases. */
7998 if (attr_length_indirect_call (insn) == 8)
2c774817 7999 {
40fc2e0b
JDA
8000 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8001 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8002 variant of the B,L instruction can't be used on the SOM target. */
8003 if (TARGET_PA_20 && !TARGET_SOM)
2c774817
JDA
8004 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8005 else
8006 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8007 }
611ad29e
JDA
8008
8009 /* Long millicode call, but we are not generating PIC or portable runtime
8010 code. */
8011 if (attr_length_indirect_call (insn) == 12)
8012 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8013
8014 /* Long millicode call for portable runtime. */
8015 if (attr_length_indirect_call (insn) == 20)
8016 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8017
8018 /* We need a long PIC call to $$dyncall. */
8019 xoperands[0] = NULL_RTX;
8020 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8021 if (TARGET_SOM || !TARGET_GAS)
8022 {
8023 xoperands[0] = gen_label_rtx ();
8024 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
ecc418c4
JDA
8025 targetm.asm_out.internal_label (asm_out_file, "L",
8026 CODE_LABEL_NUMBER (xoperands[0]));
611ad29e
JDA
8027 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8028 }
8029 else
8030 {
8031 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8032 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8033 xoperands);
8034 }
8035 output_asm_insn ("blr %%r0,%%r2", xoperands);
8036 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8037 return "";
8038}
8039
8040/* Return the total length of the save and restore instructions needed for
8041 the data linkage table pointer (i.e., the PIC register) across the call
8042 instruction INSN. No-return calls do not require a save and restore.
8043 In addition, we may be able to avoid the save and restore for calls
8044 within the same translation unit. */
8045
8046int
b7849684 8047attr_length_save_restore_dltp (rtx insn)
611ad29e
JDA
8048{
8049 if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
8050 return 0;
8051
8052 return 8;
8053}
8054
d2a94ec0 8055/* In HPUX 8.0's shared library scheme, special relocations are needed
23f6f34f 8056 for function labels if they might be passed to a function
d2a94ec0 8057 in a shared library (because shared libraries don't live in code
520a57c8 8058 space), and special magic is needed to construct their address. */
d2a94ec0
TM
8059
8060void
b7849684 8061hppa_encode_label (rtx sym)
d2a94ec0 8062{
519104fe 8063 const char *str = XSTR (sym, 0);
10d17cb7
AM
8064 int len = strlen (str) + 1;
8065 char *newstr, *p;
d2a94ec0 8066
5ead67f6 8067 p = newstr = XALLOCAVEC (char, len + 1);
10d17cb7
AM
8068 *p++ = '@';
8069 strcpy (p, str);
67d6f2fc 8070
831c1763 8071 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
d2a94ec0 8072}
23f6f34f 8073
fb49053f 8074static void
b7849684 8075pa_encode_section_info (tree decl, rtx rtl, int first)
fb49053f 8076{
9a60b229
JJ
8077 int old_referenced = 0;
8078
8079 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8080 old_referenced
8081 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8082
51076f96
RC
8083 default_encode_section_info (decl, rtl, first);
8084
fb49053f
RH
8085 if (first && TEXT_SPACE_P (decl))
8086 {
fb49053f
RH
8087 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8088 if (TREE_CODE (decl) == FUNCTION_DECL)
c6a2438a 8089 hppa_encode_label (XEXP (rtl, 0));
fb49053f 8090 }
9a60b229
JJ
8091 else if (old_referenced)
8092 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
fb49053f
RH
8093}
8094
772c5265
RH
8095/* This is sort of inverse to pa_encode_section_info. */
8096
8097static const char *
b7849684 8098pa_strip_name_encoding (const char *str)
772c5265 8099{
7830ba7b
JDA
8100 str += (*str == '@');
8101 str += (*str == '*');
8102 return str;
772c5265
RH
8103}
8104
326bc2de
JL
8105/* Returns 1 if OP is a function label involved in a simple addition
8106 with a constant. Used to keep certain patterns from matching
8107 during instruction combination. */
8108int
b7849684 8109is_function_label_plus_const (rtx op)
326bc2de
JL
8110{
8111 /* Strip off any CONST. */
8112 if (GET_CODE (op) == CONST)
8113 op = XEXP (op, 0);
8114
8115 return (GET_CODE (op) == PLUS
9c575e20 8116 && function_label_operand (XEXP (op, 0), VOIDmode)
326bc2de
JL
8117 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8118}
8119
54374491
JL
8120/* Output assembly code for a thunk to FUNCTION. */
8121
c590b625 8122static void
b7849684
JE
8123pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8124 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8125 tree function)
54374491 8126{
cdcb88d7 8127 static unsigned int current_thunk_number;
5fad1c24 8128 int val_14 = VAL_14_BITS_P (delta);
67b846fa 8129 unsigned int old_last_address = last_address, nbytes = 0;
54374491 8130 char label[16];
cdcb88d7 8131 rtx xoperands[4];
5fad1c24 8132
cdcb88d7
JDA
8133 xoperands[0] = XEXP (DECL_RTL (function), 0);
8134 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8135 xoperands[2] = GEN_INT (delta);
5fad1c24 8136
cdcb88d7
JDA
8137 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8138 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
5fad1c24
JDA
8139
8140 /* Output the thunk. We know that the function is in the same
8141 translation unit (i.e., the same space) as the thunk, and that
8142 thunks are output after their method. Thus, we don't need an
8143 external branch to reach the function. With SOM and GAS,
8144 functions and thunks are effectively in different sections.
8145 Thus, we can always use a IA-relative branch and the linker
8146 will add a long branch stub if necessary.
8147
8148 However, we have to be careful when generating PIC code on the
8149 SOM port to ensure that the sequence does not transfer to an
8150 import stub for the target function as this could clobber the
8151 return value saved at SP-24. This would also apply to the
8152 32-bit linux port if the multi-space model is implemented. */
8153 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8154 && !(flag_pic && TREE_PUBLIC (function))
8155 && (TARGET_GAS || last_address < 262132))
8156 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
677f3fa8 8157 && ((targetm_common.have_named_sections
5fad1c24
JDA
8158 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8159 /* The GNU 64-bit linker has rather poor stub management.
8160 So, we use a long branch from thunks that aren't in
8161 the same section as the target function. */
8162 && ((!TARGET_64BIT
8163 && (DECL_SECTION_NAME (thunk_fndecl)
8164 != DECL_SECTION_NAME (function)))
8165 || ((DECL_SECTION_NAME (thunk_fndecl)
8166 == DECL_SECTION_NAME (function))
8167 && last_address < 262132)))
677f3fa8 8168 || (targetm_common.have_named_sections
2842bb86
JDA
8169 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8170 && DECL_SECTION_NAME (function) == NULL
8171 && last_address < 262132)
677f3fa8
JM
8172 || (!targetm_common.have_named_sections
8173 && last_address < 262132))))
5fad1c24 8174 {
cdcb88d7
JDA
8175 if (!val_14)
8176 output_asm_insn ("addil L'%2,%%r26", xoperands);
8177
8178 output_asm_insn ("b %0", xoperands);
8179
5fad1c24
JDA
8180 if (val_14)
8181 {
cdcb88d7 8182 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8183 nbytes += 8;
8184 }
8185 else
8186 {
cdcb88d7 8187 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8188 nbytes += 12;
8189 }
8190 }
8191 else if (TARGET_64BIT)
8192 {
8193 /* We only have one call-clobbered scratch register, so we can't
8194 make use of the delay slot if delta doesn't fit in 14 bits. */
8195 if (!val_14)
cdcb88d7
JDA
8196 {
8197 output_asm_insn ("addil L'%2,%%r26", xoperands);
8198 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8199 }
5fad1c24 8200
cdcb88d7 8201 output_asm_insn ("b,l .+8,%%r1", xoperands);
5fad1c24
JDA
8202
8203 if (TARGET_GAS)
8204 {
cdcb88d7
JDA
8205 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8206 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
5fad1c24
JDA
8207 }
8208 else
8209 {
cdcb88d7
JDA
8210 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8211 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
5fad1c24
JDA
8212 }
8213
8214 if (val_14)
8215 {
cdcb88d7
JDA
8216 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8217 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8218 nbytes += 20;
8219 }
8220 else
8221 {
cdcb88d7 8222 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5fad1c24
JDA
8223 nbytes += 24;
8224 }
8225 }
8226 else if (TARGET_PORTABLE_RUNTIME)
8227 {
cdcb88d7
JDA
8228 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8229 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8230
8231 if (!val_14)
8232 output_asm_insn ("addil L'%2,%%r26", xoperands);
8233
8234 output_asm_insn ("bv %%r0(%%r22)", xoperands);
5fad1c24
JDA
8235
8236 if (val_14)
8237 {
cdcb88d7 8238 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8239 nbytes += 16;
8240 }
8241 else
8242 {
cdcb88d7 8243 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8244 nbytes += 20;
8245 }
8246 }
8247 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8248 {
8249 /* The function is accessible from outside this module. The only
8250 way to avoid an import stub between the thunk and function is to
8251 call the function directly with an indirect sequence similar to
8252 that used by $$dyncall. This is possible because $$dyncall acts
8253 as the import stub in an indirect call. */
5fad1c24 8254 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
cdcb88d7
JDA
8255 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8256 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8257 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8258 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8259 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8260 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8261 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8262 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8263
5fad1c24
JDA
8264 if (!val_14)
8265 {
cdcb88d7 8266 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24
JDA
8267 nbytes += 4;
8268 }
cdcb88d7 8269
5fad1c24
JDA
8270 if (TARGET_PA_20)
8271 {
cdcb88d7
JDA
8272 output_asm_insn ("bve (%%r22)", xoperands);
8273 nbytes += 36;
8274 }
8275 else if (TARGET_NO_SPACE_REGS)
8276 {
8277 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8278 nbytes += 36;
8279 }
8280 else
54374491 8281 {
cdcb88d7
JDA
8282 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8283 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8284 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8285 nbytes += 44;
5fad1c24
JDA
8286 }
8287
8288 if (val_14)
cdcb88d7 8289 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8290 else
cdcb88d7 8291 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8292 }
8293 else if (flag_pic)
8294 {
cdcb88d7 8295 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5fad1c24
JDA
8296
8297 if (TARGET_SOM || !TARGET_GAS)
8298 {
cdcb88d7
JDA
8299 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8300 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
5fad1c24
JDA
8301 }
8302 else
8303 {
cdcb88d7
JDA
8304 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8305 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
5fad1c24
JDA
8306 }
8307
cdcb88d7
JDA
8308 if (!val_14)
8309 output_asm_insn ("addil L'%2,%%r26", xoperands);
8310
8311 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8312
5fad1c24
JDA
8313 if (val_14)
8314 {
cdcb88d7 8315 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8316 nbytes += 20;
54374491
JL
8317 }
8318 else
5fad1c24 8319 {
cdcb88d7 8320 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8321 nbytes += 24;
8322 }
54374491
JL
8323 }
8324 else
8325 {
5fad1c24 8326 if (!val_14)
cdcb88d7 8327 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24 8328
cdcb88d7
JDA
8329 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8330 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8331
8332 if (val_14)
54374491 8333 {
cdcb88d7 8334 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8335 nbytes += 12;
54374491
JL
8336 }
8337 else
5fad1c24 8338 {
cdcb88d7 8339 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8340 nbytes += 16;
8341 }
54374491 8342 }
5fad1c24 8343
54374491 8344 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
5fad1c24 8345
1a83bfc3
JDA
8346 if (TARGET_SOM && TARGET_GAS)
8347 {
8348 /* We done with this subspace except possibly for some additional
8349 debug information. Forget that we are in this subspace to ensure
8350 that the next function is output in its own subspace. */
8351 in_section = NULL;
8352 cfun->machine->in_nsubspa = 2;
8353 }
8354
5fad1c24 8355 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
54374491 8356 {
d6b5193b 8357 switch_to_section (data_section);
cdcb88d7 8358 output_asm_insn (".align 4", xoperands);
5fad1c24 8359 ASM_OUTPUT_LABEL (file, label);
cdcb88d7 8360 output_asm_insn (".word P'%0", xoperands);
54374491 8361 }
5fad1c24 8362
54374491 8363 current_thunk_number++;
5fad1c24
JDA
8364 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8365 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8366 last_address += nbytes;
67b846fa
JDA
8367 if (old_last_address > last_address)
8368 last_address = UINT_MAX;
5fad1c24 8369 update_total_code_bytes (nbytes);
54374491
JL
8370}
8371
4977bab6
ZW
8372/* Only direct calls to static functions are allowed to be sibling (tail)
8373 call optimized.
8374
8375 This restriction is necessary because some linker generated stubs will
8376 store return pointers into rp' in some cases which might clobber a
8377 live value already in rp'.
8378
8379 In a sibcall the current function and the target function share stack
8380 space. Thus if the path to the current function and the path to the
8381 target function save a value in rp', they save the value into the
8382 same stack slot, which has undesirable consequences.
8383
8384 Because of the deferred binding nature of shared libraries any function
8385 with external scope could be in a different load module and thus require
8386 rp' to be saved when calling that function. So sibcall optimizations
8387 can only be safe for static function.
8388
8389 Note that GCC never needs return value relocations, so we don't have to
8390 worry about static calls with return value relocations (which require
8391 saving rp').
8392
8393 It is safe to perform a sibcall optimization when the target function
8394 will never return. */
8395static bool
b7849684 8396pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4977bab6 8397{
73096ba9
JDA
8398 if (TARGET_PORTABLE_RUNTIME)
8399 return false;
8400
11f43127
JDA
8401 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8402 single subspace mode and the call is not indirect. As far as I know,
8403 there is no operating system support for the multiple subspace mode.
8404 It might be possible to support indirect calls if we didn't use
8405 $$dyncall (see the indirect sequence generated in output_call). */
8406 if (TARGET_ELF32)
8407 return (decl != NULL_TREE);
8408
8409 /* Sibcalls are not ok because the arg pointer register is not a fixed
c1207243 8410 register. This prevents the sibcall optimization from occurring. In
11f43127
JDA
8411 addition, there are problems with stub placement using GNU ld. This
8412 is because a normal sibcall branch uses a 17-bit relocation while
8413 a regular call branch uses a 22-bit relocation. As a result, more
8414 care needs to be taken in the placement of long-branch stubs. */
8415 if (TARGET_64BIT)
8416 return false;
8417
73096ba9
JDA
8418 /* Sibcalls are only ok within a translation unit. */
8419 return (decl && !TREE_PUBLIC (decl));
4977bab6
ZW
8420}
8421
8ddf681a
R
8422/* ??? Addition is not commutative on the PA due to the weird implicit
8423 space register selection rules for memory addresses. Therefore, we
8424 don't consider a + b == b + a, as this might be inside a MEM. */
8425static bool
3101faab 8426pa_commutative_p (const_rtx x, int outer_code)
8ddf681a
R
8427{
8428 return (COMMUTATIVE_P (x)
bd7d5043
JDA
8429 && (TARGET_NO_SPACE_REGS
8430 || (outer_code != UNKNOWN && outer_code != MEM)
8ddf681a
R
8431 || GET_CODE (x) != PLUS));
8432}
8433
88e5c029
JL
8434/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8435 use in fmpyadd instructions. */
2fe24884 8436int
b7849684 8437fmpyaddoperands (rtx *operands)
2fe24884 8438{
f133af4c 8439 enum machine_mode mode = GET_MODE (operands[0]);
2fe24884 8440
d85ab966
JL
8441 /* Must be a floating point mode. */
8442 if (mode != SFmode && mode != DFmode)
8443 return 0;
8444
2fe24884 8445 /* All modes must be the same. */
f133af4c
TG
8446 if (! (mode == GET_MODE (operands[1])
8447 && mode == GET_MODE (operands[2])
8448 && mode == GET_MODE (operands[3])
8449 && mode == GET_MODE (operands[4])
8450 && mode == GET_MODE (operands[5])))
2fe24884
JL
8451 return 0;
8452
d85ab966
JL
8453 /* All operands must be registers. */
8454 if (! (GET_CODE (operands[1]) == REG
8455 && GET_CODE (operands[2]) == REG
8456 && GET_CODE (operands[3]) == REG
8457 && GET_CODE (operands[4]) == REG
8458 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8459 return 0;
8460
88e5c029
JL
8461 /* Only 2 real operands to the addition. One of the input operands must
8462 be the same as the output operand. */
2fe24884
JL
8463 if (! rtx_equal_p (operands[3], operands[4])
8464 && ! rtx_equal_p (operands[3], operands[5]))
8465 return 0;
8466
1e5f1716 8467 /* Inout operand of add cannot conflict with any operands from multiply. */
2fe24884
JL
8468 if (rtx_equal_p (operands[3], operands[0])
8469 || rtx_equal_p (operands[3], operands[1])
8470 || rtx_equal_p (operands[3], operands[2]))
8471 return 0;
8472
1e5f1716 8473 /* multiply cannot feed into addition operands. */
2fe24884
JL
8474 if (rtx_equal_p (operands[4], operands[0])
8475 || rtx_equal_p (operands[5], operands[0]))
8476 return 0;
8477
d85ab966
JL
8478 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8479 if (mode == SFmode
88624c0e
JL
8480 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8481 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8482 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8483 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8484 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8485 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8486 return 0;
8487
2fe24884
JL
8488 /* Passed. Operands are suitable for fmpyadd. */
8489 return 1;
8490}
8491
35d434ed
JDA
8492#if !defined(USE_COLLECT2)
8493static void
b7849684 8494pa_asm_out_constructor (rtx symbol, int priority)
35d434ed
JDA
8495{
8496 if (!function_label_operand (symbol, VOIDmode))
8497 hppa_encode_label (symbol);
8498
8499#ifdef CTORS_SECTION_ASM_OP
8500 default_ctor_section_asm_out_constructor (symbol, priority);
8501#else
8502# ifdef TARGET_ASM_NAMED_SECTION
8503 default_named_section_asm_out_constructor (symbol, priority);
8504# else
8505 default_stabs_asm_out_constructor (symbol, priority);
8506# endif
8507#endif
8508}
8509
8510static void
b7849684 8511pa_asm_out_destructor (rtx symbol, int priority)
35d434ed
JDA
8512{
8513 if (!function_label_operand (symbol, VOIDmode))
8514 hppa_encode_label (symbol);
8515
8516#ifdef DTORS_SECTION_ASM_OP
8517 default_dtor_section_asm_out_destructor (symbol, priority);
8518#else
8519# ifdef TARGET_ASM_NAMED_SECTION
8520 default_named_section_asm_out_destructor (symbol, priority);
8521# else
8522 default_stabs_asm_out_destructor (symbol, priority);
8523# endif
8524#endif
8525}
8526#endif
8527
d4482715
JDA
8528/* This function places uninitialized global data in the bss section.
8529 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8530 function on the SOM port to prevent uninitialized global data from
8531 being placed in the data section. */
8532
8533void
8534pa_asm_output_aligned_bss (FILE *stream,
8535 const char *name,
8536 unsigned HOST_WIDE_INT size,
8537 unsigned int align)
8538{
d6b5193b 8539 switch_to_section (bss_section);
d4482715
JDA
8540 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8541
8542#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8543 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8544#endif
8545
8546#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8547 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8548#endif
8549
8550 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8551 ASM_OUTPUT_LABEL (stream, name);
8552 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8553}
8554
8555/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8556 that doesn't allow the alignment of global common storage to be directly
8557 specified. The SOM linker aligns common storage based on the rounded
8558 value of the NUM_BYTES parameter in the .comm directive. It's not
8559 possible to use the .align directive as it doesn't affect the alignment
8560 of the label associated with a .comm directive. */
8561
8562void
8563pa_asm_output_aligned_common (FILE *stream,
8564 const char *name,
8565 unsigned HOST_WIDE_INT size,
8566 unsigned int align)
8567{
22f549fd
JDA
8568 unsigned int max_common_align;
8569
8570 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8571 if (align > max_common_align)
8572 {
d4ee4d25 8573 warning (0, "alignment (%u) for %s exceeds maximum alignment "
22f549fd
JDA
8574 "for global common data. Using %u",
8575 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8576 align = max_common_align;
8577 }
8578
d6b5193b 8579 switch_to_section (bss_section);
d4482715
JDA
8580
8581 assemble_name (stream, name);
8582 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8583 MAX (size, align / BITS_PER_UNIT));
8584}
8585
8586/* We can't use .comm for local common storage as the SOM linker effectively
8587 treats the symbol as universal and uses the same storage for local symbols
8588 with the same name in different object files. The .block directive
8589 reserves an uninitialized block of storage. However, it's not common
8590 storage. Fortunately, GCC never requests common storage with the same
8591 name in any given translation unit. */
8592
8593void
8594pa_asm_output_aligned_local (FILE *stream,
8595 const char *name,
8596 unsigned HOST_WIDE_INT size,
8597 unsigned int align)
8598{
d6b5193b 8599 switch_to_section (bss_section);
d4482715
JDA
8600 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8601
8602#ifdef LOCAL_ASM_OP
8603 fprintf (stream, "%s", LOCAL_ASM_OP);
8604 assemble_name (stream, name);
8605 fprintf (stream, "\n");
8606#endif
8607
8608 ASM_OUTPUT_LABEL (stream, name);
8609 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8610}
8611
88e5c029
JL
8612/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8613 use in fmpysub instructions. */
2fe24884 8614int
b7849684 8615fmpysuboperands (rtx *operands)
2fe24884 8616{
f133af4c 8617 enum machine_mode mode = GET_MODE (operands[0]);
2fe24884 8618
d85ab966
JL
8619 /* Must be a floating point mode. */
8620 if (mode != SFmode && mode != DFmode)
8621 return 0;
8622
2fe24884 8623 /* All modes must be the same. */
f133af4c
TG
8624 if (! (mode == GET_MODE (operands[1])
8625 && mode == GET_MODE (operands[2])
8626 && mode == GET_MODE (operands[3])
8627 && mode == GET_MODE (operands[4])
8628 && mode == GET_MODE (operands[5])))
2fe24884
JL
8629 return 0;
8630
d85ab966
JL
8631 /* All operands must be registers. */
8632 if (! (GET_CODE (operands[1]) == REG
8633 && GET_CODE (operands[2]) == REG
8634 && GET_CODE (operands[3]) == REG
8635 && GET_CODE (operands[4]) == REG
8636 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8637 return 0;
8638
88e5c029
JL
8639 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8640 operation, so operands[4] must be the same as operand[3]. */
2fe24884
JL
8641 if (! rtx_equal_p (operands[3], operands[4]))
8642 return 0;
8643
1e5f1716 8644 /* multiply cannot feed into subtraction. */
88e5c029 8645 if (rtx_equal_p (operands[5], operands[0]))
2fe24884
JL
8646 return 0;
8647
1e5f1716 8648 /* Inout operand of sub cannot conflict with any operands from multiply. */
2fe24884
JL
8649 if (rtx_equal_p (operands[3], operands[0])
8650 || rtx_equal_p (operands[3], operands[1])
8651 || rtx_equal_p (operands[3], operands[2]))
8652 return 0;
8653
d85ab966
JL
8654 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8655 if (mode == SFmode
88624c0e
JL
8656 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8657 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8658 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8659 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8660 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8661 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8662 return 0;
8663
2fe24884
JL
8664 /* Passed. Operands are suitable for fmpysub. */
8665 return 1;
8666}
8667
c2264220
JL
8668/* Return 1 if the given constant is 2, 4, or 8. These are the valid
8669 constants for shadd instructions. */
c9a88190 8670int
b7849684 8671shadd_constant_p (int val)
c2264220
JL
8672{
8673 if (val == 2 || val == 4 || val == 8)
8674 return 1;
8675 else
8676 return 0;
8677}
4802a0d6 8678
3232e9d8
JDA
8679/* Return TRUE if INSN branches forward. */
8680
8681static bool
b7849684 8682forward_branch_p (rtx insn)
b9821af8 8683{
3232e9d8
JDA
8684 rtx lab = JUMP_LABEL (insn);
8685
8686 /* The INSN must have a jump label. */
8687 gcc_assert (lab != NULL_RTX);
8688
8689 if (INSN_ADDRESSES_SET_P ())
8690 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
b9821af8
JL
8691
8692 while (insn)
8693 {
3232e9d8
JDA
8694 if (insn == lab)
8695 return true;
b9821af8
JL
8696 else
8697 insn = NEXT_INSN (insn);
8698 }
8699
3232e9d8 8700 return false;
b9821af8
JL
8701}
8702
2c4ff308
JL
8703/* Return 1 if INSN is in the delay slot of a call instruction. */
8704int
b7849684 8705jump_in_call_delay (rtx insn)
2c4ff308
JL
8706{
8707
8708 if (GET_CODE (insn) != JUMP_INSN)
8709 return 0;
8710
8711 if (PREV_INSN (insn)
8712 && PREV_INSN (PREV_INSN (insn))
02a57c73 8713 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
2c4ff308 8714 {
02a57c73 8715 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
2c4ff308
JL
8716
8717 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8718 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8719
8720 }
8721 else
8722 return 0;
8723}
746a9efa 8724
b1092901
JL
8725/* Output an unconditional move and branch insn. */
8726
519104fe 8727const char *
16d74a3c 8728output_parallel_movb (rtx *operands, rtx insn)
b1092901 8729{
16d74a3c
JDA
8730 int length = get_attr_length (insn);
8731
b1092901
JL
8732 /* These are the cases in which we win. */
8733 if (length == 4)
8734 return "mov%I1b,tr %1,%0,%2";
8735
16d74a3c
JDA
8736 /* None of the following cases win, but they don't lose either. */
8737 if (length == 8)
b1092901 8738 {
16d74a3c
JDA
8739 if (dbr_sequence_length () == 0)
8740 {
8741 /* Nothing in the delay slot, fake it by putting the combined
8742 insn (the copy or add) in the delay slot of a bl. */
8743 if (GET_CODE (operands[1]) == CONST_INT)
8744 return "b %2\n\tldi %1,%0";
8745 else
8746 return "b %2\n\tcopy %1,%0";
8747 }
b1092901 8748 else
16d74a3c
JDA
8749 {
8750 /* Something in the delay slot, but we've got a long branch. */
8751 if (GET_CODE (operands[1]) == CONST_INT)
8752 return "ldi %1,%0\n\tb %2";
8753 else
8754 return "copy %1,%0\n\tb %2";
8755 }
b1092901 8756 }
16d74a3c
JDA
8757
8758 if (GET_CODE (operands[1]) == CONST_INT)
8759 output_asm_insn ("ldi %1,%0", operands);
b1092901 8760 else
16d74a3c
JDA
8761 output_asm_insn ("copy %1,%0", operands);
8762 return output_lbranch (operands[2], insn, 1);
b1092901
JL
8763}
8764
8765/* Output an unconditional add and branch insn. */
8766
519104fe 8767const char *
16d74a3c 8768output_parallel_addb (rtx *operands, rtx insn)
b1092901 8769{
16d74a3c
JDA
8770 int length = get_attr_length (insn);
8771
b1092901
JL
8772 /* To make life easy we want operand0 to be the shared input/output
8773 operand and operand1 to be the readonly operand. */
8774 if (operands[0] == operands[1])
8775 operands[1] = operands[2];
8776
8777 /* These are the cases in which we win. */
8778 if (length == 4)
8779 return "add%I1b,tr %1,%0,%3";
8780
16d74a3c
JDA
8781 /* None of the following cases win, but they don't lose either. */
8782 if (length == 8)
b1092901 8783 {
16d74a3c
JDA
8784 if (dbr_sequence_length () == 0)
8785 /* Nothing in the delay slot, fake it by putting the combined
8786 insn (the copy or add) in the delay slot of a bl. */
8787 return "b %3\n\tadd%I1 %1,%0,%0";
8788 else
8789 /* Something in the delay slot, but we've got a long branch. */
8790 return "add%I1 %1,%0,%0\n\tb %3";
b1092901 8791 }
16d74a3c
JDA
8792
8793 output_asm_insn ("add%I1 %1,%0,%0", operands);
8794 return output_lbranch (operands[3], insn, 1);
b1092901
JL
8795}
8796
1c31ecf6
JDA
8797/* Return nonzero if INSN (a jump insn) immediately follows a call
8798 to a named function. This is used to avoid filling the delay slot
8799 of the jump since it can usually be eliminated by modifying RP in
8800 the delay slot of the call. */
6619e96c 8801
51723711 8802int
b7849684 8803following_call (rtx insn)
b1092901 8804{
6d8d2bbc 8805 if (! TARGET_JUMP_IN_DELAY)
f9bd8d8e
JL
8806 return 0;
8807
b1092901
JL
8808 /* Find the previous real insn, skipping NOTEs. */
8809 insn = PREV_INSN (insn);
8810 while (insn && GET_CODE (insn) == NOTE)
8811 insn = PREV_INSN (insn);
8812
8813 /* Check for CALL_INSNs and millicode calls. */
8814 if (insn
cdc0de30
JL
8815 && ((GET_CODE (insn) == CALL_INSN
8816 && get_attr_type (insn) != TYPE_DYNCALL)
b1092901
JL
8817 || (GET_CODE (insn) == INSN
8818 && GET_CODE (PATTERN (insn)) != SEQUENCE
8819 && GET_CODE (PATTERN (insn)) != USE
8820 && GET_CODE (PATTERN (insn)) != CLOBBER
8821 && get_attr_type (insn) == TYPE_MILLI)))
8822 return 1;
8823
8824 return 0;
8825}
8826
746a9efa
JL
8827/* We use this hook to perform a PA specific optimization which is difficult
8828 to do in earlier passes.
8829
8830 We want the delay slots of branches within jump tables to be filled.
8831 None of the compiler passes at the moment even has the notion that a
8832 PA jump table doesn't contain addresses, but instead contains actual
8833 instructions!
8834
8835 Because we actually jump into the table, the addresses of each entry
ddd5a7c1 8836 must stay constant in relation to the beginning of the table (which
746a9efa
JL
8837 itself must stay constant relative to the instruction to jump into
8838 it). I don't believe we can guarantee earlier passes of the compiler
8839 will adhere to those rules.
8840
8841 So, late in the compilation process we find all the jump tables, and
112cdef5 8842 expand them into real code -- e.g. each entry in the jump table vector
746a9efa
JL
8843 will get an appropriate label followed by a jump to the final target.
8844
8845 Reorg and the final jump pass can then optimize these branches and
8846 fill their delay slots. We end up with smaller, more efficient code.
8847
6619e96c 8848 The jump instructions within the table are special; we must be able
746a9efa
JL
8849 to identify them during assembly output (if the jumps don't get filled
8850 we need to emit a nop rather than nullifying the delay slot)). We
cb4d476c
JDA
8851 identify jumps in switch tables by using insns with the attribute
8852 type TYPE_BTABLE_BRANCH.
251ffdee
JL
8853
8854 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8855 insns. This serves two purposes, first it prevents jump.c from
8856 noticing that the last N entries in the table jump to the instruction
8857 immediately after the table and deleting the jumps. Second, those
8858 insns mark where we should emit .begin_brtab and .end_brtab directives
8859 when using GAS (allows for better link time optimizations). */
746a9efa 8860
18dbd950 8861static void
b7849684 8862pa_reorg (void)
746a9efa
JL
8863{
8864 rtx insn;
8865
18dbd950 8866 remove_useless_addtr_insns (1);
d8b79470 8867
86001391 8868 if (pa_cpu < PROCESSOR_8000)
18dbd950 8869 pa_combine_instructions ();
86001391 8870
c4bb6b38 8871
d8b79470 8872 /* This is fairly cheap, so always run it if optimizing. */
3e056efc 8873 if (optimize > 0 && !TARGET_BIG_SWITCH)
746a9efa 8874 {
29763968 8875 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
18dbd950 8876 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
746a9efa 8877 {
cb4d476c 8878 rtx pattern, tmp, location, label;
746a9efa
JL
8879 unsigned int length, i;
8880
29763968 8881 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
746a9efa 8882 if (GET_CODE (insn) != JUMP_INSN
29763968
JL
8883 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8884 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
746a9efa
JL
8885 continue;
8886
251ffdee
JL
8887 /* Emit marker for the beginning of the branch table. */
8888 emit_insn_before (gen_begin_brtab (), insn);
ad238e4b 8889
746a9efa
JL
8890 pattern = PATTERN (insn);
8891 location = PREV_INSN (insn);
29763968 8892 length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC);
ad238e4b 8893
746a9efa
JL
8894 for (i = 0; i < length; i++)
8895 {
3e056efc
JL
8896 /* Emit a label before each jump to keep jump.c from
8897 removing this code. */
8898 tmp = gen_label_rtx ();
8899 LABEL_NUSES (tmp) = 1;
8900 emit_label_after (tmp, location);
8901 location = NEXT_INSN (location);
8902
29763968 8903 if (GET_CODE (pattern) == ADDR_VEC)
cb4d476c 8904 label = XEXP (XVECEXP (pattern, 0, i), 0);
29763968 8905 else
cb4d476c
JDA
8906 label = XEXP (XVECEXP (pattern, 1, i), 0);
8907
8908 tmp = gen_short_jump (label);
8909
8910 /* Emit the jump itself. */
8911 tmp = emit_jump_insn_after (tmp, location);
8912 JUMP_LABEL (tmp) = label;
8913 LABEL_NUSES (label)++;
8914 location = NEXT_INSN (location);
746a9efa
JL
8915
8916 /* Emit a BARRIER after the jump. */
746a9efa 8917 emit_barrier_after (location);
746a9efa
JL
8918 location = NEXT_INSN (location);
8919 }
ad238e4b 8920
251ffdee
JL
8921 /* Emit marker for the end of the branch table. */
8922 emit_insn_before (gen_end_brtab (), location);
8923 location = NEXT_INSN (location);
8924 emit_barrier_after (location);
3e056efc 8925
29763968 8926 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
746a9efa
JL
8927 delete_insn (insn);
8928 }
8929 }
251ffdee 8930 else
ad238e4b 8931 {
cb4d476c
JDA
8932 /* Still need brtab marker insns. FIXME: the presence of these
8933 markers disables output of the branch table to readonly memory,
8934 and any alignment directives that might be needed. Possibly,
8935 the begin_brtab insn should be output before the label for the
1ae58c30 8936 table. This doesn't matter at the moment since the tables are
cb4d476c 8937 always output in the text section. */
18dbd950 8938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
ad238e4b
JL
8939 {
8940 /* Find an ADDR_VEC insn. */
8941 if (GET_CODE (insn) != JUMP_INSN
29763968
JL
8942 || (GET_CODE (PATTERN (insn)) != ADDR_VEC
8943 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC))
ad238e4b
JL
8944 continue;
8945
8946 /* Now generate markers for the beginning and end of the
956d6950 8947 branch table. */
ad238e4b
JL
8948 emit_insn_before (gen_begin_brtab (), insn);
8949 emit_insn_after (gen_end_brtab (), insn);
8950 }
8951 }
aba892c4 8952}
c4bb6b38
JL
8953
8954/* The PA has a number of odd instructions which can perform multiple
8955 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8956 it may be profitable to combine two instructions into one instruction
8957 with two outputs. It's not profitable PA2.0 machines because the
8958 two outputs would take two slots in the reorder buffers.
8959
8960 This routine finds instructions which can be combined and combines
8961 them. We only support some of the potential combinations, and we
8962 only try common ways to find suitable instructions.
8963
8964 * addb can add two registers or a register and a small integer
8965 and jump to a nearby (+-8k) location. Normally the jump to the
8966 nearby location is conditional on the result of the add, but by
8967 using the "true" condition we can make the jump unconditional.
8968 Thus addb can perform two independent operations in one insn.
8969
8970 * movb is similar to addb in that it can perform a reg->reg
8971 or small immediate->reg copy and jump to a nearby (+-8k location).
8972
8973 * fmpyadd and fmpysub can perform a FP multiply and either an
8974 FP add or FP sub if the operands of the multiply and add/sub are
8975 independent (there are other minor restrictions). Note both
8976 the fmpy and fadd/fsub can in theory move to better spots according
8977 to data dependencies, but for now we require the fmpy stay at a
8978 fixed location.
8979
8980 * Many of the memory operations can perform pre & post updates
8981 of index registers. GCC's pre/post increment/decrement addressing
8982 is far too simple to take advantage of all the possibilities. This
8983 pass may not be suitable since those insns may not be independent.
8984
8985 * comclr can compare two ints or an int and a register, nullify
8986 the following instruction and zero some other register. This
8987 is more difficult to use as it's harder to find an insn which
8988 will generate a comclr than finding something like an unconditional
8989 branch. (conditional moves & long branches create comclr insns).
8990
8991 * Most arithmetic operations can conditionally skip the next
8992 instruction. They can be viewed as "perform this operation
8993 and conditionally jump to this nearby location" (where nearby
8994 is an insns away). These are difficult to use due to the
8995 branch length restrictions. */
8996
51723711 8997static void
b7849684 8998pa_combine_instructions (void)
c4bb6b38 8999{
0a2aaacc 9000 rtx anchor, new_rtx;
c4bb6b38
JL
9001
9002 /* This can get expensive since the basic algorithm is on the
9003 order of O(n^2) (or worse). Only do it for -O2 or higher
956d6950 9004 levels of optimization. */
c4bb6b38
JL
9005 if (optimize < 2)
9006 return;
9007
9008 /* Walk down the list of insns looking for "anchor" insns which
9009 may be combined with "floating" insns. As the name implies,
9010 "anchor" instructions don't move, while "floating" insns may
9011 move around. */
0a2aaacc
KG
9012 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9013 new_rtx = make_insn_raw (new_rtx);
c4bb6b38
JL
9014
9015 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9016 {
9017 enum attr_pa_combine_type anchor_attr;
9018 enum attr_pa_combine_type floater_attr;
9019
9020 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9021 Also ignore any special USE insns. */
51723711 9022 if ((GET_CODE (anchor) != INSN
c4bb6b38 9023 && GET_CODE (anchor) != JUMP_INSN
51723711 9024 && GET_CODE (anchor) != CALL_INSN)
c4bb6b38
JL
9025 || GET_CODE (PATTERN (anchor)) == USE
9026 || GET_CODE (PATTERN (anchor)) == CLOBBER
9027 || GET_CODE (PATTERN (anchor)) == ADDR_VEC
9028 || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
9029 continue;
9030
9031 anchor_attr = get_attr_pa_combine_type (anchor);
9032 /* See if anchor is an insn suitable for combination. */
9033 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9034 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9035 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9036 && ! forward_branch_p (anchor)))
9037 {
9038 rtx floater;
9039
9040 for (floater = PREV_INSN (anchor);
9041 floater;
9042 floater = PREV_INSN (floater))
9043 {
9044 if (GET_CODE (floater) == NOTE
9045 || (GET_CODE (floater) == INSN
9046 && (GET_CODE (PATTERN (floater)) == USE
9047 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9048 continue;
9049
9050 /* Anything except a regular INSN will stop our search. */
9051 if (GET_CODE (floater) != INSN
9052 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9053 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9054 {
9055 floater = NULL_RTX;
9056 break;
9057 }
9058
9059 /* See if FLOATER is suitable for combination with the
9060 anchor. */
9061 floater_attr = get_attr_pa_combine_type (floater);
9062 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9063 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9064 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9065 && floater_attr == PA_COMBINE_TYPE_FMPY))
9066 {
9067 /* If ANCHOR and FLOATER can be combined, then we're
9068 done with this pass. */
0a2aaacc 9069 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9070 SET_DEST (PATTERN (floater)),
9071 XEXP (SET_SRC (PATTERN (floater)), 0),
9072 XEXP (SET_SRC (PATTERN (floater)), 1)))
9073 break;
9074 }
9075
9076 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9077 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9078 {
9079 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9080 {
0a2aaacc 9081 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9082 SET_DEST (PATTERN (floater)),
9083 XEXP (SET_SRC (PATTERN (floater)), 0),
9084 XEXP (SET_SRC (PATTERN (floater)), 1)))
9085 break;
9086 }
9087 else
9088 {
0a2aaacc 9089 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9090 SET_DEST (PATTERN (floater)),
9091 SET_SRC (PATTERN (floater)),
9092 SET_SRC (PATTERN (floater))))
9093 break;
9094 }
9095 }
9096 }
9097
9098 /* If we didn't find anything on the backwards scan try forwards. */
9099 if (!floater
9100 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9101 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9102 {
9103 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9104 {
9105 if (GET_CODE (floater) == NOTE
9106 || (GET_CODE (floater) == INSN
9107 && (GET_CODE (PATTERN (floater)) == USE
9108 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6619e96c 9109
c4bb6b38
JL
9110 continue;
9111
9112 /* Anything except a regular INSN will stop our search. */
9113 if (GET_CODE (floater) != INSN
9114 || GET_CODE (PATTERN (floater)) == ADDR_VEC
9115 || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
9116 {
9117 floater = NULL_RTX;
9118 break;
9119 }
9120
9121 /* See if FLOATER is suitable for combination with the
9122 anchor. */
9123 floater_attr = get_attr_pa_combine_type (floater);
9124 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9125 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9126 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9127 && floater_attr == PA_COMBINE_TYPE_FMPY))
9128 {
9129 /* If ANCHOR and FLOATER can be combined, then we're
9130 done with this pass. */
0a2aaacc 9131 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
c4bb6b38 9132 SET_DEST (PATTERN (floater)),
831c1763
AM
9133 XEXP (SET_SRC (PATTERN (floater)),
9134 0),
9135 XEXP (SET_SRC (PATTERN (floater)),
9136 1)))
c4bb6b38
JL
9137 break;
9138 }
9139 }
9140 }
9141
9142 /* FLOATER will be nonzero if we found a suitable floating
9143 insn for combination with ANCHOR. */
9144 if (floater
9145 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9146 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9147 {
9148 /* Emit the new instruction and delete the old anchor. */
c5c76735
JL
9149 emit_insn_before (gen_rtx_PARALLEL
9150 (VOIDmode,
9151 gen_rtvec (2, PATTERN (anchor),
9152 PATTERN (floater))),
9153 anchor);
9154
a38e7aa5 9155 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9156
9157 /* Emit a special USE insn for FLOATER, then delete
9158 the floating insn. */
ad2c71b7 9159 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
c4bb6b38
JL
9160 delete_insn (floater);
9161
9162 continue;
9163 }
9164 else if (floater
9165 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9166 {
9167 rtx temp;
9168 /* Emit the new_jump instruction and delete the old anchor. */
c5c76735
JL
9169 temp
9170 = emit_jump_insn_before (gen_rtx_PARALLEL
9171 (VOIDmode,
9172 gen_rtvec (2, PATTERN (anchor),
9173 PATTERN (floater))),
9174 anchor);
9175
c4bb6b38 9176 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
a38e7aa5 9177 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9178
9179 /* Emit a special USE insn for FLOATER, then delete
9180 the floating insn. */
ad2c71b7 9181 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
c4bb6b38
JL
9182 delete_insn (floater);
9183 continue;
9184 }
9185 }
9186 }
9187}
9188
0952f89b 9189static int
0a2aaacc 9190pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
b7849684 9191 rtx src1, rtx src2)
c4bb6b38
JL
9192{
9193 int insn_code_number;
9194 rtx start, end;
9195
9196 /* Create a PARALLEL with the patterns of ANCHOR and
9197 FLOATER, try to recognize it, then test constraints
9198 for the resulting pattern.
9199
9200 If the pattern doesn't match or the constraints
9201 aren't met keep searching for a suitable floater
9202 insn. */
0a2aaacc
KG
9203 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9204 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9205 INSN_CODE (new_rtx) = -1;
9206 insn_code_number = recog_memoized (new_rtx);
c4bb6b38 9207 if (insn_code_number < 0
0a2aaacc 9208 || (extract_insn (new_rtx), ! constrain_operands (1)))
c4bb6b38
JL
9209 return 0;
9210
9211 if (reversed)
9212 {
9213 start = anchor;
9214 end = floater;
9215 }
9216 else
9217 {
9218 start = floater;
9219 end = anchor;
9220 }
9221
9222 /* There's up to three operands to consider. One
9223 output and two inputs.
9224
9225 The output must not be used between FLOATER & ANCHOR
9226 exclusive. The inputs must not be set between
9227 FLOATER and ANCHOR exclusive. */
9228
9229 if (reg_used_between_p (dest, start, end))
9230 return 0;
9231
9232 if (reg_set_between_p (src1, start, end))
9233 return 0;
9234
9235 if (reg_set_between_p (src2, start, end))
9236 return 0;
9237
9238 /* If we get here, then everything is good. */
9239 return 1;
9240}
b9cd54d2 9241
2561a923 9242/* Return nonzero if references for INSN are delayed.
b9cd54d2
JL
9243
9244 Millicode insns are actually function calls with some special
9245 constraints on arguments and register usage.
9246
9247 Millicode calls always expect their arguments in the integer argument
9248 registers, and always return their result in %r29 (ret1). They
7d8b1412
AM
9249 are expected to clobber their arguments, %r1, %r29, and the return
9250 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9251
9252 This function tells reorg that the references to arguments and
9253 millicode calls do not appear to happen until after the millicode call.
9254 This allows reorg to put insns which set the argument registers into the
9255 delay slot of the millicode call -- thus they act more like traditional
9256 CALL_INSNs.
9257
1e5f1716 9258 Note we cannot consider side effects of the insn to be delayed because
7d8b1412
AM
9259 the branch and link insn will clobber the return pointer. If we happened
9260 to use the return pointer in the delay slot of the call, then we lose.
b9cd54d2
JL
9261
9262 get_attr_type will try to recognize the given insn, so make sure to
9263 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9264 in particular. */
9265int
b7849684 9266insn_refs_are_delayed (rtx insn)
b9cd54d2 9267{
6619e96c 9268 return ((GET_CODE (insn) == INSN
b9cd54d2
JL
9269 && GET_CODE (PATTERN (insn)) != SEQUENCE
9270 && GET_CODE (PATTERN (insn)) != USE
9271 && GET_CODE (PATTERN (insn)) != CLOBBER
9272 && get_attr_type (insn) == TYPE_MILLI));
9273}
d07d525a 9274
cde0f3fd
PB
9275/* Promote the return value, but not the arguments. */
9276
02095c50 9277static enum machine_mode
cde0f3fd
PB
9278pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9279 enum machine_mode mode,
9280 int *punsignedp ATTRIBUTE_UNUSED,
9281 const_tree fntype ATTRIBUTE_UNUSED,
9282 int for_return)
9283{
666e3ceb 9284 if (for_return == 0)
cde0f3fd 9285 return mode;
02095c50 9286 return promote_mode (type, mode, punsignedp);
cde0f3fd
PB
9287}
9288
44571d6e
JDA
9289/* On the HP-PA the value is found in register(s) 28(-29), unless
9290 the mode is SF or DF. Then the value is returned in fr4 (32).
9291
cde0f3fd
PB
9292 This must perform the same promotions as PROMOTE_MODE, else promoting
9293 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
44571d6e
JDA
9294
9295 Small structures must be returned in a PARALLEL on PA64 in order
9296 to match the HP Compiler ABI. */
9297
8a5b8538 9298static rtx
9193312a
AS
9299pa_function_value (const_tree valtype,
9300 const_tree func ATTRIBUTE_UNUSED,
9301 bool outgoing ATTRIBUTE_UNUSED)
44571d6e
JDA
9302{
9303 enum machine_mode valmode;
9304
4720d5ca
JDA
9305 if (AGGREGATE_TYPE_P (valtype)
9306 || TREE_CODE (valtype) == COMPLEX_TYPE
9307 || TREE_CODE (valtype) == VECTOR_TYPE)
44571d6e 9308 {
2a04824b
JDA
9309 if (TARGET_64BIT)
9310 {
9311 /* Aggregates with a size less than or equal to 128 bits are
9312 returned in GR 28(-29). They are left justified. The pad
9313 bits are undefined. Larger aggregates are returned in
9314 memory. */
9315 rtx loc[2];
9316 int i, offset = 0;
9317 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9318
9319 for (i = 0; i < ub; i++)
9320 {
9321 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9322 gen_rtx_REG (DImode, 28 + i),
9323 GEN_INT (offset));
9324 offset += 8;
9325 }
44571d6e 9326
2a04824b
JDA
9327 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9328 }
9329 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
44571d6e 9330 {
2a04824b
JDA
9331 /* Aggregates 5 to 8 bytes in size are returned in general
9332 registers r28-r29 in the same manner as other non
9333 floating-point objects. The data is right-justified and
9334 zero-extended to 64 bits. This is opposite to the normal
9335 justification used on big endian targets and requires
9336 special treatment. */
9337 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9338 gen_rtx_REG (DImode, 28), const0_rtx);
9339 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
44571d6e 9340 }
44571d6e
JDA
9341 }
9342
9343 if ((INTEGRAL_TYPE_P (valtype)
2ae88ecd 9344 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
44571d6e
JDA
9345 || POINTER_TYPE_P (valtype))
9346 valmode = word_mode;
9347 else
9348 valmode = TYPE_MODE (valtype);
9349
9350 if (TREE_CODE (valtype) == REAL_TYPE
2a04824b 9351 && !AGGREGATE_TYPE_P (valtype)
44571d6e
JDA
9352 && TYPE_MODE (valtype) != TFmode
9353 && !TARGET_SOFT_FLOAT)
9354 return gen_rtx_REG (valmode, 32);
9355
9356 return gen_rtx_REG (valmode, 28);
9357}
9358
8a5b8538
AS
9359/* Implement the TARGET_LIBCALL_VALUE hook. */
9360
9361static rtx
9362pa_libcall_value (enum machine_mode mode,
9363 const_rtx fun ATTRIBUTE_UNUSED)
9364{
9365 if (! TARGET_SOFT_FLOAT
9366 && (mode == SFmode || mode == DFmode))
9367 return gen_rtx_REG (mode, 32);
9368 else
9369 return gen_rtx_REG (mode, 28);
9370}
9371
9372/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9373
9374static bool
9375pa_function_value_regno_p (const unsigned int regno)
9376{
9377 if (regno == 28
9378 || (! TARGET_SOFT_FLOAT && regno == 32))
9379 return true;
9380
9381 return false;
9382}
9383
fd29bdaf
NF
9384/* Update the data in CUM to advance over an argument
9385 of mode MODE and data type TYPE.
9386 (TYPE is null for libcalls where that information may not be available.) */
9387
9388static void
9389pa_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9390 const_tree type, bool named ATTRIBUTE_UNUSED)
9391{
9392 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9393
9394 cum->nargs_prototype--;
9395 cum->words += (arg_size
9396 + ((cum->words & 01)
9397 && type != NULL_TREE
9398 && arg_size > 1));
9399}
9400
520babc7
JL
9401/* Return the location of a parameter that is passed in a register or NULL
9402 if the parameter has any component that is passed in memory.
9403
9404 This is new code and will be pushed to into the net sources after
6619e96c 9405 further testing.
520babc7
JL
9406
9407 ??? We might want to restructure this so that it looks more like other
9408 ports. */
fd29bdaf
NF
9409static rtx
9410pa_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9411 const_tree type, bool named ATTRIBUTE_UNUSED)
520babc7
JL
9412{
9413 int max_arg_words = (TARGET_64BIT ? 8 : 4);
015b1ad1 9414 int alignment = 0;
9dff28ab 9415 int arg_size;
520babc7
JL
9416 int fpr_reg_base;
9417 int gpr_reg_base;
9418 rtx retval;
9419
9dff28ab
JDA
9420 if (mode == VOIDmode)
9421 return NULL_RTX;
9422
9423 arg_size = FUNCTION_ARG_SIZE (mode, type);
9424
9425 /* If this arg would be passed partially or totally on the stack, then
78a52f11 9426 this routine should return zero. pa_arg_partial_bytes will
9dff28ab
JDA
9427 handle arguments which are split between regs and stack slots if
9428 the ABI mandates split arguments. */
4720d5ca 9429 if (!TARGET_64BIT)
520babc7 9430 {
9dff28ab
JDA
9431 /* The 32-bit ABI does not split arguments. */
9432 if (cum->words + arg_size > max_arg_words)
520babc7
JL
9433 return NULL_RTX;
9434 }
9435 else
9436 {
015b1ad1
JDA
9437 if (arg_size > 1)
9438 alignment = cum->words & 1;
9dff28ab 9439 if (cum->words + alignment >= max_arg_words)
520babc7
JL
9440 return NULL_RTX;
9441 }
9442
9443 /* The 32bit ABIs and the 64bit ABIs are rather different,
9444 particularly in their handling of FP registers. We might
9445 be able to cleverly share code between them, but I'm not
0952f89b 9446 going to bother in the hope that splitting them up results
015b1ad1 9447 in code that is more easily understood. */
520babc7 9448
520babc7
JL
9449 if (TARGET_64BIT)
9450 {
9451 /* Advance the base registers to their current locations.
9452
9453 Remember, gprs grow towards smaller register numbers while
015b1ad1
JDA
9454 fprs grow to higher register numbers. Also remember that
9455 although FP regs are 32-bit addressable, we pretend that
9456 the registers are 64-bits wide. */
520babc7
JL
9457 gpr_reg_base = 26 - cum->words;
9458 fpr_reg_base = 32 + cum->words;
6619e96c 9459
9dff28ab
JDA
9460 /* Arguments wider than one word and small aggregates need special
9461 treatment. */
9462 if (arg_size > 1
9463 || mode == BLKmode
4720d5ca
JDA
9464 || (type && (AGGREGATE_TYPE_P (type)
9465 || TREE_CODE (type) == COMPLEX_TYPE
9466 || TREE_CODE (type) == VECTOR_TYPE)))
520babc7 9467 {
015b1ad1
JDA
9468 /* Double-extended precision (80-bit), quad-precision (128-bit)
9469 and aggregates including complex numbers are aligned on
9470 128-bit boundaries. The first eight 64-bit argument slots
9471 are associated one-to-one, with general registers r26
9472 through r19, and also with floating-point registers fr4
9473 through fr11. Arguments larger than one word are always
9dff28ab
JDA
9474 passed in general registers.
9475
9476 Using a PARALLEL with a word mode register results in left
9477 justified data on a big-endian target. */
015b1ad1
JDA
9478
9479 rtx loc[8];
9480 int i, offset = 0, ub = arg_size;
9481
9482 /* Align the base register. */
9483 gpr_reg_base -= alignment;
9484
9485 ub = MIN (ub, max_arg_words - cum->words - alignment);
9486 for (i = 0; i < ub; i++)
520babc7 9487 {
015b1ad1
JDA
9488 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9489 gen_rtx_REG (DImode, gpr_reg_base),
9490 GEN_INT (offset));
9491 gpr_reg_base -= 1;
9492 offset += 8;
520babc7 9493 }
015b1ad1 9494
e4b95210 9495 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
520babc7 9496 }
9dff28ab 9497 }
520babc7
JL
9498 else
9499 {
9500 /* If the argument is larger than a word, then we know precisely
9501 which registers we must use. */
015b1ad1 9502 if (arg_size > 1)
520babc7
JL
9503 {
9504 if (cum->words)
9505 {
9506 gpr_reg_base = 23;
9507 fpr_reg_base = 38;
9508 }
9509 else
9510 {
9511 gpr_reg_base = 25;
9512 fpr_reg_base = 34;
9513 }
9dff28ab
JDA
9514
9515 /* Structures 5 to 8 bytes in size are passed in the general
9516 registers in the same manner as other non floating-point
9517 objects. The data is right-justified and zero-extended
7ea18c08
JDA
9518 to 64 bits. This is opposite to the normal justification
9519 used on big endian targets and requires special treatment.
4720d5ca
JDA
9520 We now define BLOCK_REG_PADDING to pad these objects.
9521 Aggregates, complex and vector types are passed in the same
9522 manner as structures. */
9523 if (mode == BLKmode
9524 || (type && (AGGREGATE_TYPE_P (type)
9525 || TREE_CODE (type) == COMPLEX_TYPE
9526 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab 9527 {
44571d6e
JDA
9528 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9529 gen_rtx_REG (DImode, gpr_reg_base),
9530 const0_rtx);
2a04824b 9531 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9dff28ab 9532 }
520babc7
JL
9533 }
9534 else
9535 {
9536 /* We have a single word (32 bits). A simple computation
9537 will get us the register #s we need. */
9538 gpr_reg_base = 26 - cum->words;
9539 fpr_reg_base = 32 + 2 * cum->words;
9540 }
9541 }
9542
b848dc65 9543 /* Determine if the argument needs to be passed in both general and
520babc7 9544 floating point registers. */
b848dc65
JDA
9545 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9546 /* If we are doing soft-float with portable runtime, then there
9547 is no need to worry about FP regs. */
c328adfa 9548 && !TARGET_SOFT_FLOAT
4720d5ca 9549 /* The parameter must be some kind of scalar float, else we just
b848dc65 9550 pass it in integer registers. */
4720d5ca 9551 && GET_MODE_CLASS (mode) == MODE_FLOAT
b848dc65
JDA
9552 /* The target function must not have a prototype. */
9553 && cum->nargs_prototype <= 0
9554 /* libcalls do not need to pass items in both FP and general
9555 registers. */
9556 && type != NULL_TREE
c328adfa
JDA
9557 /* All this hair applies to "outgoing" args only. This includes
9558 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9559 && !cum->incoming)
b848dc65
JDA
9560 /* Also pass outgoing floating arguments in both registers in indirect
9561 calls with the 32 bit ABI and the HP assembler since there is no
9562 way to the specify argument locations in static functions. */
c328adfa
JDA
9563 || (!TARGET_64BIT
9564 && !TARGET_GAS
9565 && !cum->incoming
b848dc65 9566 && cum->indirect
4720d5ca 9567 && GET_MODE_CLASS (mode) == MODE_FLOAT))
520babc7
JL
9568 {
9569 retval
9570 = gen_rtx_PARALLEL
9571 (mode,
9572 gen_rtvec (2,
9573 gen_rtx_EXPR_LIST (VOIDmode,
9574 gen_rtx_REG (mode, fpr_reg_base),
9575 const0_rtx),
9576 gen_rtx_EXPR_LIST (VOIDmode,
9577 gen_rtx_REG (mode, gpr_reg_base),
9578 const0_rtx)));
9579 }
9580 else
9581 {
9582 /* See if we should pass this parameter in a general register. */
9583 if (TARGET_SOFT_FLOAT
9584 /* Indirect calls in the normal 32bit ABI require all arguments
9585 to be passed in general registers. */
9586 || (!TARGET_PORTABLE_RUNTIME
9587 && !TARGET_64BIT
50b424a9 9588 && !TARGET_ELF32
520babc7 9589 && cum->indirect)
4720d5ca
JDA
9590 /* If the parameter is not a scalar floating-point parameter,
9591 then it belongs in GPRs. */
9592 || GET_MODE_CLASS (mode) != MODE_FLOAT
2a04824b
JDA
9593 /* Structure with single SFmode field belongs in GPR. */
9594 || (type && AGGREGATE_TYPE_P (type)))
520babc7
JL
9595 retval = gen_rtx_REG (mode, gpr_reg_base);
9596 else
9597 retval = gen_rtx_REG (mode, fpr_reg_base);
9598 }
9599 return retval;
9600}
9601
c2ed6cf8
NF
9602/* Arguments larger than one word are double word aligned. */
9603
9604static unsigned int
9605pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9606{
c2ed6cf8 9607 bool singleword = (type
b58a864d
NF
9608 ? (integer_zerop (TYPE_SIZE (type))
9609 || !TREE_CONSTANT (TYPE_SIZE (type))
c2ed6cf8 9610 || int_size_in_bytes (type) <= UNITS_PER_WORD)
19cf9bde 9611 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
c2ed6cf8
NF
9612
9613 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9614}
520babc7
JL
9615
9616/* If this arg would be passed totally in registers or totally on the stack,
78a52f11
RH
9617 then this routine should return zero. */
9618
9619static int
9620pa_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9621 tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9622{
e0c556d3
AM
9623 unsigned int max_arg_words = 8;
9624 unsigned int offset = 0;
520babc7 9625
78a52f11
RH
9626 if (!TARGET_64BIT)
9627 return 0;
9628
e0c556d3 9629 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
520babc7
JL
9630 offset = 1;
9631
e0c556d3 9632 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
fe19a83d 9633 /* Arg fits fully into registers. */
520babc7 9634 return 0;
6619e96c 9635 else if (cum->words + offset >= max_arg_words)
fe19a83d 9636 /* Arg fully on the stack. */
520babc7
JL
9637 return 0;
9638 else
fe19a83d 9639 /* Arg is split. */
78a52f11 9640 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
520babc7
JL
9641}
9642
9643
d6b5193b 9644/* A get_unnamed_section callback for switching to the text section.
9a55eab3
JDA
9645
9646 This function is only used with SOM. Because we don't support
9647 named subspaces, we can only create a new subspace or switch back
774acadf 9648 to the default text subspace. */
774acadf 9649
d6b5193b
RS
9650static void
9651som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9652{
9653 gcc_assert (TARGET_SOM);
774acadf 9654 if (TARGET_GAS)
9a55eab3 9655 {
8c5e065b 9656 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9a55eab3
JDA
9657 {
9658 /* We only want to emit a .nsubspa directive once at the
9659 start of the function. */
9660 cfun->machine->in_nsubspa = 1;
9661
9662 /* Create a new subspace for the text. This provides
9663 better stub placement and one-only functions. */
9664 if (cfun->decl
9665 && DECL_ONE_ONLY (cfun->decl)
9666 && !DECL_WEAK (cfun->decl))
1a83bfc3
JDA
9667 {
9668 output_section_asm_op ("\t.SPACE $TEXT$\n"
9669 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9670 "ACCESS=44,SORT=24,COMDAT");
9671 return;
9672 }
9a55eab3
JDA
9673 }
9674 else
9675 {
9676 /* There isn't a current function or the body of the current
9677 function has been completed. So, we are changing to the
1a83bfc3
JDA
9678 text section to output debugging information. Thus, we
9679 need to forget that we are in the text section so that
9680 varasm.c will call us when text_section is selected again. */
8c5e065b
JDA
9681 gcc_assert (!cfun || !cfun->machine
9682 || cfun->machine->in_nsubspa == 2);
d6b5193b 9683 in_section = NULL;
9a55eab3 9684 }
1a83bfc3
JDA
9685 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9686 return;
9a55eab3 9687 }
d6b5193b
RS
9688 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9689}
9690
1a83bfc3
JDA
9691/* A get_unnamed_section callback for switching to comdat data
9692 sections. This function is only used with SOM. */
9693
9694static void
9695som_output_comdat_data_section_asm_op (const void *data)
9696{
9697 in_section = NULL;
9698 output_section_asm_op (data);
9699}
9700
d6b5193b 9701/* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9a55eab3 9702
d6b5193b
RS
9703static void
9704pa_som_asm_init_sections (void)
9705{
9706 text_section
9707 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9708
9709 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9710 is not being generated. */
9711 som_readonly_data_section
9712 = get_unnamed_section (0, output_section_asm_op,
9713 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9714
9715 /* When secondary definitions are not supported, SOM makes readonly
9716 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9717 the comdat flag. */
9718 som_one_only_readonly_data_section
1a83bfc3 9719 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
d6b5193b
RS
9720 "\t.SPACE $TEXT$\n"
9721 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9722 "ACCESS=0x2c,SORT=16,COMDAT");
9723
9724
9725 /* When secondary definitions are not supported, SOM makes data one-only
9726 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9727 som_one_only_data_section
1a83bfc3
JDA
9728 = get_unnamed_section (SECTION_WRITE,
9729 som_output_comdat_data_section_asm_op,
d6b5193b
RS
9730 "\t.SPACE $PRIVATE$\n"
9731 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9732 "ACCESS=31,SORT=24,COMDAT");
9733
9734 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9735 which reference data within the $TEXT$ space (for example constant
9736 strings in the $LIT$ subspace).
9737
9738 The assemblers (GAS and HP as) both have problems with handling
9739 the difference of two symbols which is the other correct way to
9740 reference constant data during PIC code generation.
9741
9742 So, there's no way to reference constant data which is in the
9743 $TEXT$ space during PIC generation. Instead place all constant
9744 data into the $PRIVATE$ subspace (this reduces sharing, but it
9745 works correctly). */
9746 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9747
9748 /* We must not have a reference to an external symbol defined in a
9749 shared library in a readonly section, else the SOM linker will
9750 complain.
9751
9752 So, we force exception information into the data section. */
9753 exception_section = data_section;
9a55eab3
JDA
9754}
9755
ae46c4e0
RH
9756/* On hpux10, the linker will give an error if we have a reference
9757 in the read-only data section to a symbol defined in a shared
9758 library. Therefore, expressions that might require a reloc can
9759 not be placed in the read-only data section. */
9760
d6b5193b 9761static section *
24a52160
JDA
9762pa_select_section (tree exp, int reloc,
9763 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
ae46c4e0
RH
9764{
9765 if (TREE_CODE (exp) == VAR_DECL
9766 && TREE_READONLY (exp)
9767 && !TREE_THIS_VOLATILE (exp)
9768 && DECL_INITIAL (exp)
9769 && (DECL_INITIAL (exp) == error_mark_node
9770 || TREE_CONSTANT (DECL_INITIAL (exp)))
9771 && !reloc)
9a55eab3
JDA
9772 {
9773 if (TARGET_SOM
9774 && DECL_ONE_ONLY (exp)
9775 && !DECL_WEAK (exp))
d6b5193b 9776 return som_one_only_readonly_data_section;
9a55eab3 9777 else
d6b5193b 9778 return readonly_data_section;
9a55eab3 9779 }
6615c446 9780 else if (CONSTANT_CLASS_P (exp) && !reloc)
d6b5193b 9781 return readonly_data_section;
9a55eab3
JDA
9782 else if (TARGET_SOM
9783 && TREE_CODE (exp) == VAR_DECL
9784 && DECL_ONE_ONLY (exp)
e41f3691 9785 && !DECL_WEAK (exp))
d6b5193b 9786 return som_one_only_data_section;
ae46c4e0 9787 else
d6b5193b 9788 return data_section;
ae46c4e0 9789}
e2500fed 9790
5eb99654 9791static void
b7849684 9792pa_globalize_label (FILE *stream, const char *name)
5eb99654
KG
9793{
9794 /* We only handle DATA objects here, functions are globalized in
9795 ASM_DECLARE_FUNCTION_NAME. */
9796 if (! FUNCTION_NAME_P (name))
9797 {
9798 fputs ("\t.EXPORT ", stream);
9799 assemble_name (stream, name);
9800 fputs (",DATA\n", stream);
9801 }
9802}
3f12cd9b 9803
bd5bd7ac
KH
9804/* Worker function for TARGET_STRUCT_VALUE_RTX. */
9805
3f12cd9b
KH
9806static rtx
9807pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9808 int incoming ATTRIBUTE_UNUSED)
9809{
9810 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9811}
9812
bd5bd7ac
KH
9813/* Worker function for TARGET_RETURN_IN_MEMORY. */
9814
3f12cd9b 9815bool
586de218 9816pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3f12cd9b
KH
9817{
9818 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9819 PA64 ABI says that objects larger than 128 bits are returned in memory.
9820 Note, int_size_in_bytes can return -1 if the size of the object is
9821 variable or larger than the maximum value that can be expressed as
9822 a HOST_WIDE_INT. It can also return zero for an empty type. The
9823 simplest way to handle variable and empty types is to pass them in
9824 memory. This avoids problems in defining the boundaries of argument
9825 slots, allocating registers, etc. */
9826 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9827 || int_size_in_bytes (type) <= 0);
9828}
9829
744b2d61
JDA
9830/* Structure to hold declaration and name of external symbols that are
9831 emitted by GCC. We generate a vector of these symbols and output them
9832 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9833 This avoids putting out names that are never really used. */
9834
d1b38208 9835typedef struct GTY(()) extern_symbol
744b2d61
JDA
9836{
9837 tree decl;
9838 const char *name;
d4e6fecb 9839} extern_symbol;
744b2d61
JDA
9840
9841/* Define gc'd vector type for extern_symbol. */
d4e6fecb
NS
9842DEF_VEC_O(extern_symbol);
9843DEF_VEC_ALLOC_O(extern_symbol,gc);
744b2d61
JDA
9844
9845/* Vector of extern_symbol pointers. */
d4e6fecb 9846static GTY(()) VEC(extern_symbol,gc) *extern_symbols;
744b2d61
JDA
9847
9848#ifdef ASM_OUTPUT_EXTERNAL_REAL
9849/* Mark DECL (name NAME) as an external reference (assembler output
9850 file FILE). This saves the names to output at the end of the file
9851 if actually referenced. */
9852
9853void
9854pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9855{
d4e6fecb 9856 extern_symbol * p = VEC_safe_push (extern_symbol, gc, extern_symbols, NULL);
744b2d61
JDA
9857
9858 gcc_assert (file == asm_out_file);
9859 p->decl = decl;
9860 p->name = name;
744b2d61
JDA
9861}
9862
9863/* Output text required at the end of an assembler file.
9864 This includes deferred plabels and .import directives for
9865 all external symbols that were actually referenced. */
9866
9867static void
9868pa_hpux_file_end (void)
9869{
9870 unsigned int i;
d4e6fecb 9871 extern_symbol *p;
744b2d61 9872
3674b34d
JDA
9873 if (!NO_DEFERRED_PROFILE_COUNTERS)
9874 output_deferred_profile_counters ();
9875
744b2d61
JDA
9876 output_deferred_plabels ();
9877
9878 for (i = 0; VEC_iterate (extern_symbol, extern_symbols, i, p); i++)
9879 {
9880 tree decl = p->decl;
9881
9882 if (!TREE_ASM_WRITTEN (decl)
9883 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9884 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9885 }
9886
d4e6fecb 9887 VEC_free (extern_symbol, gc, extern_symbols);
744b2d61
JDA
9888}
9889#endif
9890
6982c5d4 9891/* Return true if a change from mode FROM to mode TO for a register
0a2aaacc 9892 in register class RCLASS is invalid. */
6982c5d4
JDA
9893
9894bool
9895pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
0a2aaacc 9896 enum reg_class rclass)
6982c5d4
JDA
9897{
9898 if (from == to)
9899 return false;
9900
9901 /* Reject changes to/from complex and vector modes. */
9902 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9903 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9904 return true;
9905
9906 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9907 return false;
9908
9909 /* There is no way to load QImode or HImode values directly from
9910 memory. SImode loads to the FP registers are not zero extended.
9911 On the 64-bit target, this conflicts with the definition of
9912 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9913 with different sizes in the floating-point registers. */
0a2aaacc 9914 if (MAYBE_FP_REG_CLASS_P (rclass))
6982c5d4
JDA
9915 return true;
9916
9917 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9918 in specific sets of registers. Thus, we cannot allow changing
9919 to a larger mode when it's larger than a word. */
9920 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9921 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9922 return true;
9923
9924 return false;
9925}
9926
9927/* Returns TRUE if it is a good idea to tie two pseudo registers
9928 when one has mode MODE1 and one has mode MODE2.
9929 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9930 for any hard reg, then this must be FALSE for correct output.
9931
9932 We should return FALSE for QImode and HImode because these modes
9933 are not ok in the floating-point registers. However, this prevents
9934 tieing these modes to SImode and DImode in the general registers.
9935 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9936 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9937 in the floating-point registers. */
9938
9939bool
9940pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9941{
9942 /* Don't tie modes in different classes. */
9943 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9944 return false;
9945
9946 return true;
9947}
9948
859c146c
RH
9949\f
9950/* Length in units of the trampoline instruction code. */
9951
9952#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9953
9954
9955/* Output assembler code for a block containing the constant parts
9956 of a trampoline, leaving space for the variable parts.\
9957
9958 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9959 and then branches to the specified routine.
9960
9961 This code template is copied from text segment to stack location
9962 and then patched with pa_trampoline_init to contain valid values,
9963 and then entered as a subroutine.
9964
9965 It is best to keep this as small as possible to avoid having to
9966 flush multiple lines in the cache. */
9967
9968static void
9969pa_asm_trampoline_template (FILE *f)
9970{
9971 if (!TARGET_64BIT)
9972 {
9973 fputs ("\tldw 36(%r22),%r21\n", f);
9974 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
9975 if (ASSEMBLER_DIALECT == 0)
9976 fputs ("\tdepi 0,31,2,%r21\n", f);
9977 else
9978 fputs ("\tdepwi 0,31,2,%r21\n", f);
9979 fputs ("\tldw 4(%r21),%r19\n", f);
9980 fputs ("\tldw 0(%r21),%r21\n", f);
9981 if (TARGET_PA_20)
9982 {
9983 fputs ("\tbve (%r21)\n", f);
9984 fputs ("\tldw 40(%r22),%r29\n", f);
9985 fputs ("\t.word 0\n", f);
9986 fputs ("\t.word 0\n", f);
9987 }
9988 else
9989 {
9990 fputs ("\tldsid (%r21),%r1\n", f);
9991 fputs ("\tmtsp %r1,%sr0\n", f);
9992 fputs ("\tbe 0(%sr0,%r21)\n", f);
9993 fputs ("\tldw 40(%r22),%r29\n", f);
9994 }
9995 fputs ("\t.word 0\n", f);
9996 fputs ("\t.word 0\n", f);
9997 fputs ("\t.word 0\n", f);
9998 fputs ("\t.word 0\n", f);
9999 }
10000 else
10001 {
10002 fputs ("\t.dword 0\n", f);
10003 fputs ("\t.dword 0\n", f);
10004 fputs ("\t.dword 0\n", f);
10005 fputs ("\t.dword 0\n", f);
10006 fputs ("\tmfia %r31\n", f);
10007 fputs ("\tldd 24(%r31),%r1\n", f);
10008 fputs ("\tldd 24(%r1),%r27\n", f);
10009 fputs ("\tldd 16(%r1),%r1\n", f);
10010 fputs ("\tbve (%r1)\n", f);
10011 fputs ("\tldd 32(%r31),%r31\n", f);
10012 fputs ("\t.dword 0 ; fptr\n", f);
10013 fputs ("\t.dword 0 ; static link\n", f);
10014 }
10015}
10016
10017/* Emit RTL insns to initialize the variable parts of a trampoline.
10018 FNADDR is an RTX for the address of the function's pure code.
10019 CXT is an RTX for the static chain value for the function.
10020
10021 Move the function address to the trampoline template at offset 36.
10022 Move the static chain value to trampoline template at offset 40.
10023 Move the trampoline address to trampoline template at offset 44.
10024 Move r19 to trampoline template at offset 48. The latter two
10025 words create a plabel for the indirect call to the trampoline.
10026
10027 A similar sequence is used for the 64-bit port but the plabel is
10028 at the beginning of the trampoline.
10029
10030 Finally, the cache entries for the trampoline code are flushed.
10031 This is necessary to ensure that the trampoline instruction sequence
10032 is written to memory prior to any attempts at prefetching the code
10033 sequence. */
10034
10035static void
10036pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10037{
10038 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10039 rtx start_addr = gen_reg_rtx (Pmode);
10040 rtx end_addr = gen_reg_rtx (Pmode);
10041 rtx line_length = gen_reg_rtx (Pmode);
10042 rtx r_tramp, tmp;
10043
10044 emit_block_move (m_tramp, assemble_trampoline_template (),
10045 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10046 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10047
10048 if (!TARGET_64BIT)
10049 {
10050 tmp = adjust_address (m_tramp, Pmode, 36);
10051 emit_move_insn (tmp, fnaddr);
10052 tmp = adjust_address (m_tramp, Pmode, 40);
10053 emit_move_insn (tmp, chain_value);
10054
10055 /* Create a fat pointer for the trampoline. */
10056 tmp = adjust_address (m_tramp, Pmode, 44);
10057 emit_move_insn (tmp, r_tramp);
10058 tmp = adjust_address (m_tramp, Pmode, 48);
10059 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10060
10061 /* fdc and fic only use registers for the address to flush,
10062 they do not accept integer displacements. We align the
10063 start and end addresses to the beginning of their respective
10064 cache lines to minimize the number of lines flushed. */
10065 emit_insn (gen_andsi3 (start_addr, r_tramp,
10066 GEN_INT (-MIN_CACHELINE_SIZE)));
10067 tmp = force_reg (Pmode, plus_constant (r_tramp, TRAMPOLINE_CODE_SIZE-1));
10068 emit_insn (gen_andsi3 (end_addr, tmp,
10069 GEN_INT (-MIN_CACHELINE_SIZE)));
10070 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10071 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10072 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10073 gen_reg_rtx (Pmode),
10074 gen_reg_rtx (Pmode)));
10075 }
10076 else
10077 {
10078 tmp = adjust_address (m_tramp, Pmode, 56);
10079 emit_move_insn (tmp, fnaddr);
10080 tmp = adjust_address (m_tramp, Pmode, 64);
10081 emit_move_insn (tmp, chain_value);
10082
10083 /* Create a fat pointer for the trampoline. */
10084 tmp = adjust_address (m_tramp, Pmode, 16);
10085 emit_move_insn (tmp, force_reg (Pmode, plus_constant (r_tramp, 32)));
10086 tmp = adjust_address (m_tramp, Pmode, 24);
10087 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10088
10089 /* fdc and fic only use registers for the address to flush,
10090 they do not accept integer displacements. We align the
10091 start and end addresses to the beginning of their respective
10092 cache lines to minimize the number of lines flushed. */
10093 tmp = force_reg (Pmode, plus_constant (r_tramp, 32));
10094 emit_insn (gen_anddi3 (start_addr, tmp,
10095 GEN_INT (-MIN_CACHELINE_SIZE)));
10096 tmp = force_reg (Pmode, plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1));
10097 emit_insn (gen_anddi3 (end_addr, tmp,
10098 GEN_INT (-MIN_CACHELINE_SIZE)));
10099 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10100 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10101 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10102 gen_reg_rtx (Pmode),
10103 gen_reg_rtx (Pmode)));
10104 }
10105}
10106
10107/* Perform any machine-specific adjustment in the address of the trampoline.
10108 ADDR contains the address that was passed to pa_trampoline_init.
10109 Adjust the trampoline address to point to the plabel at offset 44. */
10110
10111static rtx
10112pa_trampoline_adjust_address (rtx addr)
10113{
10114 if (!TARGET_64BIT)
10115 addr = memory_address (Pmode, plus_constant (addr, 46));
10116 return addr;
10117}
1f65437d
SE
10118
10119static rtx
10120pa_delegitimize_address (rtx orig_x)
10121{
10122 rtx x = delegitimize_mem_from_attrs (orig_x);
10123
10124 if (GET_CODE (x) == LO_SUM
10125 && GET_CODE (XEXP (x, 1)) == UNSPEC
10126 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10127 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10128 return x;
10129}
859c146c 10130\f
bc707992
JDA
10131static rtx
10132pa_internal_arg_pointer (void)
10133{
10134 /* The argument pointer and the hard frame pointer are the same in
10135 the 32-bit runtime, so we don't need a copy. */
10136 if (TARGET_64BIT)
10137 return copy_to_reg (virtual_incoming_args_rtx);
10138 else
10139 return virtual_incoming_args_rtx;
10140}
10141
10142/* Given FROM and TO register numbers, say whether this elimination is allowed.
10143 Frame pointer elimination is automatically handled. */
10144
10145static bool
10146pa_can_eliminate (const int from, const int to)
10147{
10148 /* The argument cannot be eliminated in the 64-bit runtime. */
10149 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10150 return false;
10151
10152 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10153 ? ! frame_pointer_needed
10154 : true);
10155}
10156
10157/* Define the offset between two registers, FROM to be eliminated and its
10158 replacement TO, at the start of a routine. */
10159HOST_WIDE_INT
10160pa_initial_elimination_offset (int from, int to)
10161{
10162 HOST_WIDE_INT offset;
10163
10164 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10165 && to == STACK_POINTER_REGNUM)
10166 offset = -compute_frame_size (get_frame_size (), 0);
10167 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10168 offset = 0;
10169 else
10170 gcc_unreachable ();
10171
10172 return offset;
10173}
10174
5efd84c5
NF
10175static void
10176pa_conditional_register_usage (void)
10177{
10178 int i;
10179
10180 if (!TARGET_64BIT && !TARGET_PA_11)
10181 {
10182 for (i = 56; i <= FP_REG_LAST; i++)
10183 fixed_regs[i] = call_used_regs[i] = 1;
10184 for (i = 33; i < 56; i += 2)
10185 fixed_regs[i] = call_used_regs[i] = 1;
10186 }
10187 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10188 {
10189 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10190 fixed_regs[i] = call_used_regs[i] = 1;
10191 }
10192 if (flag_pic)
10193 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10194}
10195
41a1208a
JDA
10196/* Target hook for c_mode_for_suffix. */
10197
10198static enum machine_mode
10199pa_c_mode_for_suffix (char suffix)
10200{
10201 if (HPUX_LONG_DOUBLE_LIBRARY)
10202 {
10203 if (suffix == 'q')
10204 return TFmode;
10205 }
10206
10207 return VOIDmode;
10208}
10209
7550cb35
JDA
10210/* Target hook for function_section. */
10211
10212static section *
10213pa_function_section (tree decl, enum node_frequency freq,
10214 bool startup, bool exit)
10215{
10216 /* Put functions in text section if target doesn't have named sections. */
677f3fa8 10217 if (!targetm_common.have_named_sections)
7550cb35
JDA
10218 return text_section;
10219
10220 /* Force nested functions into the same section as the containing
10221 function. */
10222 if (decl
10223 && DECL_SECTION_NAME (decl) == NULL_TREE
10224 && DECL_CONTEXT (decl) != NULL_TREE
10225 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10226 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10227 return function_section (DECL_CONTEXT (decl));
10228
10229 /* Otherwise, use the default function section. */
10230 return default_function_section (decl, freq, startup, exit);
10231}
10232
1a627b35
RS
10233/* Implement TARGET_LEGITIMATE_CONSTANT_P.
10234
10235 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10236 that need more than three instructions to load prior to reload. This
10237 limit is somewhat arbitrary. It takes three instructions to load a
10238 CONST_INT from memory but two are memory accesses. It may be better
10239 to increase the allowed range for CONST_INTS. We may also be able
10240 to handle CONST_DOUBLES. */
10241
10242static bool
10243pa_legitimate_constant_p (enum machine_mode mode, rtx x)
10244{
10245 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10246 return false;
10247
10248 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10249 return false;
10250
10251 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10252 return false;
10253
10254 if (TARGET_64BIT
10255 && HOST_BITS_PER_WIDE_INT > 32
10256 && GET_CODE (x) == CONST_INT
10257 && !reload_in_progress
10258 && !reload_completed
10259 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10260 && !cint_ok_for_move (INTVAL (x)))
10261 return false;
10262
10263 if (function_label_operand (x, mode))
10264 return false;
10265
10266 return true;
10267}
10268
e2500fed 10269#include "gt-pa.h"