]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/pa/pa.c
Factor unrelated declarations out of tree.h.
[thirdparty/gcc.git] / gcc / config / pa / pa.c
CommitLineData
188538df 1/* Subroutines for insn-output.c for HPPA.
d1e082c2 2 Copyright (C) 1992-2013 Free Software Foundation, Inc.
188538df
TG
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
b7849684 5This file is part of GCC.
188538df 6
b7849684 7GCC is free software; you can redistribute it and/or modify
188538df 8it under the terms of the GNU General Public License as published by
2f83c7d6 9the Free Software Foundation; either version 3, or (at your option)
188538df
TG
10any later version.
11
b7849684 12GCC is distributed in the hope that it will be useful,
188538df
TG
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
2f83c7d6
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
188538df 20
188538df 21#include "config.h"
0b17dd98 22#include "system.h"
4977bab6
ZW
23#include "coretypes.h"
24#include "tm.h"
188538df
TG
25#include "rtl.h"
26#include "regs.h"
27#include "hard-reg-set.h"
188538df
TG
28#include "insn-config.h"
29#include "conditions.h"
188538df
TG
30#include "insn-attr.h"
31#include "flags.h"
32#include "tree.h"
d8a2d370
DN
33#include "stor-layout.h"
34#include "stringpool.h"
35#include "varasm.h"
36#include "calls.h"
d499455b 37#include "output.h"
be7a421e 38#include "dbxout.h"
823fbbce 39#include "except.h"
becf1647 40#include "expr.h"
e78d8e51 41#include "optabs.h"
e78d8e51 42#include "reload.h"
49ad7cfa 43#include "function.h"
718f9c0f 44#include "diagnostic-core.h"
d07d525a 45#include "ggc.h"
519104fe 46#include "recog.h"
823fbbce 47#include "predict.h"
519104fe 48#include "tm_p.h"
672a6f42 49#include "target.h"
677f3fa8 50#include "common/common-target.h"
672a6f42 51#include "target-def.h"
41a1208a 52#include "langhooks.h"
62a53968 53#include "df.h"
96e45421 54#include "opts.h"
188538df 55
5d50fab3
JL
56/* Return nonzero if there is a bypass for the output of
57 OUT_INSN and the fp store IN_INSN. */
58int
ae9d61ab 59pa_fpstore_bypass_p (rtx out_insn, rtx in_insn)
5d50fab3
JL
60{
61 enum machine_mode store_mode;
62 enum machine_mode other_mode;
63 rtx set;
64
65 if (recog_memoized (in_insn) < 0
d4f2728a
JDA
66 || (get_attr_type (in_insn) != TYPE_FPSTORE
67 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
5d50fab3
JL
68 || recog_memoized (out_insn) < 0)
69 return 0;
70
71 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
72
73 set = single_set (out_insn);
74 if (!set)
75 return 0;
76
77 other_mode = GET_MODE (SET_SRC (set));
78
79 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
80}
81
82
19ec6a36
AM
83#ifndef DO_FRAME_NOTES
84#ifdef INCOMING_RETURN_ADDR_RTX
85#define DO_FRAME_NOTES 1
86#else
87#define DO_FRAME_NOTES 0
88#endif
89#endif
90
8a5b8538 91static void pa_option_override (void);
d8f95bed 92static void copy_reg_pointer (rtx, rtx);
a2017852 93static void fix_range (const char *);
8a5b8538
AS
94static int hppa_register_move_cost (enum machine_mode mode, reg_class_t,
95 reg_class_t);
b413068c 96static int hppa_address_cost (rtx, enum machine_mode mode, addr_space_t, bool);
68f932c4 97static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
b7849684
JE
98static inline rtx force_mode (enum machine_mode, rtx);
99static void pa_reorg (void);
100static void pa_combine_instructions (void);
101static int pa_can_combine_p (rtx, rtx, rtx, int, rtx, rtx, rtx);
3232e9d8 102static bool forward_branch_p (rtx);
b7849684 103static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
ae9d61ab 104static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
70128ad9
AO
105static int compute_movmem_length (rtx);
106static int compute_clrmem_length (rtx);
b7849684
JE
107static bool pa_assemble_integer (rtx, unsigned int, int);
108static void remove_useless_addtr_insns (int);
a4295210
JDA
109static void store_reg (int, HOST_WIDE_INT, int);
110static void store_reg_modify (int, int, HOST_WIDE_INT);
111static void load_reg (int, HOST_WIDE_INT, int);
112static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
9193312a 113static rtx pa_function_value (const_tree, const_tree, bool);
8a5b8538
AS
114static rtx pa_libcall_value (enum machine_mode, const_rtx);
115static bool pa_function_value_regno_p (const unsigned int);
b7849684 116static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
67b846fa 117static void update_total_code_bytes (unsigned int);
b7849684
JE
118static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
119static int pa_adjust_cost (rtx, rtx, rtx, int);
120static int pa_adjust_priority (rtx, int);
121static int pa_issue_rate (void);
d6b5193b 122static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
57d138a9 123static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
d6b5193b 124static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
ae46c4e0 125 ATTRIBUTE_UNUSED;
b7849684
JE
126static void pa_encode_section_info (tree, rtx, int);
127static const char *pa_strip_name_encoding (const char *);
128static bool pa_function_ok_for_sibcall (tree, tree);
129static void pa_globalize_label (FILE *, const char *)
a5f3f0ab 130 ATTRIBUTE_UNUSED;
b7849684
JE
131static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
132 HOST_WIDE_INT, tree);
35d434ed 133#if !defined(USE_COLLECT2)
b7849684
JE
134static void pa_asm_out_constructor (rtx, int);
135static void pa_asm_out_destructor (rtx, int);
35d434ed 136#endif
b7849684 137static void pa_init_builtins (void);
41a1208a 138static rtx pa_expand_builtin (tree, rtx, rtx, enum machine_mode mode, int);
3f12cd9b 139static rtx hppa_builtin_saveregs (void);
d7bd8aeb 140static void hppa_va_start (tree, rtx);
726a989a 141static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
83c32f2e 142static bool pa_scalar_mode_supported_p (enum machine_mode);
3101faab 143static bool pa_commutative_p (const_rtx x, int outer_code);
b7849684
JE
144static void copy_fp_args (rtx) ATTRIBUTE_UNUSED;
145static int length_fp_args (rtx) ATTRIBUTE_UNUSED;
506d7b68 146static rtx hppa_legitimize_address (rtx, rtx, enum machine_mode);
b7849684
JE
147static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
148static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
149static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
150static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
151static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
152static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
153static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
154static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
155static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
156static void output_deferred_plabels (void);
3674b34d 157static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
744b2d61
JDA
158#ifdef ASM_OUTPUT_EXTERNAL_REAL
159static void pa_hpux_file_end (void);
160#endif
50bbeefb 161static void pa_init_libfuncs (void);
3f12cd9b 162static rtx pa_struct_value_rtx (tree, int);
d5cc9181 163static bool pa_pass_by_reference (cumulative_args_t, enum machine_mode,
586de218 164 const_tree, bool);
d5cc9181 165static int pa_arg_partial_bytes (cumulative_args_t, enum machine_mode,
78a52f11 166 tree, bool);
d5cc9181 167static void pa_function_arg_advance (cumulative_args_t, enum machine_mode,
fd29bdaf 168 const_tree, bool);
d5cc9181 169static rtx pa_function_arg (cumulative_args_t, enum machine_mode,
fd29bdaf 170 const_tree, bool);
c2ed6cf8 171static unsigned int pa_function_arg_boundary (enum machine_mode, const_tree);
9a55eab3 172static struct machine_function * pa_init_machine_status (void);
a87cf97e
JR
173static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
174 enum machine_mode,
175 secondary_reload_info *);
16c16a24 176static void pa_extra_live_on_entry (bitmap);
4ce3042d
JDA
177static enum machine_mode pa_promote_function_mode (const_tree,
178 enum machine_mode, int *,
179 const_tree, int);
2eddfed1 180
859c146c
RH
181static void pa_asm_trampoline_template (FILE *);
182static void pa_trampoline_init (rtx, tree, rtx);
183static rtx pa_trampoline_adjust_address (rtx);
1f65437d 184static rtx pa_delegitimize_address (rtx);
8a5b8538 185static bool pa_print_operand_punct_valid_p (unsigned char);
bc707992
JDA
186static rtx pa_internal_arg_pointer (void);
187static bool pa_can_eliminate (const int, const int);
5efd84c5 188static void pa_conditional_register_usage (void);
41a1208a 189static enum machine_mode pa_c_mode_for_suffix (char);
7550cb35 190static section *pa_function_section (tree, enum node_frequency, bool, bool);
fbbf66e7 191static bool pa_cannot_force_const_mem (enum machine_mode, rtx);
1a627b35 192static bool pa_legitimate_constant_p (enum machine_mode, rtx);
fda33f15 193static unsigned int pa_section_type_flags (tree, const char *, int);
1a04ac2b 194static bool pa_legitimate_address_p (enum machine_mode, rtx, bool);
859c146c 195
d6b5193b
RS
196/* The following extra sections are only used for SOM. */
197static GTY(()) section *som_readonly_data_section;
198static GTY(()) section *som_one_only_readonly_data_section;
199static GTY(()) section *som_one_only_data_section;
57d138a9 200static GTY(()) section *som_tm_clone_table_section;
d6b5193b 201
68386e1e
JL
202/* Counts for the number of callee-saved general and floating point
203 registers which were saved by the current function's prologue. */
204static int gr_saved, fr_saved;
205
16c16a24
JDA
206/* Boolean indicating whether the return pointer was saved by the
207 current function's prologue. */
208static bool rp_saved;
209
b7849684 210static rtx find_addr_reg (rtx);
188538df 211
5fad1c24 212/* Keep track of the number of bytes we have output in the CODE subspace
279c9bde 213 during this compilation so we'll know when to emit inline long-calls. */
a02aa5b0 214unsigned long total_code_bytes;
279c9bde 215
5fad1c24
JDA
216/* The last address of the previous function plus the number of bytes in
217 associated thunks that have been output. This is used to determine if
218 a thunk can use an IA-relative branch to reach its target function. */
67b846fa 219static unsigned int last_address;
5fad1c24 220
93ae92c1 221/* Variables to handle plabels that we discover are necessary at assembly
ddd5a7c1 222 output time. They are output after the current function. */
d1b38208 223struct GTY(()) deferred_plabel
93ae92c1
JL
224{
225 rtx internal_label;
744b2d61 226 rtx symbol;
e2500fed
GK
227};
228static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
229 deferred_plabels;
0f8e3849 230static size_t n_deferred_plabels = 0;
672a6f42
NB
231\f
232/* Initialize the GCC target structure. */
301d03af 233
8a5b8538
AS
234#undef TARGET_OPTION_OVERRIDE
235#define TARGET_OPTION_OVERRIDE pa_option_override
236
301d03af
RS
237#undef TARGET_ASM_ALIGNED_HI_OP
238#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
239#undef TARGET_ASM_ALIGNED_SI_OP
240#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
241#undef TARGET_ASM_ALIGNED_DI_OP
242#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
243#undef TARGET_ASM_UNALIGNED_HI_OP
244#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
245#undef TARGET_ASM_UNALIGNED_SI_OP
246#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
247#undef TARGET_ASM_UNALIGNED_DI_OP
248#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
249#undef TARGET_ASM_INTEGER
250#define TARGET_ASM_INTEGER pa_assemble_integer
251
08c148a8
NB
252#undef TARGET_ASM_FUNCTION_PROLOGUE
253#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
254#undef TARGET_ASM_FUNCTION_EPILOGUE
255#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
93ae92c1 256
9193312a
AS
257#undef TARGET_FUNCTION_VALUE
258#define TARGET_FUNCTION_VALUE pa_function_value
8a5b8538
AS
259#undef TARGET_LIBCALL_VALUE
260#define TARGET_LIBCALL_VALUE pa_libcall_value
261#undef TARGET_FUNCTION_VALUE_REGNO_P
262#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
9193312a 263
506d7b68
PB
264#undef TARGET_LEGITIMIZE_ADDRESS
265#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
266
c237e94a
ZW
267#undef TARGET_SCHED_ADJUST_COST
268#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
269#undef TARGET_SCHED_ADJUST_PRIORITY
270#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
271#undef TARGET_SCHED_ISSUE_RATE
272#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
273
fb49053f
RH
274#undef TARGET_ENCODE_SECTION_INFO
275#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
772c5265
RH
276#undef TARGET_STRIP_NAME_ENCODING
277#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
fb49053f 278
4977bab6
ZW
279#undef TARGET_FUNCTION_OK_FOR_SIBCALL
280#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
281
8ddf681a
R
282#undef TARGET_COMMUTATIVE_P
283#define TARGET_COMMUTATIVE_P pa_commutative_p
284
c590b625
RH
285#undef TARGET_ASM_OUTPUT_MI_THUNK
286#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
3961e8fe
RH
287#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
288#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
c590b625 289
a5fe455b 290#undef TARGET_ASM_FILE_END
744b2d61
JDA
291#ifdef ASM_OUTPUT_EXTERNAL_REAL
292#define TARGET_ASM_FILE_END pa_hpux_file_end
293#else
a5fe455b 294#define TARGET_ASM_FILE_END output_deferred_plabels
744b2d61 295#endif
a5fe455b 296
8a5b8538
AS
297#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
298#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
299
35d434ed
JDA
300#if !defined(USE_COLLECT2)
301#undef TARGET_ASM_CONSTRUCTOR
302#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
303#undef TARGET_ASM_DESTRUCTOR
304#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
305#endif
306
4677862a
JDA
307#undef TARGET_INIT_BUILTINS
308#define TARGET_INIT_BUILTINS pa_init_builtins
309
41a1208a
JDA
310#undef TARGET_EXPAND_BUILTIN
311#define TARGET_EXPAND_BUILTIN pa_expand_builtin
312
8a5b8538
AS
313#undef TARGET_REGISTER_MOVE_COST
314#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
3c50106f
RH
315#undef TARGET_RTX_COSTS
316#define TARGET_RTX_COSTS hppa_rtx_costs
dcefdf67
RH
317#undef TARGET_ADDRESS_COST
318#define TARGET_ADDRESS_COST hppa_address_cost
3c50106f 319
18dbd950
RS
320#undef TARGET_MACHINE_DEPENDENT_REORG
321#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
322
c15c90bb 323#undef TARGET_INIT_LIBFUNCS
50bbeefb 324#define TARGET_INIT_LIBFUNCS pa_init_libfuncs
c15c90bb 325
cde0f3fd
PB
326#undef TARGET_PROMOTE_FUNCTION_MODE
327#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
3f12cd9b 328#undef TARGET_PROMOTE_PROTOTYPES
586de218 329#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
3f12cd9b
KH
330
331#undef TARGET_STRUCT_VALUE_RTX
332#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
333#undef TARGET_RETURN_IN_MEMORY
334#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
fe984136
RH
335#undef TARGET_MUST_PASS_IN_STACK
336#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8cd5a4e0
RH
337#undef TARGET_PASS_BY_REFERENCE
338#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
6cdd5672
RH
339#undef TARGET_CALLEE_COPIES
340#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
78a52f11
RH
341#undef TARGET_ARG_PARTIAL_BYTES
342#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
fd29bdaf
NF
343#undef TARGET_FUNCTION_ARG
344#define TARGET_FUNCTION_ARG pa_function_arg
345#undef TARGET_FUNCTION_ARG_ADVANCE
346#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
c2ed6cf8
NF
347#undef TARGET_FUNCTION_ARG_BOUNDARY
348#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
3f12cd9b
KH
349
350#undef TARGET_EXPAND_BUILTIN_SAVEREGS
351#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
d7bd8aeb
JJ
352#undef TARGET_EXPAND_BUILTIN_VA_START
353#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
8101c928
RH
354#undef TARGET_GIMPLIFY_VA_ARG_EXPR
355#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
3f12cd9b 356
83c32f2e
JDA
357#undef TARGET_SCALAR_MODE_SUPPORTED_P
358#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
359
51076f96 360#undef TARGET_CANNOT_FORCE_CONST_MEM
fbbf66e7 361#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
51076f96 362
ec963611
JDA
363#undef TARGET_SECONDARY_RELOAD
364#define TARGET_SECONDARY_RELOAD pa_secondary_reload
365
16c16a24
JDA
366#undef TARGET_EXTRA_LIVE_ON_ENTRY
367#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
368
859c146c
RH
369#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
370#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
371#undef TARGET_TRAMPOLINE_INIT
372#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
373#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
374#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
1f65437d
SE
375#undef TARGET_DELEGITIMIZE_ADDRESS
376#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
bc707992
JDA
377#undef TARGET_INTERNAL_ARG_POINTER
378#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
379#undef TARGET_CAN_ELIMINATE
380#define TARGET_CAN_ELIMINATE pa_can_eliminate
5efd84c5
NF
381#undef TARGET_CONDITIONAL_REGISTER_USAGE
382#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
41a1208a
JDA
383#undef TARGET_C_MODE_FOR_SUFFIX
384#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
7550cb35
JDA
385#undef TARGET_ASM_FUNCTION_SECTION
386#define TARGET_ASM_FUNCTION_SECTION pa_function_section
859c146c 387
1a627b35
RS
388#undef TARGET_LEGITIMATE_CONSTANT_P
389#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
fda33f15
JDA
390#undef TARGET_SECTION_TYPE_FLAGS
391#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
1a04ac2b
JDA
392#undef TARGET_LEGITIMATE_ADDRESS_P
393#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
1a627b35 394
f6897b10 395struct gcc_target targetm = TARGET_INITIALIZER;
672a6f42 396\f
a2017852
JDA
397/* Parse the -mfixed-range= option string. */
398
399static void
400fix_range (const char *const_str)
401{
402 int i, first, last;
403 char *str, *dash, *comma;
404
405 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
406 REG2 are either register names or register numbers. The effect
407 of this option is to mark the registers in the range from REG1 to
408 REG2 as ``fixed'' so they won't be used by the compiler. This is
419df6a2 409 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
a2017852
JDA
410
411 i = strlen (const_str);
412 str = (char *) alloca (i + 1);
413 memcpy (str, const_str, i + 1);
414
415 while (1)
416 {
417 dash = strchr (str, '-');
418 if (!dash)
419 {
d4ee4d25 420 warning (0, "value of -mfixed-range must have form REG1-REG2");
a2017852
JDA
421 return;
422 }
423 *dash = '\0';
424
425 comma = strchr (dash + 1, ',');
426 if (comma)
427 *comma = '\0';
428
429 first = decode_reg_name (str);
430 if (first < 0)
431 {
d4ee4d25 432 warning (0, "unknown register name: %s", str);
a2017852
JDA
433 return;
434 }
435
436 last = decode_reg_name (dash + 1);
437 if (last < 0)
438 {
d4ee4d25 439 warning (0, "unknown register name: %s", dash + 1);
a2017852
JDA
440 return;
441 }
442
443 *dash = '-';
444
445 if (first > last)
446 {
d4ee4d25 447 warning (0, "%s-%s is an empty range", str, dash + 1);
a2017852
JDA
448 return;
449 }
450
451 for (i = first; i <= last; ++i)
452 fixed_regs[i] = call_used_regs[i] = 1;
453
454 if (!comma)
455 break;
456
457 *comma = ',';
458 str = comma + 1;
459 }
460
461 /* Check if all floating point registers have been fixed. */
462 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
463 if (!fixed_regs[i])
464 break;
465
466 if (i > FP_REG_LAST)
467 target_flags |= MASK_DISABLE_FPREGS;
468}
469
8a5b8538
AS
470/* Implement the TARGET_OPTION_OVERRIDE hook. */
471
472static void
473pa_option_override (void)
66617831 474{
1bb721dc
JM
475 unsigned int i;
476 cl_deferred_option *opt;
9771b263
DN
477 vec<cl_deferred_option> *v
478 = (vec<cl_deferred_option> *) pa_deferred_options;
1bb721dc 479
9771b263
DN
480 if (v)
481 FOR_EACH_VEC_ELT (*v, i, opt)
482 {
483 switch (opt->opt_index)
484 {
485 case OPT_mfixed_range_:
486 fix_range (opt->arg);
487 break;
1bb721dc 488
9771b263
DN
489 default:
490 gcc_unreachable ();
491 }
492 }
1bb721dc 493
1c31ecf6
JDA
494 /* Unconditional branches in the delay slot are not compatible with dwarf2
495 call frame information. There is no benefit in using this optimization
496 on PA8000 and later processors. */
497 if (pa_cpu >= PROCESSOR_8000
677f3fa8 498 || (targetm_common.except_unwind_info (&global_options) == UI_DWARF2
d5fabb58 499 && flag_exceptions)
1c31ecf6
JDA
500 || flag_unwind_tables)
501 target_flags &= ~MASK_JUMP_IN_DELAY;
502
6a73009d
JL
503 if (flag_pic && TARGET_PORTABLE_RUNTIME)
504 {
ab532386 505 warning (0, "PIC code generation is not supported in the portable runtime model");
6a73009d
JL
506 }
507
a7721dc0 508 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
6a73009d 509 {
ab532386 510 warning (0, "PIC code generation is not compatible with fast indirect calls");
6a73009d 511 }
0eba3d30 512
54eef932
JL
513 if (! TARGET_GAS && write_symbols != NO_DEBUG)
514 {
d4ee4d25
DD
515 warning (0, "-g is only supported when using GAS on this processor,");
516 warning (0, "-g option disabled");
54eef932
JL
517 write_symbols = NO_DEBUG;
518 }
d07d525a 519
96ce28a3
JDA
520#ifdef AUTO_INC_DEC
521 /* FIXME: Disable auto increment and decrement processing until reload
522 is completed. See PR middle-end 56791. */
523 flag_auto_inc_dec = reload_completed;
524#endif
525
7ee72796
JL
526 /* We only support the "big PIC" model now. And we always generate PIC
527 code when in 64bit mode. */
528 if (flag_pic == 1 || TARGET_64BIT)
520babc7
JL
529 flag_pic = 2;
530
e92abd50
JDA
531 /* Disable -freorder-blocks-and-partition as we don't support hot and
532 cold partitioning. */
533 if (flag_reorder_blocks_and_partition)
534 {
535 inform (input_location,
536 "-freorder-blocks-and-partition does not work "
537 "on this architecture");
538 flag_reorder_blocks_and_partition = 0;
539 flag_reorder_blocks = 1;
540 }
541
301d03af
RS
542 /* We can't guarantee that .dword is available for 32-bit targets. */
543 if (UNITS_PER_WORD == 4)
544 targetm.asm_out.aligned_op.di = NULL;
545
546 /* The unaligned ops are only available when using GAS. */
547 if (!TARGET_GAS)
548 {
549 targetm.asm_out.unaligned_op.hi = NULL;
550 targetm.asm_out.unaligned_op.si = NULL;
551 targetm.asm_out.unaligned_op.di = NULL;
552 }
9a55eab3
JDA
553
554 init_machine_status = pa_init_machine_status;
c47decad
JL
555}
556
41a1208a
JDA
557enum pa_builtins
558{
559 PA_BUILTIN_COPYSIGNQ,
560 PA_BUILTIN_FABSQ,
561 PA_BUILTIN_INFQ,
562 PA_BUILTIN_HUGE_VALQ,
563 PA_BUILTIN_max
564};
565
566static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
567
eab9e742 568static void
b7849684 569pa_init_builtins (void)
4677862a
JDA
570{
571#ifdef DONT_HAVE_FPUTC_UNLOCKED
e79983f4
MM
572 {
573 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
574 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
575 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
576 }
4677862a 577#endif
dfcb2b51 578#if TARGET_HPUX_11
e79983f4
MM
579 {
580 tree decl;
581
582 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
583 set_user_assembler_name (decl, "_Isfinite");
584 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
585 set_user_assembler_name (decl, "_Isfinitef");
586 }
7d522000 587#endif
41a1208a
JDA
588
589 if (HPUX_LONG_DOUBLE_LIBRARY)
590 {
591 tree decl, ftype;
592
593 /* Under HPUX, the __float128 type is a synonym for "long double". */
594 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
595 "__float128");
596
597 /* TFmode support builtins. */
598 ftype = build_function_type_list (long_double_type_node,
599 long_double_type_node,
600 NULL_TREE);
601 decl = add_builtin_function ("__builtin_fabsq", ftype,
602 PA_BUILTIN_FABSQ, BUILT_IN_MD,
603 "_U_Qfabs", NULL_TREE);
604 TREE_READONLY (decl) = 1;
605 pa_builtins[PA_BUILTIN_FABSQ] = decl;
606
607 ftype = build_function_type_list (long_double_type_node,
608 long_double_type_node,
609 long_double_type_node,
610 NULL_TREE);
611 decl = add_builtin_function ("__builtin_copysignq", ftype,
612 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
613 "_U_Qfcopysign", NULL_TREE);
614 TREE_READONLY (decl) = 1;
615 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
616
12526412 617 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
41a1208a
JDA
618 decl = add_builtin_function ("__builtin_infq", ftype,
619 PA_BUILTIN_INFQ, BUILT_IN_MD,
620 NULL, NULL_TREE);
621 pa_builtins[PA_BUILTIN_INFQ] = decl;
622
623 decl = add_builtin_function ("__builtin_huge_valq", ftype,
624 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
625 NULL, NULL_TREE);
626 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
627 }
628}
629
630static rtx
631pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
632 enum machine_mode mode ATTRIBUTE_UNUSED,
633 int ignore ATTRIBUTE_UNUSED)
634{
635 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
636 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
637
638 switch (fcode)
639 {
640 case PA_BUILTIN_FABSQ:
641 case PA_BUILTIN_COPYSIGNQ:
642 return expand_call (exp, target, ignore);
643
644 case PA_BUILTIN_INFQ:
645 case PA_BUILTIN_HUGE_VALQ:
646 {
647 enum machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
648 REAL_VALUE_TYPE inf;
649 rtx tmp;
650
651 real_inf (&inf);
652 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
653
654 tmp = validize_mem (force_const_mem (target_mode, tmp));
655
656 if (target == 0)
657 target = gen_reg_rtx (target_mode);
658
659 emit_move_insn (target, tmp);
660 return target;
661 }
662
663 default:
664 gcc_unreachable ();
665 }
666
667 return NULL_RTX;
4677862a
JDA
668}
669
9a55eab3
JDA
670/* Function to init struct machine_function.
671 This will be called, via a pointer variable,
672 from push_function_context. */
673
674static struct machine_function *
675pa_init_machine_status (void)
676{
a9429e29 677 return ggc_alloc_cleared_machine_function ();
9a55eab3
JDA
678}
679
d8f95bed
JDA
680/* If FROM is a probable pointer register, mark TO as a probable
681 pointer register with the same pointer alignment as FROM. */
682
683static void
684copy_reg_pointer (rtx to, rtx from)
685{
686 if (REG_POINTER (from))
687 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
688}
689
23f6f34f
TG
690/* Return 1 if X contains a symbolic expression. We know these
691 expressions will have one of a few well defined forms, so
c1d1b3f0
JL
692 we need only check those forms. */
693int
ae9d61ab 694pa_symbolic_expression_p (rtx x)
c1d1b3f0
JL
695{
696
fe19a83d 697 /* Strip off any HIGH. */
c1d1b3f0
JL
698 if (GET_CODE (x) == HIGH)
699 x = XEXP (x, 0);
700
1a04ac2b 701 return symbolic_operand (x, VOIDmode);
c1d1b3f0
JL
702}
703
47abc309 704/* Accept any constant that can be moved in one instruction into a
6746a52e 705 general register. */
23f6f34f 706int
ae9d61ab 707pa_cint_ok_for_move (HOST_WIDE_INT ival)
6746a52e
JL
708{
709 /* OK if ldo, ldil, or zdepi, can be used. */
5b281141 710 return (VAL_14_BITS_P (ival)
ae9d61ab
JDA
711 || pa_ldil_cint_p (ival)
712 || pa_zdepi_cint_p (ival));
6746a52e 713}
188538df 714\f
5b281141
JDA
715/* True iff ldil can be used to load this CONST_INT. The least
716 significant 11 bits of the value must be zero and the value must
717 not change sign when extended from 32 to 64 bits. */
718int
ae9d61ab 719pa_ldil_cint_p (HOST_WIDE_INT ival)
5b281141
JDA
720{
721 HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
722
723 return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
724}
725
831c1763 726/* True iff zdepi can be used to generate this CONST_INT.
a7b376ee 727 zdepi first sign extends a 5-bit signed number to a given field
831c1763 728 length, then places this field anywhere in a zero. */
0e7f4c19 729int
ae9d61ab 730pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
3a5babac 731{
0c235d7e 732 unsigned HOST_WIDE_INT lsb_mask, t;
3a5babac
TG
733
734 /* This might not be obvious, but it's at least fast.
ddd5a7c1 735 This function is critical; we don't have the time loops would take. */
a1747d2c
TG
736 lsb_mask = x & -x;
737 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
738 /* Return true iff t is a power of two. */
3a5babac
TG
739 return ((t & (t - 1)) == 0);
740}
741
23f6f34f
TG
742/* True iff depi or extru can be used to compute (reg & mask).
743 Accept bit pattern like these:
744 0....01....1
745 1....10....0
746 1..10..01..1 */
0e7f4c19 747int
ae9d61ab 748pa_and_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
749{
750 mask = ~mask;
751 mask += mask & -mask;
752 return (mask & (mask - 1)) == 0;
753}
754
0e7f4c19
TG
755/* True iff depi can be used to compute (reg | MASK). */
756int
ae9d61ab 757pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
758{
759 mask += mask & -mask;
760 return (mask & (mask - 1)) == 0;
761}
188538df
TG
762\f
763/* Legitimize PIC addresses. If the address is already
764 position-independent, we return ORIG. Newly generated
765 position-independent addresses go to REG. If we need more
766 than one register, we lose. */
767
ae9d61ab 768static rtx
b7849684 769legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
188538df
TG
770{
771 rtx pic_ref = orig;
772
06ae7eb1 773 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
51076f96 774
abc95ed3 775 /* Labels need special handling. */
519104fe 776 if (pic_label_operand (orig, mode))
6bb36601 777 {
4d811a05
JDA
778 rtx insn;
779
b3d9ecf0
JL
780 /* We do not want to go through the movXX expanders here since that
781 would create recursion.
782
783 Nor do we really want to call a generator for a named pattern
784 since that requires multiple patterns if we want to support
785 multiple word sizes.
786
787 So instead we just emit the raw set, which avoids the movXX
788 expanders completely. */
d8f95bed 789 mark_reg_pointer (reg, BITS_PER_UNIT);
4d811a05
JDA
790 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
791
792 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bbbbb16a 793 add_reg_note (insn, REG_EQUAL, orig);
4d811a05
JDA
794
795 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
796 and update LABEL_NUSES because this is not done automatically. */
797 if (reload_in_progress || reload_completed)
798 {
799 /* Extract LABEL_REF. */
800 if (GET_CODE (orig) == CONST)
801 orig = XEXP (XEXP (orig, 0), 0);
802 /* Extract CODE_LABEL. */
803 orig = XEXP (orig, 0);
65c5f2a6 804 add_reg_note (insn, REG_LABEL_OPERAND, orig);
27e430a2
JDA
805 /* Make sure we have label and not a note. */
806 if (LABEL_P (orig))
807 LABEL_NUSES (orig)++;
4d811a05 808 }
e3b5732b 809 crtl->uses_pic_offset_table = 1;
6bb36601
JL
810 return reg;
811 }
188538df
TG
812 if (GET_CODE (orig) == SYMBOL_REF)
813 {
9ab81df2
JDA
814 rtx insn, tmp_reg;
815
144d51f9 816 gcc_assert (reg);
188538df 817
9ab81df2
JDA
818 /* Before reload, allocate a temporary register for the intermediate
819 result. This allows the sequence to be deleted when the final
820 result is unused and the insns are trivially dead. */
821 tmp_reg = ((reload_in_progress || reload_completed)
822 ? reg : gen_reg_rtx (Pmode));
823
9c575e20 824 if (function_label_operand (orig, VOIDmode))
7813231b 825 {
0b076fea
JDA
826 /* Force function label into memory in word mode. */
827 orig = XEXP (force_const_mem (word_mode, orig), 0);
7813231b
JDA
828 /* Load plabel address from DLT. */
829 emit_move_insn (tmp_reg,
830 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
831 gen_rtx_HIGH (word_mode, orig)));
832 pic_ref
833 = gen_const_mem (Pmode,
834 gen_rtx_LO_SUM (Pmode, tmp_reg,
835 gen_rtx_UNSPEC (Pmode,
542a8afa
RH
836 gen_rtvec (1, orig),
837 UNSPEC_DLTIND14R)));
7813231b
JDA
838 emit_move_insn (reg, pic_ref);
839 /* Now load address of function descriptor. */
840 pic_ref = gen_rtx_MEM (Pmode, reg);
841 }
842 else
843 {
844 /* Load symbol reference from DLT. */
845 emit_move_insn (tmp_reg,
846 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
847 gen_rtx_HIGH (word_mode, orig)));
848 pic_ref
849 = gen_const_mem (Pmode,
850 gen_rtx_LO_SUM (Pmode, tmp_reg,
851 gen_rtx_UNSPEC (Pmode,
852 gen_rtvec (1, orig),
853 UNSPEC_DLTIND14R)));
854 }
c5c76735 855
e3b5732b 856 crtl->uses_pic_offset_table = 1;
d8f95bed 857 mark_reg_pointer (reg, BITS_PER_UNIT);
9ab81df2
JDA
858 insn = emit_move_insn (reg, pic_ref);
859
860 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bd94cb6e 861 set_unique_reg_note (insn, REG_EQUAL, orig);
9ab81df2 862
188538df
TG
863 return reg;
864 }
865 else if (GET_CODE (orig) == CONST)
866 {
f1c7ce82 867 rtx base;
188538df
TG
868
869 if (GET_CODE (XEXP (orig, 0)) == PLUS
870 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
871 return orig;
872
144d51f9
NS
873 gcc_assert (reg);
874 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
875
876 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
877 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
878 base == reg ? 0 : reg);
d8f95bed 879
188538df
TG
880 if (GET_CODE (orig) == CONST_INT)
881 {
a1747d2c 882 if (INT_14_BITS (orig))
0a81f074 883 return plus_constant (Pmode, base, INTVAL (orig));
188538df
TG
884 orig = force_reg (Pmode, orig);
885 }
ad2c71b7 886 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
188538df
TG
887 /* Likewise, should we set special REG_NOTEs here? */
888 }
d8f95bed 889
188538df
TG
890 return pic_ref;
891}
892
51076f96
RC
893static GTY(()) rtx gen_tls_tga;
894
895static rtx
896gen_tls_get_addr (void)
897{
898 if (!gen_tls_tga)
899 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
900 return gen_tls_tga;
901}
902
903static rtx
904hppa_tls_call (rtx arg)
905{
906 rtx ret;
907
908 ret = gen_reg_rtx (Pmode);
909 emit_library_call_value (gen_tls_get_addr (), ret,
910 LCT_CONST, Pmode, 1, arg, Pmode);
911
912 return ret;
913}
914
915static rtx
916legitimize_tls_address (rtx addr)
917{
918 rtx ret, insn, tmp, t1, t2, tp;
919 enum tls_model model = SYMBOL_REF_TLS_MODEL (addr);
920
921 switch (model)
922 {
923 case TLS_MODEL_GLOBAL_DYNAMIC:
924 tmp = gen_reg_rtx (Pmode);
a758fa89
AJ
925 if (flag_pic)
926 emit_insn (gen_tgd_load_pic (tmp, addr));
927 else
928 emit_insn (gen_tgd_load (tmp, addr));
51076f96
RC
929 ret = hppa_tls_call (tmp);
930 break;
931
932 case TLS_MODEL_LOCAL_DYNAMIC:
933 ret = gen_reg_rtx (Pmode);
934 tmp = gen_reg_rtx (Pmode);
935 start_sequence ();
a758fa89
AJ
936 if (flag_pic)
937 emit_insn (gen_tld_load_pic (tmp, addr));
938 else
939 emit_insn (gen_tld_load (tmp, addr));
51076f96
RC
940 t1 = hppa_tls_call (tmp);
941 insn = get_insns ();
942 end_sequence ();
943 t2 = gen_reg_rtx (Pmode);
944 emit_libcall_block (insn, t2, t1,
945 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
946 UNSPEC_TLSLDBASE));
947 emit_insn (gen_tld_offset_load (ret, addr, t2));
948 break;
949
950 case TLS_MODEL_INITIAL_EXEC:
951 tp = gen_reg_rtx (Pmode);
952 tmp = gen_reg_rtx (Pmode);
953 ret = gen_reg_rtx (Pmode);
954 emit_insn (gen_tp_load (tp));
a758fa89
AJ
955 if (flag_pic)
956 emit_insn (gen_tie_load_pic (tmp, addr));
957 else
958 emit_insn (gen_tie_load (tmp, addr));
51076f96
RC
959 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
960 break;
961
962 case TLS_MODEL_LOCAL_EXEC:
963 tp = gen_reg_rtx (Pmode);
964 ret = gen_reg_rtx (Pmode);
965 emit_insn (gen_tp_load (tp));
966 emit_insn (gen_tle_load (ret, addr, tp));
967 break;
968
969 default:
06ae7eb1 970 gcc_unreachable ();
51076f96
RC
971 }
972
973 return ret;
974}
975
c1d1b3f0
JL
976/* Try machine-dependent ways of modifying an illegitimate address
977 to be legitimate. If we find one, return the new, valid address.
978 This macro is used in only one place: `memory_address' in explow.c.
979
980 OLDX is the address as it was before break_out_memory_refs was called.
981 In some cases it is useful to look at this to decide what needs to be done.
982
c1d1b3f0 983 It is always safe for this macro to do nothing. It exists to recognize
23f6f34f 984 opportunities to optimize the output.
c1d1b3f0
JL
985
986 For the PA, transform:
987
988 memory(X + <large int>)
989
990 into:
991
992 if (<large int> & mask) >= 16
993 Y = (<large int> & ~mask) + mask + 1 Round up.
994 else
995 Y = (<large int> & ~mask) Round down.
996 Z = X + Y
997 memory (Z + (<large int> - Y));
998
23f6f34f 999 This is for CSE to find several similar references, and only use one Z.
c1d1b3f0 1000
1e5f1716 1001 X can either be a SYMBOL_REF or REG, but because combine cannot
c1d1b3f0
JL
1002 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1003 D will not fit in 14 bits.
1004
1005 MODE_FLOAT references allow displacements which fit in 5 bits, so use
23f6f34f 1006 0x1f as the mask.
c1d1b3f0
JL
1007
1008 MODE_INT references allow displacements which fit in 14 bits, so use
23f6f34f 1009 0x3fff as the mask.
c1d1b3f0
JL
1010
1011 This relies on the fact that most mode MODE_FLOAT references will use FP
1012 registers and most mode MODE_INT references will use integer registers.
1013 (In the rare case of an FP register used in an integer MODE, we depend
1014 on secondary reloads to clean things up.)
1015
1016
1017 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1018 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
ddd5a7c1 1019 addressing modes to be used).
c1d1b3f0
JL
1020
1021 Put X and Z into registers. Then put the entire expression into
1022 a register. */
1023
1024rtx
b7849684
JE
1025hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1026 enum machine_mode mode)
c1d1b3f0 1027{
c1d1b3f0
JL
1028 rtx orig = x;
1029
d8f95bed
JDA
1030 /* We need to canonicalize the order of operands in unscaled indexed
1031 addresses since the code that checks if an address is valid doesn't
1032 always try both orders. */
1033 if (!TARGET_NO_SPACE_REGS
1034 && GET_CODE (x) == PLUS
1035 && GET_MODE (x) == Pmode
1036 && REG_P (XEXP (x, 0))
1037 && REG_P (XEXP (x, 1))
1038 && REG_POINTER (XEXP (x, 0))
1039 && !REG_POINTER (XEXP (x, 1)))
1040 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1041
51076f96
RC
1042 if (PA_SYMBOL_REF_TLS_P (x))
1043 return legitimize_tls_address (x);
1044 else if (flag_pic)
6bb36601
JL
1045 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1046
fe19a83d 1047 /* Strip off CONST. */
c1d1b3f0
JL
1048 if (GET_CODE (x) == CONST)
1049 x = XEXP (x, 0);
1050
68944452
JL
1051 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1052 That should always be safe. */
1053 if (GET_CODE (x) == PLUS
1054 && GET_CODE (XEXP (x, 0)) == REG
1055 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1056 {
690d4228
JL
1057 rtx reg = force_reg (Pmode, XEXP (x, 1));
1058 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
68944452
JL
1059 }
1060
326bc2de
JL
1061 /* Note we must reject symbols which represent function addresses
1062 since the assembler/linker can't handle arithmetic on plabels. */
c1d1b3f0
JL
1063 if (GET_CODE (x) == PLUS
1064 && GET_CODE (XEXP (x, 1)) == CONST_INT
326bc2de
JL
1065 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1066 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
c1d1b3f0
JL
1067 || GET_CODE (XEXP (x, 0)) == REG))
1068 {
1069 rtx int_part, ptr_reg;
1070 int newoffset;
1071 int offset = INTVAL (XEXP (x, 1));
f9bd8d8e
JL
1072 int mask;
1073
1074 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1a04ac2b 1075 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
c1d1b3f0 1076
23f6f34f 1077 /* Choose which way to round the offset. Round up if we
c1d1b3f0
JL
1078 are >= halfway to the next boundary. */
1079 if ((offset & mask) >= ((mask + 1) / 2))
1080 newoffset = (offset & ~ mask) + mask + 1;
1081 else
1082 newoffset = (offset & ~ mask);
1083
1084 /* If the newoffset will not fit in 14 bits (ldo), then
1085 handling this would take 4 or 5 instructions (2 to load
1086 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1087 add the new offset and the SYMBOL_REF.) Combine can
1088 not handle 4->2 or 5->2 combinations, so do not create
1089 them. */
1090 if (! VAL_14_BITS_P (newoffset)
1091 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1092 {
0a81f074 1093 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
c1d1b3f0 1094 rtx tmp_reg
e5e28962 1095 = force_reg (Pmode,
ad2c71b7 1096 gen_rtx_HIGH (Pmode, const_part));
c1d1b3f0 1097 ptr_reg
e5e28962 1098 = force_reg (Pmode,
c5c76735
JL
1099 gen_rtx_LO_SUM (Pmode,
1100 tmp_reg, const_part));
c1d1b3f0
JL
1101 }
1102 else
1103 {
1104 if (! VAL_14_BITS_P (newoffset))
e5e28962 1105 int_part = force_reg (Pmode, GEN_INT (newoffset));
c1d1b3f0
JL
1106 else
1107 int_part = GEN_INT (newoffset);
1108
e5e28962 1109 ptr_reg = force_reg (Pmode,
ad2c71b7
JL
1110 gen_rtx_PLUS (Pmode,
1111 force_reg (Pmode, XEXP (x, 0)),
1112 int_part));
c1d1b3f0 1113 }
0a81f074 1114 return plus_constant (Pmode, ptr_reg, offset - newoffset);
c1d1b3f0 1115 }
7426c959 1116
78c0acfd 1117 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
7426c959 1118
c1d1b3f0
JL
1119 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
1120 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
ae9d61ab 1121 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
ec8e098d 1122 && (OBJECT_P (XEXP (x, 1))
7426c959
JL
1123 || GET_CODE (XEXP (x, 1)) == SUBREG)
1124 && GET_CODE (XEXP (x, 1)) != CONST)
c1d1b3f0
JL
1125 {
1126 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1127 rtx reg1, reg2;
78c0acfd
JL
1128
1129 reg1 = XEXP (x, 1);
1130 if (GET_CODE (reg1) != REG)
1131 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1132
1133 reg2 = XEXP (XEXP (x, 0), 0);
1134 if (GET_CODE (reg2) != REG)
1135 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1136
ad2c71b7 1137 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
c5c76735
JL
1138 gen_rtx_MULT (Pmode,
1139 reg2,
1140 GEN_INT (val)),
ad2c71b7 1141 reg1));
c1d1b3f0 1142 }
7426c959 1143
305123ba
JL
1144 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1145
1146 Only do so for floating point modes since this is more speculative
1147 and we lose if it's an integer store. */
78c0acfd 1148 if (GET_CODE (x) == PLUS
305123ba
JL
1149 && GET_CODE (XEXP (x, 0)) == PLUS
1150 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
1151 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
ae9d61ab 1152 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
78c0acfd 1153 && (mode == SFmode || mode == DFmode))
305123ba 1154 {
78c0acfd
JL
1155
1156 /* First, try and figure out what to use as a base register. */
b38bccca 1157 rtx reg1, reg2, base, idx;
78c0acfd
JL
1158
1159 reg1 = XEXP (XEXP (x, 0), 1);
1160 reg2 = XEXP (x, 1);
1161 base = NULL_RTX;
1162 idx = NULL_RTX;
1163
1164 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
ae9d61ab 1165 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
3502dc9c 1166 it's a base register below. */
78c0acfd
JL
1167 if (GET_CODE (reg1) != REG)
1168 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1169
1170 if (GET_CODE (reg2) != REG)
1171 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1172
1173 /* Figure out what the base and index are. */
6619e96c 1174
78c0acfd 1175 if (GET_CODE (reg1) == REG
3502dc9c 1176 && REG_POINTER (reg1))
78c0acfd
JL
1177 {
1178 base = reg1;
ad2c71b7
JL
1179 idx = gen_rtx_PLUS (Pmode,
1180 gen_rtx_MULT (Pmode,
1181 XEXP (XEXP (XEXP (x, 0), 0), 0),
1182 XEXP (XEXP (XEXP (x, 0), 0), 1)),
1183 XEXP (x, 1));
78c0acfd
JL
1184 }
1185 else if (GET_CODE (reg2) == REG
3502dc9c 1186 && REG_POINTER (reg2))
78c0acfd
JL
1187 {
1188 base = reg2;
78c0acfd
JL
1189 idx = XEXP (x, 0);
1190 }
1191
1192 if (base == 0)
31d4f31f 1193 return orig;
78c0acfd
JL
1194
1195 /* If the index adds a large constant, try to scale the
1196 constant so that it can be loaded with only one insn. */
1197 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1198 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1199 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1200 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1201 {
1202 /* Divide the CONST_INT by the scale factor, then add it to A. */
1203 int val = INTVAL (XEXP (idx, 1));
1204
1205 val /= INTVAL (XEXP (XEXP (idx, 0), 1));
1206 reg1 = XEXP (XEXP (idx, 0), 0);
1207 if (GET_CODE (reg1) != REG)
1208 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1209
ad2c71b7 1210 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
78c0acfd
JL
1211
1212 /* We can now generate a simple scaled indexed address. */
c5c76735
JL
1213 return
1214 force_reg
1215 (Pmode, gen_rtx_PLUS (Pmode,
1216 gen_rtx_MULT (Pmode, reg1,
1217 XEXP (XEXP (idx, 0), 1)),
1218 base));
78c0acfd
JL
1219 }
1220
1221 /* If B + C is still a valid base register, then add them. */
1222 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1223 && INTVAL (XEXP (idx, 1)) <= 4096
1224 && INTVAL (XEXP (idx, 1)) >= -4096)
1225 {
1226 int val = INTVAL (XEXP (XEXP (idx, 0), 1));
1227 rtx reg1, reg2;
1228
ad2c71b7 1229 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
78c0acfd
JL
1230
1231 reg2 = XEXP (XEXP (idx, 0), 0);
1232 if (GET_CODE (reg2) != CONST_INT)
1233 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1234
ad2c71b7 1235 return force_reg (Pmode, gen_rtx_PLUS (Pmode,
c5c76735
JL
1236 gen_rtx_MULT (Pmode,
1237 reg2,
ad2c71b7
JL
1238 GEN_INT (val)),
1239 reg1));
78c0acfd
JL
1240 }
1241
1242 /* Get the index into a register, then add the base + index and
1243 return a register holding the result. */
1244
1245 /* First get A into a register. */
1246 reg1 = XEXP (XEXP (idx, 0), 0);
1247 if (GET_CODE (reg1) != REG)
1248 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1249
1250 /* And get B into a register. */
1251 reg2 = XEXP (idx, 1);
1252 if (GET_CODE (reg2) != REG)
1253 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1254
ad2c71b7
JL
1255 reg1 = force_reg (Pmode,
1256 gen_rtx_PLUS (Pmode,
1257 gen_rtx_MULT (Pmode, reg1,
1258 XEXP (XEXP (idx, 0), 1)),
1259 reg2));
78c0acfd
JL
1260
1261 /* Add the result to our base register and return. */
ad2c71b7 1262 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
6619e96c 1263
305123ba
JL
1264 }
1265
23f6f34f 1266 /* Uh-oh. We might have an address for x[n-100000]. This needs
c2827c50
JL
1267 special handling to avoid creating an indexed memory address
1268 with x-100000 as the base.
6619e96c 1269
c2827c50
JL
1270 If the constant part is small enough, then it's still safe because
1271 there is a guard page at the beginning and end of the data segment.
1272
1273 Scaled references are common enough that we want to try and rearrange the
1274 terms so that we can use indexing for these addresses too. Only
305123ba 1275 do the optimization for floatint point modes. */
7426c959 1276
c2827c50 1277 if (GET_CODE (x) == PLUS
ae9d61ab 1278 && pa_symbolic_expression_p (XEXP (x, 1)))
7426c959
JL
1279 {
1280 /* Ugly. We modify things here so that the address offset specified
1281 by the index expression is computed first, then added to x to form
c2827c50 1282 the entire address. */
7426c959 1283
305123ba 1284 rtx regx1, regx2, regy1, regy2, y;
7426c959
JL
1285
1286 /* Strip off any CONST. */
1287 y = XEXP (x, 1);
1288 if (GET_CODE (y) == CONST)
1289 y = XEXP (y, 0);
1290
77fc9313
RK
1291 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1292 {
305123ba
JL
1293 /* See if this looks like
1294 (plus (mult (reg) (shadd_const))
1295 (const (plus (symbol_ref) (const_int))))
1296
78c0acfd 1297 Where const_int is small. In that case the const
6619e96c 1298 expression is a valid pointer for indexing.
78c0acfd
JL
1299
1300 If const_int is big, but can be divided evenly by shadd_const
1301 and added to (reg). This allows more scaled indexed addresses. */
1302 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1303 && GET_CODE (XEXP (x, 0)) == MULT
305123ba 1304 && GET_CODE (XEXP (y, 1)) == CONST_INT
78c0acfd
JL
1305 && INTVAL (XEXP (y, 1)) >= -4096
1306 && INTVAL (XEXP (y, 1)) <= 4095
1307 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
ae9d61ab 1308 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
78c0acfd
JL
1309 {
1310 int val = INTVAL (XEXP (XEXP (x, 0), 1));
1311 rtx reg1, reg2;
1312
1313 reg1 = XEXP (x, 1);
1314 if (GET_CODE (reg1) != REG)
1315 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1316
1317 reg2 = XEXP (XEXP (x, 0), 0);
1318 if (GET_CODE (reg2) != REG)
1319 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1320
ad2c71b7
JL
1321 return force_reg (Pmode,
1322 gen_rtx_PLUS (Pmode,
c5c76735
JL
1323 gen_rtx_MULT (Pmode,
1324 reg2,
ad2c71b7 1325 GEN_INT (val)),
c5c76735 1326 reg1));
78c0acfd
JL
1327 }
1328 else if ((mode == DFmode || mode == SFmode)
1329 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1330 && GET_CODE (XEXP (x, 0)) == MULT
1331 && GET_CODE (XEXP (y, 1)) == CONST_INT
1332 && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
1333 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
ae9d61ab 1334 && pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
305123ba
JL
1335 {
1336 regx1
1337 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1338 / INTVAL (XEXP (XEXP (x, 0), 1))));
1339 regx2 = XEXP (XEXP (x, 0), 0);
1340 if (GET_CODE (regx2) != REG)
1341 regx2 = force_reg (Pmode, force_operand (regx2, 0));
ad2c71b7
JL
1342 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1343 regx2, regx1));
c5c76735
JL
1344 return
1345 force_reg (Pmode,
1346 gen_rtx_PLUS (Pmode,
1347 gen_rtx_MULT (Pmode, regx2,
1348 XEXP (XEXP (x, 0), 1)),
1349 force_reg (Pmode, XEXP (y, 0))));
305123ba 1350 }
c2827c50
JL
1351 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1352 && INTVAL (XEXP (y, 1)) >= -4096
1353 && INTVAL (XEXP (y, 1)) <= 4095)
1354 {
1355 /* This is safe because of the guard page at the
1356 beginning and end of the data space. Just
1357 return the original address. */
1358 return orig;
1359 }
305123ba
JL
1360 else
1361 {
1362 /* Doesn't look like one we can optimize. */
1363 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1364 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1365 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1366 regx1 = force_reg (Pmode,
ad2c71b7
JL
1367 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1368 regx1, regy2));
1369 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
305123ba 1370 }
77fc9313 1371 }
7426c959
JL
1372 }
1373
c1d1b3f0
JL
1374 return orig;
1375}
1376
8a5b8538
AS
1377/* Implement the TARGET_REGISTER_MOVE_COST hook.
1378
1379 Compute extra cost of moving data between one register class
1380 and another.
1381
1382 Make moves from SAR so expensive they should never happen. We used to
1383 have 0xffff here, but that generates overflow in rare cases.
1384
1385 Copies involving a FP register and a non-FP register are relatively
1386 expensive because they must go through memory.
1387
1388 Other copies are reasonably cheap. */
1389
1390static int
1391hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
1392 reg_class_t from, reg_class_t to)
1393{
1394 if (from == SHIFT_REGS)
1395 return 0x100;
483d7ad3
JDA
1396 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1397 return 18;
8a5b8538
AS
1398 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1399 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1400 return 16;
1401 else
1402 return 2;
1403}
1404
188538df
TG
1405/* For the HPPA, REG and REG+CONST is cost 0
1406 and addresses involving symbolic constants are cost 2.
1407
1408 PIC addresses are very expensive.
1409
1410 It is no coincidence that this has the same structure
1a04ac2b 1411 as pa_legitimate_address_p. */
dcefdf67
RH
1412
1413static int
b413068c
OE
1414hppa_address_cost (rtx X, enum machine_mode mode ATTRIBUTE_UNUSED,
1415 addr_space_t as ATTRIBUTE_UNUSED,
f40751dd 1416 bool speed ATTRIBUTE_UNUSED)
188538df 1417{
dcefdf67
RH
1418 switch (GET_CODE (X))
1419 {
1420 case REG:
1421 case PLUS:
1422 case LO_SUM:
188538df 1423 return 1;
dcefdf67
RH
1424 case HIGH:
1425 return 2;
1426 default:
1427 return 4;
1428 }
188538df
TG
1429}
1430
3c50106f
RH
1431/* Compute a (partial) cost for rtx X. Return true if the complete
1432 cost has been computed, and false if subexpressions should be
1433 scanned. In either case, *TOTAL contains the cost result. */
1434
1435static bool
68f932c4
RS
1436hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
1437 int *total, bool speed ATTRIBUTE_UNUSED)
3c50106f 1438{
259febfe
JDA
1439 int factor;
1440
3c50106f
RH
1441 switch (code)
1442 {
1443 case CONST_INT:
1444 if (INTVAL (x) == 0)
1445 *total = 0;
1446 else if (INT_14_BITS (x))
1447 *total = 1;
1448 else
1449 *total = 2;
1450 return true;
1451
1452 case HIGH:
1453 *total = 2;
1454 return true;
1455
1456 case CONST:
1457 case LABEL_REF:
1458 case SYMBOL_REF:
1459 *total = 4;
1460 return true;
1461
1462 case CONST_DOUBLE:
1463 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1464 && outer_code != SET)
1465 *total = 0;
1466 else
1467 *total = 8;
1468 return true;
1469
1470 case MULT:
1471 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
259febfe
JDA
1472 {
1473 *total = COSTS_N_INSNS (3);
1474 return true;
1475 }
1476
1477 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1478 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1479 if (factor == 0)
1480 factor = 1;
1481
1482 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1483 *total = factor * factor * COSTS_N_INSNS (8);
3c50106f 1484 else
259febfe 1485 *total = factor * factor * COSTS_N_INSNS (20);
3c50106f
RH
1486 return true;
1487
1488 case DIV:
1489 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
1490 {
1491 *total = COSTS_N_INSNS (14);
1492 return true;
1493 }
5efb1046 1494 /* FALLTHRU */
3c50106f
RH
1495
1496 case UDIV:
1497 case MOD:
1498 case UMOD:
259febfe
JDA
1499 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1500 factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
1501 if (factor == 0)
1502 factor = 1;
1503
1504 *total = factor * factor * COSTS_N_INSNS (60);
3c50106f
RH
1505 return true;
1506
1507 case PLUS: /* this includes shNadd insns */
1508 case MINUS:
1509 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
259febfe
JDA
1510 {
1511 *total = COSTS_N_INSNS (3);
1512 return true;
1513 }
1514
1515 /* A size N times larger than UNITS_PER_WORD needs N times as
1516 many insns, taking N times as long. */
1517 factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
1518 if (factor == 0)
1519 factor = 1;
1520 *total = factor * COSTS_N_INSNS (1);
3c50106f
RH
1521 return true;
1522
1523 case ASHIFT:
1524 case ASHIFTRT:
1525 case LSHIFTRT:
1526 *total = COSTS_N_INSNS (1);
1527 return true;
1528
1529 default:
1530 return false;
1531 }
1532}
1533
6619e96c
AM
1534/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1535 new rtx with the correct mode. */
1536static inline rtx
b7849684 1537force_mode (enum machine_mode mode, rtx orig)
6619e96c
AM
1538{
1539 if (mode == GET_MODE (orig))
1540 return orig;
1541
144d51f9 1542 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
6619e96c
AM
1543
1544 return gen_rtx_REG (mode, REGNO (orig));
1545}
1546
51076f96
RC
1547/* Return 1 if *X is a thread-local symbol. */
1548
1549static int
1550pa_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
1551{
1552 return PA_SYMBOL_REF_TLS_P (*x);
1553}
1554
1555/* Return 1 if X contains a thread-local symbol. */
1556
1557bool
1558pa_tls_referenced_p (rtx x)
1559{
1560 if (!TARGET_HAVE_TLS)
1561 return false;
1562
1563 return for_each_rtx (&x, &pa_tls_symbol_ref_1, 0);
1564}
1565
fbbf66e7
RS
1566/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1567
1568static bool
1569pa_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1570{
1571 return pa_tls_referenced_p (x);
1572}
1573
188538df
TG
1574/* Emit insns to move operands[1] into operands[0].
1575
1576 Return 1 if we have written out everything that needs to be done to
1577 do the move. Otherwise, return 0 and the caller will emit the move
6619e96c 1578 normally.
1b8ad134
JL
1579
1580 Note SCRATCH_REG may not be in the proper mode depending on how it
c1207243 1581 will be used. This routine is responsible for creating a new copy
1b8ad134 1582 of SCRATCH_REG in the proper mode. */
188538df
TG
1583
1584int
ae9d61ab 1585pa_emit_move_sequence (rtx *operands, enum machine_mode mode, rtx scratch_reg)
188538df
TG
1586{
1587 register rtx operand0 = operands[0];
1588 register rtx operand1 = operands[1];
428be702 1589 register rtx tem;
188538df 1590
d8f95bed
JDA
1591 /* We can only handle indexed addresses in the destination operand
1592 of floating point stores. Thus, we need to break out indexed
1593 addresses from the destination operand. */
1594 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1595 {
b3a13419 1596 gcc_assert (can_create_pseudo_p ());
d8f95bed
JDA
1597
1598 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1599 operand0 = replace_equiv_address (operand0, tem);
1600 }
1601
1602 /* On targets with non-equivalent space registers, break out unscaled
1603 indexed addresses from the source operand before the final CSE.
1604 We have to do this because the REG_POINTER flag is not correctly
1605 carried through various optimization passes and CSE may substitute
1606 a pseudo without the pointer set for one with the pointer set. As
71cc389b 1607 a result, we loose various opportunities to create insns with
d8f95bed
JDA
1608 unscaled indexed addresses. */
1609 if (!TARGET_NO_SPACE_REGS
1610 && !cse_not_expected
1611 && GET_CODE (operand1) == MEM
1612 && GET_CODE (XEXP (operand1, 0)) == PLUS
1613 && REG_P (XEXP (XEXP (operand1, 0), 0))
1614 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1615 operand1
1616 = replace_equiv_address (operand1,
1617 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1618
54d65918
JL
1619 if (scratch_reg
1620 && reload_in_progress && GET_CODE (operand0) == REG
8a642d97 1621 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
f2034d06 1622 operand0 = reg_equiv_mem (REGNO (operand0));
54d65918
JL
1623 else if (scratch_reg
1624 && reload_in_progress && GET_CODE (operand0) == SUBREG
8a642d97
RK
1625 && GET_CODE (SUBREG_REG (operand0)) == REG
1626 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1627 {
ddef6bc7 1628 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1629 the code which tracks sets/uses for delete_output_reload. */
1630 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
f2034d06 1631 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
ddef6bc7 1632 SUBREG_BYTE (operand0));
55a2c322 1633 operand0 = alter_subreg (&temp, true);
27a2c2b5 1634 }
8a642d97 1635
54d65918
JL
1636 if (scratch_reg
1637 && reload_in_progress && GET_CODE (operand1) == REG
8a642d97 1638 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
f2034d06 1639 operand1 = reg_equiv_mem (REGNO (operand1));
54d65918
JL
1640 else if (scratch_reg
1641 && reload_in_progress && GET_CODE (operand1) == SUBREG
8a642d97
RK
1642 && GET_CODE (SUBREG_REG (operand1)) == REG
1643 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1644 {
ddef6bc7 1645 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1646 the code which tracks sets/uses for delete_output_reload. */
1647 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
f2034d06 1648 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
ddef6bc7 1649 SUBREG_BYTE (operand1));
55a2c322 1650 operand1 = alter_subreg (&temp, true);
27a2c2b5 1651 }
8a642d97 1652
54d65918 1653 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
428be702
RK
1654 && ((tem = find_replacement (&XEXP (operand0, 0)))
1655 != XEXP (operand0, 0)))
7c95bbfb 1656 operand0 = replace_equiv_address (operand0, tem);
d8f95bed 1657
54d65918 1658 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
428be702
RK
1659 && ((tem = find_replacement (&XEXP (operand1, 0)))
1660 != XEXP (operand1, 0)))
7c95bbfb 1661 operand1 = replace_equiv_address (operand1, tem);
428be702 1662
4d3cea21 1663 /* Handle secondary reloads for loads/stores of FP registers from
cae80939 1664 REG+D addresses where D does not fit in 5 or 14 bits, including
68944452 1665 (subreg (mem (addr))) cases. */
a4295210
JDA
1666 if (scratch_reg
1667 && fp_reg_operand (operand0, mode)
1a04ac2b
JDA
1668 && (MEM_P (operand1)
1669 || (GET_CODE (operand1) == SUBREG
1670 && MEM_P (XEXP (operand1, 0))))
1671 && !floating_point_store_memory_operand (operand1, mode))
d2a94ec0 1672 {
42fbe27f
JL
1673 if (GET_CODE (operand1) == SUBREG)
1674 operand1 = XEXP (operand1, 0);
1675
1b8ad134
JL
1676 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1677 it in WORD_MODE regardless of what mode it was originally given
1678 to us. */
6619e96c 1679 scratch_reg = force_mode (word_mode, scratch_reg);
2d7b2c36
JL
1680
1681 /* D might not fit in 14 bits either; for such cases load D into
1682 scratch reg. */
1a04ac2b
JDA
1683 if (reg_plus_base_memory_operand (operand1, mode)
1684 && !(TARGET_PA_20
1685 && !TARGET_ELF32
1686 && INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))
2d7b2c36
JL
1687 {
1688 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
d8f95bed
JDA
1689 emit_move_insn (scratch_reg,
1690 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
1691 Pmode,
1692 XEXP (XEXP (operand1, 0), 0),
1693 scratch_reg));
2d7b2c36
JL
1694 }
1695 else
1696 emit_move_insn (scratch_reg, XEXP (operand1, 0));
c5c76735 1697 emit_insn (gen_rtx_SET (VOIDmode, operand0,
7c95bbfb 1698 replace_equiv_address (operand1, scratch_reg)));
d2a94ec0
TM
1699 return 1;
1700 }
a4295210
JDA
1701 else if (scratch_reg
1702 && fp_reg_operand (operand1, mode)
1a04ac2b
JDA
1703 && (MEM_P (operand0)
1704 || (GET_CODE (operand0) == SUBREG
1705 && MEM_P (XEXP (operand0, 0))))
1706 && !floating_point_store_memory_operand (operand0, mode))
d2a94ec0 1707 {
42fbe27f
JL
1708 if (GET_CODE (operand0) == SUBREG)
1709 operand0 = XEXP (operand0, 0);
1710
1b8ad134
JL
1711 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1712 it in WORD_MODE regardless of what mode it was originally given
1713 to us. */
6619e96c 1714 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1715
2d7b2c36
JL
1716 /* D might not fit in 14 bits either; for such cases load D into
1717 scratch reg. */
1a04ac2b
JDA
1718 if (reg_plus_base_memory_operand (operand0, mode)
1719 && !(TARGET_PA_20
1720 && !TARGET_ELF32
1721 && INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))
2d7b2c36
JL
1722 {
1723 emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
ad2c71b7
JL
1724 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0,
1725 0)),
690d4228 1726 Pmode,
ad2c71b7
JL
1727 XEXP (XEXP (operand0, 0),
1728 0),
1729 scratch_reg));
2d7b2c36
JL
1730 }
1731 else
1732 emit_move_insn (scratch_reg, XEXP (operand0, 0));
7c95bbfb
RH
1733 emit_insn (gen_rtx_SET (VOIDmode,
1734 replace_equiv_address (operand0, scratch_reg),
ad2c71b7 1735 operand1));
d2a94ec0
TM
1736 return 1;
1737 }
c063ad75 1738 /* Handle secondary reloads for loads of FP registers from constant
1a04ac2b
JDA
1739 expressions by forcing the constant into memory. For the most part,
1740 this is only necessary for SImode and DImode.
c063ad75 1741
1a04ac2b 1742 Use scratch_reg to hold the address of the memory location. */
a4295210 1743 else if (scratch_reg
c063ad75 1744 && CONSTANT_P (operand1)
a4295210 1745 && fp_reg_operand (operand0, mode))
c063ad75 1746 {
7c95bbfb 1747 rtx const_mem, xoperands[2];
c063ad75 1748
1a04ac2b
JDA
1749 if (operand1 == CONST0_RTX (mode))
1750 {
1751 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
1752 return 1;
1753 }
1754
1b8ad134
JL
1755 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1756 it in WORD_MODE regardless of what mode it was originally given
1757 to us. */
6619e96c 1758 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1759
c063ad75
JL
1760 /* Force the constant into memory and put the address of the
1761 memory location into scratch_reg. */
7c95bbfb 1762 const_mem = force_const_mem (mode, operand1);
c063ad75 1763 xoperands[0] = scratch_reg;
7c95bbfb 1764 xoperands[1] = XEXP (const_mem, 0);
ae9d61ab 1765 pa_emit_move_sequence (xoperands, Pmode, 0);
c063ad75
JL
1766
1767 /* Now load the destination register. */
c5c76735 1768 emit_insn (gen_rtx_SET (mode, operand0,
7c95bbfb 1769 replace_equiv_address (const_mem, scratch_reg)));
c063ad75
JL
1770 return 1;
1771 }
4d3cea21 1772 /* Handle secondary reloads for SAR. These occur when trying to load
483d7ad3 1773 the SAR from memory or a constant. */
a4295210
JDA
1774 else if (scratch_reg
1775 && GET_CODE (operand0) == REG
9c1eed37 1776 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
4d3cea21 1777 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
483d7ad3 1778 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
4d3cea21 1779 {
09ece7b5
JL
1780 /* D might not fit in 14 bits either; for such cases load D into
1781 scratch reg. */
1782 if (GET_CODE (operand1) == MEM
2fd74bff 1783 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
09ece7b5 1784 {
dd8c13e3
JL
1785 /* We are reloading the address into the scratch register, so we
1786 want to make sure the scratch register is a full register. */
6619e96c 1787 scratch_reg = force_mode (word_mode, scratch_reg);
dd8c13e3 1788
6619e96c 1789 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
ad2c71b7
JL
1790 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1791 0)),
690d4228 1792 Pmode,
ad2c71b7
JL
1793 XEXP (XEXP (operand1, 0),
1794 0),
1795 scratch_reg));
dd8c13e3
JL
1796
1797 /* Now we are going to load the scratch register from memory,
1798 we want to load it in the same width as the original MEM,
1799 which must be the same as the width of the ultimate destination,
1800 OPERAND0. */
6619e96c
AM
1801 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1802
7c95bbfb
RH
1803 emit_move_insn (scratch_reg,
1804 replace_equiv_address (operand1, scratch_reg));
09ece7b5
JL
1805 }
1806 else
dd8c13e3
JL
1807 {
1808 /* We want to load the scratch register using the same mode as
1809 the ultimate destination. */
6619e96c
AM
1810 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1811
dd8c13e3
JL
1812 emit_move_insn (scratch_reg, operand1);
1813 }
1814
1815 /* And emit the insn to set the ultimate destination. We know that
1816 the scratch register has the same mode as the destination at this
1817 point. */
4d3cea21
JL
1818 emit_move_insn (operand0, scratch_reg);
1819 return 1;
1820 }
d8f95bed 1821 /* Handle the most common case: storing into a register. */
d2a94ec0 1822 else if (register_operand (operand0, mode))
188538df 1823 {
9a201645
JDA
1824 /* Legitimize TLS symbol references. This happens for references
1825 that aren't a legitimate constant. */
1826 if (PA_SYMBOL_REF_TLS_P (operand1))
1827 operand1 = legitimize_tls_address (operand1);
1828
188538df 1829 if (register_operand (operand1, mode)
b8e42321 1830 || (GET_CODE (operand1) == CONST_INT
ae9d61ab 1831 && pa_cint_ok_for_move (INTVAL (operand1)))
f048ca47 1832 || (operand1 == CONST0_RTX (mode))
188538df 1833 || (GET_CODE (operand1) == HIGH
80225b66 1834 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
188538df
TG
1835 /* Only `general_operands' can come here, so MEM is ok. */
1836 || GET_CODE (operand1) == MEM)
1837 {
d8f95bed
JDA
1838 /* Various sets are created during RTL generation which don't
1839 have the REG_POINTER flag correctly set. After the CSE pass,
1840 instruction recognition can fail if we don't consistently
1841 set this flag when performing register copies. This should
1842 also improve the opportunities for creating insns that use
1843 unscaled indexing. */
1844 if (REG_P (operand0) && REG_P (operand1))
1845 {
1846 if (REG_POINTER (operand1)
1847 && !REG_POINTER (operand0)
1848 && !HARD_REGISTER_P (operand0))
1849 copy_reg_pointer (operand0, operand1);
d8f95bed
JDA
1850 }
1851
1852 /* When MEMs are broken out, the REG_POINTER flag doesn't
1853 get set. In some cases, we can set the REG_POINTER flag
1854 from the declaration for the MEM. */
1855 if (REG_P (operand0)
1856 && GET_CODE (operand1) == MEM
1857 && !REG_POINTER (operand0))
1858 {
1859 tree decl = MEM_EXPR (operand1);
1860
1861 /* Set the register pointer flag and register alignment
1862 if the declaration for this memory reference is a
077c8ada
SE
1863 pointer type. */
1864 if (decl)
d8f95bed
JDA
1865 {
1866 tree type;
1867
1868 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1869 tree operand 1. */
1870 if (TREE_CODE (decl) == COMPONENT_REF)
1871 decl = TREE_OPERAND (decl, 1);
1872
1873 type = TREE_TYPE (decl);
dd25a747 1874 type = strip_array_types (type);
d8f95bed
JDA
1875
1876 if (POINTER_TYPE_P (type))
1877 {
1878 int align;
1879
1880 type = TREE_TYPE (type);
1881 /* Using TYPE_ALIGN_OK is rather conservative as
1882 only the ada frontend actually sets it. */
1883 align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
1884 : BITS_PER_UNIT);
1885 mark_reg_pointer (operand0, align);
1886 }
1887 }
1888 }
1889
ad2c71b7 1890 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
188538df
TG
1891 return 1;
1892 }
1893 }
1894 else if (GET_CODE (operand0) == MEM)
1895 {
d66dec28
JL
1896 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1897 && !(reload_in_progress || reload_completed))
1898 {
1899 rtx temp = gen_reg_rtx (DFmode);
1900
ad2c71b7
JL
1901 emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
1902 emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
d66dec28
JL
1903 return 1;
1904 }
f048ca47 1905 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
188538df
TG
1906 {
1907 /* Run this case quickly. */
ad2c71b7 1908 emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
188538df
TG
1909 return 1;
1910 }
1bc695cd 1911 if (! (reload_in_progress || reload_completed))
188538df
TG
1912 {
1913 operands[0] = validize_mem (operand0);
1914 operands[1] = operand1 = force_reg (mode, operand1);
1915 }
1916 }
1917
44201dba
JL
1918 /* Simplify the source if we need to.
1919 Note we do have to handle function labels here, even though we do
1920 not consider them legitimate constants. Loop optimizations can
06387d7c 1921 call the emit_move_xxx with one as a source. */
f1c7ce82 1922 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
9c575e20 1923 || function_label_operand (operand1, VOIDmode)
43940f6b 1924 || (GET_CODE (operand1) == HIGH
ba365a19 1925 && symbolic_operand (XEXP (operand1, 0), mode)))
188538df 1926 {
43940f6b
JL
1927 int ishighonly = 0;
1928
1929 if (GET_CODE (operand1) == HIGH)
1930 {
1931 ishighonly = 1;
1932 operand1 = XEXP (operand1, 0);
1933 }
188538df
TG
1934 if (symbolic_operand (operand1, mode))
1935 {
5eceed92 1936 /* Argh. The assembler and linker can't handle arithmetic
b0fabad3 1937 involving plabels.
5eceed92 1938
b0fabad3
JL
1939 So we force the plabel into memory, load operand0 from
1940 the memory location, then add in the constant part. */
44201dba
JL
1941 if ((GET_CODE (operand1) == CONST
1942 && GET_CODE (XEXP (operand1, 0)) == PLUS
9c575e20
JDA
1943 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
1944 VOIDmode))
1945 || function_label_operand (operand1, VOIDmode))
5eceed92 1946 {
8e64b41a 1947 rtx temp, const_part;
b0fabad3
JL
1948
1949 /* Figure out what (if any) scratch register to use. */
1950 if (reload_in_progress || reload_completed)
1b8ad134
JL
1951 {
1952 scratch_reg = scratch_reg ? scratch_reg : operand0;
1953 /* SCRATCH_REG will hold an address and maybe the actual
1954 data. We want it in WORD_MODE regardless of what mode it
1955 was originally given to us. */
6619e96c 1956 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1957 }
b0fabad3
JL
1958 else if (flag_pic)
1959 scratch_reg = gen_reg_rtx (Pmode);
1960
44201dba
JL
1961 if (GET_CODE (operand1) == CONST)
1962 {
1963 /* Save away the constant part of the expression. */
1964 const_part = XEXP (XEXP (operand1, 0), 1);
144d51f9 1965 gcc_assert (GET_CODE (const_part) == CONST_INT);
44201dba
JL
1966
1967 /* Force the function label into memory. */
1968 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
1969 }
1970 else
1971 {
1972 /* No constant part. */
1973 const_part = NULL_RTX;
5eceed92 1974
44201dba
JL
1975 /* Force the function label into memory. */
1976 temp = force_const_mem (mode, operand1);
1977 }
6619e96c 1978
b0fabad3
JL
1979
1980 /* Get the address of the memory location. PIC-ify it if
1981 necessary. */
1982 temp = XEXP (temp, 0);
1983 if (flag_pic)
1984 temp = legitimize_pic_address (temp, mode, scratch_reg);
1985
1986 /* Put the address of the memory location into our destination
1987 register. */
1988 operands[1] = temp;
ae9d61ab 1989 pa_emit_move_sequence (operands, mode, scratch_reg);
b0fabad3
JL
1990
1991 /* Now load from the memory location into our destination
1992 register. */
ad2c71b7 1993 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
ae9d61ab 1994 pa_emit_move_sequence (operands, mode, scratch_reg);
b0fabad3
JL
1995
1996 /* And add back in the constant part. */
44201dba
JL
1997 if (const_part != NULL_RTX)
1998 expand_inc (operand0, const_part);
b0fabad3
JL
1999
2000 return 1;
5eceed92
JL
2001 }
2002
188538df
TG
2003 if (flag_pic)
2004 {
1bc695cd
JL
2005 rtx temp;
2006
2007 if (reload_in_progress || reload_completed)
1b8ad134
JL
2008 {
2009 temp = scratch_reg ? scratch_reg : operand0;
2010 /* TEMP will hold an address and maybe the actual
2011 data. We want it in WORD_MODE regardless of what mode it
2012 was originally given to us. */
6619e96c 2013 temp = force_mode (word_mode, temp);
1b8ad134 2014 }
1bc695cd
JL
2015 else
2016 temp = gen_reg_rtx (Pmode);
23f6f34f 2017
b0fabad3
JL
2018 /* (const (plus (symbol) (const_int))) must be forced to
2019 memory during/after reload if the const_int will not fit
2020 in 14 bits. */
2021 if (GET_CODE (operand1) == CONST
bc4a9f17
JL
2022 && GET_CODE (XEXP (operand1, 0)) == PLUS
2023 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2024 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
2025 && (reload_completed || reload_in_progress)
2026 && flag_pic)
2027 {
7c95bbfb 2028 rtx const_mem = force_const_mem (mode, operand1);
1c9ef36d 2029 operands[1] = legitimize_pic_address (XEXP (const_mem, 0),
bc4a9f17 2030 mode, temp);
7c95bbfb 2031 operands[1] = replace_equiv_address (const_mem, operands[1]);
ae9d61ab 2032 pa_emit_move_sequence (operands, mode, temp);
bc4a9f17 2033 }
5eceed92
JL
2034 else
2035 {
2036 operands[1] = legitimize_pic_address (operand1, mode, temp);
d8f95bed
JDA
2037 if (REG_P (operand0) && REG_P (operands[1]))
2038 copy_reg_pointer (operand0, operands[1]);
ad2c71b7 2039 emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1]));
5eceed92 2040 }
188538df 2041 }
6bb36601
JL
2042 /* On the HPPA, references to data space are supposed to use dp,
2043 register 27, but showing it in the RTL inhibits various cse
2044 and loop optimizations. */
23f6f34f 2045 else
188538df 2046 {
5eceed92 2047 rtx temp, set;
43940f6b 2048
23f6f34f 2049 if (reload_in_progress || reload_completed)
1b8ad134
JL
2050 {
2051 temp = scratch_reg ? scratch_reg : operand0;
2052 /* TEMP will hold an address and maybe the actual
2053 data. We want it in WORD_MODE regardless of what mode it
2054 was originally given to us. */
6619e96c 2055 temp = force_mode (word_mode, temp);
1b8ad134 2056 }
43940f6b
JL
2057 else
2058 temp = gen_reg_rtx (mode);
2059
68944452 2060 /* Loading a SYMBOL_REF into a register makes that register
6619e96c 2061 safe to be used as the base in an indexed address.
68944452
JL
2062
2063 Don't mark hard registers though. That loses. */
c34d858f
RK
2064 if (GET_CODE (operand0) == REG
2065 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
d8f95bed 2066 mark_reg_pointer (operand0, BITS_PER_UNIT);
68944452 2067 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
d8f95bed
JDA
2068 mark_reg_pointer (temp, BITS_PER_UNIT);
2069
43940f6b 2070 if (ishighonly)
ad2c71b7 2071 set = gen_rtx_SET (mode, operand0, temp);
43940f6b 2072 else
c5c76735
JL
2073 set = gen_rtx_SET (VOIDmode,
2074 operand0,
ad2c71b7 2075 gen_rtx_LO_SUM (mode, temp, operand1));
23f6f34f 2076
ad2c71b7
JL
2077 emit_insn (gen_rtx_SET (VOIDmode,
2078 temp,
2079 gen_rtx_HIGH (mode, operand1)));
b0ce651a 2080 emit_insn (set);
326bc2de 2081
188538df 2082 }
43940f6b 2083 return 1;
188538df 2084 }
51076f96
RC
2085 else if (pa_tls_referenced_p (operand1))
2086 {
2087 rtx tmp = operand1;
2088 rtx addend = NULL;
2089
2090 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2091 {
2092 addend = XEXP (XEXP (tmp, 0), 1);
2093 tmp = XEXP (XEXP (tmp, 0), 0);
2094 }
2095
2096 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2097 tmp = legitimize_tls_address (tmp);
2098 if (addend)
2099 {
2100 tmp = gen_rtx_PLUS (mode, tmp, addend);
2101 tmp = force_operand (tmp, operands[0]);
2102 }
2103 operands[1] = tmp;
2104 }
a1747d2c 2105 else if (GET_CODE (operand1) != CONST_INT
ae9d61ab 2106 || !pa_cint_ok_for_move (INTVAL (operand1)))
188538df 2107 {
a4295210
JDA
2108 rtx insn, temp;
2109 rtx op1 = operand1;
4cce9dd8 2110 HOST_WIDE_INT value = 0;
a4295210
JDA
2111 HOST_WIDE_INT insv = 0;
2112 int insert = 0;
2113
4cce9dd8
RS
2114 if (GET_CODE (operand1) == CONST_INT)
2115 value = INTVAL (operand1);
2116
a4295210
JDA
2117 if (TARGET_64BIT
2118 && GET_CODE (operand1) == CONST_INT
e0c556d3 2119 && HOST_BITS_PER_WIDE_INT > 32
520babc7
JL
2120 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2121 {
e0c556d3 2122 HOST_WIDE_INT nval;
520babc7 2123
b8e42321
JDA
2124 /* Extract the low order 32 bits of the value and sign extend.
2125 If the new value is the same as the original value, we can
2126 can use the original value as-is. If the new value is
2127 different, we use it and insert the most-significant 32-bits
2128 of the original value into the final result. */
a4295210 2129 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
e0c556d3 2130 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
a4295210 2131 if (value != nval)
520babc7 2132 {
b8e42321 2133#if HOST_BITS_PER_WIDE_INT > 32
a4295210 2134 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
b8e42321 2135#endif
a4295210
JDA
2136 insert = 1;
2137 value = nval;
520babc7
JL
2138 operand1 = GEN_INT (nval);
2139 }
2140 }
1bc695cd
JL
2141
2142 if (reload_in_progress || reload_completed)
a4295210 2143 temp = scratch_reg ? scratch_reg : operand0;
1bc695cd
JL
2144 else
2145 temp = gen_reg_rtx (mode);
2146
47abc309
JDA
2147 /* We don't directly split DImode constants on 32-bit targets
2148 because PLUS uses an 11-bit immediate and the insn sequence
2149 generated is not as efficient as the one using HIGH/LO_SUM. */
2150 if (GET_CODE (operand1) == CONST_INT
0eab7815 2151 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
a4295210
JDA
2152 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2153 && !insert)
b8e42321 2154 {
47abc309 2155 /* Directly break constant into high and low parts. This
b8e42321
JDA
2156 provides better optimization opportunities because various
2157 passes recognize constants split with PLUS but not LO_SUM.
2158 We use a 14-bit signed low part except when the addition
2159 of 0x4000 to the high part might change the sign of the
2160 high part. */
b8e42321
JDA
2161 HOST_WIDE_INT low = value & 0x3fff;
2162 HOST_WIDE_INT high = value & ~ 0x3fff;
2163
2164 if (low >= 0x2000)
2165 {
2166 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2167 high += 0x2000;
2168 else
2169 high += 0x4000;
2170 }
2171
2172 low = value - high;
520babc7 2173
b8e42321
JDA
2174 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
2175 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2176 }
2177 else
520babc7 2178 {
b8e42321
JDA
2179 emit_insn (gen_rtx_SET (VOIDmode, temp,
2180 gen_rtx_HIGH (mode, operand1)));
2181 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
520babc7 2182 }
6619e96c 2183
a4295210
JDA
2184 insn = emit_move_insn (operands[0], operands[1]);
2185
2186 /* Now insert the most significant 32 bits of the value
2187 into the register. When we don't have a second register
2188 available, it could take up to nine instructions to load
2189 a 64-bit integer constant. Prior to reload, we force
2190 constants that would take more than three instructions
2191 to load to the constant pool. During and after reload,
2192 we have to handle all possible values. */
2193 if (insert)
2194 {
2195 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2196 register and the value to be inserted is outside the
2197 range that can be loaded with three depdi instructions. */
2198 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2199 {
2200 operand1 = GEN_INT (insv);
2201
2202 emit_insn (gen_rtx_SET (VOIDmode, temp,
2203 gen_rtx_HIGH (mode, operand1)));
2204 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
f0d54148
JDA
2205 if (mode == DImode)
2206 emit_insn (gen_insvdi (operand0, GEN_INT (32),
2207 const0_rtx, temp));
2208 else
2209 emit_insn (gen_insvsi (operand0, GEN_INT (32),
2210 const0_rtx, temp));
a4295210
JDA
2211 }
2212 else
2213 {
2214 int len = 5, pos = 27;
2215
2216 /* Insert the bits using the depdi instruction. */
2217 while (pos >= 0)
2218 {
2219 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2220 HOST_WIDE_INT sign = v5 < 0;
2221
2222 /* Left extend the insertion. */
2223 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2224 while (pos > 0 && (insv & 1) == sign)
2225 {
2226 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2227 len += 1;
2228 pos -= 1;
2229 }
2230
f0d54148
JDA
2231 if (mode == DImode)
2232 emit_insn (gen_insvdi (operand0, GEN_INT (len),
2233 GEN_INT (pos), GEN_INT (v5)));
2234 else
2235 emit_insn (gen_insvsi (operand0, GEN_INT (len),
2236 GEN_INT (pos), GEN_INT (v5)));
a4295210
JDA
2237
2238 len = pos > 0 && pos < 5 ? pos : 5;
2239 pos -= len;
2240 }
2241 }
2242 }
b8e42321 2243
bd94cb6e 2244 set_unique_reg_note (insn, REG_EQUAL, op1);
b8e42321 2245
520babc7 2246 return 1;
188538df
TG
2247 }
2248 }
2249 /* Now have insn-emit do whatever it normally does. */
2250 return 0;
2251}
2252
c77c286a 2253/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
c4bb6b38 2254 it will need a link/runtime reloc). */
c77c286a
JL
2255
2256int
ae9d61ab 2257pa_reloc_needed (tree exp)
c77c286a
JL
2258{
2259 int reloc = 0;
2260
2261 switch (TREE_CODE (exp))
2262 {
2263 case ADDR_EXPR:
2264 return 1;
2265
5be014d5 2266 case POINTER_PLUS_EXPR:
c77c286a
JL
2267 case PLUS_EXPR:
2268 case MINUS_EXPR:
ae9d61ab
JDA
2269 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2270 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
c77c286a
JL
2271 break;
2272
1043771b 2273 CASE_CONVERT:
c77c286a 2274 case NON_LVALUE_EXPR:
ae9d61ab 2275 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
c77c286a
JL
2276 break;
2277
2278 case CONSTRUCTOR:
2279 {
28f155be
GB
2280 tree value;
2281 unsigned HOST_WIDE_INT ix;
2282
2283 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2284 if (value)
ae9d61ab 2285 reloc |= pa_reloc_needed (value);
c77c286a
JL
2286 }
2287 break;
2288
2289 case ERROR_MARK:
2290 break;
51723711
KG
2291
2292 default:
2293 break;
c77c286a
JL
2294 }
2295 return reloc;
2296}
2297
188538df
TG
2298\f
2299/* Return the best assembler insn template
71cc389b 2300 for moving operands[1] into operands[0] as a fullword. */
519104fe 2301const char *
ae9d61ab 2302pa_singlemove_string (rtx *operands)
188538df 2303{
0c235d7e
TG
2304 HOST_WIDE_INT intval;
2305
188538df
TG
2306 if (GET_CODE (operands[0]) == MEM)
2307 return "stw %r1,%0";
0c235d7e 2308 if (GET_CODE (operands[1]) == MEM)
188538df 2309 return "ldw %1,%0";
0c235d7e 2310 if (GET_CODE (operands[1]) == CONST_DOUBLE)
e5c2baa1 2311 {
0c235d7e
TG
2312 long i;
2313 REAL_VALUE_TYPE d;
e5c2baa1 2314
144d51f9 2315 gcc_assert (GET_MODE (operands[1]) == SFmode);
e5c2baa1 2316
0c235d7e
TG
2317 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2318 bit pattern. */
2319 REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
2320 REAL_VALUE_TO_TARGET_SINGLE (d, i);
e5c2baa1 2321
0c235d7e
TG
2322 operands[1] = GEN_INT (i);
2323 /* Fall through to CONST_INT case. */
2324 }
2325 if (GET_CODE (operands[1]) == CONST_INT)
e5c2baa1 2326 {
0c235d7e
TG
2327 intval = INTVAL (operands[1]);
2328
2329 if (VAL_14_BITS_P (intval))
2330 return "ldi %1,%0";
2331 else if ((intval & 0x7ff) == 0)
2332 return "ldil L'%1,%0";
ae9d61ab 2333 else if (pa_zdepi_cint_p (intval))
f38b27c7 2334 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
e5c2baa1
RS
2335 else
2336 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2337 }
188538df
TG
2338 return "copy %1,%0";
2339}
2340\f
2341
f133af4c
TG
2342/* Compute position (in OP[1]) and width (in OP[2])
2343 useful for copying IMM to a register using the zdepi
2344 instructions. Store the immediate value to insert in OP[0]. */
519104fe 2345static void
b7849684 2346compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
c819adf2 2347{
0e7f4c19 2348 int lsb, len;
c819adf2 2349
0e7f4c19
TG
2350 /* Find the least significant set bit in IMM. */
2351 for (lsb = 0; lsb < 32; lsb++)
c819adf2 2352 {
0e7f4c19 2353 if ((imm & 1) != 0)
c819adf2 2354 break;
0e7f4c19 2355 imm >>= 1;
c819adf2
TG
2356 }
2357
0e7f4c19
TG
2358 /* Choose variants based on *sign* of the 5-bit field. */
2359 if ((imm & 0x10) == 0)
2360 len = (lsb <= 28) ? 4 : 32 - lsb;
c819adf2
TG
2361 else
2362 {
0e7f4c19 2363 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2364 for (len = 5; len < 32 - lsb; len++)
c819adf2 2365 {
ef8d9a0e 2366 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
c819adf2 2367 break;
c819adf2
TG
2368 }
2369
0e7f4c19
TG
2370 /* Sign extend IMM as a 5-bit value. */
2371 imm = (imm & 0xf) - 0x10;
c819adf2
TG
2372 }
2373
a1747d2c
TG
2374 op[0] = imm;
2375 op[1] = 31 - lsb;
2376 op[2] = len;
c819adf2
TG
2377}
2378
520babc7
JL
2379/* Compute position (in OP[1]) and width (in OP[2])
2380 useful for copying IMM to a register using the depdi,z
2381 instructions. Store the immediate value to insert in OP[0]. */
ae9d61ab
JDA
2382
2383static void
b7849684 2384compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
520babc7 2385{
ef8d9a0e
JDA
2386 int lsb, len, maxlen;
2387
2388 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
520babc7
JL
2389
2390 /* Find the least significant set bit in IMM. */
ef8d9a0e 2391 for (lsb = 0; lsb < maxlen; lsb++)
520babc7
JL
2392 {
2393 if ((imm & 1) != 0)
2394 break;
2395 imm >>= 1;
2396 }
2397
2398 /* Choose variants based on *sign* of the 5-bit field. */
2399 if ((imm & 0x10) == 0)
ef8d9a0e 2400 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
520babc7
JL
2401 else
2402 {
2403 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2404 for (len = 5; len < maxlen - lsb; len++)
520babc7 2405 {
831c1763 2406 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
520babc7
JL
2407 break;
2408 }
2409
ef8d9a0e
JDA
2410 /* Extend length if host is narrow and IMM is negative. */
2411 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2412 len += 32;
2413
520babc7
JL
2414 /* Sign extend IMM as a 5-bit value. */
2415 imm = (imm & 0xf) - 0x10;
2416 }
2417
2418 op[0] = imm;
2419 op[1] = 63 - lsb;
2420 op[2] = len;
2421}
2422
188538df
TG
2423/* Output assembler code to perform a doubleword move insn
2424 with operands OPERANDS. */
2425
519104fe 2426const char *
ae9d61ab 2427pa_output_move_double (rtx *operands)
188538df
TG
2428{
2429 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2430 rtx latehalf[2];
2431 rtx addreg0 = 0, addreg1 = 0;
2432
2433 /* First classify both operands. */
2434
2435 if (REG_P (operands[0]))
2436 optype0 = REGOP;
2437 else if (offsettable_memref_p (operands[0]))
2438 optype0 = OFFSOP;
2439 else if (GET_CODE (operands[0]) == MEM)
2440 optype0 = MEMOP;
2441 else
2442 optype0 = RNDOP;
2443
2444 if (REG_P (operands[1]))
2445 optype1 = REGOP;
2446 else if (CONSTANT_P (operands[1]))
2447 optype1 = CNSTOP;
2448 else if (offsettable_memref_p (operands[1]))
2449 optype1 = OFFSOP;
2450 else if (GET_CODE (operands[1]) == MEM)
2451 optype1 = MEMOP;
2452 else
2453 optype1 = RNDOP;
2454
2455 /* Check for the cases that the operand constraints are not
144d51f9
NS
2456 supposed to allow to happen. */
2457 gcc_assert (optype0 == REGOP || optype1 == REGOP);
188538df 2458
5401050b
JDA
2459 /* Handle copies between general and floating registers. */
2460
2461 if (optype0 == REGOP && optype1 == REGOP
2462 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2463 {
2464 if (FP_REG_P (operands[0]))
2465 {
2466 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2467 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2468 return "{fldds|fldd} -16(%%sp),%0";
2469 }
2470 else
2471 {
2472 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2473 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2474 return "{ldws|ldw} -12(%%sp),%R0";
2475 }
2476 }
2477
188538df
TG
2478 /* Handle auto decrementing and incrementing loads and stores
2479 specifically, since the structure of the function doesn't work
2480 for them without major modification. Do it better when we learn
2481 this port about the general inc/dec addressing of PA.
2482 (This was written by tege. Chide him if it doesn't work.) */
2483
2484 if (optype0 == MEMOP)
2485 {
e37ce5f6
JL
2486 /* We have to output the address syntax ourselves, since print_operand
2487 doesn't deal with the addresses we want to use. Fix this later. */
2488
188538df 2489 rtx addr = XEXP (operands[0], 0);
e37ce5f6 2490 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
188538df 2491 {
ad2c71b7 2492 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2493
2494 operands[0] = XEXP (addr, 0);
144d51f9
NS
2495 gcc_assert (GET_CODE (operands[1]) == REG
2496 && GET_CODE (operands[0]) == REG);
e37ce5f6 2497
144d51f9
NS
2498 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2499
2500 /* No overlap between high target register and address
2501 register. (We do this in a non-obvious way to
2502 save a register file writeback) */
2503 if (GET_CODE (addr) == POST_INC)
2504 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2505 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
9682683d 2506 }
e37ce5f6 2507 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
9682683d 2508 {
ad2c71b7 2509 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2510
2511 operands[0] = XEXP (addr, 0);
144d51f9
NS
2512 gcc_assert (GET_CODE (operands[1]) == REG
2513 && GET_CODE (operands[0]) == REG);
2514
2515 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2516 /* No overlap between high target register and address
2517 register. (We do this in a non-obvious way to save a
2518 register file writeback) */
2519 if (GET_CODE (addr) == PRE_INC)
2520 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2521 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
188538df
TG
2522 }
2523 }
2524 if (optype1 == MEMOP)
2525 {
2526 /* We have to output the address syntax ourselves, since print_operand
2527 doesn't deal with the addresses we want to use. Fix this later. */
2528
2529 rtx addr = XEXP (operands[1], 0);
2530 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2531 {
ad2c71b7 2532 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2533
2534 operands[1] = XEXP (addr, 0);
144d51f9
NS
2535 gcc_assert (GET_CODE (operands[0]) == REG
2536 && GET_CODE (operands[1]) == REG);
188538df
TG
2537
2538 if (!reg_overlap_mentioned_p (high_reg, addr))
2539 {
2540 /* No overlap between high target register and address
dd605bb4 2541 register. (We do this in a non-obvious way to
188538df
TG
2542 save a register file writeback) */
2543 if (GET_CODE (addr) == POST_INC)
f38b27c7 2544 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
6126a380 2545 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
188538df
TG
2546 }
2547 else
2548 {
2549 /* This is an undefined situation. We should load into the
2550 address register *and* update that register. Probably
2551 we don't need to handle this at all. */
2552 if (GET_CODE (addr) == POST_INC)
f38b27c7
JL
2553 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2554 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
188538df
TG
2555 }
2556 }
2557 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2558 {
ad2c71b7 2559 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2560
2561 operands[1] = XEXP (addr, 0);
144d51f9
NS
2562 gcc_assert (GET_CODE (operands[0]) == REG
2563 && GET_CODE (operands[1]) == REG);
188538df
TG
2564
2565 if (!reg_overlap_mentioned_p (high_reg, addr))
2566 {
2567 /* No overlap between high target register and address
dd605bb4 2568 register. (We do this in a non-obvious way to
188538df
TG
2569 save a register file writeback) */
2570 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2571 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2572 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
188538df
TG
2573 }
2574 else
2575 {
2576 /* This is an undefined situation. We should load into the
2577 address register *and* update that register. Probably
2578 we don't need to handle this at all. */
2579 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2580 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2581 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
188538df
TG
2582 }
2583 }
a89974a2
JL
2584 else if (GET_CODE (addr) == PLUS
2585 && GET_CODE (XEXP (addr, 0)) == MULT)
2586 {
4c6d8726 2587 rtx xoperands[4];
ad2c71b7 2588 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
a89974a2
JL
2589
2590 if (!reg_overlap_mentioned_p (high_reg, addr))
2591 {
a89974a2
JL
2592 xoperands[0] = high_reg;
2593 xoperands[1] = XEXP (addr, 1);
2594 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2595 xoperands[3] = XEXP (XEXP (addr, 0), 1);
f38b27c7
JL
2596 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2597 xoperands);
d2d28085 2598 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
a89974a2
JL
2599 }
2600 else
2601 {
a89974a2
JL
2602 xoperands[0] = high_reg;
2603 xoperands[1] = XEXP (addr, 1);
2604 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2605 xoperands[3] = XEXP (XEXP (addr, 0), 1);
f38b27c7
JL
2606 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2607 xoperands);
d2d28085 2608 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
a89974a2 2609 }
a89974a2 2610 }
188538df
TG
2611 }
2612
2613 /* If an operand is an unoffsettable memory ref, find a register
2614 we can increment temporarily to make it refer to the second word. */
2615
2616 if (optype0 == MEMOP)
2617 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2618
2619 if (optype1 == MEMOP)
2620 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2621
2622 /* Ok, we can do one word at a time.
2623 Normally we do the low-numbered word first.
2624
2625 In either case, set up in LATEHALF the operands to use
2626 for the high-numbered word and in some cases alter the
2627 operands in OPERANDS to be suitable for the low-numbered word. */
2628
2629 if (optype0 == REGOP)
ad2c71b7 2630 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
188538df 2631 else if (optype0 == OFFSOP)
b72f00af 2632 latehalf[0] = adjust_address (operands[0], SImode, 4);
188538df
TG
2633 else
2634 latehalf[0] = operands[0];
2635
2636 if (optype1 == REGOP)
ad2c71b7 2637 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
188538df 2638 else if (optype1 == OFFSOP)
b72f00af 2639 latehalf[1] = adjust_address (operands[1], SImode, 4);
188538df
TG
2640 else if (optype1 == CNSTOP)
2641 split_double (operands[1], &operands[1], &latehalf[1]);
2642 else
2643 latehalf[1] = operands[1];
2644
2645 /* If the first move would clobber the source of the second one,
2646 do them in the other order.
2647
bad883f8 2648 This can happen in two cases:
188538df 2649
bad883f8
JL
2650 mem -> register where the first half of the destination register
2651 is the same register used in the memory's address. Reload
2652 can create such insns.
188538df 2653
bad883f8 2654 mem in this case will be either register indirect or register
6619e96c 2655 indirect plus a valid offset.
bad883f8
JL
2656
2657 register -> register move where REGNO(dst) == REGNO(src + 1)
6619e96c 2658 someone (Tim/Tege?) claimed this can happen for parameter loads.
bad883f8
JL
2659
2660 Handle mem -> register case first. */
2661 if (optype0 == REGOP
2662 && (optype1 == MEMOP || optype1 == OFFSOP)
2663 && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
2664 operands[1], 0))
188538df 2665 {
188538df
TG
2666 /* Do the late half first. */
2667 if (addreg1)
498ee10c 2668 output_asm_insn ("ldo 4(%0),%0", &addreg1);
ae9d61ab 2669 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
bad883f8
JL
2670
2671 /* Then clobber. */
188538df 2672 if (addreg1)
498ee10c 2673 output_asm_insn ("ldo -4(%0),%0", &addreg1);
ae9d61ab 2674 return pa_singlemove_string (operands);
188538df
TG
2675 }
2676
bad883f8 2677 /* Now handle register -> register case. */
63a1f834
TG
2678 if (optype0 == REGOP && optype1 == REGOP
2679 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2680 {
ae9d61ab
JDA
2681 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2682 return pa_singlemove_string (operands);
63a1f834
TG
2683 }
2684
188538df
TG
2685 /* Normal case: do the two words, low-numbered first. */
2686
ae9d61ab 2687 output_asm_insn (pa_singlemove_string (operands), operands);
188538df
TG
2688
2689 /* Make any unoffsettable addresses point at high-numbered word. */
2690 if (addreg0)
498ee10c 2691 output_asm_insn ("ldo 4(%0),%0", &addreg0);
188538df 2692 if (addreg1)
498ee10c 2693 output_asm_insn ("ldo 4(%0),%0", &addreg1);
188538df
TG
2694
2695 /* Do that word. */
ae9d61ab 2696 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
188538df
TG
2697
2698 /* Undo the adds we just did. */
2699 if (addreg0)
498ee10c 2700 output_asm_insn ("ldo -4(%0),%0", &addreg0);
188538df 2701 if (addreg1)
498ee10c 2702 output_asm_insn ("ldo -4(%0),%0", &addreg1);
188538df
TG
2703
2704 return "";
2705}
2706\f
519104fe 2707const char *
ae9d61ab 2708pa_output_fp_move_double (rtx *operands)
188538df
TG
2709{
2710 if (FP_REG_P (operands[0]))
2711 {
23f6f34f 2712 if (FP_REG_P (operands[1])
f048ca47 2713 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
55abf18a 2714 output_asm_insn ("fcpy,dbl %f1,%0", operands);
23f6f34f 2715 else
2414e0e2 2716 output_asm_insn ("fldd%F1 %1,%0", operands);
188538df
TG
2717 }
2718 else if (FP_REG_P (operands[1]))
2719 {
2414e0e2 2720 output_asm_insn ("fstd%F0 %1,%0", operands);
188538df 2721 }
144d51f9 2722 else
f048ca47 2723 {
144d51f9
NS
2724 rtx xoperands[2];
2725
2726 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2727
23f6f34f 2728 /* This is a pain. You have to be prepared to deal with an
ddd5a7c1 2729 arbitrary address here including pre/post increment/decrement.
f048ca47
JL
2730
2731 so avoid this in the MD. */
144d51f9
NS
2732 gcc_assert (GET_CODE (operands[0]) == REG);
2733
2734 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2735 xoperands[0] = operands[0];
2736 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
f048ca47 2737 }
188538df
TG
2738 return "";
2739}
2740\f
2741/* Return a REG that occurs in ADDR with coefficient 1.
2742 ADDR can be effectively incremented by incrementing REG. */
2743
2744static rtx
b7849684 2745find_addr_reg (rtx addr)
188538df
TG
2746{
2747 while (GET_CODE (addr) == PLUS)
2748 {
2749 if (GET_CODE (XEXP (addr, 0)) == REG)
2750 addr = XEXP (addr, 0);
2751 else if (GET_CODE (XEXP (addr, 1)) == REG)
2752 addr = XEXP (addr, 1);
2753 else if (CONSTANT_P (XEXP (addr, 0)))
2754 addr = XEXP (addr, 1);
2755 else if (CONSTANT_P (XEXP (addr, 1)))
2756 addr = XEXP (addr, 0);
2757 else
144d51f9 2758 gcc_unreachable ();
188538df 2759 }
144d51f9
NS
2760 gcc_assert (GET_CODE (addr) == REG);
2761 return addr;
188538df
TG
2762}
2763
188538df
TG
2764/* Emit code to perform a block move.
2765
188538df
TG
2766 OPERANDS[0] is the destination pointer as a REG, clobbered.
2767 OPERANDS[1] is the source pointer as a REG, clobbered.
68944452 2768 OPERANDS[2] is a register for temporary storage.
188538df 2769 OPERANDS[3] is a register for temporary storage.
cdc9103c 2770 OPERANDS[4] is the size as a CONST_INT
6619e96c 2771 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
71cc389b 2772 OPERANDS[6] is another temporary register. */
188538df 2773
519104fe 2774const char *
ae9d61ab 2775pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
188538df
TG
2776{
2777 int align = INTVAL (operands[5]);
68944452 2778 unsigned long n_bytes = INTVAL (operands[4]);
188538df 2779
cdc9103c 2780 /* We can't move more than a word at a time because the PA
188538df 2781 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2782 if (align > (TARGET_64BIT ? 8 : 4))
2783 align = (TARGET_64BIT ? 8 : 4);
188538df 2784
68944452
JL
2785 /* Note that we know each loop below will execute at least twice
2786 (else we would have open-coded the copy). */
2787 switch (align)
188538df 2788 {
cdc9103c
JDA
2789 case 8:
2790 /* Pre-adjust the loop counter. */
2791 operands[4] = GEN_INT (n_bytes - 16);
2792 output_asm_insn ("ldi %4,%2", operands);
2793
2794 /* Copying loop. */
2795 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2796 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2797 output_asm_insn ("std,ma %3,8(%0)", operands);
2798 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2799 output_asm_insn ("std,ma %6,8(%0)", operands);
2800
2801 /* Handle the residual. There could be up to 7 bytes of
2802 residual to copy! */
2803 if (n_bytes % 16 != 0)
2804 {
2805 operands[4] = GEN_INT (n_bytes % 8);
2806 if (n_bytes % 16 >= 8)
2807 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2808 if (n_bytes % 8 != 0)
2809 output_asm_insn ("ldd 0(%1),%6", operands);
2810 if (n_bytes % 16 >= 8)
2811 output_asm_insn ("std,ma %3,8(%0)", operands);
2812 if (n_bytes % 8 != 0)
2813 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2814 }
2815 return "";
2816
68944452
JL
2817 case 4:
2818 /* Pre-adjust the loop counter. */
2819 operands[4] = GEN_INT (n_bytes - 8);
2820 output_asm_insn ("ldi %4,%2", operands);
2821
2822 /* Copying loop. */
f38b27c7
JL
2823 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2824 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2825 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2826 output_asm_insn ("addib,>= -8,%2,.-12", operands);
f38b27c7 2827 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
68944452
JL
2828
2829 /* Handle the residual. There could be up to 7 bytes of
2830 residual to copy! */
2831 if (n_bytes % 8 != 0)
2832 {
2833 operands[4] = GEN_INT (n_bytes % 4);
2834 if (n_bytes % 8 >= 4)
f38b27c7 2835 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
68944452 2836 if (n_bytes % 4 != 0)
d2d28085 2837 output_asm_insn ("ldw 0(%1),%6", operands);
68944452 2838 if (n_bytes % 8 >= 4)
f38b27c7 2839 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2840 if (n_bytes % 4 != 0)
f38b27c7 2841 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
68944452
JL
2842 }
2843 return "";
188538df 2844
68944452
JL
2845 case 2:
2846 /* Pre-adjust the loop counter. */
2847 operands[4] = GEN_INT (n_bytes - 4);
2848 output_asm_insn ("ldi %4,%2", operands);
188538df 2849
68944452 2850 /* Copying loop. */
f38b27c7
JL
2851 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2852 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2853 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2854 output_asm_insn ("addib,>= -4,%2,.-12", operands);
f38b27c7 2855 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
188538df 2856
68944452
JL
2857 /* Handle the residual. */
2858 if (n_bytes % 4 != 0)
2859 {
2860 if (n_bytes % 4 >= 2)
f38b27c7 2861 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
68944452 2862 if (n_bytes % 2 != 0)
d2d28085 2863 output_asm_insn ("ldb 0(%1),%6", operands);
68944452 2864 if (n_bytes % 4 >= 2)
f38b27c7 2865 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2866 if (n_bytes % 2 != 0)
d2d28085 2867 output_asm_insn ("stb %6,0(%0)", operands);
68944452
JL
2868 }
2869 return "";
188538df 2870
68944452
JL
2871 case 1:
2872 /* Pre-adjust the loop counter. */
2873 operands[4] = GEN_INT (n_bytes - 2);
2874 output_asm_insn ("ldi %4,%2", operands);
188538df 2875
68944452 2876 /* Copying loop. */
f38b27c7
JL
2877 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2878 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2879 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
68944452 2880 output_asm_insn ("addib,>= -2,%2,.-12", operands);
f38b27c7 2881 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
188538df 2882
68944452
JL
2883 /* Handle the residual. */
2884 if (n_bytes % 2 != 0)
2885 {
d2d28085
JL
2886 output_asm_insn ("ldb 0(%1),%3", operands);
2887 output_asm_insn ("stb %3,0(%0)", operands);
68944452
JL
2888 }
2889 return "";
188538df 2890
68944452 2891 default:
144d51f9 2892 gcc_unreachable ();
188538df 2893 }
188538df 2894}
3673e996
RS
2895
2896/* Count the number of insns necessary to handle this block move.
2897
2898 Basic structure is the same as emit_block_move, except that we
2899 count insns rather than emit them. */
2900
519104fe 2901static int
70128ad9 2902compute_movmem_length (rtx insn)
3673e996
RS
2903{
2904 rtx pat = PATTERN (insn);
a36a47ad
GS
2905 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2906 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
68944452 2907 unsigned int n_insns = 0;
3673e996
RS
2908
2909 /* We can't move more than four bytes at a time because the PA
2910 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2911 if (align > (TARGET_64BIT ? 8 : 4))
2912 align = (TARGET_64BIT ? 8 : 4);
3673e996 2913
90304f64 2914 /* The basic copying loop. */
68944452 2915 n_insns = 6;
3673e996 2916
68944452
JL
2917 /* Residuals. */
2918 if (n_bytes % (2 * align) != 0)
3673e996 2919 {
90304f64
JL
2920 if ((n_bytes % (2 * align)) >= align)
2921 n_insns += 2;
2922
2923 if ((n_bytes % align) != 0)
2924 n_insns += 2;
3673e996 2925 }
68944452
JL
2926
2927 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2928 return n_insns * 4;
3673e996 2929}
cdc9103c
JDA
2930
2931/* Emit code to perform a block clear.
2932
2933 OPERANDS[0] is the destination pointer as a REG, clobbered.
2934 OPERANDS[1] is a register for temporary storage.
2935 OPERANDS[2] is the size as a CONST_INT
2936 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2937
2938const char *
ae9d61ab 2939pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
cdc9103c
JDA
2940{
2941 int align = INTVAL (operands[3]);
2942 unsigned long n_bytes = INTVAL (operands[2]);
2943
2944 /* We can't clear more than a word at a time because the PA
2945 has no longer integer move insns. */
2946 if (align > (TARGET_64BIT ? 8 : 4))
2947 align = (TARGET_64BIT ? 8 : 4);
2948
2949 /* Note that we know each loop below will execute at least twice
2950 (else we would have open-coded the copy). */
2951 switch (align)
2952 {
2953 case 8:
2954 /* Pre-adjust the loop counter. */
2955 operands[2] = GEN_INT (n_bytes - 16);
2956 output_asm_insn ("ldi %2,%1", operands);
2957
2958 /* Loop. */
2959 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2960 output_asm_insn ("addib,>= -16,%1,.-4", operands);
2961 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2962
2963 /* Handle the residual. There could be up to 7 bytes of
2964 residual to copy! */
2965 if (n_bytes % 16 != 0)
2966 {
2967 operands[2] = GEN_INT (n_bytes % 8);
2968 if (n_bytes % 16 >= 8)
2969 output_asm_insn ("std,ma %%r0,8(%0)", operands);
2970 if (n_bytes % 8 != 0)
2971 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
2972 }
2973 return "";
2974
2975 case 4:
2976 /* Pre-adjust the loop counter. */
2977 operands[2] = GEN_INT (n_bytes - 8);
2978 output_asm_insn ("ldi %2,%1", operands);
2979
2980 /* Loop. */
2981 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2982 output_asm_insn ("addib,>= -8,%1,.-4", operands);
2983 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2984
2985 /* Handle the residual. There could be up to 7 bytes of
2986 residual to copy! */
2987 if (n_bytes % 8 != 0)
2988 {
2989 operands[2] = GEN_INT (n_bytes % 4);
2990 if (n_bytes % 8 >= 4)
2991 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
2992 if (n_bytes % 4 != 0)
2993 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
2994 }
2995 return "";
2996
2997 case 2:
2998 /* Pre-adjust the loop counter. */
2999 operands[2] = GEN_INT (n_bytes - 4);
3000 output_asm_insn ("ldi %2,%1", operands);
3001
3002 /* Loop. */
3003 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3004 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3005 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3006
3007 /* Handle the residual. */
3008 if (n_bytes % 4 != 0)
3009 {
3010 if (n_bytes % 4 >= 2)
3011 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3012 if (n_bytes % 2 != 0)
3013 output_asm_insn ("stb %%r0,0(%0)", operands);
3014 }
3015 return "";
3016
3017 case 1:
3018 /* Pre-adjust the loop counter. */
3019 operands[2] = GEN_INT (n_bytes - 2);
3020 output_asm_insn ("ldi %2,%1", operands);
3021
3022 /* Loop. */
3023 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3024 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3025 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3026
3027 /* Handle the residual. */
3028 if (n_bytes % 2 != 0)
3029 output_asm_insn ("stb %%r0,0(%0)", operands);
3030
3031 return "";
3032
3033 default:
144d51f9 3034 gcc_unreachable ();
cdc9103c
JDA
3035 }
3036}
3037
3038/* Count the number of insns necessary to handle this block move.
3039
3040 Basic structure is the same as emit_block_move, except that we
3041 count insns rather than emit them. */
3042
3043static int
70128ad9 3044compute_clrmem_length (rtx insn)
cdc9103c
JDA
3045{
3046 rtx pat = PATTERN (insn);
3047 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3048 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3049 unsigned int n_insns = 0;
3050
3051 /* We can't clear more than a word at a time because the PA
3052 has no longer integer move insns. */
3053 if (align > (TARGET_64BIT ? 8 : 4))
3054 align = (TARGET_64BIT ? 8 : 4);
3055
3056 /* The basic loop. */
3057 n_insns = 4;
3058
3059 /* Residuals. */
3060 if (n_bytes % (2 * align) != 0)
3061 {
3062 if ((n_bytes % (2 * align)) >= align)
3063 n_insns++;
3064
3065 if ((n_bytes % align) != 0)
3066 n_insns++;
3067 }
3068
3069 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3070 return n_insns * 4;
3071}
188538df
TG
3072\f
3073
519104fe 3074const char *
ae9d61ab 3075pa_output_and (rtx *operands)
0e7f4c19 3076{
d2a94ec0 3077 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
0e7f4c19 3078 {
0c235d7e 3079 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
0e7f4c19
TG
3080 int ls0, ls1, ms0, p, len;
3081
3082 for (ls0 = 0; ls0 < 32; ls0++)
3083 if ((mask & (1 << ls0)) == 0)
3084 break;
3085
3086 for (ls1 = ls0; ls1 < 32; ls1++)
3087 if ((mask & (1 << ls1)) != 0)
3088 break;
3089
3090 for (ms0 = ls1; ms0 < 32; ms0++)
3091 if ((mask & (1 << ms0)) == 0)
3092 break;
3093
144d51f9 3094 gcc_assert (ms0 == 32);
0e7f4c19
TG
3095
3096 if (ls1 == 32)
3097 {
3098 len = ls0;
3099
144d51f9 3100 gcc_assert (len);
0e7f4c19 3101
8919037c 3102 operands[2] = GEN_INT (len);
f38b27c7 3103 return "{extru|extrw,u} %1,31,%2,%0";
0e7f4c19
TG
3104 }
3105 else
3106 {
3107 /* We could use this `depi' for the case above as well, but `depi'
3108 requires one more register file access than an `extru'. */
3109
3110 p = 31 - ls0;
3111 len = ls1 - ls0;
3112
8919037c
TG
3113 operands[2] = GEN_INT (p);
3114 operands[3] = GEN_INT (len);
f38b27c7 3115 return "{depi|depwi} 0,%2,%3,%0";
0e7f4c19
TG
3116 }
3117 }
3118 else
3119 return "and %1,%2,%0";
3120}
3121
520babc7
JL
3122/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3123 storing the result in operands[0]. */
0952f89b 3124const char *
ae9d61ab 3125pa_output_64bit_and (rtx *operands)
520babc7
JL
3126{
3127 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3128 {
3129 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3130 int ls0, ls1, ms0, p, len;
520babc7
JL
3131
3132 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
e0c556d3 3133 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
520babc7
JL
3134 break;
3135
3136 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
e0c556d3 3137 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
520babc7
JL
3138 break;
3139
3140 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
e0c556d3 3141 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
520babc7
JL
3142 break;
3143
144d51f9 3144 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
520babc7
JL
3145
3146 if (ls1 == HOST_BITS_PER_WIDE_INT)
3147 {
3148 len = ls0;
3149
144d51f9 3150 gcc_assert (len);
520babc7
JL
3151
3152 operands[2] = GEN_INT (len);
3153 return "extrd,u %1,63,%2,%0";
3154 }
3155 else
3156 {
3157 /* We could use this `depi' for the case above as well, but `depi'
3158 requires one more register file access than an `extru'. */
3159
3160 p = 63 - ls0;
3161 len = ls1 - ls0;
3162
3163 operands[2] = GEN_INT (p);
3164 operands[3] = GEN_INT (len);
3165 return "depdi 0,%2,%3,%0";
3166 }
3167 }
3168 else
3169 return "and %1,%2,%0";
3170}
3171
519104fe 3172const char *
ae9d61ab 3173pa_output_ior (rtx *operands)
0e7f4c19 3174{
0c235d7e 3175 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
f1c7ce82 3176 int bs0, bs1, p, len;
23f6f34f 3177
8365d59b
TG
3178 if (INTVAL (operands[2]) == 0)
3179 return "copy %1,%0";
0e7f4c19 3180
8365d59b
TG
3181 for (bs0 = 0; bs0 < 32; bs0++)
3182 if ((mask & (1 << bs0)) != 0)
3183 break;
0e7f4c19 3184
8365d59b
TG
3185 for (bs1 = bs0; bs1 < 32; bs1++)
3186 if ((mask & (1 << bs1)) == 0)
3187 break;
0e7f4c19 3188
144d51f9 3189 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
0e7f4c19 3190
8365d59b
TG
3191 p = 31 - bs0;
3192 len = bs1 - bs0;
0e7f4c19 3193
8919037c
TG
3194 operands[2] = GEN_INT (p);
3195 operands[3] = GEN_INT (len);
f38b27c7 3196 return "{depi|depwi} -1,%2,%3,%0";
0e7f4c19 3197}
520babc7
JL
3198
3199/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3200 storing the result in operands[0]. */
0952f89b 3201const char *
ae9d61ab 3202pa_output_64bit_ior (rtx *operands)
520babc7
JL
3203{
3204 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3205 int bs0, bs1, p, len;
520babc7
JL
3206
3207 if (INTVAL (operands[2]) == 0)
3208 return "copy %1,%0";
3209
3210 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
e0c556d3 3211 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
520babc7
JL
3212 break;
3213
3214 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
e0c556d3 3215 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
520babc7
JL
3216 break;
3217
144d51f9
NS
3218 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3219 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
520babc7
JL
3220
3221 p = 63 - bs0;
3222 len = bs1 - bs0;
3223
3224 operands[2] = GEN_INT (p);
3225 operands[3] = GEN_INT (len);
3226 return "depdi -1,%2,%3,%0";
3227}
0e7f4c19 3228\f
301d03af 3229/* Target hook for assembling integer objects. This code handles
cdcb88d7
JDA
3230 aligned SI and DI integers specially since function references
3231 must be preceded by P%. */
301d03af
RS
3232
3233static bool
b7849684 3234pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3235{
cdcb88d7
JDA
3236 if (size == UNITS_PER_WORD
3237 && aligned_p
301d03af
RS
3238 && function_label_operand (x, VOIDmode))
3239 {
3240 fputs (size == 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file);
3241 output_addr_const (asm_out_file, x);
3242 fputc ('\n', asm_out_file);
3243 return true;
3244 }
3245 return default_assemble_integer (x, size, aligned_p);
3246}
3247\f
188538df 3248/* Output an ascii string. */
f1c7ce82 3249void
ae9d61ab 3250pa_output_ascii (FILE *file, const char *p, int size)
188538df
TG
3251{
3252 int i;
3253 int chars_output;
71cc389b 3254 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
188538df
TG
3255
3256 /* The HP assembler can only take strings of 256 characters at one
3257 time. This is a limitation on input line length, *not* the
3258 length of the string. Sigh. Even worse, it seems that the
3259 restriction is in number of input characters (see \xnn &
3260 \whatever). So we have to do this very carefully. */
3261
e236a9ff 3262 fputs ("\t.STRING \"", file);
188538df
TG
3263
3264 chars_output = 0;
3265 for (i = 0; i < size; i += 4)
3266 {
3267 int co = 0;
3268 int io = 0;
3269 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3270 {
6b5ffd4e 3271 register unsigned int c = (unsigned char) p[i + io];
188538df
TG
3272
3273 if (c == '\"' || c == '\\')
3274 partial_output[co++] = '\\';
3275 if (c >= ' ' && c < 0177)
3276 partial_output[co++] = c;
3277 else
3278 {
3279 unsigned int hexd;
3280 partial_output[co++] = '\\';
3281 partial_output[co++] = 'x';
3282 hexd = c / 16 - 0 + '0';
3283 if (hexd > '9')
3284 hexd -= '9' - 'a' + 1;
3285 partial_output[co++] = hexd;
3286 hexd = c % 16 - 0 + '0';
3287 if (hexd > '9')
3288 hexd -= '9' - 'a' + 1;
3289 partial_output[co++] = hexd;
3290 }
3291 }
3292 if (chars_output + co > 243)
3293 {
e236a9ff 3294 fputs ("\"\n\t.STRING \"", file);
188538df
TG
3295 chars_output = 0;
3296 }
823fbbce 3297 fwrite (partial_output, 1, (size_t) co, file);
188538df
TG
3298 chars_output += co;
3299 co = 0;
3300 }
e236a9ff 3301 fputs ("\"\n", file);
188538df 3302}
5621d717
JL
3303
3304/* Try to rewrite floating point comparisons & branches to avoid
3305 useless add,tr insns.
3306
3307 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3308 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3309 first attempt to remove useless add,tr insns. It is zero
3310 for the second pass as reorg sometimes leaves bogus REG_DEAD
3311 notes lying around.
3312
3313 When CHECK_NOTES is zero we can only eliminate add,tr insns
3314 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3315 instructions. */
519104fe 3316static void
b7849684 3317remove_useless_addtr_insns (int check_notes)
5621d717
JL
3318{
3319 rtx insn;
5621d717
JL
3320 static int pass = 0;
3321
3322 /* This is fairly cheap, so always run it when optimizing. */
3323 if (optimize > 0)
3324 {
3325 int fcmp_count = 0;
3326 int fbranch_count = 0;
3327
3328 /* Walk all the insns in this function looking for fcmp & fbranch
3329 instructions. Keep track of how many of each we find. */
18dbd950 3330 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717
JL
3331 {
3332 rtx tmp;
3333
3334 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
b64925dc 3335 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
5621d717
JL
3336 continue;
3337
3338 tmp = PATTERN (insn);
3339
3340 /* It must be a set. */
3341 if (GET_CODE (tmp) != SET)
3342 continue;
3343
3344 /* If the destination is CCFP, then we've found an fcmp insn. */
3345 tmp = SET_DEST (tmp);
3346 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3347 {
3348 fcmp_count++;
3349 continue;
3350 }
6619e96c 3351
5621d717
JL
3352 tmp = PATTERN (insn);
3353 /* If this is an fbranch instruction, bump the fbranch counter. */
3354 if (GET_CODE (tmp) == SET
3355 && SET_DEST (tmp) == pc_rtx
3356 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3357 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3358 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3359 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3360 {
3361 fbranch_count++;
3362 continue;
3363 }
3364 }
3365
3366
3367 /* Find all floating point compare + branch insns. If possible,
3368 reverse the comparison & the branch to avoid add,tr insns. */
18dbd950 3369 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717
JL
3370 {
3371 rtx tmp, next;
3372
3373 /* Ignore anything that isn't an INSN. */
b64925dc 3374 if (! NONJUMP_INSN_P (insn))
5621d717
JL
3375 continue;
3376
3377 tmp = PATTERN (insn);
3378
3379 /* It must be a set. */
3380 if (GET_CODE (tmp) != SET)
3381 continue;
3382
3383 /* The destination must be CCFP, which is register zero. */
3384 tmp = SET_DEST (tmp);
3385 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3386 continue;
3387
3388 /* INSN should be a set of CCFP.
3389
3390 See if the result of this insn is used in a reversed FP
3391 conditional branch. If so, reverse our condition and
3392 the branch. Doing so avoids useless add,tr insns. */
3393 next = next_insn (insn);
3394 while (next)
3395 {
3396 /* Jumps, calls and labels stop our search. */
b64925dc 3397 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
5621d717
JL
3398 break;
3399
3400 /* As does another fcmp insn. */
b64925dc 3401 if (NONJUMP_INSN_P (next)
5621d717
JL
3402 && GET_CODE (PATTERN (next)) == SET
3403 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3404 && REGNO (SET_DEST (PATTERN (next))) == 0)
3405 break;
3406
3407 next = next_insn (next);
3408 }
3409
3410 /* Is NEXT_INSN a branch? */
b64925dc 3411 if (next && JUMP_P (next))
5621d717
JL
3412 {
3413 rtx pattern = PATTERN (next);
3414
112cdef5 3415 /* If it a reversed fp conditional branch (e.g. uses add,tr)
5621d717
JL
3416 and CCFP dies, then reverse our conditional and the branch
3417 to avoid the add,tr. */
3418 if (GET_CODE (pattern) == SET
3419 && SET_DEST (pattern) == pc_rtx
3420 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3421 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3422 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3423 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3424 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3425 && (fcmp_count == fbranch_count
3426 || (check_notes
3427 && find_regno_note (next, REG_DEAD, 0))))
3428 {
3429 /* Reverse the branch. */
3430 tmp = XEXP (SET_SRC (pattern), 1);
3431 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3432 XEXP (SET_SRC (pattern), 2) = tmp;
3433 INSN_CODE (next) = -1;
3434
3435 /* Reverse our condition. */
3436 tmp = PATTERN (insn);
3437 PUT_CODE (XEXP (tmp, 1),
831c1763
AM
3438 (reverse_condition_maybe_unordered
3439 (GET_CODE (XEXP (tmp, 1)))));
5621d717
JL
3440 }
3441 }
3442 }
3443 }
3444
3445 pass = !pass;
3446
3447}
188538df 3448\f
831c1763
AM
3449/* You may have trouble believing this, but this is the 32 bit HP-PA
3450 stack layout. Wow.
188538df
TG
3451
3452 Offset Contents
3453
3454 Variable arguments (optional; any number may be allocated)
3455
3456 SP-(4*(N+9)) arg word N
3457 : :
3458 SP-56 arg word 5
3459 SP-52 arg word 4
3460
3461 Fixed arguments (must be allocated; may remain unused)
3462
3463 SP-48 arg word 3
3464 SP-44 arg word 2
3465 SP-40 arg word 1
3466 SP-36 arg word 0
3467
3468 Frame Marker
3469
3470 SP-32 External Data Pointer (DP)
3471 SP-28 External sr4
3472 SP-24 External/stub RP (RP')
3473 SP-20 Current RP
3474 SP-16 Static Link
3475 SP-12 Clean up
3476 SP-8 Calling Stub RP (RP'')
3477 SP-4 Previous SP
3478
3479 Top of Frame
3480
3481 SP-0 Stack Pointer (points to next available address)
3482
3483*/
3484
3485/* This function saves registers as follows. Registers marked with ' are
3486 this function's registers (as opposed to the previous function's).
3487 If a frame_pointer isn't needed, r4 is saved as a general register;
3488 the space for the frame pointer is still allocated, though, to keep
3489 things simple.
3490
3491
3492 Top of Frame
3493
3494 SP (FP') Previous FP
3495 SP + 4 Alignment filler (sigh)
3496 SP + 8 Space for locals reserved here.
3497 .
3498 .
3499 .
3500 SP + n All call saved register used.
3501 .
3502 .
3503 .
3504 SP + o All call saved fp registers used.
3505 .
3506 .
3507 .
3508 SP + p (SP') points to next available address.
23f6f34f 3509
188538df
TG
3510*/
3511
08c148a8 3512/* Global variables set by output_function_prologue(). */
19ec6a36
AM
3513/* Size of frame. Need to know this to emit return insns from
3514 leaf procedures. */
a4295210
JDA
3515static HOST_WIDE_INT actual_fsize, local_fsize;
3516static int save_fregs;
19ec6a36 3517
aadcdb45 3518/* Emit RTL to store REG at the memory location specified by BASE+DISP.
fc82f2f1 3519 Handle case where DISP > 8k by using the add_high_const patterns.
aadcdb45
JL
3520
3521 Note in DISP > 8k case, we will leave the high part of the address
3522 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
c5c76735 3523
f6bcf44c 3524static void
a4295210 3525store_reg (int reg, HOST_WIDE_INT disp, int base)
188538df 3526{
f6bcf44c 3527 rtx insn, dest, src, basereg;
19ec6a36
AM
3528
3529 src = gen_rtx_REG (word_mode, reg);
3530 basereg = gen_rtx_REG (Pmode, base);
188538df 3531 if (VAL_14_BITS_P (disp))
aadcdb45 3532 {
0a81f074 3533 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
f6bcf44c 3534 insn = emit_move_insn (dest, src);
aadcdb45 3535 }
a4295210
JDA
3536 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3537 {
3538 rtx delta = GEN_INT (disp);
3539 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3540
3541 emit_move_insn (tmpreg, delta);
5dcc9605 3542 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
a4295210
JDA
3543 if (DO_FRAME_NOTES)
3544 {
bbbbb16a
ILT
3545 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3546 gen_rtx_SET (VOIDmode, tmpreg,
3547 gen_rtx_PLUS (Pmode, basereg, delta)));
5dcc9605 3548 RTX_FRAME_RELATED_P (insn) = 1;
a4295210 3549 }
5dcc9605
JDA
3550 dest = gen_rtx_MEM (word_mode, tmpreg);
3551 insn = emit_move_insn (dest, src);
a4295210 3552 }
aadcdb45
JL
3553 else
3554 {
19ec6a36
AM
3555 rtx delta = GEN_INT (disp);
3556 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3557 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 3558
19ec6a36
AM
3559 emit_move_insn (tmpreg, high);
3560 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
f6bcf44c
JDA
3561 insn = emit_move_insn (dest, src);
3562 if (DO_FRAME_NOTES)
bbbbb16a
ILT
3563 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3564 gen_rtx_SET (VOIDmode,
3565 gen_rtx_MEM (word_mode,
3566 gen_rtx_PLUS (word_mode,
3567 basereg,
3568 delta)),
3569 src));
aadcdb45 3570 }
f6bcf44c
JDA
3571
3572 if (DO_FRAME_NOTES)
3573 RTX_FRAME_RELATED_P (insn) = 1;
aadcdb45
JL
3574}
3575
823fbbce
JDA
3576/* Emit RTL to store REG at the memory location specified by BASE and then
3577 add MOD to BASE. MOD must be <= 8k. */
aadcdb45 3578
823fbbce 3579static void
a4295210 3580store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
823fbbce
JDA
3581{
3582 rtx insn, basereg, srcreg, delta;
3583
144d51f9 3584 gcc_assert (VAL_14_BITS_P (mod));
823fbbce
JDA
3585
3586 basereg = gen_rtx_REG (Pmode, base);
3587 srcreg = gen_rtx_REG (word_mode, reg);
3588 delta = GEN_INT (mod);
3589
3590 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3591 if (DO_FRAME_NOTES)
3592 {
3593 RTX_FRAME_RELATED_P (insn) = 1;
3594
3595 /* RTX_FRAME_RELATED_P must be set on each frame related set
77c4f044
RH
3596 in a parallel with more than one element. */
3597 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3598 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
823fbbce
JDA
3599 }
3600}
3601
3602/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3603 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3604 whether to add a frame note or not.
3605
3606 In the DISP > 8k case, we leave the high part of the address in %r1.
3607 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
c5c76735 3608
f6bcf44c 3609static void
a4295210 3610set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
188538df 3611{
f6bcf44c 3612 rtx insn;
19ec6a36 3613
188538df 3614 if (VAL_14_BITS_P (disp))
19ec6a36 3615 {
f6bcf44c 3616 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
0a81f074
RS
3617 plus_constant (Pmode,
3618 gen_rtx_REG (Pmode, base), disp));
19ec6a36 3619 }
a4295210
JDA
3620 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3621 {
3622 rtx basereg = gen_rtx_REG (Pmode, base);
3623 rtx delta = GEN_INT (disp);
3624 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3625
3626 emit_move_insn (tmpreg, delta);
3627 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3628 gen_rtx_PLUS (Pmode, tmpreg, basereg));
5dcc9605 3629 if (DO_FRAME_NOTES)
bbbbb16a
ILT
3630 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3631 gen_rtx_SET (VOIDmode, tmpreg,
3632 gen_rtx_PLUS (Pmode, basereg, delta)));
a4295210 3633 }
188538df 3634 else
aadcdb45 3635 {
f6bcf44c 3636 rtx basereg = gen_rtx_REG (Pmode, base);
19ec6a36 3637 rtx delta = GEN_INT (disp);
a4295210 3638 rtx tmpreg = gen_rtx_REG (Pmode, 1);
f6bcf44c 3639
a4295210 3640 emit_move_insn (tmpreg,
f6bcf44c 3641 gen_rtx_PLUS (Pmode, basereg,
19ec6a36 3642 gen_rtx_HIGH (Pmode, delta)));
f6bcf44c 3643 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
a4295210 3644 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
aadcdb45 3645 }
f6bcf44c 3646
823fbbce 3647 if (DO_FRAME_NOTES && note)
f6bcf44c 3648 RTX_FRAME_RELATED_P (insn) = 1;
188538df
TG
3649}
3650
a4295210 3651HOST_WIDE_INT
ae9d61ab 3652pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
188538df 3653{
95f3f59e
JDA
3654 int freg_saved = 0;
3655 int i, j;
3656
ae9d61ab 3657 /* The code in pa_expand_prologue and pa_expand_epilogue must
95f3f59e
JDA
3658 be consistent with the rounding and size calculation done here.
3659 Change them at the same time. */
3660
3661 /* We do our own stack alignment. First, round the size of the
3662 stack locals up to a word boundary. */
3663 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3664
3665 /* Space for previous frame pointer + filler. If any frame is
3666 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3667 waste some space here for the sake of HP compatibility. The
3668 first slot is only used when the frame pointer is needed. */
3669 if (size || frame_pointer_needed)
3670 size += STARTING_FRAME_OFFSET;
3671
823fbbce
JDA
3672 /* If the current function calls __builtin_eh_return, then we need
3673 to allocate stack space for registers that will hold data for
3674 the exception handler. */
e3b5732b 3675 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3676 {
3677 unsigned int i;
3678
3679 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3680 continue;
95f3f59e 3681 size += i * UNITS_PER_WORD;
823fbbce
JDA
3682 }
3683
6261ede7 3684 /* Account for space used by the callee general register saves. */
95f3f59e 3685 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
6fb5fa3c 3686 if (df_regs_ever_live_p (i))
95f3f59e 3687 size += UNITS_PER_WORD;
80225b66 3688
6261ede7 3689 /* Account for space used by the callee floating point register saves. */
88624c0e 3690 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
3691 if (df_regs_ever_live_p (i)
3692 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
80225b66 3693 {
95f3f59e 3694 freg_saved = 1;
9e18f575 3695
6261ede7
JL
3696 /* We always save both halves of the FP register, so always
3697 increment the frame size by 8 bytes. */
95f3f59e 3698 size += 8;
80225b66
TG
3699 }
3700
95f3f59e
JDA
3701 /* If any of the floating registers are saved, account for the
3702 alignment needed for the floating point register save block. */
3703 if (freg_saved)
3704 {
3705 size = (size + 7) & ~7;
3706 if (fregs_live)
3707 *fregs_live = 1;
3708 }
3709
6261ede7 3710 /* The various ABIs include space for the outgoing parameters in the
95f3f59e
JDA
3711 size of the current function's stack frame. We don't need to align
3712 for the outgoing arguments as their alignment is set by the final
3713 rounding for the frame as a whole. */
38173d38 3714 size += crtl->outgoing_args_size;
6261ede7
JL
3715
3716 /* Allocate space for the fixed frame marker. This space must be
685d0e07 3717 allocated for any function that makes calls or allocates
6261ede7 3718 stack space. */
416ff32e 3719 if (!crtl->is_leaf || size)
685d0e07 3720 size += TARGET_64BIT ? 48 : 32;
520babc7 3721
95f3f59e 3722 /* Finally, round to the preferred stack boundary. */
5fad1c24
JDA
3723 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3724 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
188538df 3725}
23f6f34f 3726
08c148a8
NB
3727/* Generate the assembly code for function entry. FILE is a stdio
3728 stream to output the code to. SIZE is an int: how many units of
3729 temporary storage to allocate.
3730
3731 Refer to the array `regs_ever_live' to determine which registers to
3732 save; `regs_ever_live[I]' is nonzero if register number I is ever
3733 used in the function. This function is responsible for knowing
3734 which registers should not be saved even if used. */
3735
3736/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3737 of memory. If any fpu reg is used in the function, we allocate
3738 such a block here, at the bottom of the frame, just in case it's needed.
3739
3740 If this function is a leaf procedure, then we may choose not
3741 to do a "save" insn. The decision about whether or not
3742 to do this is made in regclass.c. */
3743
c590b625 3744static void
b7849684 3745pa_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
188538df 3746{
ba0bfdac
JL
3747 /* The function's label and associated .PROC must never be
3748 separated and must be output *after* any profiling declarations
3749 to avoid changing spaces/subspaces within a procedure. */
3750 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3751 fputs ("\t.PROC\n", file);
3752
ae9d61ab 3753 /* pa_expand_prologue does the dirty work now. We just need
aadcdb45
JL
3754 to output the assembler directives which denote the start
3755 of a function. */
a4295210 3756 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
416ff32e 3757 if (crtl->is_leaf)
e236a9ff 3758 fputs (",NO_CALLS", file);
16c16a24
JDA
3759 else
3760 fputs (",CALLS", file);
3761 if (rp_saved)
3762 fputs (",SAVE_RP", file);
da3c3336 3763
685d0e07
JDA
3764 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3765 at the beginning of the frame and that it is used as the frame
3766 pointer for the frame. We do this because our current frame
a4d05547 3767 layout doesn't conform to that specified in the HP runtime
685d0e07
JDA
3768 documentation and we need a way to indicate to programs such as
3769 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3770 isn't used by HP compilers but is supported by the assembler.
3771 However, SAVE_SP is supposed to indicate that the previous stack
3772 pointer has been saved in the frame marker. */
da3c3336 3773 if (frame_pointer_needed)
e236a9ff 3774 fputs (",SAVE_SP", file);
da3c3336 3775
68386e1e 3776 /* Pass on information about the number of callee register saves
e8cfae5c
JL
3777 performed in the prologue.
3778
3779 The compiler is supposed to pass the highest register number
23f6f34f 3780 saved, the assembler then has to adjust that number before
e8cfae5c 3781 entering it into the unwind descriptor (to account for any
23f6f34f 3782 caller saved registers with lower register numbers than the
e8cfae5c
JL
3783 first callee saved register). */
3784 if (gr_saved)
3785 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3786
3787 if (fr_saved)
3788 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
68386e1e 3789
e236a9ff 3790 fputs ("\n\t.ENTRY\n", file);
aadcdb45 3791
18dbd950 3792 remove_useless_addtr_insns (0);
aadcdb45
JL
3793}
3794
f1c7ce82 3795void
ae9d61ab 3796pa_expand_prologue (void)
aadcdb45 3797{
4971c587 3798 int merge_sp_adjust_with_store = 0;
a4295210
JDA
3799 HOST_WIDE_INT size = get_frame_size ();
3800 HOST_WIDE_INT offset;
3801 int i;
823fbbce 3802 rtx insn, tmpreg;
aadcdb45 3803
68386e1e
JL
3804 gr_saved = 0;
3805 fr_saved = 0;
8a9c76f3 3806 save_fregs = 0;
6261ede7 3807
95f3f59e 3808 /* Compute total size for frame pointer, filler, locals and rounding to
ae9d61ab 3809 the next word boundary. Similar code appears in pa_compute_frame_size
95f3f59e
JDA
3810 and must be changed in tandem with this code. */
3811 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3812 if (local_fsize || frame_pointer_needed)
3813 local_fsize += STARTING_FRAME_OFFSET;
6261ede7 3814
ae9d61ab 3815 actual_fsize = pa_compute_frame_size (size, &save_fregs);
a11e0df4 3816 if (flag_stack_usage_info)
d3c12306 3817 current_function_static_stack_size = actual_fsize;
188538df 3818
aadcdb45 3819 /* Compute a few things we will use often. */
690d4228 3820 tmpreg = gen_rtx_REG (word_mode, 1);
188538df 3821
23f6f34f 3822 /* Save RP first. The calling conventions manual states RP will
19ec6a36 3823 always be stored into the caller's frame at sp - 20 or sp - 16
520babc7 3824 depending on which ABI is in use. */
e3b5732b 3825 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
16c16a24
JDA
3826 {
3827 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3828 rp_saved = true;
3829 }
3830 else
3831 rp_saved = false;
23f6f34f 3832
aadcdb45 3833 /* Allocate the local frame and set up the frame pointer if needed. */
31d68947
AM
3834 if (actual_fsize != 0)
3835 {
3836 if (frame_pointer_needed)
3837 {
3838 /* Copy the old frame pointer temporarily into %r1. Set up the
3839 new stack pointer, then store away the saved old frame pointer
823fbbce
JDA
3840 into the stack at sp and at the same time update the stack
3841 pointer by actual_fsize bytes. Two versions, first
31d68947
AM
3842 handles small (<8k) frames. The second handles large (>=8k)
3843 frames. */
bc707992 3844 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
823fbbce 3845 if (DO_FRAME_NOTES)
77c4f044 3846 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce 3847
bc707992 3848 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
823fbbce
JDA
3849 if (DO_FRAME_NOTES)
3850 RTX_FRAME_RELATED_P (insn) = 1;
3851
3852 if (VAL_14_BITS_P (actual_fsize))
3853 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
31d68947
AM
3854 else
3855 {
3856 /* It is incorrect to store the saved frame pointer at *sp,
3857 then increment sp (writes beyond the current stack boundary).
3858
3859 So instead use stwm to store at *sp and post-increment the
3860 stack pointer as an atomic operation. Then increment sp to
3861 finish allocating the new frame. */
a4295210
JDA
3862 HOST_WIDE_INT adjust1 = 8192 - 64;
3863 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
19ec6a36 3864
823fbbce 3865 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
f6bcf44c 3866 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3867 adjust2, 1);
31d68947 3868 }
823fbbce 3869
685d0e07
JDA
3870 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3871 we need to store the previous stack pointer (frame pointer)
3872 into the frame marker on targets that use the HP unwind
3873 library. This allows the HP unwind library to be used to
3874 unwind GCC frames. However, we are not fully compatible
3875 with the HP library because our frame layout differs from
3876 that specified in the HP runtime specification.
3877
3878 We don't want a frame note on this instruction as the frame
3879 marker moves during dynamic stack allocation.
3880
3881 This instruction also serves as a blockage to prevent
3882 register spills from being scheduled before the stack
3883 pointer is raised. This is necessary as we store
3884 registers using the frame pointer as a base register,
3885 and the frame pointer is set before sp is raised. */
3886 if (TARGET_HPUX_UNWIND_LIBRARY)
3887 {
3888 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3889 GEN_INT (TARGET_64BIT ? -8 : -4));
3890
3891 emit_move_insn (gen_rtx_MEM (word_mode, addr),
bc707992 3892 hard_frame_pointer_rtx);
685d0e07
JDA
3893 }
3894 else
3895 emit_insn (gen_blockage ());
31d68947
AM
3896 }
3897 /* no frame pointer needed. */
3898 else
3899 {
3900 /* In some cases we can perform the first callee register save
3901 and allocating the stack frame at the same time. If so, just
3902 make a note of it and defer allocating the frame until saving
3903 the callee registers. */
1c7a8112 3904 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
31d68947
AM
3905 merge_sp_adjust_with_store = 1;
3906 /* Can not optimize. Adjust the stack frame by actual_fsize
3907 bytes. */
3908 else
f6bcf44c 3909 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3910 actual_fsize, 1);
31d68947 3911 }
a9d91d6f
RS
3912 }
3913
23f6f34f 3914 /* Normal register save.
aadcdb45
JL
3915
3916 Do not save the frame pointer in the frame_pointer_needed case. It
3917 was done earlier. */
188538df
TG
3918 if (frame_pointer_needed)
3919 {
823fbbce
JDA
3920 offset = local_fsize;
3921
3922 /* Saving the EH return data registers in the frame is the simplest
3923 way to get the frame unwind information emitted. We put them
3924 just before the general registers. */
e3b5732b 3925 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3926 {
3927 unsigned int i, regno;
3928
3929 for (i = 0; ; ++i)
3930 {
3931 regno = EH_RETURN_DATA_REGNO (i);
3932 if (regno == INVALID_REGNUM)
3933 break;
3934
bc707992 3935 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
3936 offset += UNITS_PER_WORD;
3937 }
3938 }
3939
3940 for (i = 18; i >= 4; i--)
6fb5fa3c 3941 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 3942 {
bc707992 3943 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 3944 offset += UNITS_PER_WORD;
68386e1e 3945 gr_saved++;
188538df 3946 }
e63ffc38 3947 /* Account for %r3 which is saved in a special place. */
e8cfae5c 3948 gr_saved++;
188538df 3949 }
aadcdb45 3950 /* No frame pointer needed. */
188538df
TG
3951 else
3952 {
823fbbce
JDA
3953 offset = local_fsize - actual_fsize;
3954
3955 /* Saving the EH return data registers in the frame is the simplest
3956 way to get the frame unwind information emitted. */
e3b5732b 3957 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3958 {
3959 unsigned int i, regno;
3960
3961 for (i = 0; ; ++i)
3962 {
3963 regno = EH_RETURN_DATA_REGNO (i);
3964 if (regno == INVALID_REGNUM)
3965 break;
3966
3967 /* If merge_sp_adjust_with_store is nonzero, then we can
3968 optimize the first save. */
3969 if (merge_sp_adjust_with_store)
3970 {
3971 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
3972 merge_sp_adjust_with_store = 0;
3973 }
3974 else
3975 store_reg (regno, offset, STACK_POINTER_REGNUM);
3976 offset += UNITS_PER_WORD;
3977 }
3978 }
3979
3980 for (i = 18; i >= 3; i--)
6fb5fa3c 3981 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 3982 {
23f6f34f 3983 /* If merge_sp_adjust_with_store is nonzero, then we can
4971c587 3984 optimize the first GR save. */
f133af4c 3985 if (merge_sp_adjust_with_store)
4971c587 3986 {
823fbbce 3987 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4971c587 3988 merge_sp_adjust_with_store = 0;
4971c587
JL
3989 }
3990 else
f6bcf44c 3991 store_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 3992 offset += UNITS_PER_WORD;
68386e1e 3993 gr_saved++;
188538df 3994 }
aadcdb45 3995
4971c587 3996 /* If we wanted to merge the SP adjustment with a GR save, but we never
aadcdb45 3997 did any GR saves, then just emit the adjustment here. */
f133af4c 3998 if (merge_sp_adjust_with_store)
f6bcf44c 3999 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 4000 actual_fsize, 1);
188538df 4001 }
23f6f34f 4002
1c7a8112
AM
4003 /* The hppa calling conventions say that %r19, the pic offset
4004 register, is saved at sp - 32 (in this function's frame)
4005 when generating PIC code. FIXME: What is the correct thing
4006 to do for functions which make no calls and allocate no
4007 frame? Do we need to allocate a frame, or can we just omit
3ffa9dc1
JDA
4008 the save? For now we'll just omit the save.
4009
4010 We don't want a note on this insn as the frame marker can
4011 move if there is a dynamic stack allocation. */
1c7a8112 4012 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3ffa9dc1
JDA
4013 {
4014 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4015
4016 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4017
4018 }
1c7a8112 4019
188538df
TG
4020 /* Align pointer properly (doubleword boundary). */
4021 offset = (offset + 7) & ~7;
4022
4023 /* Floating point register store. */
4024 if (save_fregs)
188538df 4025 {
823fbbce
JDA
4026 rtx base;
4027
aadcdb45
JL
4028 /* First get the frame or stack pointer to the start of the FP register
4029 save area. */
2b41935c 4030 if (frame_pointer_needed)
823fbbce 4031 {
bc707992
JDA
4032 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4033 base = hard_frame_pointer_rtx;
823fbbce 4034 }
2b41935c 4035 else
823fbbce
JDA
4036 {
4037 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4038 base = stack_pointer_rtx;
4039 }
aadcdb45
JL
4040
4041 /* Now actually save the FP registers. */
88624c0e 4042 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
e63ffc38 4043 {
6fb5fa3c
DB
4044 if (df_regs_ever_live_p (i)
4045 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
e63ffc38 4046 {
f6bcf44c 4047 rtx addr, insn, reg;
403a3fb7
JDA
4048 addr = gen_rtx_MEM (DFmode,
4049 gen_rtx_POST_INC (word_mode, tmpreg));
19ec6a36 4050 reg = gen_rtx_REG (DFmode, i);
f6bcf44c
JDA
4051 insn = emit_move_insn (addr, reg);
4052 if (DO_FRAME_NOTES)
4053 {
4054 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce
JDA
4055 if (TARGET_64BIT)
4056 {
4057 rtx mem = gen_rtx_MEM (DFmode,
0a81f074
RS
4058 plus_constant (Pmode, base,
4059 offset));
bbbbb16a
ILT
4060 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4061 gen_rtx_SET (VOIDmode, mem, reg));
823fbbce
JDA
4062 }
4063 else
4064 {
4065 rtx meml = gen_rtx_MEM (SFmode,
0a81f074
RS
4066 plus_constant (Pmode, base,
4067 offset));
823fbbce 4068 rtx memr = gen_rtx_MEM (SFmode,
0a81f074
RS
4069 plus_constant (Pmode, base,
4070 offset + 4));
823fbbce
JDA
4071 rtx regl = gen_rtx_REG (SFmode, i);
4072 rtx regr = gen_rtx_REG (SFmode, i + 1);
4073 rtx setl = gen_rtx_SET (VOIDmode, meml, regl);
4074 rtx setr = gen_rtx_SET (VOIDmode, memr, regr);
4075 rtvec vec;
4076
4077 RTX_FRAME_RELATED_P (setl) = 1;
4078 RTX_FRAME_RELATED_P (setr) = 1;
4079 vec = gen_rtvec (2, setl, setr);
bbbbb16a
ILT
4080 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4081 gen_rtx_SEQUENCE (VOIDmode, vec));
823fbbce 4082 }
f6bcf44c
JDA
4083 }
4084 offset += GET_MODE_SIZE (DFmode);
e63ffc38
JL
4085 fr_saved++;
4086 }
4087 }
188538df
TG
4088 }
4089}
4090
19ec6a36
AM
4091/* Emit RTL to load REG from the memory location specified by BASE+DISP.
4092 Handle case where DISP > 8k by using the add_high_const patterns. */
4093
f6bcf44c 4094static void
a4295210 4095load_reg (int reg, HOST_WIDE_INT disp, int base)
19ec6a36 4096{
a4295210
JDA
4097 rtx dest = gen_rtx_REG (word_mode, reg);
4098 rtx basereg = gen_rtx_REG (Pmode, base);
4099 rtx src;
19ec6a36 4100
19ec6a36 4101 if (VAL_14_BITS_P (disp))
0a81f074 4102 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
a4295210 4103 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
19ec6a36 4104 {
a4295210
JDA
4105 rtx delta = GEN_INT (disp);
4106 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4107
4108 emit_move_insn (tmpreg, delta);
4109 if (TARGET_DISABLE_INDEXING)
4110 {
4111 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4112 src = gen_rtx_MEM (word_mode, tmpreg);
4113 }
4114 else
4115 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
19ec6a36
AM
4116 }
4117 else
4118 {
4119 rtx delta = GEN_INT (disp);
4120 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4121 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 4122
19ec6a36
AM
4123 emit_move_insn (tmpreg, high);
4124 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
19ec6a36 4125 }
a4295210
JDA
4126
4127 emit_move_insn (dest, src);
19ec6a36 4128}
aadcdb45 4129
5fad1c24
JDA
4130/* Update the total code bytes output to the text section. */
4131
4132static void
67b846fa 4133update_total_code_bytes (unsigned int nbytes)
5fad1c24
JDA
4134{
4135 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
62910663 4136 && !IN_NAMED_SECTION_P (cfun->decl))
5fad1c24 4137 {
67b846fa 4138 unsigned int old_total = total_code_bytes;
5fad1c24 4139
67b846fa 4140 total_code_bytes += nbytes;
5fad1c24 4141
67b846fa
JDA
4142 /* Be prepared to handle overflows. */
4143 if (old_total > total_code_bytes)
4144 total_code_bytes = UINT_MAX;
5fad1c24
JDA
4145 }
4146}
4147
08c148a8
NB
4148/* This function generates the assembly code for function exit.
4149 Args are as for output_function_prologue ().
4150
4151 The function epilogue should not depend on the current stack
4152 pointer! It should use the frame pointer only. This is mandatory
4153 because of alloca; we also take advantage of it to omit stack
fe19a83d 4154 adjustments before returning. */
08c148a8
NB
4155
4156static void
b7849684 4157pa_output_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
188538df 4158{
08a2b118
RS
4159 rtx insn = get_last_insn ();
4160
5fad1c24
JDA
4161 last_address = 0;
4162
ae9d61ab 4163 /* pa_expand_epilogue does the dirty work now. We just need
aadcdb45 4164 to output the assembler directives which denote the end
08a2b118
RS
4165 of a function.
4166
4167 To make debuggers happy, emit a nop if the epilogue was completely
4168 eliminated due to a volatile call as the last insn in the
23f6f34f 4169 current function. That way the return address (in %r2) will
08a2b118
RS
4170 always point to a valid instruction in the current function. */
4171
4172 /* Get the last real insn. */
b64925dc 4173 if (NOTE_P (insn))
08a2b118
RS
4174 insn = prev_real_insn (insn);
4175
4176 /* If it is a sequence, then look inside. */
b64925dc 4177 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
08a2b118
RS
4178 insn = XVECEXP (PATTERN (insn), 0, 0);
4179
23f6f34f 4180 /* If insn is a CALL_INSN, then it must be a call to a volatile
08a2b118 4181 function (otherwise there would be epilogue insns). */
b64925dc 4182 if (insn && CALL_P (insn))
17e6098e
JDA
4183 {
4184 fputs ("\tnop\n", file);
4185 last_address += 4;
4186 }
23f6f34f 4187
e236a9ff 4188 fputs ("\t.EXIT\n\t.PROCEND\n", file);
17e6098e 4189
9a55eab3
JDA
4190 if (TARGET_SOM && TARGET_GAS)
4191 {
4192 /* We done with this subspace except possibly for some additional
4193 debug information. Forget that we are in this subspace to ensure
4194 that the next function is output in its own subspace. */
d6b5193b 4195 in_section = NULL;
1a83bfc3 4196 cfun->machine->in_nsubspa = 2;
9a55eab3
JDA
4197 }
4198
5fad1c24 4199 if (INSN_ADDRESSES_SET_P ())
17e6098e 4200 {
5fad1c24
JDA
4201 insn = get_last_nonnote_insn ();
4202 last_address += INSN_ADDRESSES (INSN_UID (insn));
4203 if (INSN_P (insn))
4204 last_address += insn_default_length (insn);
4205 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4206 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
17e6098e 4207 }
67b846fa
JDA
4208 else
4209 last_address = UINT_MAX;
5fad1c24
JDA
4210
4211 /* Finally, update the total number of code bytes output so far. */
4212 update_total_code_bytes (last_address);
aadcdb45 4213}
4971c587 4214
aadcdb45 4215void
ae9d61ab 4216pa_expand_epilogue (void)
aadcdb45 4217{
23f6f34f 4218 rtx tmpreg;
a4295210
JDA
4219 HOST_WIDE_INT offset;
4220 HOST_WIDE_INT ret_off = 0;
4221 int i;
31d68947 4222 int merge_sp_adjust_with_load = 0;
aadcdb45
JL
4223
4224 /* We will use this often. */
690d4228 4225 tmpreg = gen_rtx_REG (word_mode, 1);
aadcdb45
JL
4226
4227 /* Try to restore RP early to avoid load/use interlocks when
4228 RP gets used in the return (bv) instruction. This appears to still
fe19a83d 4229 be necessary even when we schedule the prologue and epilogue. */
16c16a24 4230 if (rp_saved)
31d68947
AM
4231 {
4232 ret_off = TARGET_64BIT ? -16 : -20;
4233 if (frame_pointer_needed)
4234 {
bc707992 4235 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
31d68947
AM
4236 ret_off = 0;
4237 }
4238 else
4239 {
4240 /* No frame pointer, and stack is smaller than 8k. */
4241 if (VAL_14_BITS_P (ret_off - actual_fsize))
4242 {
f6bcf44c 4243 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
31d68947
AM
4244 ret_off = 0;
4245 }
4246 }
4247 }
aadcdb45
JL
4248
4249 /* General register restores. */
188538df
TG
4250 if (frame_pointer_needed)
4251 {
823fbbce
JDA
4252 offset = local_fsize;
4253
4254 /* If the current function calls __builtin_eh_return, then we need
4255 to restore the saved EH data registers. */
e3b5732b 4256 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4257 {
4258 unsigned int i, regno;
4259
4260 for (i = 0; ; ++i)
4261 {
4262 regno = EH_RETURN_DATA_REGNO (i);
4263 if (regno == INVALID_REGNUM)
4264 break;
4265
bc707992 4266 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
4267 offset += UNITS_PER_WORD;
4268 }
4269 }
4270
4271 for (i = 18; i >= 4; i--)
6fb5fa3c 4272 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4273 {
bc707992 4274 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 4275 offset += UNITS_PER_WORD;
188538df 4276 }
188538df
TG
4277 }
4278 else
4279 {
823fbbce
JDA
4280 offset = local_fsize - actual_fsize;
4281
4282 /* If the current function calls __builtin_eh_return, then we need
4283 to restore the saved EH data registers. */
e3b5732b 4284 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4285 {
4286 unsigned int i, regno;
4287
4288 for (i = 0; ; ++i)
4289 {
4290 regno = EH_RETURN_DATA_REGNO (i);
4291 if (regno == INVALID_REGNUM)
4292 break;
4293
4294 /* Only for the first load.
4295 merge_sp_adjust_with_load holds the register load
4296 with which we will merge the sp adjustment. */
4297 if (merge_sp_adjust_with_load == 0
4298 && local_fsize == 0
4299 && VAL_14_BITS_P (-actual_fsize))
4300 merge_sp_adjust_with_load = regno;
4301 else
4302 load_reg (regno, offset, STACK_POINTER_REGNUM);
4303 offset += UNITS_PER_WORD;
4304 }
4305 }
4306
4307 for (i = 18; i >= 3; i--)
e63ffc38 4308 {
6fb5fa3c 4309 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
e63ffc38 4310 {
e63ffc38
JL
4311 /* Only for the first load.
4312 merge_sp_adjust_with_load holds the register load
4313 with which we will merge the sp adjustment. */
31d68947 4314 if (merge_sp_adjust_with_load == 0
e63ffc38 4315 && local_fsize == 0
31d68947 4316 && VAL_14_BITS_P (-actual_fsize))
e63ffc38
JL
4317 merge_sp_adjust_with_load = i;
4318 else
f6bcf44c 4319 load_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 4320 offset += UNITS_PER_WORD;
e63ffc38
JL
4321 }
4322 }
188538df 4323 }
aadcdb45 4324
188538df
TG
4325 /* Align pointer properly (doubleword boundary). */
4326 offset = (offset + 7) & ~7;
4327
aadcdb45 4328 /* FP register restores. */
188538df 4329 if (save_fregs)
188538df 4330 {
aadcdb45 4331 /* Adjust the register to index off of. */
2b41935c 4332 if (frame_pointer_needed)
bc707992 4333 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
2b41935c 4334 else
823fbbce 4335 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
aadcdb45
JL
4336
4337 /* Actually do the restores now. */
88624c0e 4338 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
4339 if (df_regs_ever_live_p (i)
4340 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
19ec6a36 4341 {
403a3fb7
JDA
4342 rtx src = gen_rtx_MEM (DFmode,
4343 gen_rtx_POST_INC (word_mode, tmpreg));
19ec6a36 4344 rtx dest = gen_rtx_REG (DFmode, i);
f6bcf44c 4345 emit_move_insn (dest, src);
19ec6a36 4346 }
188538df 4347 }
aadcdb45 4348
1144563f
JL
4349 /* Emit a blockage insn here to keep these insns from being moved to
4350 an earlier spot in the epilogue, or into the main instruction stream.
4351
4352 This is necessary as we must not cut the stack back before all the
4353 restores are finished. */
4354 emit_insn (gen_blockage ());
aadcdb45 4355
6619e96c 4356 /* Reset stack pointer (and possibly frame pointer). The stack
68944452 4357 pointer is initially set to fp + 64 to avoid a race condition. */
31d68947 4358 if (frame_pointer_needed)
188538df 4359 {
19ec6a36 4360 rtx delta = GEN_INT (-64);
823fbbce 4361
bc707992
JDA
4362 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4363 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4364 stack_pointer_rtx, delta));
188538df 4365 }
aadcdb45 4366 /* If we were deferring a callee register restore, do it now. */
31d68947
AM
4367 else if (merge_sp_adjust_with_load)
4368 {
4369 rtx delta = GEN_INT (-actual_fsize);
19ec6a36 4370 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
823fbbce
JDA
4371
4372 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
31d68947 4373 }
aadcdb45 4374 else if (actual_fsize != 0)
823fbbce
JDA
4375 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4376 - actual_fsize, 0);
31d68947
AM
4377
4378 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4379 frame greater than 8k), do so now. */
4380 if (ret_off != 0)
f6bcf44c 4381 load_reg (2, ret_off, STACK_POINTER_REGNUM);
823fbbce 4382
e3b5732b 4383 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4384 {
4385 rtx sa = EH_RETURN_STACKADJ_RTX;
4386
4387 emit_insn (gen_blockage ());
4388 emit_insn (TARGET_64BIT
4389 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4390 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4391 }
15768583
JDA
4392}
4393
4394bool
4395pa_can_use_return_insn (void)
4396{
4397 if (!reload_completed)
4398 return false;
4399
4400 if (frame_pointer_needed)
4401 return false;
4402
4403 if (df_regs_ever_live_p (2))
4404 return false;
4405
4406 if (crtl->profile)
4407 return false;
4408
ae9d61ab 4409 return pa_compute_frame_size (get_frame_size (), 0) == 0;
188538df
TG
4410}
4411
d777856d 4412rtx
b7849684 4413hppa_pic_save_rtx (void)
824e7605 4414{
d777856d 4415 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
1c7a8112
AM
4416}
4417
3674b34d
JDA
4418#ifndef NO_DEFERRED_PROFILE_COUNTERS
4419#define NO_DEFERRED_PROFILE_COUNTERS 0
4420#endif
4421
3674b34d
JDA
4422
4423/* Vector of funcdef numbers. */
9771b263 4424static vec<int> funcdef_nos;
3674b34d
JDA
4425
4426/* Output deferred profile counters. */
4427static void
4428output_deferred_profile_counters (void)
4429{
4430 unsigned int i;
4431 int align, n;
4432
9771b263 4433 if (funcdef_nos.is_empty ())
3674b34d
JDA
4434 return;
4435
d6b5193b 4436 switch_to_section (data_section);
3674b34d
JDA
4437 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4438 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4439
9771b263 4440 for (i = 0; funcdef_nos.iterate (i, &n); i++)
3674b34d
JDA
4441 {
4442 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4443 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4444 }
4445
9771b263 4446 funcdef_nos.release ();
3674b34d
JDA
4447}
4448
1c7a8112 4449void
b7849684 4450hppa_profile_hook (int label_no)
1c7a8112 4451{
a3d4c92f
RC
4452 /* We use SImode for the address of the function in both 32 and
4453 64-bit code to avoid having to provide DImode versions of the
4454 lcla2 and load_offset_label_address insn patterns. */
4455 rtx reg = gen_reg_rtx (SImode);
4456 rtx label_rtx = gen_label_rtx ();
8f949e7e
JDA
4457 rtx begin_label_rtx, call_insn;
4458 char begin_label_name[16];
1c7a8112 4459
8f949e7e 4460 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
f6f315fe 4461 label_no);
a3d4c92f 4462 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
1c7a8112
AM
4463
4464 if (TARGET_64BIT)
4465 emit_move_insn (arg_pointer_rtx,
4466 gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
4467 GEN_INT (64)));
4468
1c7a8112
AM
4469 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4470
110abdbc 4471 /* The address of the function is loaded into %r25 with an instruction-
a3d4c92f
RC
4472 relative sequence that avoids the use of relocations. The sequence
4473 is split so that the load_offset_label_address instruction can
4474 occupy the delay slot of the call to _mcount. */
4475 if (TARGET_PA_20)
4476 emit_insn (gen_lcla2 (reg, label_rtx));
4477 else
4478 emit_insn (gen_lcla1 (reg, label_rtx));
4479
4480 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4481 reg, begin_label_rtx, label_rtx));
4482
3674b34d 4483#if !NO_DEFERRED_PROFILE_COUNTERS
1c7a8112
AM
4484 {
4485 rtx count_label_rtx, addr, r24;
8f949e7e 4486 char count_label_name[16];
1c7a8112 4487
9771b263 4488 funcdef_nos.safe_push (label_no);
8f949e7e
JDA
4489 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4490 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (count_label_name));
1c7a8112 4491
bdad4be5 4492 addr = force_reg (Pmode, count_label_rtx);
1c7a8112
AM
4493 r24 = gen_rtx_REG (Pmode, 24);
4494 emit_move_insn (r24, addr);
4495
1c7a8112 4496 call_insn =
a3d4c92f
RC
4497 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4498 gen_rtx_SYMBOL_REF (Pmode,
4499 "_mcount")),
4500 GEN_INT (TARGET_64BIT ? 24 : 12)));
1c7a8112
AM
4501
4502 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4503 }
4504#else
a3d4c92f 4505
1c7a8112 4506 call_insn =
a3d4c92f
RC
4507 emit_call_insn (gen_call (gen_rtx_MEM (Pmode,
4508 gen_rtx_SYMBOL_REF (Pmode,
4509 "_mcount")),
4510 GEN_INT (TARGET_64BIT ? 16 : 8)));
4511
1c7a8112
AM
4512#endif
4513
a3d4c92f
RC
4514 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4515 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4516
1c7a8112
AM
4517 /* Indicate the _mcount call cannot throw, nor will it execute a
4518 non-local goto. */
062a5fd1 4519 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
824e7605
AM
4520}
4521
e99d6592
MS
4522/* Fetch the return address for the frame COUNT steps up from
4523 the current frame, after the prologue. FRAMEADDR is the
4524 frame pointer of the COUNT frame.
4525
cf3735b8
JDA
4526 We want to ignore any export stub remnants here. To handle this,
4527 we examine the code at the return address, and if it is an export
4528 stub, we return a memory rtx for the stub return address stored
4529 at frame-24.
c28eb6c2
JL
4530
4531 The value returned is used in two different ways:
4532
4533 1. To find a function's caller.
4534
4535 2. To change the return address for a function.
4536
4537 This function handles most instances of case 1; however, it will
4538 fail if there are two levels of stubs to execute on the return
4539 path. The only way I believe that can happen is if the return value
4540 needs a parameter relocation, which never happens for C code.
4541
4542 This function handles most instances of case 2; however, it will
4543 fail if we did not originally have stub code on the return path
cf3735b8 4544 but will need stub code on the new return path. This can happen if
c28eb6c2 4545 the caller & callee are both in the main program, but the new
cf3735b8 4546 return location is in a shared library. */
e99d6592
MS
4547
4548rtx
ae9d61ab 4549pa_return_addr_rtx (int count, rtx frameaddr)
e99d6592
MS
4550{
4551 rtx label;
cf3735b8 4552 rtx rp;
e99d6592
MS
4553 rtx saved_rp;
4554 rtx ins;
4555
df8b5535 4556 /* The instruction stream at the return address of a PA1.X export stub is:
f90b7a5a
PB
4557
4558 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4559 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4560 0x00011820 | stub+16: mtsp r1,sr0
4561 0xe0400002 | stub+20: be,n 0(sr0,rp)
4562
4563 0xe0400002 must be specified as -532676606 so that it won't be
df8b5535 4564 rejected as an invalid immediate operand on 64-bit hosts.
f90b7a5a 4565
df8b5535
JDA
4566 The instruction stream at the return address of a PA2.0 export stub is:
4567
4568 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4569 0xe840d002 | stub+12: bve,n (rp)
4570 */
4571
4572 HOST_WIDE_INT insns[4];
4573 int i, len;
f90b7a5a 4574
cf3735b8
JDA
4575 if (count != 0)
4576 return NULL_RTX;
a7721dc0 4577
cf3735b8 4578 rp = get_hard_reg_initial_val (Pmode, 2);
e99d6592 4579
cf3735b8
JDA
4580 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4581 return rp;
e99d6592 4582
f90b7a5a
PB
4583 /* If there is no export stub then just use the value saved from
4584 the return pointer register. */
4585
a7721dc0 4586 saved_rp = gen_reg_rtx (Pmode);
cf3735b8 4587 emit_move_insn (saved_rp, rp);
e99d6592
MS
4588
4589 /* Get pointer to the instruction stream. We have to mask out the
4590 privilege level from the two low order bits of the return address
4591 pointer here so that ins will point to the start of the first
4592 instruction that would have been executed if we returned. */
cf3735b8 4593 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
e99d6592
MS
4594 label = gen_label_rtx ();
4595
df8b5535
JDA
4596 if (TARGET_PA_20)
4597 {
4598 insns[0] = 0x4bc23fd1;
4599 insns[1] = -398405630;
4600 len = 2;
4601 }
4602 else
4603 {
4604 insns[0] = 0x4bc23fd1;
4605 insns[1] = 0x004010a1;
4606 insns[2] = 0x00011820;
4607 insns[3] = -532676606;
4608 len = 4;
4609 }
4610
e99d6592 4611 /* Check the instruction stream at the normal return address for the
f90b7a5a
PB
4612 export stub. If it is an export stub, than our return address is
4613 really in -24[frameaddr]. */
e99d6592 4614
df8b5535 4615 for (i = 0; i < len; i++)
f90b7a5a 4616 {
0a81f074 4617 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
f90b7a5a
PB
4618 rtx op1 = GEN_INT (insns[i]);
4619 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4620 }
e99d6592 4621
cf3735b8 4622 /* Here we know that our return address points to an export
e99d6592 4623 stub. We don't want to return the address of the export stub,
cf3735b8
JDA
4624 but rather the return address of the export stub. That return
4625 address is stored at -24[frameaddr]. */
e99d6592 4626
cf3735b8
JDA
4627 emit_move_insn (saved_rp,
4628 gen_rtx_MEM (Pmode,
4629 memory_address (Pmode,
0a81f074 4630 plus_constant (Pmode, frameaddr,
cf3735b8 4631 -24))));
e99d6592
MS
4632
4633 emit_label (label);
f90b7a5a 4634
cf3735b8 4635 return saved_rp;
e99d6592
MS
4636}
4637
188538df 4638void
ae9d61ab 4639pa_emit_bcond_fp (rtx operands[])
188538df 4640{
f90b7a5a
PB
4641 enum rtx_code code = GET_CODE (operands[0]);
4642 rtx operand0 = operands[1];
4643 rtx operand1 = operands[2];
4644 rtx label = operands[3];
4645
4646 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0),
4647 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4648
ad2c71b7
JL
4649 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4650 gen_rtx_IF_THEN_ELSE (VOIDmode,
f90b7a5a 4651 gen_rtx_fmt_ee (NE,
ad2c71b7
JL
4652 VOIDmode,
4653 gen_rtx_REG (CCFPmode, 0),
4654 const0_rtx),
f90b7a5a 4655 gen_rtx_LABEL_REF (VOIDmode, label),
ad2c71b7 4656 pc_rtx)));
188538df
TG
4657
4658}
4659
780f491f
TG
4660/* Adjust the cost of a scheduling dependency. Return the new cost of
4661 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4662
c237e94a 4663static int
b7849684 4664pa_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
780f491f 4665{
b09fa787
JL
4666 enum attr_type attr_type;
4667
5d50fab3
JL
4668 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4669 true dependencies as they are described with bypasses now. */
4670 if (pa_cpu >= PROCESSOR_8000 || REG_NOTE_KIND (link) == 0)
86001391
JQ
4671 return cost;
4672
e150ae4f
TG
4673 if (! recog_memoized (insn))
4674 return 0;
780f491f 4675
b09fa787
JL
4676 attr_type = get_attr_type (insn);
4677
144d51f9 4678 switch (REG_NOTE_KIND (link))
780f491f 4679 {
144d51f9 4680 case REG_DEP_ANTI:
780f491f
TG
4681 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4682 cycles later. */
4683
b09fa787 4684 if (attr_type == TYPE_FPLOAD)
780f491f 4685 {
e150ae4f
TG
4686 rtx pat = PATTERN (insn);
4687 rtx dep_pat = PATTERN (dep_insn);
4688 if (GET_CODE (pat) == PARALLEL)
4689 {
4690 /* This happens for the fldXs,mb patterns. */
4691 pat = XVECEXP (pat, 0, 0);
4692 }
4693 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
780f491f 4694 /* If this happens, we have to extend this to schedule
e150ae4f
TG
4695 optimally. Return 0 for now. */
4696 return 0;
780f491f 4697
e150ae4f 4698 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
780f491f 4699 {
e150ae4f
TG
4700 if (! recog_memoized (dep_insn))
4701 return 0;
780f491f
TG
4702 switch (get_attr_type (dep_insn))
4703 {
4704 case TYPE_FPALU:
c47decad
JL
4705 case TYPE_FPMULSGL:
4706 case TYPE_FPMULDBL:
780f491f
TG
4707 case TYPE_FPDIVSGL:
4708 case TYPE_FPDIVDBL:
4709 case TYPE_FPSQRTSGL:
4710 case TYPE_FPSQRTDBL:
e150ae4f 4711 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4712 preceding arithmetic operation has finished if
e150ae4f
TG
4713 the target of the fpload is any of the sources
4714 (or destination) of the arithmetic operation. */
5d50fab3 4715 return insn_default_latency (dep_insn) - 1;
c47decad
JL
4716
4717 default:
4718 return 0;
4719 }
4720 }
4721 }
b09fa787 4722 else if (attr_type == TYPE_FPALU)
c47decad
JL
4723 {
4724 rtx pat = PATTERN (insn);
4725 rtx dep_pat = PATTERN (dep_insn);
4726 if (GET_CODE (pat) == PARALLEL)
4727 {
4728 /* This happens for the fldXs,mb patterns. */
4729 pat = XVECEXP (pat, 0, 0);
4730 }
4731 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4732 /* If this happens, we have to extend this to schedule
4733 optimally. Return 0 for now. */
4734 return 0;
4735
4736 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4737 {
4738 if (! recog_memoized (dep_insn))
4739 return 0;
4740 switch (get_attr_type (dep_insn))
4741 {
4742 case TYPE_FPDIVSGL:
4743 case TYPE_FPDIVDBL:
4744 case TYPE_FPSQRTSGL:
4745 case TYPE_FPSQRTDBL:
4746 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4747 preceding divide or sqrt operation has finished if
c47decad
JL
4748 the target of the ALU flop is any of the sources
4749 (or destination) of the divide or sqrt operation. */
5d50fab3 4750 return insn_default_latency (dep_insn) - 2;
780f491f
TG
4751
4752 default:
4753 return 0;
4754 }
4755 }
4756 }
4757
4758 /* For other anti dependencies, the cost is 0. */
4759 return 0;
144d51f9
NS
4760
4761 case REG_DEP_OUTPUT:
c47decad
JL
4762 /* Output dependency; DEP_INSN writes a register that INSN writes some
4763 cycles later. */
b09fa787 4764 if (attr_type == TYPE_FPLOAD)
c47decad
JL
4765 {
4766 rtx pat = PATTERN (insn);
4767 rtx dep_pat = PATTERN (dep_insn);
4768 if (GET_CODE (pat) == PARALLEL)
4769 {
4770 /* This happens for the fldXs,mb patterns. */
4771 pat = XVECEXP (pat, 0, 0);
4772 }
4773 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4774 /* If this happens, we have to extend this to schedule
4775 optimally. Return 0 for now. */
4776 return 0;
4777
4778 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4779 {
4780 if (! recog_memoized (dep_insn))
4781 return 0;
4782 switch (get_attr_type (dep_insn))
4783 {
4784 case TYPE_FPALU:
4785 case TYPE_FPMULSGL:
4786 case TYPE_FPMULDBL:
4787 case TYPE_FPDIVSGL:
4788 case TYPE_FPDIVDBL:
4789 case TYPE_FPSQRTSGL:
4790 case TYPE_FPSQRTDBL:
4791 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4792 preceding arithmetic operation has finished if
c47decad 4793 the target of the fpload is the destination of the
fae15c93
VM
4794 arithmetic operation.
4795
4796 Exception: For PA7100LC, PA7200 and PA7300, the cost
4797 is 3 cycles, unless they bundle together. We also
4798 pay the penalty if the second insn is a fpload. */
5d50fab3 4799 return insn_default_latency (dep_insn) - 1;
780f491f 4800
c47decad
JL
4801 default:
4802 return 0;
4803 }
4804 }
4805 }
b09fa787 4806 else if (attr_type == TYPE_FPALU)
c47decad
JL
4807 {
4808 rtx pat = PATTERN (insn);
4809 rtx dep_pat = PATTERN (dep_insn);
4810 if (GET_CODE (pat) == PARALLEL)
4811 {
4812 /* This happens for the fldXs,mb patterns. */
4813 pat = XVECEXP (pat, 0, 0);
4814 }
4815 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4816 /* If this happens, we have to extend this to schedule
4817 optimally. Return 0 for now. */
4818 return 0;
4819
4820 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4821 {
4822 if (! recog_memoized (dep_insn))
4823 return 0;
4824 switch (get_attr_type (dep_insn))
4825 {
4826 case TYPE_FPDIVSGL:
4827 case TYPE_FPDIVDBL:
4828 case TYPE_FPSQRTSGL:
4829 case TYPE_FPSQRTDBL:
4830 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4831 preceding divide or sqrt operation has finished if
c47decad 4832 the target of the ALU flop is also the target of
38e01259 4833 the divide or sqrt operation. */
5d50fab3 4834 return insn_default_latency (dep_insn) - 2;
c47decad
JL
4835
4836 default:
4837 return 0;
4838 }
4839 }
4840 }
4841
4842 /* For other output dependencies, the cost is 0. */
4843 return 0;
144d51f9
NS
4844
4845 default:
4846 gcc_unreachable ();
c47decad 4847 }
780f491f 4848}
188538df 4849
c237e94a
ZW
4850/* Adjust scheduling priorities. We use this to try and keep addil
4851 and the next use of %r1 close together. */
4852static int
b7849684 4853pa_adjust_priority (rtx insn, int priority)
c237e94a
ZW
4854{
4855 rtx set = single_set (insn);
4856 rtx src, dest;
4857 if (set)
4858 {
4859 src = SET_SRC (set);
4860 dest = SET_DEST (set);
4861 if (GET_CODE (src) == LO_SUM
4862 && symbolic_operand (XEXP (src, 1), VOIDmode)
4863 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4864 priority >>= 3;
4865
4866 else if (GET_CODE (src) == MEM
4867 && GET_CODE (XEXP (src, 0)) == LO_SUM
4868 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4869 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4870 priority >>= 1;
4871
4872 else if (GET_CODE (dest) == MEM
4873 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4874 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
4875 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
4876 priority >>= 3;
4877 }
4878 return priority;
4879}
4880
4881/* The 700 can only issue a single insn at a time.
4882 The 7XXX processors can issue two insns at a time.
4883 The 8000 can issue 4 insns at a time. */
4884static int
b7849684 4885pa_issue_rate (void)
c237e94a
ZW
4886{
4887 switch (pa_cpu)
4888 {
4889 case PROCESSOR_700: return 1;
4890 case PROCESSOR_7100: return 2;
4891 case PROCESSOR_7100LC: return 2;
4892 case PROCESSOR_7200: return 2;
fae15c93 4893 case PROCESSOR_7300: return 2;
c237e94a
ZW
4894 case PROCESSOR_8000: return 4;
4895
4896 default:
144d51f9 4897 gcc_unreachable ();
c237e94a
ZW
4898 }
4899}
4900
4901
4902
ab11fb42
JDA
4903/* Return any length plus adjustment needed by INSN which already has
4904 its length computed as LENGTH. Return LENGTH if no adjustment is
4905 necessary.
3673e996
RS
4906
4907 Also compute the length of an inline block move here as it is too
b9821af8 4908 complicated to express as a length attribute in pa.md. */
3673e996 4909int
b7849684 4910pa_adjust_insn_length (rtx insn, int length)
3673e996
RS
4911{
4912 rtx pat = PATTERN (insn);
4913
ab11fb42
JDA
4914 /* If length is negative or undefined, provide initial length. */
4915 if ((unsigned int) length >= INT_MAX)
4916 {
4917 if (GET_CODE (pat) == SEQUENCE)
4918 insn = XVECEXP (pat, 0, 0);
4919
4920 switch (get_attr_type (insn))
4921 {
4922 case TYPE_MILLI:
4923 length = pa_attr_length_millicode_call (insn);
4924 break;
4925 case TYPE_CALL:
4926 length = pa_attr_length_call (insn, 0);
4927 break;
4928 case TYPE_SIBCALL:
4929 length = pa_attr_length_call (insn, 1);
4930 break;
4931 case TYPE_DYNCALL:
4932 length = pa_attr_length_indirect_call (insn);
4933 break;
4934 case TYPE_SH_FUNC_ADRS:
4935 length = pa_attr_length_millicode_call (insn) + 20;
4936 break;
4937 default:
4938 gcc_unreachable ();
4939 }
4940 }
4941
3673e996 4942 /* Block move pattern. */
33e67557
SB
4943 if (NONJUMP_INSN_P (insn)
4944 && GET_CODE (pat) == PARALLEL
4945 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4946 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4947 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
4948 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
4949 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
ab11fb42 4950 length += compute_movmem_length (insn) - 4;
cdc9103c 4951 /* Block clear pattern. */
b64925dc 4952 else if (NONJUMP_INSN_P (insn)
cdc9103c
JDA
4953 && GET_CODE (pat) == PARALLEL
4954 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
4955 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
4956 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
4957 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
ab11fb42 4958 length += compute_clrmem_length (insn) - 4;
3673e996 4959 /* Conditional branch with an unfilled delay slot. */
b64925dc 4960 else if (JUMP_P (insn) && ! simplejump_p (insn))
b9821af8
JL
4961 {
4962 /* Adjust a short backwards conditional with an unfilled delay slot. */
4963 if (GET_CODE (pat) == SET
a1b36964 4964 && length == 4
3232e9d8 4965 && JUMP_LABEL (insn) != NULL_RTX
b9821af8 4966 && ! forward_branch_p (insn))
ab11fb42 4967 length += 4;
b1092901
JL
4968 else if (GET_CODE (pat) == PARALLEL
4969 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
4970 && length == 4)
ab11fb42 4971 length += 4;
b9821af8 4972 /* Adjust dbra insn with short backwards conditional branch with
23f6f34f 4973 unfilled delay slot -- only for case where counter is in a
fe19a83d 4974 general register register. */
b9821af8
JL
4975 else if (GET_CODE (pat) == PARALLEL
4976 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
4977 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
23f6f34f 4978 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
a1b36964 4979 && length == 4
b9821af8 4980 && ! forward_branch_p (insn))
ab11fb42 4981 length += 4;
b9821af8 4982 }
ab11fb42 4983 return length;
3673e996
RS
4984}
4985
8a5b8538
AS
4986/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4987
4988static bool
4989pa_print_operand_punct_valid_p (unsigned char code)
4990{
4991 if (code == '@'
4992 || code == '#'
4993 || code == '*'
4994 || code == '^')
4995 return true;
4996
4997 return false;
4998}
4999
188538df
TG
5000/* Print operand X (an rtx) in assembler syntax to file FILE.
5001 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5002 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5003
5004void
ae9d61ab 5005pa_print_operand (FILE *file, rtx x, int code)
188538df
TG
5006{
5007 switch (code)
5008 {
5009 case '#':
5010 /* Output a 'nop' if there's nothing for the delay slot. */
5011 if (dbr_sequence_length () == 0)
5012 fputs ("\n\tnop", file);
5013 return;
5014 case '*':
5bdc5878 5015 /* Output a nullification completer if there's nothing for the */
23f6f34f 5016 /* delay slot or nullification is requested. */
188538df
TG
5017 if (dbr_sequence_length () == 0 ||
5018 (final_sequence &&
5019 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5020 fputs (",n", file);
5021 return;
5022 case 'R':
5023 /* Print out the second register name of a register pair.
5024 I.e., R (6) => 7. */
831c1763 5025 fputs (reg_names[REGNO (x) + 1], file);
188538df
TG
5026 return;
5027 case 'r':
fe19a83d 5028 /* A register or zero. */
f048ca47
JL
5029 if (x == const0_rtx
5030 || (x == CONST0_RTX (DFmode))
5031 || (x == CONST0_RTX (SFmode)))
188538df 5032 {
55abf18a
JL
5033 fputs ("%r0", file);
5034 return;
5035 }
5036 else
5037 break;
5038 case 'f':
fe19a83d 5039 /* A register or zero (floating point). */
55abf18a
JL
5040 if (x == const0_rtx
5041 || (x == CONST0_RTX (DFmode))
5042 || (x == CONST0_RTX (SFmode)))
5043 {
5044 fputs ("%fr0", file);
188538df
TG
5045 return;
5046 }
5047 else
5048 break;
f8eb41cc
JL
5049 case 'A':
5050 {
5051 rtx xoperands[2];
5052
5053 xoperands[0] = XEXP (XEXP (x, 0), 0);
5054 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
ae9d61ab 5055 pa_output_global_address (file, xoperands[1], 0);
f8eb41cc
JL
5056 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5057 return;
5058 }
5059
c85b8963 5060 case 'C': /* Plain (C)ondition */
188538df
TG
5061 case 'X':
5062 switch (GET_CODE (x))
23f6f34f 5063 {
188538df 5064 case EQ:
e236a9ff 5065 fputs ("=", file); break;
188538df 5066 case NE:
e236a9ff 5067 fputs ("<>", file); break;
188538df 5068 case GT:
e236a9ff 5069 fputs (">", file); break;
188538df 5070 case GE:
e236a9ff 5071 fputs (">=", file); break;
188538df 5072 case GEU:
e236a9ff 5073 fputs (">>=", file); break;
188538df 5074 case GTU:
e236a9ff 5075 fputs (">>", file); break;
188538df 5076 case LT:
e236a9ff 5077 fputs ("<", file); break;
188538df 5078 case LE:
e236a9ff 5079 fputs ("<=", file); break;
188538df 5080 case LEU:
e236a9ff 5081 fputs ("<<=", file); break;
188538df 5082 case LTU:
e236a9ff 5083 fputs ("<<", file); break;
188538df 5084 default:
144d51f9 5085 gcc_unreachable ();
188538df
TG
5086 }
5087 return;
c85b8963 5088 case 'N': /* Condition, (N)egated */
188538df
TG
5089 switch (GET_CODE (x))
5090 {
5091 case EQ:
e236a9ff 5092 fputs ("<>", file); break;
188538df 5093 case NE:
e236a9ff 5094 fputs ("=", file); break;
188538df 5095 case GT:
e236a9ff 5096 fputs ("<=", file); break;
188538df 5097 case GE:
e236a9ff 5098 fputs ("<", file); break;
188538df 5099 case GEU:
e236a9ff 5100 fputs ("<<", file); break;
188538df 5101 case GTU:
e236a9ff 5102 fputs ("<<=", file); break;
188538df 5103 case LT:
e236a9ff 5104 fputs (">=", file); break;
188538df 5105 case LE:
e236a9ff 5106 fputs (">", file); break;
188538df 5107 case LEU:
e236a9ff 5108 fputs (">>", file); break;
188538df 5109 case LTU:
e236a9ff 5110 fputs (">>=", file); break;
188538df 5111 default:
144d51f9 5112 gcc_unreachable ();
188538df
TG
5113 }
5114 return;
831c1763 5115 /* For floating point comparisons. Note that the output
69049ba0
JDA
5116 predicates are the complement of the desired mode. The
5117 conditions for GT, GE, LT, LE and LTGT cause an invalid
5118 operation exception if the result is unordered and this
5119 exception is enabled in the floating-point status register. */
d6c0d377
JL
5120 case 'Y':
5121 switch (GET_CODE (x))
5122 {
5123 case EQ:
e236a9ff 5124 fputs ("!=", file); break;
d6c0d377 5125 case NE:
e236a9ff 5126 fputs ("=", file); break;
d6c0d377 5127 case GT:
becf1647 5128 fputs ("!>", file); break;
d6c0d377 5129 case GE:
becf1647 5130 fputs ("!>=", file); break;
d6c0d377 5131 case LT:
becf1647 5132 fputs ("!<", file); break;
d6c0d377 5133 case LE:
becf1647
DA
5134 fputs ("!<=", file); break;
5135 case LTGT:
5136 fputs ("!<>", file); break;
5137 case UNLE:
69049ba0 5138 fputs ("!?<=", file); break;
becf1647 5139 case UNLT:
69049ba0 5140 fputs ("!?<", file); break;
becf1647 5141 case UNGE:
69049ba0 5142 fputs ("!?>=", file); break;
becf1647 5143 case UNGT:
69049ba0 5144 fputs ("!?>", file); break;
becf1647 5145 case UNEQ:
69049ba0 5146 fputs ("!?=", file); break;
becf1647 5147 case UNORDERED:
69049ba0 5148 fputs ("!?", file); break;
becf1647 5149 case ORDERED:
69049ba0 5150 fputs ("?", file); break;
d6c0d377 5151 default:
144d51f9 5152 gcc_unreachable ();
d6c0d377
JL
5153 }
5154 return;
c85b8963
TG
5155 case 'S': /* Condition, operands are (S)wapped. */
5156 switch (GET_CODE (x))
5157 {
5158 case EQ:
e236a9ff 5159 fputs ("=", file); break;
c85b8963 5160 case NE:
e236a9ff 5161 fputs ("<>", file); break;
c85b8963 5162 case GT:
e236a9ff 5163 fputs ("<", file); break;
c85b8963 5164 case GE:
e236a9ff 5165 fputs ("<=", file); break;
c85b8963 5166 case GEU:
e236a9ff 5167 fputs ("<<=", file); break;
c85b8963 5168 case GTU:
e236a9ff 5169 fputs ("<<", file); break;
c85b8963 5170 case LT:
e236a9ff 5171 fputs (">", file); break;
c85b8963 5172 case LE:
e236a9ff 5173 fputs (">=", file); break;
c85b8963 5174 case LEU:
e236a9ff 5175 fputs (">>=", file); break;
c85b8963 5176 case LTU:
e236a9ff 5177 fputs (">>", file); break;
c85b8963 5178 default:
144d51f9 5179 gcc_unreachable ();
23f6f34f 5180 }
c85b8963
TG
5181 return;
5182 case 'B': /* Condition, (B)oth swapped and negate. */
5183 switch (GET_CODE (x))
5184 {
5185 case EQ:
e236a9ff 5186 fputs ("<>", file); break;
c85b8963 5187 case NE:
e236a9ff 5188 fputs ("=", file); break;
c85b8963 5189 case GT:
e236a9ff 5190 fputs (">=", file); break;
c85b8963 5191 case GE:
e236a9ff 5192 fputs (">", file); break;
c85b8963 5193 case GEU:
e236a9ff 5194 fputs (">>", file); break;
c85b8963 5195 case GTU:
e236a9ff 5196 fputs (">>=", file); break;
c85b8963 5197 case LT:
e236a9ff 5198 fputs ("<=", file); break;
c85b8963 5199 case LE:
e236a9ff 5200 fputs ("<", file); break;
c85b8963 5201 case LEU:
e236a9ff 5202 fputs ("<<", file); break;
c85b8963 5203 case LTU:
e236a9ff 5204 fputs ("<<=", file); break;
c85b8963 5205 default:
144d51f9 5206 gcc_unreachable ();
23f6f34f 5207 }
c85b8963
TG
5208 return;
5209 case 'k':
144d51f9
NS
5210 gcc_assert (GET_CODE (x) == CONST_INT);
5211 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5212 return;
520babc7 5213 case 'Q':
144d51f9
NS
5214 gcc_assert (GET_CODE (x) == CONST_INT);
5215 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5216 return;
c8d6697c 5217 case 'L':
144d51f9
NS
5218 gcc_assert (GET_CODE (x) == CONST_INT);
5219 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5220 return;
4802a0d6 5221 case 'O':
144d51f9
NS
5222 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5223 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5224 return;
520babc7 5225 case 'p':
144d51f9
NS
5226 gcc_assert (GET_CODE (x) == CONST_INT);
5227 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5228 return;
c8d6697c 5229 case 'P':
144d51f9
NS
5230 gcc_assert (GET_CODE (x) == CONST_INT);
5231 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5232 return;
c85b8963
TG
5233 case 'I':
5234 if (GET_CODE (x) == CONST_INT)
5235 fputs ("i", file);
5236 return;
188538df 5237 case 'M':
2414e0e2 5238 case 'F':
188538df
TG
5239 switch (GET_CODE (XEXP (x, 0)))
5240 {
5241 case PRE_DEC:
5242 case PRE_INC:
f38b27c7
JL
5243 if (ASSEMBLER_DIALECT == 0)
5244 fputs ("s,mb", file);
5245 else
5246 fputs (",mb", file);
188538df
TG
5247 break;
5248 case POST_DEC:
5249 case POST_INC:
f38b27c7
JL
5250 if (ASSEMBLER_DIALECT == 0)
5251 fputs ("s,ma", file);
5252 else
5253 fputs (",ma", file);
188538df 5254 break;
2414e0e2 5255 case PLUS:
d8f95bed
JDA
5256 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5257 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5258 {
5259 if (ASSEMBLER_DIALECT == 0)
5260 fputs ("x", file);
5261 }
5262 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5263 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
f38b27c7
JL
5264 {
5265 if (ASSEMBLER_DIALECT == 0)
5266 fputs ("x,s", file);
5267 else
5268 fputs (",s", file);
5269 }
5270 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5271 fputs ("s", file);
188538df
TG
5272 break;
5273 default:
f38b27c7 5274 if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5275 fputs ("s", file);
188538df
TG
5276 break;
5277 }
5278 return;
5279 case 'G':
ae9d61ab 5280 pa_output_global_address (file, x, 0);
ad238e4b
JL
5281 return;
5282 case 'H':
ae9d61ab 5283 pa_output_global_address (file, x, 1);
188538df
TG
5284 return;
5285 case 0: /* Don't do anything special */
5286 break;
a1747d2c
TG
5287 case 'Z':
5288 {
5289 unsigned op[3];
6fda0f5b 5290 compute_zdepwi_operands (INTVAL (x), op);
a1747d2c
TG
5291 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5292 return;
5293 }
520babc7
JL
5294 case 'z':
5295 {
5296 unsigned op[3];
5297 compute_zdepdi_operands (INTVAL (x), op);
5298 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5299 return;
5300 }
11881f37
AM
5301 case 'c':
5302 /* We can get here from a .vtable_inherit due to our
5303 CONSTANT_ADDRESS_P rejecting perfectly good constant
5304 addresses. */
5305 break;
188538df 5306 default:
144d51f9 5307 gcc_unreachable ();
188538df
TG
5308 }
5309 if (GET_CODE (x) == REG)
80225b66 5310 {
3ba1236f 5311 fputs (reg_names [REGNO (x)], file);
520babc7
JL
5312 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5313 {
5314 fputs ("R", file);
5315 return;
5316 }
5317 if (FP_REG_P (x)
5318 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5319 && (REGNO (x) & 1) == 0)
3ba1236f 5320 fputs ("L", file);
80225b66 5321 }
188538df
TG
5322 else if (GET_CODE (x) == MEM)
5323 {
5324 int size = GET_MODE_SIZE (GET_MODE (x));
478a4495 5325 rtx base = NULL_RTX;
188538df
TG
5326 switch (GET_CODE (XEXP (x, 0)))
5327 {
5328 case PRE_DEC:
5329 case POST_DEC:
520babc7 5330 base = XEXP (XEXP (x, 0), 0);
d2d28085 5331 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
188538df
TG
5332 break;
5333 case PRE_INC:
5334 case POST_INC:
520babc7 5335 base = XEXP (XEXP (x, 0), 0);
d2d28085 5336 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
188538df 5337 break;
d8f95bed
JDA
5338 case PLUS:
5339 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
d2d28085 5340 fprintf (file, "%s(%s)",
2414e0e2
JL
5341 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5342 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
d8f95bed 5343 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
d2d28085 5344 fprintf (file, "%s(%s)",
2414e0e2
JL
5345 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5346 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
d8f95bed
JDA
5347 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5348 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5349 {
5350 /* Because the REG_POINTER flag can get lost during reload,
1a04ac2b 5351 pa_legitimate_address_p canonicalizes the order of the
d8f95bed
JDA
5352 index and base registers in the combined move patterns. */
5353 rtx base = XEXP (XEXP (x, 0), 1);
5354 rtx index = XEXP (XEXP (x, 0), 0);
5355
5356 fprintf (file, "%s(%s)",
5357 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5358 }
2414e0e2
JL
5359 else
5360 output_address (XEXP (x, 0));
188538df 5361 break;
d8f95bed
JDA
5362 default:
5363 output_address (XEXP (x, 0));
5364 break;
188538df
TG
5365 }
5366 }
188538df
TG
5367 else
5368 output_addr_const (file, x);
5369}
5370
fe19a83d 5371/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
188538df
TG
5372
5373void
ae9d61ab 5374pa_output_global_address (FILE *file, rtx x, int round_constant)
188538df 5375{
43940f6b
JL
5376
5377 /* Imagine (high (const (plus ...))). */
5378 if (GET_CODE (x) == HIGH)
5379 x = XEXP (x, 0);
5380
519104fe 5381 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
744b2d61 5382 output_addr_const (file, x);
6bb36601 5383 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
188538df 5384 {
744b2d61 5385 output_addr_const (file, x);
e236a9ff 5386 fputs ("-$global$", file);
188538df
TG
5387 }
5388 else if (GET_CODE (x) == CONST)
5389 {
519104fe 5390 const char *sep = "";
188538df 5391 int offset = 0; /* assembler wants -$global$ at end */
516c2342 5392 rtx base = NULL_RTX;
23f6f34f 5393
144d51f9 5394 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
188538df 5395 {
144d51f9 5396 case SYMBOL_REF:
188538df
TG
5397 base = XEXP (XEXP (x, 0), 0);
5398 output_addr_const (file, base);
144d51f9
NS
5399 break;
5400 case CONST_INT:
5401 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5402 break;
5403 default:
5404 gcc_unreachable ();
188538df 5405 }
188538df 5406
144d51f9 5407 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
188538df 5408 {
144d51f9 5409 case SYMBOL_REF:
188538df
TG
5410 base = XEXP (XEXP (x, 0), 1);
5411 output_addr_const (file, base);
144d51f9
NS
5412 break;
5413 case CONST_INT:
5414 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5415 break;
5416 default:
5417 gcc_unreachable ();
188538df 5418 }
188538df 5419
ad238e4b
JL
5420 /* How bogus. The compiler is apparently responsible for
5421 rounding the constant if it uses an LR field selector.
5422
5423 The linker and/or assembler seem a better place since
5424 they have to do this kind of thing already.
5425
5426 If we fail to do this, HP's optimizing linker may eliminate
5427 an addil, but not update the ldw/stw/ldo instruction that
5428 uses the result of the addil. */
5429 if (round_constant)
5430 offset = ((offset + 0x1000) & ~0x1fff);
5431
144d51f9 5432 switch (GET_CODE (XEXP (x, 0)))
188538df 5433 {
144d51f9 5434 case PLUS:
188538df
TG
5435 if (offset < 0)
5436 {
5437 offset = -offset;
5438 sep = "-";
5439 }
5440 else
5441 sep = "+";
144d51f9
NS
5442 break;
5443
5444 case MINUS:
5445 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5446 sep = "-";
5447 break;
188538df 5448
144d51f9
NS
5449 default:
5450 gcc_unreachable ();
5451 }
5452
519104fe 5453 if (!read_only_operand (base, VOIDmode) && !flag_pic)
e236a9ff 5454 fputs ("-$global$", file);
ad238e4b 5455 if (offset)
831c1763 5456 fprintf (file, "%s%d", sep, offset);
188538df
TG
5457 }
5458 else
5459 output_addr_const (file, x);
5460}
5461
1bc7c5b6
ZW
5462/* Output boilerplate text to appear at the beginning of the file.
5463 There are several possible versions. */
5464#define aputs(x) fputs(x, asm_out_file)
5465static inline void
b7849684 5466pa_file_start_level (void)
1bc7c5b6
ZW
5467{
5468 if (TARGET_64BIT)
5469 aputs ("\t.LEVEL 2.0w\n");
5470 else if (TARGET_PA_20)
5471 aputs ("\t.LEVEL 2.0\n");
5472 else if (TARGET_PA_11)
5473 aputs ("\t.LEVEL 1.1\n");
5474 else
5475 aputs ("\t.LEVEL 1.0\n");
5476}
5477
5478static inline void
b7849684 5479pa_file_start_space (int sortspace)
1bc7c5b6
ZW
5480{
5481 aputs ("\t.SPACE $PRIVATE$");
5482 if (sortspace)
5483 aputs (",SORT=16");
57d138a9
JDA
5484 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5485 if (flag_tm)
5486 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5487 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5488 "\n\t.SPACE $TEXT$");
1bc7c5b6
ZW
5489 if (sortspace)
5490 aputs (",SORT=8");
5491 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
57d138a9 5492 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
1bc7c5b6
ZW
5493}
5494
5495static inline void
b7849684 5496pa_file_start_file (int want_version)
1bc7c5b6
ZW
5497{
5498 if (write_symbols != NO_DEBUG)
5499 {
5500 output_file_directive (asm_out_file, main_input_filename);
5501 if (want_version)
5502 aputs ("\t.version\t\"01.01\"\n");
5503 }
5504}
5505
5506static inline void
b7849684 5507pa_file_start_mcount (const char *aswhat)
1bc7c5b6
ZW
5508{
5509 if (profile_flag)
5510 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5511}
5512
5513static void
b7849684 5514pa_elf_file_start (void)
1bc7c5b6
ZW
5515{
5516 pa_file_start_level ();
5517 pa_file_start_mcount ("ENTRY");
5518 pa_file_start_file (0);
5519}
5520
5521static void
b7849684 5522pa_som_file_start (void)
1bc7c5b6
ZW
5523{
5524 pa_file_start_level ();
5525 pa_file_start_space (0);
5526 aputs ("\t.IMPORT $global$,DATA\n"
5527 "\t.IMPORT $$dyncall,MILLICODE\n");
5528 pa_file_start_mcount ("CODE");
5529 pa_file_start_file (0);
5530}
5531
5532static void
b7849684 5533pa_linux_file_start (void)
1bc7c5b6
ZW
5534{
5535 pa_file_start_file (1);
5536 pa_file_start_level ();
5537 pa_file_start_mcount ("CODE");
5538}
5539
5540static void
b7849684 5541pa_hpux64_gas_file_start (void)
1bc7c5b6
ZW
5542{
5543 pa_file_start_level ();
5544#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5545 if (profile_flag)
5546 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5547#endif
5548 pa_file_start_file (1);
5549}
5550
5551static void
b7849684 5552pa_hpux64_hpas_file_start (void)
1bc7c5b6
ZW
5553{
5554 pa_file_start_level ();
5555 pa_file_start_space (1);
5556 pa_file_start_mcount ("CODE");
5557 pa_file_start_file (0);
5558}
5559#undef aputs
5560
7aaf280e
JDA
5561/* Search the deferred plabel list for SYMBOL and return its internal
5562 label. If an entry for SYMBOL is not found, a new entry is created. */
5563
5564rtx
ae9d61ab 5565pa_get_deferred_plabel (rtx symbol)
a02aa5b0 5566{
744b2d61 5567 const char *fname = XSTR (symbol, 0);
a02aa5b0
JDA
5568 size_t i;
5569
5570 /* See if we have already put this function on the list of deferred
5571 plabels. This list is generally small, so a liner search is not
5572 too ugly. If it proves too slow replace it with something faster. */
5573 for (i = 0; i < n_deferred_plabels; i++)
744b2d61 5574 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
a02aa5b0
JDA
5575 break;
5576
5577 /* If the deferred plabel list is empty, or this entry was not found
5578 on the list, create a new entry on the list. */
5579 if (deferred_plabels == NULL || i == n_deferred_plabels)
5580 {
744b2d61
JDA
5581 tree id;
5582
a02aa5b0 5583 if (deferred_plabels == 0)
a9429e29 5584 deferred_plabels = ggc_alloc_deferred_plabel ();
a02aa5b0 5585 else
a9429e29
LB
5586 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5587 deferred_plabels,
5588 n_deferred_plabels + 1);
a02aa5b0
JDA
5589
5590 i = n_deferred_plabels++;
5591 deferred_plabels[i].internal_label = gen_label_rtx ();
744b2d61 5592 deferred_plabels[i].symbol = symbol;
a02aa5b0 5593
744b2d61
JDA
5594 /* Gross. We have just implicitly taken the address of this
5595 function. Mark it in the same manner as assemble_name. */
5596 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5597 if (id)
5598 mark_referenced (id);
a02aa5b0
JDA
5599 }
5600
7aaf280e 5601 return deferred_plabels[i].internal_label;
a02aa5b0
JDA
5602}
5603
a5fe455b 5604static void
b7849684 5605output_deferred_plabels (void)
359255a9 5606{
0f8e3849 5607 size_t i;
1a83bfc3
JDA
5608
5609 /* If we have some deferred plabels, then we need to switch into the
5610 data or readonly data section, and align it to a 4 byte boundary
6416ae7f 5611 before outputting the deferred plabels. */
359255a9
JL
5612 if (n_deferred_plabels)
5613 {
1a83bfc3 5614 switch_to_section (flag_pic ? data_section : readonly_data_section);
a5fe455b 5615 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
359255a9
JL
5616 }
5617
5618 /* Now output the deferred plabels. */
5619 for (i = 0; i < n_deferred_plabels; i++)
5620 {
ecc418c4 5621 targetm.asm_out.internal_label (asm_out_file, "L",
a5fe455b 5622 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
744b2d61 5623 assemble_integer (deferred_plabels[i].symbol,
3d9268b6 5624 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
359255a9
JL
5625 }
5626}
5627
50bbeefb
JDA
5628/* Initialize optabs to point to emulation routines. */
5629
c15c90bb 5630static void
50bbeefb 5631pa_init_libfuncs (void)
c15c90bb 5632{
50bbeefb
JDA
5633 if (HPUX_LONG_DOUBLE_LIBRARY)
5634 {
5635 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5636 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5637 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5638 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5639 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5640 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5641 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5642 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5643 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5644
5645 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5646 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5647 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5648 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5649 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5650 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5651 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5652
5653 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5654 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5655 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5656 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5657
5658 set_conv_libfunc (sfix_optab, SImode, TFmode,
5659 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5660 : "_U_Qfcnvfxt_quad_to_sgl");
5661 set_conv_libfunc (sfix_optab, DImode, TFmode,
5662 "_U_Qfcnvfxt_quad_to_dbl");
5663 set_conv_libfunc (ufix_optab, SImode, TFmode,
5664 "_U_Qfcnvfxt_quad_to_usgl");
5665 set_conv_libfunc (ufix_optab, DImode, TFmode,
5666 "_U_Qfcnvfxt_quad_to_udbl");
5667
5668 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5669 "_U_Qfcnvxf_sgl_to_quad");
5670 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5671 "_U_Qfcnvxf_dbl_to_quad");
5672 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5673 "_U_Qfcnvxf_usgl_to_quad");
5674 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5675 "_U_Qfcnvxf_udbl_to_quad");
5676 }
33a55f29
RH
5677
5678 if (TARGET_SYNC_LIBCALL)
5679 init_sync_libfuncs (UNITS_PER_WORD);
c15c90bb 5680}
c15c90bb 5681
188538df
TG
5682/* HP's millicode routines mean something special to the assembler.
5683 Keep track of which ones we have used. */
5684
f3a4e54e 5685enum millicodes { remI, remU, divI, divU, mulI, end1000 };
b7849684 5686static void import_milli (enum millicodes);
831c1763 5687static char imported[(int) end1000];
f3a4e54e 5688static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
8b60264b 5689static const char import_string[] = ".IMPORT $$....,MILLICODE";
188538df
TG
5690#define MILLI_START 10
5691
f1c7ce82 5692static void
b7849684 5693import_milli (enum millicodes code)
188538df
TG
5694{
5695 char str[sizeof (import_string)];
23f6f34f 5696
831c1763 5697 if (!imported[(int) code])
188538df 5698 {
831c1763 5699 imported[(int) code] = 1;
188538df 5700 strcpy (str, import_string);
831c1763 5701 strncpy (str + MILLI_START, milli_names[(int) code], 4);
188538df
TG
5702 output_asm_insn (str, 0);
5703 }
5704}
5705
23f6f34f 5706/* The register constraints have put the operands and return value in
fe19a83d 5707 the proper registers. */
188538df 5708
519104fe 5709const char *
ae9d61ab 5710pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx insn)
188538df 5711{
9b38c2fa 5712 import_milli (mulI);
ae9d61ab 5713 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
188538df
TG
5714}
5715
fe19a83d 5716/* Emit the rtl for doing a division by a constant. */
188538df 5717
9b38c2fa 5718/* Do magic division millicodes exist for this value? */
ae9d61ab 5719const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
188538df 5720
23f6f34f 5721/* We'll use an array to keep track of the magic millicodes and
188538df 5722 whether or not we've used them already. [n][0] is signed, [n][1] is
fe19a83d 5723 unsigned. */
188538df 5724
188538df
TG
5725static int div_milli[16][2];
5726
188538df 5727int
ae9d61ab 5728pa_emit_hpdiv_const (rtx *operands, int unsignedp)
188538df
TG
5729{
5730 if (GET_CODE (operands[2]) == CONST_INT
5731 && INTVAL (operands[2]) > 0
5732 && INTVAL (operands[2]) < 16
ae9d61ab 5733 && pa_magic_milli[INTVAL (operands[2])])
188538df 5734 {
7d8b1412
AM
5735 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5736
ad2c71b7 5737 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
188538df 5738 emit
92fd5e41
KH
5739 (gen_rtx_PARALLEL
5740 (VOIDmode,
bd83f9a5 5741 gen_rtvec (6, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29),
ad2c71b7
JL
5742 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5743 SImode,
5744 gen_rtx_REG (SImode, 26),
5745 operands[2])),
bd83f9a5 5746 gen_rtx_CLOBBER (VOIDmode, operands[4]),
ad2c71b7
JL
5747 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5748 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5749 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
7d8b1412 5750 gen_rtx_CLOBBER (VOIDmode, ret))));
ad2c71b7 5751 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
188538df
TG
5752 return 1;
5753 }
5754 return 0;
5755}
5756
519104fe 5757const char *
ae9d61ab 5758pa_output_div_insn (rtx *operands, int unsignedp, rtx insn)
188538df
TG
5759{
5760 int divisor;
23f6f34f
TG
5761
5762 /* If the divisor is a constant, try to use one of the special
188538df
TG
5763 opcodes .*/
5764 if (GET_CODE (operands[0]) == CONST_INT)
5765 {
2c4ff308 5766 static char buf[100];
188538df
TG
5767 divisor = INTVAL (operands[0]);
5768 if (!div_milli[divisor][unsignedp])
5769 {
2c4ff308 5770 div_milli[divisor][unsignedp] = 1;
188538df
TG
5771 if (unsignedp)
5772 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5773 else
5774 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
188538df
TG
5775 }
5776 if (unsignedp)
2c4ff308 5777 {
4a0a75dd
KG
5778 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5779 INTVAL (operands[0]));
ae9d61ab
JDA
5780 return pa_output_millicode_call (insn,
5781 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308
JL
5782 }
5783 else
5784 {
4a0a75dd
KG
5785 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5786 INTVAL (operands[0]));
ae9d61ab
JDA
5787 return pa_output_millicode_call (insn,
5788 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308 5789 }
188538df 5790 }
fe19a83d 5791 /* Divisor isn't a special constant. */
188538df
TG
5792 else
5793 {
5794 if (unsignedp)
5795 {
5796 import_milli (divU);
ae9d61ab 5797 return pa_output_millicode_call (insn,
ad2c71b7 5798 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
188538df
TG
5799 }
5800 else
5801 {
5802 import_milli (divI);
ae9d61ab 5803 return pa_output_millicode_call (insn,
ad2c71b7 5804 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
188538df
TG
5805 }
5806 }
5807}
5808
fe19a83d 5809/* Output a $$rem millicode to do mod. */
188538df 5810
519104fe 5811const char *
ae9d61ab 5812pa_output_mod_insn (int unsignedp, rtx insn)
188538df
TG
5813{
5814 if (unsignedp)
5815 {
5816 import_milli (remU);
ae9d61ab
JDA
5817 return pa_output_millicode_call (insn,
5818 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
188538df
TG
5819 }
5820 else
5821 {
5822 import_milli (remI);
ae9d61ab
JDA
5823 return pa_output_millicode_call (insn,
5824 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
188538df
TG
5825 }
5826}
5827
5828void
ae9d61ab 5829pa_output_arg_descriptor (rtx call_insn)
188538df 5830{
519104fe 5831 const char *arg_regs[4];
188538df 5832 enum machine_mode arg_mode;
80225b66 5833 rtx link;
188538df
TG
5834 int i, output_flag = 0;
5835 int regno;
23f6f34f 5836
520babc7 5837 /* We neither need nor want argument location descriptors for the
e25724d8
AM
5838 64bit runtime environment or the ELF32 environment. */
5839 if (TARGET_64BIT || TARGET_ELF32)
520babc7
JL
5840 return;
5841
188538df
TG
5842 for (i = 0; i < 4; i++)
5843 arg_regs[i] = 0;
5844
2822d96e
JL
5845 /* Specify explicitly that no argument relocations should take place
5846 if using the portable runtime calling conventions. */
5847 if (TARGET_PORTABLE_RUNTIME)
5848 {
e236a9ff
JL
5849 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5850 asm_out_file);
2822d96e
JL
5851 return;
5852 }
5853
b64925dc 5854 gcc_assert (CALL_P (call_insn));
144d51f9
NS
5855 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5856 link; link = XEXP (link, 1))
188538df 5857 {
80225b66 5858 rtx use = XEXP (link, 0);
3529be83 5859
80225b66
TG
5860 if (! (GET_CODE (use) == USE
5861 && GET_CODE (XEXP (use, 0)) == REG
5862 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
3529be83
RS
5863 continue;
5864
80225b66
TG
5865 arg_mode = GET_MODE (XEXP (use, 0));
5866 regno = REGNO (XEXP (use, 0));
188538df 5867 if (regno >= 23 && regno <= 26)
a9d91d6f
RS
5868 {
5869 arg_regs[26 - regno] = "GR";
5870 if (arg_mode == DImode)
5871 arg_regs[25 - regno] = "GR";
5872 }
80225b66 5873 else if (regno >= 32 && regno <= 39)
188538df
TG
5874 {
5875 if (arg_mode == SFmode)
80225b66 5876 arg_regs[(regno - 32) / 2] = "FR";
d0616842 5877 else
188538df 5878 {
22d6e660 5879#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
80225b66
TG
5880 arg_regs[(regno - 34) / 2] = "FR";
5881 arg_regs[(regno - 34) / 2 + 1] = "FU";
188538df 5882#else
80225b66
TG
5883 arg_regs[(regno - 34) / 2] = "FU";
5884 arg_regs[(regno - 34) / 2 + 1] = "FR";
188538df
TG
5885#endif
5886 }
188538df
TG
5887 }
5888 }
5889 fputs ("\t.CALL ", asm_out_file);
5890 for (i = 0; i < 4; i++)
5891 {
5892 if (arg_regs[i])
5893 {
5894 if (output_flag++)
5895 fputc (',', asm_out_file);
5896 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
5897 }
5898 }
5899 fputc ('\n', asm_out_file);
5900}
5901\f
1a04ac2b
JDA
5902/* Inform reload about cases where moving X with a mode MODE to or from
5903 a register in RCLASS requires an extra scratch or immediate register.
5904 Return the class needed for the immediate register. */
483d7ad3 5905
a87cf97e
JR
5906static reg_class_t
5907pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
ec963611
JDA
5908 enum machine_mode mode, secondary_reload_info *sri)
5909{
715a567d 5910 int regno;
a87cf97e 5911 enum reg_class rclass = (enum reg_class) rclass_i;
e236a9ff 5912
ec963611 5913 /* Handle the easy stuff first. */
0a2aaacc 5914 if (rclass == R1_REGS)
ec963611 5915 return NO_REGS;
e236a9ff 5916
ec963611
JDA
5917 if (REG_P (x))
5918 {
5919 regno = REGNO (x);
0a2aaacc 5920 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
ec963611
JDA
5921 return NO_REGS;
5922 }
69f8a2d6
JDA
5923 else
5924 regno = -1;
188538df 5925
ec963611
JDA
5926 /* If we have something like (mem (mem (...)), we can safely assume the
5927 inner MEM will end up in a general register after reloading, so there's
5928 no need for a secondary reload. */
5929 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
5930 return NO_REGS;
188538df 5931
6bb36601 5932 /* Trying to load a constant into a FP register during PIC code
1a04ac2b
JDA
5933 generation requires %r1 as a scratch register. For float modes,
5934 the only legitimate constant is CONST0_RTX. However, there are
5935 a few patterns that accept constant double operands. */
7ee72796 5936 if (flag_pic
0a2aaacc 5937 && FP_REG_CLASS_P (rclass)
ec963611 5938 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
e236a9ff 5939 {
1a04ac2b
JDA
5940 switch (mode)
5941 {
5942 case SImode:
5943 sri->icode = CODE_FOR_reload_insi_r1;
5944 break;
5945
5946 case DImode:
5947 sri->icode = CODE_FOR_reload_indi_r1;
5948 break;
5949
5950 case SFmode:
5951 sri->icode = CODE_FOR_reload_insf_r1;
5952 break;
5953
5954 case DFmode:
5955 sri->icode = CODE_FOR_reload_indf_r1;
5956 break;
5957
5958 default:
5959 gcc_unreachable ();
5960 }
ec963611 5961 return NO_REGS;
e236a9ff 5962 }
e236a9ff 5963
1a04ac2b
JDA
5964 /* Secondary reloads of symbolic expressions require %r1 as a scratch
5965 register when we're generating PIC code or when the operand isn't
715a567d 5966 readonly. */
ae9d61ab 5967 if (pa_symbolic_expression_p (x))
715a567d
JDA
5968 {
5969 if (GET_CODE (x) == HIGH)
5970 x = XEXP (x, 0);
5971
5972 if (flag_pic || !read_only_operand (x, VOIDmode))
5973 {
1a04ac2b
JDA
5974 switch (mode)
5975 {
5976 case SImode:
5977 sri->icode = CODE_FOR_reload_insi_r1;
5978 break;
5979
5980 case DImode:
5981 sri->icode = CODE_FOR_reload_indi_r1;
5982 break;
5983
5984 default:
5985 gcc_unreachable ();
5986 }
715a567d
JDA
5987 return NO_REGS;
5988 }
5989 }
5990
ec963611
JDA
5991 /* Profiling showed the PA port spends about 1.3% of its compilation
5992 time in true_regnum from calls inside pa_secondary_reload_class. */
5993 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
5994 regno = true_regnum (x);
39dfb55a 5995
1a04ac2b 5996 /* Handle reloads for floating point loads and stores. */
6982c5d4 5997 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
0a2aaacc 5998 && FP_REG_CLASS_P (rclass))
6982c5d4 5999 {
1a04ac2b 6000 if (MEM_P (x))
6982c5d4
JDA
6001 {
6002 x = XEXP (x, 0);
6003
6004 /* We don't need an intermediate for indexed and LO_SUM DLT
6005 memory addresses. When INT14_OK_STRICT is true, it might
6006 appear that we could directly allow register indirect
6007 memory addresses. However, this doesn't work because we
6008 don't support SUBREGs in floating-point register copies
6009 and reload doesn't tell us when it's going to use a SUBREG. */
6010 if (IS_INDEX_ADDR_P (x)
6011 || IS_LO_SUM_DLT_ADDR_P (x))
6012 return NO_REGS;
6013
1a04ac2b 6014 /* Request intermediate general register. */
6982c5d4
JDA
6015 return GENERAL_REGS;
6016 }
6017
6018 /* Request a secondary reload with a general scratch register
073a8998 6019 for everything else. ??? Could symbolic operands be handled
6982c5d4 6020 directly when generating non-pic PA 2.0 code? */
f9621cc4
RS
6021 sri->icode = (in_p
6022 ? direct_optab_handler (reload_in_optab, mode)
6023 : direct_optab_handler (reload_out_optab, mode));
6982c5d4
JDA
6024 return NO_REGS;
6025 }
6026
483d7ad3
JDA
6027 /* A SAR<->FP register copy requires an intermediate general register
6028 and secondary memory. We need a secondary reload with a general
6029 scratch register for spills. */
6030 if (rclass == SHIFT_REGS)
ec963611 6031 {
483d7ad3
JDA
6032 /* Handle spill. */
6033 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6034 {
6035 sri->icode = (in_p
6036 ? direct_optab_handler (reload_in_optab, mode)
6037 : direct_optab_handler (reload_out_optab, mode));
6038 return NO_REGS;
6039 }
6040
6041 /* Handle FP copy. */
6042 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6043 return GENERAL_REGS;
ec963611 6044 }
fa5e5c1e 6045
26ee120d 6046 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
483d7ad3
JDA
6047 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6048 && FP_REG_CLASS_P (rclass))
6049 return GENERAL_REGS;
43940f6b 6050
fa5e5c1e 6051 return NO_REGS;
188538df
TG
6052}
6053
16c16a24
JDA
6054/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6055 is only marked as live on entry by df-scan when it is a fixed
6056 register. It isn't a fixed register in the 64-bit runtime,
6057 so we need to mark it here. */
6058
6059static void
6060pa_extra_live_on_entry (bitmap regs)
6061{
6062 if (TARGET_64BIT)
6063 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6064}
6065
6066/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6067 to prevent it from being deleted. */
6068
6069rtx
6070pa_eh_return_handler_rtx (void)
6071{
6072 rtx tmp;
6073
bc707992 6074 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
16c16a24
JDA
6075 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6076 tmp = gen_rtx_MEM (word_mode, tmp);
6077 tmp->volatil = 1;
6078 return tmp;
6079}
6080
8cd5a4e0
RH
6081/* In the 32-bit runtime, arguments larger than eight bytes are passed
6082 by invisible reference. As a GCC extension, we also pass anything
6083 with a zero or variable size by reference.
6084
6085 The 64-bit runtime does not describe passing any types by invisible
6086 reference. The internals of GCC can't currently handle passing
6087 empty structures, and zero or variable length arrays when they are
6088 not passed entirely on the stack or by reference. Thus, as a GCC
6089 extension, we pass these types by reference. The HP compiler doesn't
6090 support these types, so hopefully there shouldn't be any compatibility
6091 issues. This may have to be revisited when HP releases a C99 compiler
6092 or updates the ABI. */
6093
6094static bool
d5cc9181 6095pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
586de218 6096 enum machine_mode mode, const_tree type,
8cd5a4e0
RH
6097 bool named ATTRIBUTE_UNUSED)
6098{
6099 HOST_WIDE_INT size;
6100
6101 if (type)
6102 size = int_size_in_bytes (type);
6103 else
6104 size = GET_MODE_SIZE (mode);
6105
6106 if (TARGET_64BIT)
6107 return size <= 0;
6108 else
6109 return size <= 0 || size > 8;
6110}
6111
188538df 6112enum direction
ae9d61ab 6113pa_function_arg_padding (enum machine_mode mode, const_tree type)
188538df 6114{
9dff28ab 6115 if (mode == BLKmode
c3e39a47
JDA
6116 || (TARGET_64BIT
6117 && type
6118 && (AGGREGATE_TYPE_P (type)
6119 || TREE_CODE (type) == COMPLEX_TYPE
6120 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab
JDA
6121 {
6122 /* Return none if justification is not required. */
6123 if (type
6124 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6125 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6126 return none;
6127
6128 /* The directions set here are ignored when a BLKmode argument larger
6129 than a word is placed in a register. Different code is used for
6130 the stack and registers. This makes it difficult to have a
6131 consistent data representation for both the stack and registers.
6132 For both runtimes, the justification and padding for arguments on
6133 the stack and in registers should be identical. */
6134 if (TARGET_64BIT)
6135 /* The 64-bit runtime specifies left justification for aggregates. */
6136 return upward;
188538df 6137 else
9dff28ab
JDA
6138 /* The 32-bit runtime architecture specifies right justification.
6139 When the argument is passed on the stack, the argument is padded
6140 with garbage on the left. The HP compiler pads with zeros. */
6141 return downward;
188538df 6142 }
9dff28ab
JDA
6143
6144 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
188538df 6145 return downward;
188538df
TG
6146 else
6147 return none;
6148}
6149
188538df 6150\f
648d2ffc
RH
6151/* Do what is necessary for `va_start'. We look at the current function
6152 to determine if stdargs or varargs is used and fill in an initial
6153 va_list. A pointer to this constructor is returned. */
188538df 6154
3f12cd9b 6155static rtx
b7849684 6156hppa_builtin_saveregs (void)
188538df 6157{
5e32727c 6158 rtx offset, dest;
188538df 6159 tree fntype = TREE_TYPE (current_function_decl);
f38958e8 6160 int argadj = ((!stdarg_p (fntype))
188538df
TG
6161 ? UNITS_PER_WORD : 0);
6162
6163 if (argadj)
0a81f074 6164 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
188538df 6165 else
38173d38 6166 offset = crtl->args.arg_offset_rtx;
17e1dfa2 6167
520babc7
JL
6168 if (TARGET_64BIT)
6169 {
6170 int i, off;
6619e96c 6171
520babc7
JL
6172 /* Adjust for varargs/stdarg differences. */
6173 if (argadj)
0a81f074 6174 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
520babc7 6175 else
38173d38 6176 offset = crtl->args.arg_offset_rtx;
520babc7
JL
6177
6178 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6179 from the incoming arg pointer and growing to larger addresses. */
6180 for (i = 26, off = -64; i >= 19; i--, off += 8)
6181 emit_move_insn (gen_rtx_MEM (word_mode,
0a81f074
RS
6182 plus_constant (Pmode,
6183 arg_pointer_rtx, off)),
520babc7
JL
6184 gen_rtx_REG (word_mode, i));
6185
6186 /* The incoming args pointer points just beyond the flushback area;
f710504c 6187 normally this is not a serious concern. However, when we are doing
520babc7
JL
6188 varargs/stdargs we want to make the arg pointer point to the start
6189 of the incoming argument area. */
6190 emit_move_insn (virtual_incoming_args_rtx,
0a81f074 6191 plus_constant (Pmode, arg_pointer_rtx, -64));
520babc7
JL
6192
6193 /* Now return a pointer to the first anonymous argument. */
6194 return copy_to_reg (expand_binop (Pmode, add_optab,
6195 virtual_incoming_args_rtx,
6196 offset, 0, 0, OPTAB_LIB_WIDEN));
6197 }
6198
fe19a83d 6199 /* Store general registers on the stack. */
ad2c71b7 6200 dest = gen_rtx_MEM (BLKmode,
0a81f074 6201 plus_constant (Pmode, crtl->args.internal_arg_pointer,
ad2c71b7 6202 -16));
ba4828e0 6203 set_mem_alias_set (dest, get_varargs_alias_set ());
8ac61af7 6204 set_mem_align (dest, BITS_PER_WORD);
c6b97fac 6205 move_block_from_reg (23, dest, 4);
5e32727c 6206
39dfb55a
JL
6207 /* move_block_from_reg will emit code to store the argument registers
6208 individually as scalar stores.
6209
6210 However, other insns may later load from the same addresses for
956d6950 6211 a structure load (passing a struct to a varargs routine).
39dfb55a
JL
6212
6213 The alias code assumes that such aliasing can never happen, so we
6214 have to keep memory referencing insns from moving up beyond the
6215 last argument register store. So we emit a blockage insn here. */
6216 emit_insn (gen_blockage ());
6217
17e1dfa2 6218 return copy_to_reg (expand_binop (Pmode, add_optab,
38173d38 6219 crtl->args.internal_arg_pointer,
17e1dfa2 6220 offset, 0, 0, OPTAB_LIB_WIDEN));
188538df 6221}
d2a94ec0 6222
d7bd8aeb 6223static void
b7849684 6224hppa_va_start (tree valist, rtx nextarg)
ca5f4364
RH
6225{
6226 nextarg = expand_builtin_saveregs ();
e5faf155 6227 std_expand_builtin_va_start (valist, nextarg);
ca5f4364
RH
6228}
6229
8101c928 6230static tree
726a989a
RB
6231hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6232 gimple_seq *post_p)
ca5f4364 6233{
520babc7
JL
6234 if (TARGET_64BIT)
6235 {
8101c928 6236 /* Args grow upward. We can use the generic routines. */
af064de5 6237 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
ca5f4364 6238 }
9dff28ab 6239 else /* !TARGET_64BIT */
ca5f4364 6240 {
8101c928
RH
6241 tree ptr = build_pointer_type (type);
6242 tree valist_type;
6243 tree t, u;
6244 unsigned int size, ofs;
af064de5 6245 bool indirect;
ca5f4364 6246
af064de5 6247 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
8101c928 6248 if (indirect)
9dff28ab 6249 {
8101c928
RH
6250 type = ptr;
6251 ptr = build_pointer_type (type);
ca5f4364 6252 }
8101c928
RH
6253 size = int_size_in_bytes (type);
6254 valist_type = TREE_TYPE (valist);
9dff28ab 6255
8101c928 6256 /* Args grow down. Not handled by generic routines. */
9dff28ab 6257
5be014d5
AP
6258 u = fold_convert (sizetype, size_in_bytes (type));
6259 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5d49b6a7 6260 t = fold_build_pointer_plus (valist, u);
9dff28ab 6261
e4f1aef1
RG
6262 /* Align to 4 or 8 byte boundary depending on argument size. */
6263
6264 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6265 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
5be014d5 6266 t = fold_convert (valist_type, t);
8101c928 6267
66863d89 6268 t = build2 (MODIFY_EXPR, valist_type, valist, t);
ca5f4364 6269
8101c928
RH
6270 ofs = (8 - size) % 4;
6271 if (ofs != 0)
5d49b6a7 6272 t = fold_build_pointer_plus_hwi (t, ofs);
ca5f4364 6273
8101c928 6274 t = fold_convert (ptr, t);
d6e9821f 6275 t = build_va_arg_indirect_ref (t);
ca5f4364 6276
8101c928 6277 if (indirect)
d6e9821f 6278 t = build_va_arg_indirect_ref (t);
ca5f4364 6279
8101c928
RH
6280 return t;
6281 }
6282}
ca5f4364 6283
83c32f2e
JDA
6284/* True if MODE is valid for the target. By "valid", we mean able to
6285 be manipulated in non-trivial ways. In particular, this means all
6286 the arithmetic is supported.
6287
6288 Currently, TImode is not valid as the HP 64-bit runtime documentation
6289 doesn't document the alignment and calling conventions for this type.
6290 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6291 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6292
6293static bool
6294pa_scalar_mode_supported_p (enum machine_mode mode)
6295{
6296 int precision = GET_MODE_PRECISION (mode);
6297
6298 switch (GET_MODE_CLASS (mode))
6299 {
6300 case MODE_PARTIAL_INT:
6301 case MODE_INT:
6302 if (precision == CHAR_TYPE_SIZE)
6303 return true;
6304 if (precision == SHORT_TYPE_SIZE)
6305 return true;
6306 if (precision == INT_TYPE_SIZE)
6307 return true;
6308 if (precision == LONG_TYPE_SIZE)
6309 return true;
6310 if (precision == LONG_LONG_TYPE_SIZE)
6311 return true;
6312 return false;
6313
6314 case MODE_FLOAT:
6315 if (precision == FLOAT_TYPE_SIZE)
6316 return true;
6317 if (precision == DOUBLE_TYPE_SIZE)
6318 return true;
6319 if (precision == LONG_DOUBLE_TYPE_SIZE)
6320 return true;
6321 return false;
6322
70c1d012
JDA
6323 case MODE_DECIMAL_FLOAT:
6324 return false;
6325
83c32f2e
JDA
6326 default:
6327 gcc_unreachable ();
6328 }
6329}
6330
f5e66865 6331/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
16923e7b 6332 it branches into the delay slot. Otherwise, return FALSE. */
f5e66865
JDA
6333
6334static bool
6335branch_to_delay_slot_p (rtx insn)
6336{
16923e7b
JDA
6337 rtx jump_insn;
6338
f5e66865
JDA
6339 if (dbr_sequence_length ())
6340 return FALSE;
6341
16923e7b
JDA
6342 jump_insn = next_active_insn (JUMP_LABEL (insn));
6343 while (insn)
6344 {
6345 insn = next_active_insn (insn);
6346 if (jump_insn == insn)
6347 return TRUE;
6348
6349 /* We can't rely on the length of asms. So, we return FALSE when
6350 the branch is followed by an asm. */
6351 if (!insn
6352 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6353 || extract_asm_operands (PATTERN (insn)) != NULL_RTX
6354 || get_attr_length (insn) > 0)
6355 break;
6356 }
6357
6358 return FALSE;
f5e66865
JDA
6359}
6360
16923e7b 6361/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
f5e66865
JDA
6362
6363 This occurs when INSN has an unfilled delay slot and is followed
16923e7b
JDA
6364 by an asm. Disaster can occur if the asm is empty and the jump
6365 branches into the delay slot. So, we add a nop in the delay slot
6366 when this occurs. */
f5e66865
JDA
6367
6368static bool
6369branch_needs_nop_p (rtx insn)
6370{
16923e7b 6371 rtx jump_insn;
f5e66865
JDA
6372
6373 if (dbr_sequence_length ())
6374 return FALSE;
6375
16923e7b
JDA
6376 jump_insn = next_active_insn (JUMP_LABEL (insn));
6377 while (insn)
6378 {
6379 insn = next_active_insn (insn);
6380 if (!insn || jump_insn == insn)
6381 return TRUE;
6382
6383 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6384 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6385 && get_attr_length (insn) > 0)
6386 break;
6387 }
6388
6389 return FALSE;
6390}
6391
6392/* Return TRUE if INSN, a forward jump insn, can use nullification
6393 to skip the following instruction. This avoids an extra cycle due
6394 to a mis-predicted branch when we fall through. */
6395
6396static bool
6397use_skip_p (rtx insn)
6398{
6399 rtx jump_insn = next_active_insn (JUMP_LABEL (insn));
6400
6401 while (insn)
6402 {
6403 insn = next_active_insn (insn);
6404
6405 /* We can't rely on the length of asms, so we can't skip asms. */
6406 if (!insn
6407 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6408 || extract_asm_operands (PATTERN (insn)) != NULL_RTX)
6409 break;
6410 if (get_attr_length (insn) == 4
6411 && jump_insn == next_active_insn (insn))
6412 return TRUE;
6413 if (get_attr_length (insn) > 0)
6414 break;
6415 }
6416
6417 return FALSE;
f5e66865
JDA
6418}
6419
23f6f34f
TG
6420/* This routine handles all the normal conditional branch sequences we
6421 might need to generate. It handles compare immediate vs compare
6422 register, nullification of delay slots, varying length branches,
d2364a74 6423 negated branches, and all combinations of the above. It returns the
23f6f34f 6424 output appropriate to emit the branch corresponding to all given
d2364a74
JL
6425 parameters. */
6426
519104fe 6427const char *
ae9d61ab 6428pa_output_cbranch (rtx *operands, int negated, rtx insn)
b1a275e1 6429{
d2364a74 6430 static char buf[100];
16923e7b 6431 bool useskip;
16d74a3c
JDA
6432 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6433 int length = get_attr_length (insn);
6434 int xdelay;
d2364a74 6435
112cdef5 6436 /* A conditional branch to the following instruction (e.g. the delay slot)
02a57c73
JDA
6437 is asking for a disaster. This can happen when not optimizing and
6438 when jump optimization fails.
b1a275e1 6439
7772f0a9
JDA
6440 While it is usually safe to emit nothing, this can fail if the
6441 preceding instruction is a nullified branch with an empty delay
6442 slot and the same branch target as this branch. We could check
6443 for this but jump optimization should eliminate nop jumps. It
6444 is always safe to emit a nop. */
f5e66865 6445 if (branch_to_delay_slot_p (insn))
02a57c73 6446 return "nop";
23f6f34f 6447
ae2ea719
JDA
6448 /* The doubleword form of the cmpib instruction doesn't have the LEU
6449 and GTU conditions while the cmpb instruction does. Since we accept
6450 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6451 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6452 operands[2] = gen_rtx_REG (DImode, 0);
9972f30d
SE
6453 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6454 operands[1] = gen_rtx_REG (DImode, 0);
ae2ea719 6455
b9821af8
JL
6456 /* If this is a long branch with its delay slot unfilled, set `nullify'
6457 as it can nullify the delay slot and save a nop. */
a1b36964 6458 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6459 nullify = 1;
6460
6461 /* If this is a short forward conditional branch which did not get
6462 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6463 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6464 nullify = forward_branch_p (insn);
6465
23f6f34f 6466 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6467 comclr instruction. This avoids a single cycle penalty due to
6468 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6469 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6470
6471 switch (length)
6472 {
b9821af8
JL
6473 /* All short conditional branches except backwards with an unfilled
6474 delay slot. */
a1b36964 6475 case 4:
d2364a74 6476 if (useskip)
f38b27c7 6477 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
d2364a74 6478 else
f38b27c7 6479 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6480 if (GET_MODE (operands[1]) == DImode)
6481 strcat (buf, "*");
d2364a74
JL
6482 if (negated)
6483 strcat (buf, "%B3");
6484 else
6485 strcat (buf, "%S3");
6486 if (useskip)
3b5e5fb3 6487 strcat (buf, " %2,%r1,%%r0");
d2364a74 6488 else if (nullify)
f5e66865
JDA
6489 {
6490 if (branch_needs_nop_p (insn))
6491 strcat (buf, ",n %2,%r1,%0%#");
6492 else
6493 strcat (buf, ",n %2,%r1,%0");
6494 }
23f6f34f 6495 else
dcaeffef 6496 strcat (buf, " %2,%r1,%0");
d2364a74
JL
6497 break;
6498
5bdc5878 6499 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6500 unfilled delay slot is treated just like a long backward branch
6501 with an unfilled delay slot. */
a1b36964 6502 case 8:
b9821af8 6503 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6504 which is nullified. */
b9821af8
JL
6505 if (dbr_sequence_length () != 0
6506 && ! forward_branch_p (insn)
6507 && nullify)
6508 {
f38b27c7 6509 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6510 if (GET_MODE (operands[1]) == DImode)
6511 strcat (buf, "*");
b9821af8
JL
6512 if (negated)
6513 strcat (buf, "%S3");
6514 else
6515 strcat (buf, "%B3");
3b5e5fb3 6516 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
b9821af8 6517 }
923f781d
JL
6518 /* Handle short backwards branch with an unfilled delay slot.
6519 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6520 taken and untaken branches. */
6521 else if (dbr_sequence_length () == 0
6522 && ! forward_branch_p (insn)
9d98a694
AO
6523 && INSN_ADDRESSES_SET_P ()
6524 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6525 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 6526 {
f38b27c7 6527 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6528 if (GET_MODE (operands[1]) == DImode)
6529 strcat (buf, "*");
923f781d 6530 if (negated)
dcaeffef 6531 strcat (buf, "%B3 %2,%r1,%0%#");
923f781d 6532 else
dcaeffef 6533 strcat (buf, "%S3 %2,%r1,%0%#");
923f781d 6534 }
d2364a74 6535 else
b9821af8 6536 {
f38b27c7 6537 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
520babc7
JL
6538 if (GET_MODE (operands[1]) == DImode)
6539 strcat (buf, "*");
b9821af8
JL
6540 if (negated)
6541 strcat (buf, "%S3");
6542 else
6543 strcat (buf, "%B3");
6544 if (nullify)
3b5e5fb3 6545 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
b9821af8 6546 else
3b5e5fb3 6547 strcat (buf, " %2,%r1,%%r0\n\tb %0");
b9821af8 6548 }
d2364a74
JL
6549 break;
6550
16d74a3c 6551 default:
685d0e07 6552 /* The reversed conditional branch must branch over one additional
16d74a3c 6553 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 6554 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
6555 nullified forward branch, the instruction after the reversed
6556 condition branch must be nullified. */
6557 if (dbr_sequence_length () == 0
6558 || (nullify && forward_branch_p (insn)))
6559 {
6560 nullify = 1;
6561 xdelay = 0;
6562 operands[4] = GEN_INT (length);
6563 }
6564 else
6565 {
6566 xdelay = 1;
6567 operands[4] = GEN_INT (length + 4);
6568 }
4bcb9e3f
JL
6569
6570 /* Create a reversed conditional branch which branches around
6571 the following insns. */
685d0e07
JDA
6572 if (GET_MODE (operands[1]) != DImode)
6573 {
6574 if (nullify)
6575 {
6576 if (negated)
6577 strcpy (buf,
6578 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6579 else
6580 strcpy (buf,
6581 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6582 }
6583 else
6584 {
6585 if (negated)
6586 strcpy (buf,
6587 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6588 else
6589 strcpy (buf,
6590 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6591 }
6592 }
4bcb9e3f 6593 else
520babc7 6594 {
685d0e07
JDA
6595 if (nullify)
6596 {
6597 if (negated)
6598 strcpy (buf,
6599 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6600 else
6601 strcpy (buf,
6602 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6603 }
520babc7 6604 else
685d0e07
JDA
6605 {
6606 if (negated)
6607 strcpy (buf,
6608 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6609 else
6610 strcpy (buf,
6611 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6612 }
520babc7 6613 }
4bcb9e3f 6614
16d74a3c 6615 output_asm_insn (buf, operands);
ae9d61ab 6616 return pa_output_lbranch (operands[0], insn, xdelay);
685d0e07
JDA
6617 }
6618 return buf;
6619}
4bcb9e3f 6620
16d74a3c
JDA
6621/* This routine handles output of long unconditional branches that
6622 exceed the maximum range of a simple branch instruction. Since
6623 we don't have a register available for the branch, we save register
6624 %r1 in the frame marker, load the branch destination DEST into %r1,
6625 execute the branch, and restore %r1 in the delay slot of the branch.
6626
6627 Since long branches may have an insn in the delay slot and the
6628 delay slot is used to restore %r1, we in general need to extract
6629 this insn and execute it before the branch. However, to facilitate
6630 use of this function by conditional branches, we also provide an
6631 option to not extract the delay insn so that it will be emitted
6632 after the long branch. So, if there is an insn in the delay slot,
6633 it is extracted if XDELAY is nonzero.
6634
6635 The lengths of the various long-branch sequences are 20, 16 and 24
6636 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
4bcb9e3f 6637
685d0e07 6638const char *
ae9d61ab 6639pa_output_lbranch (rtx dest, rtx insn, int xdelay)
685d0e07
JDA
6640{
6641 rtx xoperands[2];
6642
6643 xoperands[0] = dest;
4bcb9e3f 6644
685d0e07 6645 /* First, free up the delay slot. */
16d74a3c 6646 if (xdelay && dbr_sequence_length () != 0)
685d0e07
JDA
6647 {
6648 /* We can't handle a jump in the delay slot. */
b64925dc 6649 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
4bcb9e3f 6650
685d0e07 6651 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 6652 optimize, 0, NULL);
4bcb9e3f 6653
685d0e07 6654 /* Now delete the delay insn. */
a38e7aa5 6655 SET_INSN_DELETED (NEXT_INSN (insn));
685d0e07 6656 }
4bcb9e3f 6657
685d0e07
JDA
6658 /* Output an insn to save %r1. The runtime documentation doesn't
6659 specify whether the "Clean Up" slot in the callers frame can
6660 be clobbered by the callee. It isn't copied by HP's builtin
6661 alloca, so this suggests that it can be clobbered if necessary.
6662 The "Static Link" location is copied by HP builtin alloca, so
6663 we avoid using it. Using the cleanup slot might be a problem
6664 if we have to interoperate with languages that pass cleanup
6665 information. However, it should be possible to handle these
6666 situations with GCC's asm feature.
6667
6668 The "Current RP" slot is reserved for the called procedure, so
6669 we try to use it when we don't have a frame of our own. It's
6670 rather unlikely that we won't have a frame when we need to emit
6671 a very long branch.
6672
6673 Really the way to go long term is a register scavenger; goto
6674 the target of the jump and find a register which we can use
6675 as a scratch to hold the value in %r1. Then, we wouldn't have
6676 to free up the delay slot or clobber a slot that may be needed
6677 for other purposes. */
6678 if (TARGET_64BIT)
6679 {
6fb5fa3c 6680 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6681 /* Use the return pointer slot in the frame marker. */
6682 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6683 else
6684 /* Use the slot at -40 in the frame marker since HP builtin
6685 alloca doesn't copy it. */
6686 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6687 }
6688 else
6689 {
6fb5fa3c 6690 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6691 /* Use the return pointer slot in the frame marker. */
6692 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6693 else
6694 /* Use the "Clean Up" slot in the frame marker. In GCC,
6695 the only other use of this location is for copying a
6696 floating point double argument from a floating-point
6697 register to two general registers. The copy is done
aa7f1eb1 6698 as an "atomic" operation when outputting a call, so it
685d0e07
JDA
6699 won't interfere with our using the location here. */
6700 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6701 }
3d9268b6 6702
5fad1c24
JDA
6703 if (TARGET_PORTABLE_RUNTIME)
6704 {
6705 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6706 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6707 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6708 }
6709 else if (flag_pic)
685d0e07
JDA
6710 {
6711 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
6712 if (TARGET_SOM || !TARGET_GAS)
6713 {
6714 xoperands[1] = gen_label_rtx ();
6715 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
ecc418c4
JDA
6716 targetm.asm_out.internal_label (asm_out_file, "L",
6717 CODE_LABEL_NUMBER (xoperands[1]));
685d0e07 6718 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
4bcb9e3f 6719 }
685d0e07
JDA
6720 else
6721 {
6722 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
6723 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
6724 }
6725 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6726 }
6727 else
6728 /* Now output a very long branch to the original target. */
6729 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
4bcb9e3f 6730
685d0e07
JDA
6731 /* Now restore the value of %r1 in the delay slot. */
6732 if (TARGET_64BIT)
6733 {
6fb5fa3c 6734 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6735 return "ldd -16(%%r30),%%r1";
6736 else
6737 return "ldd -40(%%r30),%%r1";
6738 }
6739 else
6740 {
6fb5fa3c 6741 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6742 return "ldw -20(%%r30),%%r1";
6743 else
6744 return "ldw -12(%%r30),%%r1";
b9821af8 6745 }
d2364a74
JL
6746}
6747
23f6f34f 6748/* This routine handles all the branch-on-bit conditional branch sequences we
d2364a74
JL
6749 might need to generate. It handles nullification of delay slots,
6750 varying length branches, negated branches and all combinations of the
6751 above. it returns the appropriate output template to emit the branch. */
6752
519104fe 6753const char *
ae9d61ab 6754pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn, int which)
b1a275e1 6755{
d2364a74 6756 static char buf[100];
16923e7b 6757 bool useskip;
16d74a3c
JDA
6758 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6759 int length = get_attr_length (insn);
6760 int xdelay;
d2364a74 6761
112cdef5 6762 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1 6763 asking for a disaster. I do not think this can happen as this pattern
23f6f34f 6764 is only used when optimizing; jump optimization should eliminate the
b1a275e1 6765 jump. But be prepared just in case. */
23f6f34f 6766
f5e66865 6767 if (branch_to_delay_slot_p (insn))
02a57c73 6768 return "nop";
23f6f34f 6769
b9821af8
JL
6770 /* If this is a long branch with its delay slot unfilled, set `nullify'
6771 as it can nullify the delay slot and save a nop. */
a1b36964 6772 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6773 nullify = 1;
6774
6775 /* If this is a short forward conditional branch which did not get
6776 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6777 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6778 nullify = forward_branch_p (insn);
6779
23f6f34f 6780 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6781 extrs instruction. This avoids a single cycle penalty due to
6782 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6783 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6784
6785 switch (length)
6786 {
6787
b9821af8
JL
6788 /* All short conditional branches except backwards with an unfilled
6789 delay slot. */
a1b36964 6790 case 4:
d2364a74 6791 if (useskip)
f38b27c7 6792 strcpy (buf, "{extrs,|extrw,s,}");
23f6f34f 6793 else
d2364a74 6794 strcpy (buf, "bb,");
520babc7
JL
6795 if (useskip && GET_MODE (operands[0]) == DImode)
6796 strcpy (buf, "extrd,s,*");
6797 else if (GET_MODE (operands[0]) == DImode)
6798 strcpy (buf, "bb,*");
d2364a74
JL
6799 if ((which == 0 && negated)
6800 || (which == 1 && ! negated))
6801 strcat (buf, ">=");
6802 else
6803 strcat (buf, "<");
6804 if (useskip)
3b5e5fb3 6805 strcat (buf, " %0,%1,1,%%r0");
d2364a74 6806 else if (nullify && negated)
f5e66865
JDA
6807 {
6808 if (branch_needs_nop_p (insn))
6809 strcat (buf, ",n %0,%1,%3%#");
6810 else
6811 strcat (buf, ",n %0,%1,%3");
6812 }
d2364a74 6813 else if (nullify && ! negated)
f5e66865
JDA
6814 {
6815 if (branch_needs_nop_p (insn))
6816 strcat (buf, ",n %0,%1,%2%#");
6817 else
6818 strcat (buf, ",n %0,%1,%2");
6819 }
d2364a74 6820 else if (! nullify && negated)
f5e66865 6821 strcat (buf, " %0,%1,%3");
d2364a74 6822 else if (! nullify && ! negated)
b9821af8 6823 strcat (buf, " %0,%1,%2");
d2364a74
JL
6824 break;
6825
5bdc5878 6826 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6827 unfilled delay slot is treated just like a long backward branch
6828 with an unfilled delay slot. */
a1b36964 6829 case 8:
b9821af8 6830 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6831 which is nullified. */
b9821af8
JL
6832 if (dbr_sequence_length () != 0
6833 && ! forward_branch_p (insn)
6834 && nullify)
6835 {
6836 strcpy (buf, "bb,");
520babc7
JL
6837 if (GET_MODE (operands[0]) == DImode)
6838 strcat (buf, "*");
b9821af8
JL
6839 if ((which == 0 && negated)
6840 || (which == 1 && ! negated))
6841 strcat (buf, "<");
6842 else
6843 strcat (buf, ">=");
6844 if (negated)
3b5e5fb3 6845 strcat (buf, ",n %0,%1,.+12\n\tb %3");
b9821af8 6846 else
3b5e5fb3 6847 strcat (buf, ",n %0,%1,.+12\n\tb %2");
b9821af8 6848 }
923f781d
JL
6849 /* Handle short backwards branch with an unfilled delay slot.
6850 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6851 taken and untaken branches. */
6852 else if (dbr_sequence_length () == 0
6853 && ! forward_branch_p (insn)
9d98a694
AO
6854 && INSN_ADDRESSES_SET_P ()
6855 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6856 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d
JL
6857 {
6858 strcpy (buf, "bb,");
520babc7
JL
6859 if (GET_MODE (operands[0]) == DImode)
6860 strcat (buf, "*");
923f781d
JL
6861 if ((which == 0 && negated)
6862 || (which == 1 && ! negated))
6863 strcat (buf, ">=");
6864 else
6865 strcat (buf, "<");
6866 if (negated)
6867 strcat (buf, " %0,%1,%3%#");
6868 else
6869 strcat (buf, " %0,%1,%2%#");
6870 }
d2364a74 6871 else
b9821af8 6872 {
520babc7
JL
6873 if (GET_MODE (operands[0]) == DImode)
6874 strcpy (buf, "extrd,s,*");
16d74a3c
JDA
6875 else
6876 strcpy (buf, "{extrs,|extrw,s,}");
b9821af8
JL
6877 if ((which == 0 && negated)
6878 || (which == 1 && ! negated))
6879 strcat (buf, "<");
6880 else
6881 strcat (buf, ">=");
6882 if (nullify && negated)
55abf18a 6883 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
b9821af8 6884 else if (nullify && ! negated)
55abf18a 6885 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
b9821af8 6886 else if (negated)
3b5e5fb3 6887 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
23f6f34f 6888 else
3b5e5fb3 6889 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
b9821af8 6890 }
d2364a74
JL
6891 break;
6892
6893 default:
16d74a3c
JDA
6894 /* The reversed conditional branch must branch over one additional
6895 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 6896 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
6897 nullified forward branch, the instruction after the reversed
6898 condition branch must be nullified. */
6899 if (dbr_sequence_length () == 0
6900 || (nullify && forward_branch_p (insn)))
6901 {
6902 nullify = 1;
6903 xdelay = 0;
8370f6fa 6904 operands[4] = GEN_INT (length);
16d74a3c
JDA
6905 }
6906 else
6907 {
6908 xdelay = 1;
8370f6fa 6909 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
6910 }
6911
6912 if (GET_MODE (operands[0]) == DImode)
8370f6fa 6913 strcpy (buf, "bb,*");
16d74a3c 6914 else
8370f6fa 6915 strcpy (buf, "bb,");
16d74a3c
JDA
6916 if ((which == 0 && negated)
6917 || (which == 1 && !negated))
8370f6fa 6918 strcat (buf, "<");
16d74a3c 6919 else
8370f6fa 6920 strcat (buf, ">=");
16d74a3c 6921 if (nullify)
8370f6fa 6922 strcat (buf, ",n %0,%1,.+%4");
16d74a3c 6923 else
8370f6fa 6924 strcat (buf, " %0,%1,.+%4");
16d74a3c 6925 output_asm_insn (buf, operands);
ae9d61ab
JDA
6926 return pa_output_lbranch (negated ? operands[3] : operands[2],
6927 insn, xdelay);
b9821af8 6928 }
d2364a74
JL
6929 return buf;
6930}
6931
6a73009d
JL
6932/* This routine handles all the branch-on-variable-bit conditional branch
6933 sequences we might need to generate. It handles nullification of delay
6934 slots, varying length branches, negated branches and all combinations
6935 of the above. it returns the appropriate output template to emit the
6936 branch. */
6937
519104fe 6938const char *
ae9d61ab
JDA
6939pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx insn,
6940 int which)
6a73009d
JL
6941{
6942 static char buf[100];
16923e7b 6943 bool useskip;
16d74a3c
JDA
6944 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6945 int length = get_attr_length (insn);
6946 int xdelay;
6a73009d 6947
112cdef5 6948 /* A conditional branch to the following instruction (e.g. the delay slot) is
6a73009d
JL
6949 asking for a disaster. I do not think this can happen as this pattern
6950 is only used when optimizing; jump optimization should eliminate the
6951 jump. But be prepared just in case. */
6952
f5e66865 6953 if (branch_to_delay_slot_p (insn))
02a57c73 6954 return "nop";
6a73009d
JL
6955
6956 /* If this is a long branch with its delay slot unfilled, set `nullify'
6957 as it can nullify the delay slot and save a nop. */
6958 if (length == 8 && dbr_sequence_length () == 0)
6959 nullify = 1;
6960
6961 /* If this is a short forward conditional branch which did not get
6962 its delay slot filled, the delay slot can still be nullified. */
6963 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6964 nullify = forward_branch_p (insn);
6965
6966 /* A forward branch over a single nullified insn can be done with a
6967 extrs instruction. This avoids a single cycle penalty due to
6968 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6969 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6a73009d
JL
6970
6971 switch (length)
6972 {
6973
6974 /* All short conditional branches except backwards with an unfilled
6975 delay slot. */
6976 case 4:
6977 if (useskip)
f38b27c7 6978 strcpy (buf, "{vextrs,|extrw,s,}");
6a73009d 6979 else
f38b27c7 6980 strcpy (buf, "{bvb,|bb,}");
520babc7 6981 if (useskip && GET_MODE (operands[0]) == DImode)
e72ed000 6982 strcpy (buf, "extrd,s,*");
520babc7
JL
6983 else if (GET_MODE (operands[0]) == DImode)
6984 strcpy (buf, "bb,*");
6a73009d
JL
6985 if ((which == 0 && negated)
6986 || (which == 1 && ! negated))
6987 strcat (buf, ">=");
6988 else
6989 strcat (buf, "<");
6990 if (useskip)
f38b27c7 6991 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6a73009d 6992 else if (nullify && negated)
f5e66865
JDA
6993 {
6994 if (branch_needs_nop_p (insn))
6995 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6996 else
6997 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
6998 }
6a73009d 6999 else if (nullify && ! negated)
f5e66865
JDA
7000 {
7001 if (branch_needs_nop_p (insn))
7002 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7003 else
7004 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7005 }
6a73009d 7006 else if (! nullify && negated)
f5e66865 7007 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6a73009d 7008 else if (! nullify && ! negated)
f38b27c7 7009 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6a73009d
JL
7010 break;
7011
5bdc5878 7012 /* All long conditionals. Note a short backward branch with an
6a73009d
JL
7013 unfilled delay slot is treated just like a long backward branch
7014 with an unfilled delay slot. */
7015 case 8:
7016 /* Handle weird backwards branch with a filled delay slot
16d74a3c 7017 which is nullified. */
6a73009d
JL
7018 if (dbr_sequence_length () != 0
7019 && ! forward_branch_p (insn)
7020 && nullify)
7021 {
f38b27c7 7022 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
7023 if (GET_MODE (operands[0]) == DImode)
7024 strcat (buf, "*");
6a73009d
JL
7025 if ((which == 0 && negated)
7026 || (which == 1 && ! negated))
7027 strcat (buf, "<");
7028 else
7029 strcat (buf, ">=");
7030 if (negated)
f38b27c7 7031 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6a73009d 7032 else
f38b27c7 7033 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6a73009d
JL
7034 }
7035 /* Handle short backwards branch with an unfilled delay slot.
7036 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7037 taken and untaken branches. */
7038 else if (dbr_sequence_length () == 0
7039 && ! forward_branch_p (insn)
9d98a694
AO
7040 && INSN_ADDRESSES_SET_P ()
7041 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7042 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6a73009d 7043 {
f38b27c7 7044 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
7045 if (GET_MODE (operands[0]) == DImode)
7046 strcat (buf, "*");
6a73009d
JL
7047 if ((which == 0 && negated)
7048 || (which == 1 && ! negated))
7049 strcat (buf, ">=");
7050 else
7051 strcat (buf, "<");
7052 if (negated)
f38b27c7 7053 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6a73009d 7054 else
f38b27c7 7055 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6a73009d
JL
7056 }
7057 else
7058 {
f38b27c7 7059 strcpy (buf, "{vextrs,|extrw,s,}");
520babc7
JL
7060 if (GET_MODE (operands[0]) == DImode)
7061 strcpy (buf, "extrd,s,*");
6a73009d
JL
7062 if ((which == 0 && negated)
7063 || (which == 1 && ! negated))
7064 strcat (buf, "<");
7065 else
7066 strcat (buf, ">=");
7067 if (nullify && negated)
f38b27c7 7068 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6a73009d 7069 else if (nullify && ! negated)
f38b27c7 7070 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6a73009d 7071 else if (negated)
f38b27c7 7072 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6a73009d 7073 else
f38b27c7 7074 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6a73009d
JL
7075 }
7076 break;
7077
7078 default:
16d74a3c
JDA
7079 /* The reversed conditional branch must branch over one additional
7080 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7081 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7082 nullified forward branch, the instruction after the reversed
7083 condition branch must be nullified. */
7084 if (dbr_sequence_length () == 0
7085 || (nullify && forward_branch_p (insn)))
7086 {
7087 nullify = 1;
7088 xdelay = 0;
8370f6fa 7089 operands[4] = GEN_INT (length);
16d74a3c
JDA
7090 }
7091 else
7092 {
7093 xdelay = 1;
8370f6fa 7094 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
7095 }
7096
7097 if (GET_MODE (operands[0]) == DImode)
8370f6fa 7098 strcpy (buf, "bb,*");
16d74a3c 7099 else
8370f6fa 7100 strcpy (buf, "{bvb,|bb,}");
16d74a3c
JDA
7101 if ((which == 0 && negated)
7102 || (which == 1 && !negated))
8370f6fa 7103 strcat (buf, "<");
16d74a3c 7104 else
8370f6fa 7105 strcat (buf, ">=");
16d74a3c 7106 if (nullify)
8370f6fa 7107 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 7108 else
8370f6fa 7109 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 7110 output_asm_insn (buf, operands);
ae9d61ab
JDA
7111 return pa_output_lbranch (negated ? operands[3] : operands[2],
7112 insn, xdelay);
6a73009d
JL
7113 }
7114 return buf;
7115}
7116
b1a275e1
JL
7117/* Return the output template for emitting a dbra type insn.
7118
7119 Note it may perform some output operations on its own before
7120 returning the final output string. */
519104fe 7121const char *
ae9d61ab 7122pa_output_dbra (rtx *operands, rtx insn, int which_alternative)
b1a275e1 7123{
16d74a3c 7124 int length = get_attr_length (insn);
b1a275e1 7125
112cdef5 7126 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7127 asking for a disaster. Be prepared! */
7128
f5e66865 7129 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7130 {
7131 if (which_alternative == 0)
7132 return "ldo %1(%0),%0";
7133 else if (which_alternative == 1)
7134 {
831c1763
AM
7135 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7136 output_asm_insn ("ldw -16(%%r30),%4", operands);
d2d28085 7137 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
f38b27c7 7138 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1
JL
7139 }
7140 else
7141 {
7142 output_asm_insn ("ldw %0,%4", operands);
7143 return "ldo %1(%4),%4\n\tstw %4,%0";
7144 }
7145 }
7146
7147 if (which_alternative == 0)
7148 {
7149 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7150 int xdelay;
b1a275e1
JL
7151
7152 /* If this is a long branch with its delay slot unfilled, set `nullify'
7153 as it can nullify the delay slot and save a nop. */
a1b36964 7154 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7155 nullify = 1;
7156
7157 /* If this is a short forward conditional branch which did not get
7158 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7159 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7160 nullify = forward_branch_p (insn);
7161
144d51f9 7162 switch (length)
b1a275e1 7163 {
144d51f9
NS
7164 case 4:
7165 if (nullify)
f5e66865
JDA
7166 {
7167 if (branch_needs_nop_p (insn))
7168 return "addib,%C2,n %1,%0,%3%#";
7169 else
7170 return "addib,%C2,n %1,%0,%3";
7171 }
144d51f9
NS
7172 else
7173 return "addib,%C2 %1,%0,%3";
7174
7175 case 8:
23f6f34f 7176 /* Handle weird backwards branch with a fulled delay slot
b1a275e1
JL
7177 which is nullified. */
7178 if (dbr_sequence_length () != 0
7179 && ! forward_branch_p (insn)
7180 && nullify)
3b5e5fb3 7181 return "addib,%N2,n %1,%0,.+12\n\tb %3";
923f781d
JL
7182 /* Handle short backwards branch with an unfilled delay slot.
7183 Using a addb;nop rather than addi;bl saves 1 cycle for both
7184 taken and untaken branches. */
7185 else if (dbr_sequence_length () == 0
7186 && ! forward_branch_p (insn)
9d98a694
AO
7187 && INSN_ADDRESSES_SET_P ()
7188 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7189 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7190 return "addib,%C2 %1,%0,%3%#";
23f6f34f
TG
7191
7192 /* Handle normal cases. */
b1a275e1 7193 if (nullify)
3b5e5fb3 7194 return "addi,%N2 %1,%0,%0\n\tb,n %3";
b1a275e1 7195 else
3b5e5fb3 7196 return "addi,%N2 %1,%0,%0\n\tb %3";
144d51f9
NS
7197
7198 default:
16d74a3c
JDA
7199 /* The reversed conditional branch must branch over one additional
7200 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7201 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7202 nullified forward branch, the instruction after the reversed
7203 condition branch must be nullified. */
7204 if (dbr_sequence_length () == 0
7205 || (nullify && forward_branch_p (insn)))
7206 {
7207 nullify = 1;
7208 xdelay = 0;
7209 operands[4] = GEN_INT (length);
7210 }
7211 else
7212 {
7213 xdelay = 1;
7214 operands[4] = GEN_INT (length + 4);
7215 }
7216
7217 if (nullify)
7218 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7219 else
7220 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7221
ae9d61ab 7222 return pa_output_lbranch (operands[3], insn, xdelay);
b1a275e1 7223 }
144d51f9 7224
b1a275e1
JL
7225 }
7226 /* Deal with gross reload from FP register case. */
7227 else if (which_alternative == 1)
7228 {
7229 /* Move loop counter from FP register to MEM then into a GR,
7230 increment the GR, store the GR into MEM, and finally reload
23f6f34f 7231 the FP register from MEM from within the branch's delay slot. */
831c1763
AM
7232 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7233 operands);
d2d28085 7234 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
16d74a3c 7235 if (length == 24)
f38b27c7 7236 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7237 else if (length == 28)
f38b27c7 7238 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7239 else
7240 {
8370f6fa
JDA
7241 operands[5] = GEN_INT (length - 16);
7242 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
16d74a3c 7243 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
ae9d61ab 7244 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7245 }
b1a275e1
JL
7246 }
7247 /* Deal with gross reload from memory case. */
7248 else
7249 {
7250 /* Reload loop counter from memory, the store back to memory
71cc389b 7251 happens in the branch's delay slot. */
b1a275e1 7252 output_asm_insn ("ldw %0,%4", operands);
16d74a3c 7253 if (length == 12)
b1a275e1 7254 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
16d74a3c 7255 else if (length == 16)
3b5e5fb3 7256 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
16d74a3c
JDA
7257 else
7258 {
8370f6fa
JDA
7259 operands[5] = GEN_INT (length - 4);
7260 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
ae9d61ab 7261 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7262 }
b1a275e1
JL
7263 }
7264}
7265
16d74a3c 7266/* Return the output template for emitting a movb type insn.
b1a275e1
JL
7267
7268 Note it may perform some output operations on its own before
7269 returning the final output string. */
519104fe 7270const char *
ae9d61ab 7271pa_output_movb (rtx *operands, rtx insn, int which_alternative,
b7849684 7272 int reverse_comparison)
b1a275e1 7273{
16d74a3c 7274 int length = get_attr_length (insn);
b1a275e1 7275
112cdef5 7276 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7277 asking for a disaster. Be prepared! */
7278
f5e66865 7279 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7280 {
7281 if (which_alternative == 0)
7282 return "copy %1,%0";
7283 else if (which_alternative == 1)
7284 {
831c1763 7285 output_asm_insn ("stw %1,-16(%%r30)", operands);
f38b27c7 7286 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1 7287 }
b1092901 7288 else if (which_alternative == 2)
b1a275e1 7289 return "stw %1,%0";
b1092901
JL
7290 else
7291 return "mtsar %r1";
b1a275e1
JL
7292 }
7293
7294 /* Support the second variant. */
7295 if (reverse_comparison)
7296 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7297
7298 if (which_alternative == 0)
7299 {
7300 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7301 int xdelay;
b1a275e1
JL
7302
7303 /* If this is a long branch with its delay slot unfilled, set `nullify'
7304 as it can nullify the delay slot and save a nop. */
a1b36964 7305 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7306 nullify = 1;
7307
7308 /* If this is a short forward conditional branch which did not get
7309 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7310 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7311 nullify = forward_branch_p (insn);
7312
144d51f9 7313 switch (length)
b1a275e1 7314 {
144d51f9
NS
7315 case 4:
7316 if (nullify)
f5e66865
JDA
7317 {
7318 if (branch_needs_nop_p (insn))
7319 return "movb,%C2,n %1,%0,%3%#";
7320 else
7321 return "movb,%C2,n %1,%0,%3";
7322 }
144d51f9
NS
7323 else
7324 return "movb,%C2 %1,%0,%3";
7325
7326 case 8:
23f6f34f 7327 /* Handle weird backwards branch with a filled delay slot
b1a275e1
JL
7328 which is nullified. */
7329 if (dbr_sequence_length () != 0
7330 && ! forward_branch_p (insn)
7331 && nullify)
3b5e5fb3 7332 return "movb,%N2,n %1,%0,.+12\n\tb %3";
23f6f34f 7333
923f781d
JL
7334 /* Handle short backwards branch with an unfilled delay slot.
7335 Using a movb;nop rather than or;bl saves 1 cycle for both
7336 taken and untaken branches. */
7337 else if (dbr_sequence_length () == 0
7338 && ! forward_branch_p (insn)
9d98a694
AO
7339 && INSN_ADDRESSES_SET_P ()
7340 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7341 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7342 return "movb,%C2 %1,%0,%3%#";
23f6f34f 7343 /* Handle normal cases. */
b1a275e1 7344 if (nullify)
3b5e5fb3 7345 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
b1a275e1 7346 else
3b5e5fb3 7347 return "or,%N2 %1,%%r0,%0\n\tb %3";
144d51f9
NS
7348
7349 default:
16d74a3c
JDA
7350 /* The reversed conditional branch must branch over one additional
7351 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7352 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7353 nullified forward branch, the instruction after the reversed
7354 condition branch must be nullified. */
7355 if (dbr_sequence_length () == 0
7356 || (nullify && forward_branch_p (insn)))
7357 {
7358 nullify = 1;
7359 xdelay = 0;
7360 operands[4] = GEN_INT (length);
7361 }
7362 else
7363 {
7364 xdelay = 1;
7365 operands[4] = GEN_INT (length + 4);
7366 }
7367
7368 if (nullify)
7369 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7370 else
7371 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7372
ae9d61ab 7373 return pa_output_lbranch (operands[3], insn, xdelay);
b1a275e1 7374 }
b1a275e1 7375 }
16d74a3c 7376 /* Deal with gross reload for FP destination register case. */
b1a275e1
JL
7377 else if (which_alternative == 1)
7378 {
16d74a3c
JDA
7379 /* Move source register to MEM, perform the branch test, then
7380 finally load the FP register from MEM from within the branch's
7381 delay slot. */
831c1763 7382 output_asm_insn ("stw %1,-16(%%r30)", operands);
16d74a3c 7383 if (length == 12)
f38b27c7 7384 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7385 else if (length == 16)
f38b27c7 7386 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7387 else
7388 {
8370f6fa
JDA
7389 operands[4] = GEN_INT (length - 4);
7390 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
16d74a3c 7391 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
ae9d61ab 7392 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7393 }
b1a275e1
JL
7394 }
7395 /* Deal with gross reload from memory case. */
b1092901 7396 else if (which_alternative == 2)
b1a275e1
JL
7397 {
7398 /* Reload loop counter from memory, the store back to memory
71cc389b 7399 happens in the branch's delay slot. */
16d74a3c 7400 if (length == 8)
f38b27c7 7401 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
16d74a3c 7402 else if (length == 12)
f38b27c7 7403 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
16d74a3c
JDA
7404 else
7405 {
8370f6fa
JDA
7406 operands[4] = GEN_INT (length);
7407 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7408 operands);
ae9d61ab 7409 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7410 }
b1a275e1 7411 }
b1092901
JL
7412 /* Handle SAR as a destination. */
7413 else
7414 {
16d74a3c 7415 if (length == 8)
f38b27c7 7416 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
16d74a3c 7417 else if (length == 12)
715ab8c3 7418 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
16d74a3c
JDA
7419 else
7420 {
8370f6fa
JDA
7421 operands[4] = GEN_INT (length);
7422 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7423 operands);
ae9d61ab 7424 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7425 }
b1092901 7426 }
b1a275e1
JL
7427}
7428
a02aa5b0
JDA
7429/* Copy any FP arguments in INSN into integer registers. */
7430static void
b7849684 7431copy_fp_args (rtx insn)
a02aa5b0
JDA
7432{
7433 rtx link;
7434 rtx xoperands[2];
b1a275e1 7435
a02aa5b0
JDA
7436 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7437 {
7438 int arg_mode, regno;
7439 rtx use = XEXP (link, 0);
f726ea7d 7440
a02aa5b0
JDA
7441 if (! (GET_CODE (use) == USE
7442 && GET_CODE (XEXP (use, 0)) == REG
7443 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7444 continue;
2c4ff308 7445
a02aa5b0
JDA
7446 arg_mode = GET_MODE (XEXP (use, 0));
7447 regno = REGNO (XEXP (use, 0));
520babc7 7448
a02aa5b0
JDA
7449 /* Is it a floating point register? */
7450 if (regno >= 32 && regno <= 39)
7451 {
7452 /* Copy the FP register into an integer register via memory. */
7453 if (arg_mode == SFmode)
7454 {
7455 xoperands[0] = XEXP (use, 0);
7456 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7457 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7458 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7459 }
7460 else
7461 {
7462 xoperands[0] = XEXP (use, 0);
7463 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7464 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7465 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7466 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7467 }
7468 }
279c9bde 7469 }
a02aa5b0
JDA
7470}
7471
7472/* Compute length of the FP argument copy sequence for INSN. */
7473static int
b7849684 7474length_fp_args (rtx insn)
a02aa5b0
JDA
7475{
7476 int length = 0;
7477 rtx link;
279c9bde 7478
a02aa5b0 7479 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6a73009d 7480 {
a02aa5b0
JDA
7481 int arg_mode, regno;
7482 rtx use = XEXP (link, 0);
7483
7484 if (! (GET_CODE (use) == USE
7485 && GET_CODE (XEXP (use, 0)) == REG
7486 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7487 continue;
6a73009d 7488
a02aa5b0
JDA
7489 arg_mode = GET_MODE (XEXP (use, 0));
7490 regno = REGNO (XEXP (use, 0));
7491
7492 /* Is it a floating point register? */
7493 if (regno >= 32 && regno <= 39)
6a73009d 7494 {
a02aa5b0
JDA
7495 if (arg_mode == SFmode)
7496 length += 8;
7497 else
7498 length += 12;
6a73009d 7499 }
a02aa5b0 7500 }
6a73009d 7501
a02aa5b0
JDA
7502 return length;
7503}
3d9268b6 7504
611ad29e 7505/* Return the attribute length for the millicode call instruction INSN.
ae9d61ab 7506 The length must match the code generated by pa_output_millicode_call.
611ad29e 7507 We include the delay slot in the returned length as it is better to
a02aa5b0 7508 over estimate the length than to under estimate it. */
a7721dc0 7509
a02aa5b0 7510int
ae9d61ab 7511pa_attr_length_millicode_call (rtx insn)
a02aa5b0 7512{
611ad29e 7513 unsigned long distance = -1;
62910663 7514 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
a02aa5b0 7515
611ad29e
JDA
7516 if (INSN_ADDRESSES_SET_P ())
7517 {
5fad1c24
JDA
7518 distance = (total + insn_current_reference_address (insn));
7519 if (distance < total)
611ad29e
JDA
7520 distance = -1;
7521 }
a02aa5b0
JDA
7522
7523 if (TARGET_64BIT)
7524 {
7525 if (!TARGET_LONG_CALLS && distance < 7600000)
611ad29e 7526 return 8;
a02aa5b0 7527
611ad29e 7528 return 20;
a02aa5b0
JDA
7529 }
7530 else if (TARGET_PORTABLE_RUNTIME)
611ad29e 7531 return 24;
a02aa5b0
JDA
7532 else
7533 {
a43434ff 7534 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
611ad29e 7535 return 8;
a02aa5b0
JDA
7536
7537 if (TARGET_LONG_ABS_CALL && !flag_pic)
611ad29e 7538 return 12;
a02aa5b0 7539
611ad29e 7540 return 24;
a02aa5b0
JDA
7541 }
7542}
7543
7544/* INSN is a function call. It may have an unconditional jump
7545 in its delay slot.
a7721dc0 7546
a02aa5b0 7547 CALL_DEST is the routine we are calling. */
a7721dc0 7548
a02aa5b0 7549const char *
ae9d61ab 7550pa_output_millicode_call (rtx insn, rtx call_dest)
a02aa5b0
JDA
7551{
7552 int attr_length = get_attr_length (insn);
7553 int seq_length = dbr_sequence_length ();
7554 int distance;
7555 rtx seq_insn;
7556 rtx xoperands[3];
a7721dc0 7557
a02aa5b0
JDA
7558 xoperands[0] = call_dest;
7559 xoperands[2] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7560
7561 /* Handle the common case where we are sure that the branch will
7562 reach the beginning of the $CODE$ subspace. The within reach
ab11fb42
JDA
7563 form of the $$sh_func_adrs call has a length of 28. Because it
7564 has an attribute type of sh_func_adrs, it never has a nonzero
7565 sequence length (i.e., the delay slot is never filled). */
a02aa5b0 7566 if (!TARGET_LONG_CALLS
ab11fb42
JDA
7567 && (attr_length == 8
7568 || (attr_length == 28
7569 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
a02aa5b0
JDA
7570 {
7571 output_asm_insn ("{bl|b,l} %0,%2", xoperands);
7572 }
7573 else
7574 {
7575 if (TARGET_64BIT)
7576 {
7577 /* It might seem that one insn could be saved by accessing
7578 the millicode function using the linkage table. However,
7579 this doesn't work in shared libraries and other dynamically
7580 loaded objects. Using a pc-relative sequence also avoids
7581 problems related to the implicit use of the gp register. */
7582 output_asm_insn ("b,l .+8,%%r1", xoperands);
581d9404
JDA
7583
7584 if (TARGET_GAS)
7585 {
7586 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
7587 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
7588 }
7589 else
7590 {
7591 xoperands[1] = gen_label_rtx ();
7592 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
ecc418c4 7593 targetm.asm_out.internal_label (asm_out_file, "L",
581d9404
JDA
7594 CODE_LABEL_NUMBER (xoperands[1]));
7595 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7596 }
7597
a02aa5b0 7598 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6a73009d 7599 }
6a73009d
JL
7600 else if (TARGET_PORTABLE_RUNTIME)
7601 {
a02aa5b0
JDA
7602 /* Pure portable runtime doesn't allow be/ble; we also don't
7603 have PIC support in the assembler/linker, so this sequence
7604 is needed. */
6a73009d 7605
a02aa5b0
JDA
7606 /* Get the address of our target into %r1. */
7607 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7608 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6a73009d 7609
a02aa5b0
JDA
7610 /* Get our return address into %r31. */
7611 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7612 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6a73009d 7613
a02aa5b0
JDA
7614 /* Jump to our target address in %r1. */
7615 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7616 }
a02aa5b0 7617 else if (!flag_pic)
6a73009d 7618 {
a02aa5b0 7619 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6248c4dd 7620 if (TARGET_PA_20)
a02aa5b0 7621 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6248c4dd 7622 else
a02aa5b0 7623 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 7624 }
a02aa5b0 7625 else
6a73009d 7626 {
581d9404
JDA
7627 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7628 output_asm_insn ("addi 16,%%r1,%%r31", xoperands);
7629
a02aa5b0
JDA
7630 if (TARGET_SOM || !TARGET_GAS)
7631 {
7632 /* The HP assembler can generate relocations for the
7633 difference of two symbols. GAS can do this for a
7634 millicode symbol but not an arbitrary external
7635 symbol when generating SOM output. */
7636 xoperands[1] = gen_label_rtx ();
ecc418c4 7637 targetm.asm_out.internal_label (asm_out_file, "L",
a02aa5b0
JDA
7638 CODE_LABEL_NUMBER (xoperands[1]));
7639 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
7640 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7641 }
7642 else
7643 {
a02aa5b0
JDA
7644 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands);
7645 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7646 xoperands);
7647 }
6a73009d 7648
a02aa5b0
JDA
7649 /* Jump to our target address in %r1. */
7650 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7651 }
6a73009d
JL
7652 }
7653
a02aa5b0
JDA
7654 if (seq_length == 0)
7655 output_asm_insn ("nop", xoperands);
6a73009d 7656
a02aa5b0 7657 /* We are done if there isn't a jump in the delay slot. */
b64925dc 7658 if (seq_length == 0 || ! JUMP_P (NEXT_INSN (insn)))
a02aa5b0 7659 return "";
6a73009d 7660
a02aa5b0
JDA
7661 /* This call has an unconditional jump in its delay slot. */
7662 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
6a73009d 7663
a02aa5b0
JDA
7664 /* See if the return address can be adjusted. Use the containing
7665 sequence insn's address. */
611ad29e 7666 if (INSN_ADDRESSES_SET_P ())
6a73009d 7667 {
611ad29e
JDA
7668 seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
7669 distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
7670 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
7671
7672 if (VAL_14_BITS_P (distance))
7673 {
7674 xoperands[1] = gen_label_rtx ();
7675 output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
ecc418c4
JDA
7676 targetm.asm_out.internal_label (asm_out_file, "L",
7677 CODE_LABEL_NUMBER (xoperands[1]));
611ad29e
JDA
7678 }
7679 else
7680 /* ??? This branch may not reach its target. */
7681 output_asm_insn ("nop\n\tb,n %0", xoperands);
6a73009d 7682 }
a02aa5b0
JDA
7683 else
7684 /* ??? This branch may not reach its target. */
7685 output_asm_insn ("nop\n\tb,n %0", xoperands);
6a73009d
JL
7686
7687 /* Delete the jump. */
a38e7aa5 7688 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0 7689
6a73009d
JL
7690 return "";
7691}
7692
611ad29e
JDA
7693/* Return the attribute length of the call instruction INSN. The SIBCALL
7694 flag indicates whether INSN is a regular call or a sibling call. The
32562302 7695 length returned must be longer than the code actually generated by
ae9d61ab 7696 pa_output_call. Since branch shortening is done before delay branch
32562302
JDA
7697 sequencing, there is no way to determine whether or not the delay
7698 slot will be filled during branch shortening. Even when the delay
7699 slot is filled, we may have to add a nop if the delay slot contains
7700 a branch that can't reach its target. Thus, we always have to include
7701 the delay slot in the length estimate. This used to be done in
7702 pa_adjust_insn_length but we do it here now as some sequences always
7703 fill the delay slot and we can save four bytes in the estimate for
7704 these sequences. */
a02aa5b0
JDA
7705
7706int
ae9d61ab 7707pa_attr_length_call (rtx insn, int sibcall)
a02aa5b0 7708{
32562302 7709 int local_call;
e40375e0 7710 rtx call, call_dest;
32562302
JDA
7711 tree call_decl;
7712 int length = 0;
7713 rtx pat = PATTERN (insn);
611ad29e 7714 unsigned long distance = -1;
a02aa5b0 7715
b64925dc 7716 gcc_assert (CALL_P (insn));
e40375e0 7717
611ad29e
JDA
7718 if (INSN_ADDRESSES_SET_P ())
7719 {
32562302
JDA
7720 unsigned long total;
7721
7722 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
5fad1c24
JDA
7723 distance = (total + insn_current_reference_address (insn));
7724 if (distance < total)
611ad29e
JDA
7725 distance = -1;
7726 }
a02aa5b0 7727
e40375e0 7728 gcc_assert (GET_CODE (pat) == PARALLEL);
a02aa5b0 7729
e40375e0
JDA
7730 /* Get the call rtx. */
7731 call = XVECEXP (pat, 0, 0);
7732 if (GET_CODE (call) == SET)
7733 call = SET_SRC (call);
7734
7735 gcc_assert (GET_CODE (call) == CALL);
7736
7737 /* Determine if this is a local call. */
7738 call_dest = XEXP (XEXP (call, 0), 0);
32562302 7739 call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7740 local_call = call_decl && targetm.binds_local_p (call_decl);
a02aa5b0 7741
32562302
JDA
7742 /* pc-relative branch. */
7743 if (!TARGET_LONG_CALLS
7744 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
a43434ff 7745 || distance < MAX_PCREL17F_OFFSET))
32562302 7746 length += 8;
a02aa5b0 7747
32562302
JDA
7748 /* 64-bit plabel sequence. */
7749 else if (TARGET_64BIT && !local_call)
7750 length += sibcall ? 28 : 24;
a02aa5b0 7751
32562302
JDA
7752 /* non-pic long absolute branch sequence. */
7753 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7754 length += 12;
a02aa5b0 7755
32562302 7756 /* long pc-relative branch sequence. */
9dbd54be 7757 else if (TARGET_LONG_PIC_SDIFF_CALL
751d9855
JDA
7758 || (TARGET_GAS && !TARGET_SOM
7759 && (TARGET_LONG_PIC_PCREL_CALL || local_call)))
32562302
JDA
7760 {
7761 length += 20;
a02aa5b0 7762
0831e1d1 7763 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302
JDA
7764 length += 8;
7765 }
62910663 7766
32562302
JDA
7767 /* 32-bit plabel sequence. */
7768 else
7769 {
7770 length += 32;
a02aa5b0 7771
32562302
JDA
7772 if (TARGET_SOM)
7773 length += length_fp_args (insn);
7774
7775 if (flag_pic)
7776 length += 4;
90330d31 7777
32562302
JDA
7778 if (!TARGET_PA_20)
7779 {
a02aa5b0
JDA
7780 if (!sibcall)
7781 length += 8;
7782
0831e1d1 7783 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302 7784 length += 8;
a02aa5b0
JDA
7785 }
7786 }
32562302
JDA
7787
7788 return length;
a02aa5b0
JDA
7789}
7790
7791/* INSN is a function call. It may have an unconditional jump
6a73009d
JL
7792 in its delay slot.
7793
7794 CALL_DEST is the routine we are calling. */
7795
519104fe 7796const char *
ae9d61ab 7797pa_output_call (rtx insn, rtx call_dest, int sibcall)
6a73009d 7798{
a02aa5b0
JDA
7799 int delay_insn_deleted = 0;
7800 int delay_slot_filled = 0;
3d9268b6 7801 int seq_length = dbr_sequence_length ();
5fad1c24 7802 tree call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7803 int local_call = call_decl && targetm.binds_local_p (call_decl);
a02aa5b0
JDA
7804 rtx xoperands[2];
7805
7806 xoperands[0] = call_dest;
6a73009d 7807
a02aa5b0 7808 /* Handle the common case where we're sure that the branch will reach
5fad1c24
JDA
7809 the beginning of the "$CODE$" subspace. This is the beginning of
7810 the current function if we are in a named section. */
ae9d61ab 7811 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
2c4ff308 7812 {
520babc7 7813 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
a02aa5b0 7814 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
279c9bde 7815 }
a02aa5b0 7816 else
279c9bde 7817 {
5fad1c24 7818 if (TARGET_64BIT && !local_call)
f726ea7d 7819 {
a02aa5b0
JDA
7820 /* ??? As far as I can tell, the HP linker doesn't support the
7821 long pc-relative sequence described in the 64-bit runtime
7822 architecture. So, we use a slightly longer indirect call. */
ae9d61ab 7823 xoperands[0] = pa_get_deferred_plabel (call_dest);
a02aa5b0
JDA
7824 xoperands[1] = gen_label_rtx ();
7825
7826 /* If this isn't a sibcall, we put the load of %r27 into the
7827 delay slot. We can't do this in a sibcall as we don't
7828 have a second call-clobbered scratch register available. */
7829 if (seq_length != 0
b64925dc 7830 && ! JUMP_P (NEXT_INSN (insn))
a02aa5b0
JDA
7831 && !sibcall)
7832 {
7833 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 7834 optimize, 0, NULL);
a02aa5b0
JDA
7835
7836 /* Now delete the delay insn. */
a38e7aa5 7837 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0
JDA
7838 delay_insn_deleted = 1;
7839 }
279c9bde 7840
a02aa5b0
JDA
7841 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7842 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7843 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
279c9bde 7844
a02aa5b0 7845 if (sibcall)
279c9bde 7846 {
a02aa5b0
JDA
7847 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7848 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7849 output_asm_insn ("bve (%%r1)", xoperands);
7850 }
7851 else
7852 {
7853 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7854 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7855 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7856 delay_slot_filled = 1;
279c9bde
JL
7857 }
7858 }
a02aa5b0 7859 else
93ae92c1 7860 {
a02aa5b0
JDA
7861 int indirect_call = 0;
7862
7863 /* Emit a long call. There are several different sequences
7864 of increasing length and complexity. In most cases,
7865 they don't allow an instruction in the delay slot. */
5fad1c24 7866 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
9dbd54be 7867 && !TARGET_LONG_PIC_SDIFF_CALL
751d9855
JDA
7868 && !(TARGET_GAS && !TARGET_SOM
7869 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
5fad1c24 7870 && !TARGET_64BIT)
a02aa5b0
JDA
7871 indirect_call = 1;
7872
7873 if (seq_length != 0
b64925dc 7874 && ! JUMP_P (NEXT_INSN (insn))
a02aa5b0 7875 && !sibcall
44b86471
JDA
7876 && (!TARGET_PA_20
7877 || indirect_call
7878 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
359255a9 7879 {
a02aa5b0
JDA
7880 /* A non-jump insn in the delay slot. By definition we can
7881 emit this insn before the call (and in fact before argument
7882 relocating. */
c9d691e9 7883 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
5cfc5f84 7884 NULL);
a02aa5b0
JDA
7885
7886 /* Now delete the delay insn. */
a38e7aa5 7887 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0 7888 delay_insn_deleted = 1;
359255a9 7889 }
93ae92c1 7890
5fad1c24 7891 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
359255a9 7892 {
a02aa5b0
JDA
7893 /* This is the best sequence for making long calls in
7894 non-pic code. Unfortunately, GNU ld doesn't provide
7895 the stub needed for external calls, and GAS's support
5fad1c24
JDA
7896 for this with the SOM linker is buggy. It is safe
7897 to use this for local calls. */
a02aa5b0
JDA
7898 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7899 if (sibcall)
7900 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
7901 else
7902 {
7903 if (TARGET_PA_20)
7904 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7905 xoperands);
7906 else
7907 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 7908
a02aa5b0
JDA
7909 output_asm_insn ("copy %%r31,%%r2", xoperands);
7910 delay_slot_filled = 1;
7911 }
7912 }
7913 else
7914 {
9dbd54be 7915 if (TARGET_LONG_PIC_SDIFF_CALL)
3d9268b6 7916 {
a02aa5b0 7917 /* The HP assembler and linker can handle relocations
9dbd54be
JDA
7918 for the difference of two symbols. The HP assembler
7919 recognizes the sequence as a pc-relative call and
7920 the linker provides stubs when needed. */
a02aa5b0
JDA
7921 xoperands[1] = gen_label_rtx ();
7922 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7923 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands);
ecc418c4 7924 targetm.asm_out.internal_label (asm_out_file, "L",
3d9268b6 7925 CODE_LABEL_NUMBER (xoperands[1]));
a02aa5b0
JDA
7926 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands);
7927 }
751d9855
JDA
7928 else if (TARGET_GAS && !TARGET_SOM
7929 && (TARGET_LONG_PIC_PCREL_CALL || local_call))
3d9268b6 7930 {
a02aa5b0
JDA
7931 /* GAS currently can't generate the relocations that
7932 are needed for the SOM linker under HP-UX using this
7933 sequence. The GNU linker doesn't generate the stubs
7934 that are needed for external calls on TARGET_ELF32
7935 with this sequence. For now, we have to use a
7936 longer plabel sequence when using GAS. */
7937 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
7938 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
3d9268b6 7939 xoperands);
a02aa5b0 7940 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
3d9268b6
JDA
7941 xoperands);
7942 }
520babc7
JL
7943 else
7944 {
a02aa5b0
JDA
7945 /* Emit a long plabel-based call sequence. This is
7946 essentially an inline implementation of $$dyncall.
7947 We don't actually try to call $$dyncall as this is
7948 as difficult as calling the function itself. */
ae9d61ab 7949 xoperands[0] = pa_get_deferred_plabel (call_dest);
a02aa5b0
JDA
7950 xoperands[1] = gen_label_rtx ();
7951
7952 /* Since the call is indirect, FP arguments in registers
7953 need to be copied to the general registers. Then, the
7954 argument relocation stub will copy them back. */
7955 if (TARGET_SOM)
7956 copy_fp_args (insn);
7957
7958 if (flag_pic)
7959 {
7960 output_asm_insn ("addil LT'%0,%%r19", xoperands);
7961 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
7962 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
7963 }
7964 else
7965 {
7966 output_asm_insn ("addil LR'%0-$global$,%%r27",
7967 xoperands);
7968 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7969 xoperands);
7970 }
279c9bde 7971
a02aa5b0
JDA
7972 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
7973 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
7974 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
7975 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6a73009d 7976
a02aa5b0
JDA
7977 if (!sibcall && !TARGET_PA_20)
7978 {
7979 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
0831e1d1 7980 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
7981 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
7982 else
7983 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
a02aa5b0
JDA
7984 }
7985 }
6a73009d 7986
a02aa5b0 7987 if (TARGET_PA_20)
520babc7 7988 {
a02aa5b0
JDA
7989 if (sibcall)
7990 output_asm_insn ("bve (%%r1)", xoperands);
7991 else
7992 {
7993 if (indirect_call)
7994 {
7995 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7996 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
7997 delay_slot_filled = 1;
7998 }
7999 else
8000 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8001 }
520babc7
JL
8002 }
8003 else
8004 {
0831e1d1 8005 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
90330d31
JDA
8006 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8007 xoperands);
279c9bde 8008
a02aa5b0 8009 if (sibcall)
90330d31 8010 {
0831e1d1 8011 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8012 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8013 else
8014 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8015 }
a02aa5b0
JDA
8016 else
8017 {
0831e1d1 8018 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8019 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8020 else
8021 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
279c9bde 8022
a02aa5b0
JDA
8023 if (indirect_call)
8024 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8025 else
8026 output_asm_insn ("copy %%r31,%%r2", xoperands);
8027 delay_slot_filled = 1;
8028 }
8029 }
8030 }
279c9bde 8031 }
2c4ff308 8032 }
23f6f34f 8033
62910663 8034 if (!delay_slot_filled && (seq_length == 0 || delay_insn_deleted))
a02aa5b0 8035 output_asm_insn ("nop", xoperands);
2c4ff308 8036
a02aa5b0
JDA
8037 /* We are done if there isn't a jump in the delay slot. */
8038 if (seq_length == 0
8039 || delay_insn_deleted
b64925dc 8040 || ! JUMP_P (NEXT_INSN (insn)))
a02aa5b0 8041 return "";
2c4ff308 8042
a02aa5b0 8043 /* A sibcall should never have a branch in the delay slot. */
144d51f9 8044 gcc_assert (!sibcall);
2c4ff308 8045
a02aa5b0
JDA
8046 /* This call has an unconditional jump in its delay slot. */
8047 xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
2c4ff308 8048
611ad29e 8049 if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
2c4ff308 8050 {
a02aa5b0 8051 /* See if the return address can be adjusted. Use the containing
28326880
OH
8052 sequence insn's address. This would break the regular call/return@
8053 relationship assumed by the table based eh unwinder, so only do that
8054 if the call is not possibly throwing. */
a02aa5b0
JDA
8055 rtx seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
8056 int distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
8057 - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
8058
28326880
OH
8059 if (VAL_14_BITS_P (distance)
8060 && !(can_throw_internal (insn) || can_throw_external (insn)))
a02aa5b0
JDA
8061 {
8062 xoperands[1] = gen_label_rtx ();
8063 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
ecc418c4
JDA
8064 targetm.asm_out.internal_label (asm_out_file, "L",
8065 CODE_LABEL_NUMBER (xoperands[1]));
a02aa5b0
JDA
8066 }
8067 else
a02aa5b0 8068 output_asm_insn ("nop\n\tb,n %0", xoperands);
2c4ff308 8069 }
a02aa5b0 8070 else
a02aa5b0 8071 output_asm_insn ("b,n %0", xoperands);
2c4ff308
JL
8072
8073 /* Delete the jump. */
a38e7aa5 8074 SET_INSN_DELETED (NEXT_INSN (insn));
a02aa5b0 8075
2c4ff308
JL
8076 return "";
8077}
8078
611ad29e
JDA
8079/* Return the attribute length of the indirect call instruction INSN.
8080 The length must match the code generated by output_indirect call.
8081 The returned length includes the delay slot. Currently, the delay
8082 slot of an indirect call sequence is not exposed and it is used by
8083 the sequence itself. */
8084
8085int
ae9d61ab 8086pa_attr_length_indirect_call (rtx insn)
611ad29e
JDA
8087{
8088 unsigned long distance = -1;
62910663 8089 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
611ad29e
JDA
8090
8091 if (INSN_ADDRESSES_SET_P ())
8092 {
5fad1c24
JDA
8093 distance = (total + insn_current_reference_address (insn));
8094 if (distance < total)
611ad29e
JDA
8095 distance = -1;
8096 }
8097
8098 if (TARGET_64BIT)
8099 return 12;
8100
8101 if (TARGET_FAST_INDIRECT_CALLS
8102 || (!TARGET_PORTABLE_RUNTIME
40fc2e0b 8103 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
a43434ff 8104 || distance < MAX_PCREL17F_OFFSET)))
611ad29e
JDA
8105 return 8;
8106
8107 if (flag_pic)
8108 return 24;
8109
8110 if (TARGET_PORTABLE_RUNTIME)
8111 return 20;
8112
8113 /* Out of reach, can use ble. */
8114 return 12;
8115}
8116
8117const char *
ae9d61ab 8118pa_output_indirect_call (rtx insn, rtx call_dest)
611ad29e
JDA
8119{
8120 rtx xoperands[1];
8121
8122 if (TARGET_64BIT)
8123 {
8124 xoperands[0] = call_dest;
8125 output_asm_insn ("ldd 16(%0),%%r2", xoperands);
8126 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
8127 return "";
8128 }
8129
8130 /* First the special case for kernels, level 0 systems, etc. */
8131 if (TARGET_FAST_INDIRECT_CALLS)
8132 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8133
8134 /* Now the normal case -- we can reach $$dyncall directly or
8135 we're sure that we can get there via a long-branch stub.
8136
8137 No need to check target flags as the length uniquely identifies
8138 the remaining cases. */
ae9d61ab 8139 if (pa_attr_length_indirect_call (insn) == 8)
2c774817 8140 {
40fc2e0b
JDA
8141 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8142 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8143 variant of the B,L instruction can't be used on the SOM target. */
8144 if (TARGET_PA_20 && !TARGET_SOM)
2c774817
JDA
8145 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
8146 else
8147 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8148 }
611ad29e
JDA
8149
8150 /* Long millicode call, but we are not generating PIC or portable runtime
8151 code. */
ae9d61ab 8152 if (pa_attr_length_indirect_call (insn) == 12)
611ad29e
JDA
8153 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8154
8155 /* Long millicode call for portable runtime. */
ae9d61ab 8156 if (pa_attr_length_indirect_call (insn) == 20)
611ad29e
JDA
8157 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
8158
8159 /* We need a long PIC call to $$dyncall. */
8160 xoperands[0] = NULL_RTX;
8161 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
8162 if (TARGET_SOM || !TARGET_GAS)
8163 {
8164 xoperands[0] = gen_label_rtx ();
8165 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
ecc418c4
JDA
8166 targetm.asm_out.internal_label (asm_out_file, "L",
8167 CODE_LABEL_NUMBER (xoperands[0]));
611ad29e
JDA
8168 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
8169 }
8170 else
8171 {
8172 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
8173 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
8174 xoperands);
8175 }
8176 output_asm_insn ("blr %%r0,%%r2", xoperands);
8177 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
8178 return "";
8179}
8180
d2a94ec0 8181/* In HPUX 8.0's shared library scheme, special relocations are needed
23f6f34f 8182 for function labels if they might be passed to a function
d2a94ec0 8183 in a shared library (because shared libraries don't live in code
520a57c8 8184 space), and special magic is needed to construct their address. */
d2a94ec0
TM
8185
8186void
ae9d61ab 8187pa_encode_label (rtx sym)
d2a94ec0 8188{
519104fe 8189 const char *str = XSTR (sym, 0);
10d17cb7
AM
8190 int len = strlen (str) + 1;
8191 char *newstr, *p;
d2a94ec0 8192
5ead67f6 8193 p = newstr = XALLOCAVEC (char, len + 1);
10d17cb7
AM
8194 *p++ = '@';
8195 strcpy (p, str);
67d6f2fc 8196
831c1763 8197 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
d2a94ec0 8198}
23f6f34f 8199
fb49053f 8200static void
b7849684 8201pa_encode_section_info (tree decl, rtx rtl, int first)
fb49053f 8202{
9a60b229
JJ
8203 int old_referenced = 0;
8204
8205 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8206 old_referenced
8207 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8208
51076f96
RC
8209 default_encode_section_info (decl, rtl, first);
8210
fb49053f
RH
8211 if (first && TEXT_SPACE_P (decl))
8212 {
fb49053f
RH
8213 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8214 if (TREE_CODE (decl) == FUNCTION_DECL)
ae9d61ab 8215 pa_encode_label (XEXP (rtl, 0));
fb49053f 8216 }
9a60b229
JJ
8217 else if (old_referenced)
8218 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
fb49053f
RH
8219}
8220
772c5265
RH
8221/* This is sort of inverse to pa_encode_section_info. */
8222
8223static const char *
b7849684 8224pa_strip_name_encoding (const char *str)
772c5265 8225{
7830ba7b
JDA
8226 str += (*str == '@');
8227 str += (*str == '*');
8228 return str;
772c5265
RH
8229}
8230
326bc2de
JL
8231/* Returns 1 if OP is a function label involved in a simple addition
8232 with a constant. Used to keep certain patterns from matching
8233 during instruction combination. */
8234int
ae9d61ab 8235pa_is_function_label_plus_const (rtx op)
326bc2de
JL
8236{
8237 /* Strip off any CONST. */
8238 if (GET_CODE (op) == CONST)
8239 op = XEXP (op, 0);
8240
8241 return (GET_CODE (op) == PLUS
9c575e20 8242 && function_label_operand (XEXP (op, 0), VOIDmode)
326bc2de
JL
8243 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8244}
8245
54374491
JL
8246/* Output assembly code for a thunk to FUNCTION. */
8247
c590b625 8248static void
b7849684
JE
8249pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8250 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8251 tree function)
54374491 8252{
cdcb88d7 8253 static unsigned int current_thunk_number;
5fad1c24 8254 int val_14 = VAL_14_BITS_P (delta);
67b846fa 8255 unsigned int old_last_address = last_address, nbytes = 0;
54374491 8256 char label[16];
cdcb88d7 8257 rtx xoperands[4];
5fad1c24 8258
cdcb88d7
JDA
8259 xoperands[0] = XEXP (DECL_RTL (function), 0);
8260 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8261 xoperands[2] = GEN_INT (delta);
5fad1c24 8262
cdcb88d7
JDA
8263 ASM_OUTPUT_LABEL (file, XSTR (xoperands[1], 0));
8264 fprintf (file, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
5fad1c24
JDA
8265
8266 /* Output the thunk. We know that the function is in the same
8267 translation unit (i.e., the same space) as the thunk, and that
8268 thunks are output after their method. Thus, we don't need an
8269 external branch to reach the function. With SOM and GAS,
8270 functions and thunks are effectively in different sections.
8271 Thus, we can always use a IA-relative branch and the linker
8272 will add a long branch stub if necessary.
8273
8274 However, we have to be careful when generating PIC code on the
8275 SOM port to ensure that the sequence does not transfer to an
8276 import stub for the target function as this could clobber the
8277 return value saved at SP-24. This would also apply to the
8278 32-bit linux port if the multi-space model is implemented. */
8279 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8280 && !(flag_pic && TREE_PUBLIC (function))
8281 && (TARGET_GAS || last_address < 262132))
8282 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
677f3fa8 8283 && ((targetm_common.have_named_sections
5fad1c24
JDA
8284 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8285 /* The GNU 64-bit linker has rather poor stub management.
8286 So, we use a long branch from thunks that aren't in
8287 the same section as the target function. */
8288 && ((!TARGET_64BIT
8289 && (DECL_SECTION_NAME (thunk_fndecl)
8290 != DECL_SECTION_NAME (function)))
8291 || ((DECL_SECTION_NAME (thunk_fndecl)
8292 == DECL_SECTION_NAME (function))
8293 && last_address < 262132)))
677f3fa8 8294 || (targetm_common.have_named_sections
2842bb86
JDA
8295 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8296 && DECL_SECTION_NAME (function) == NULL
8297 && last_address < 262132)
677f3fa8
JM
8298 || (!targetm_common.have_named_sections
8299 && last_address < 262132))))
5fad1c24 8300 {
cdcb88d7
JDA
8301 if (!val_14)
8302 output_asm_insn ("addil L'%2,%%r26", xoperands);
8303
8304 output_asm_insn ("b %0", xoperands);
8305
5fad1c24
JDA
8306 if (val_14)
8307 {
cdcb88d7 8308 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8309 nbytes += 8;
8310 }
8311 else
8312 {
cdcb88d7 8313 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8314 nbytes += 12;
8315 }
8316 }
8317 else if (TARGET_64BIT)
8318 {
8319 /* We only have one call-clobbered scratch register, so we can't
8320 make use of the delay slot if delta doesn't fit in 14 bits. */
8321 if (!val_14)
cdcb88d7
JDA
8322 {
8323 output_asm_insn ("addil L'%2,%%r26", xoperands);
8324 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8325 }
5fad1c24 8326
cdcb88d7 8327 output_asm_insn ("b,l .+8,%%r1", xoperands);
5fad1c24
JDA
8328
8329 if (TARGET_GAS)
8330 {
cdcb88d7
JDA
8331 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8332 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
5fad1c24
JDA
8333 }
8334 else
8335 {
cdcb88d7
JDA
8336 xoperands[3] = GEN_INT (val_14 ? 8 : 16);
8337 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands);
5fad1c24
JDA
8338 }
8339
8340 if (val_14)
8341 {
cdcb88d7
JDA
8342 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8343 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8344 nbytes += 20;
8345 }
8346 else
8347 {
cdcb88d7 8348 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5fad1c24
JDA
8349 nbytes += 24;
8350 }
8351 }
8352 else if (TARGET_PORTABLE_RUNTIME)
8353 {
cdcb88d7
JDA
8354 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8355 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8356
8357 if (!val_14)
8358 output_asm_insn ("addil L'%2,%%r26", xoperands);
8359
8360 output_asm_insn ("bv %%r0(%%r22)", xoperands);
5fad1c24
JDA
8361
8362 if (val_14)
8363 {
cdcb88d7 8364 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8365 nbytes += 16;
8366 }
8367 else
8368 {
cdcb88d7 8369 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8370 nbytes += 20;
8371 }
8372 }
8373 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8374 {
8375 /* The function is accessible from outside this module. The only
8376 way to avoid an import stub between the thunk and function is to
8377 call the function directly with an indirect sequence similar to
8378 that used by $$dyncall. This is possible because $$dyncall acts
8379 as the import stub in an indirect call. */
5fad1c24 8380 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
cdcb88d7
JDA
8381 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8382 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8383 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8384 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8385 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8386 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8387 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8388 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8389
5fad1c24
JDA
8390 if (!val_14)
8391 {
cdcb88d7 8392 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24
JDA
8393 nbytes += 4;
8394 }
cdcb88d7 8395
5fad1c24
JDA
8396 if (TARGET_PA_20)
8397 {
cdcb88d7
JDA
8398 output_asm_insn ("bve (%%r22)", xoperands);
8399 nbytes += 36;
8400 }
8401 else if (TARGET_NO_SPACE_REGS)
8402 {
8403 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8404 nbytes += 36;
8405 }
8406 else
54374491 8407 {
cdcb88d7
JDA
8408 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8409 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8410 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8411 nbytes += 44;
5fad1c24
JDA
8412 }
8413
8414 if (val_14)
cdcb88d7 8415 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8416 else
cdcb88d7 8417 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8418 }
8419 else if (flag_pic)
8420 {
cdcb88d7 8421 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
5fad1c24
JDA
8422
8423 if (TARGET_SOM || !TARGET_GAS)
8424 {
cdcb88d7
JDA
8425 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands);
8426 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands);
5fad1c24
JDA
8427 }
8428 else
8429 {
cdcb88d7
JDA
8430 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands);
8431 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands);
5fad1c24
JDA
8432 }
8433
cdcb88d7
JDA
8434 if (!val_14)
8435 output_asm_insn ("addil L'%2,%%r26", xoperands);
8436
8437 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8438
5fad1c24
JDA
8439 if (val_14)
8440 {
cdcb88d7 8441 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8442 nbytes += 20;
54374491
JL
8443 }
8444 else
5fad1c24 8445 {
cdcb88d7 8446 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8447 nbytes += 24;
8448 }
54374491
JL
8449 }
8450 else
8451 {
5fad1c24 8452 if (!val_14)
cdcb88d7 8453 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24 8454
cdcb88d7
JDA
8455 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8456 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8457
8458 if (val_14)
54374491 8459 {
cdcb88d7 8460 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8461 nbytes += 12;
54374491
JL
8462 }
8463 else
5fad1c24 8464 {
cdcb88d7 8465 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8466 nbytes += 16;
8467 }
54374491 8468 }
5fad1c24 8469
54374491 8470 fprintf (file, "\t.EXIT\n\t.PROCEND\n");
5fad1c24 8471
1a83bfc3
JDA
8472 if (TARGET_SOM && TARGET_GAS)
8473 {
8474 /* We done with this subspace except possibly for some additional
8475 debug information. Forget that we are in this subspace to ensure
8476 that the next function is output in its own subspace. */
8477 in_section = NULL;
8478 cfun->machine->in_nsubspa = 2;
8479 }
8480
5fad1c24 8481 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
54374491 8482 {
d6b5193b 8483 switch_to_section (data_section);
cdcb88d7 8484 output_asm_insn (".align 4", xoperands);
5fad1c24 8485 ASM_OUTPUT_LABEL (file, label);
cdcb88d7 8486 output_asm_insn (".word P'%0", xoperands);
54374491 8487 }
5fad1c24 8488
54374491 8489 current_thunk_number++;
5fad1c24
JDA
8490 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8491 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8492 last_address += nbytes;
67b846fa
JDA
8493 if (old_last_address > last_address)
8494 last_address = UINT_MAX;
5fad1c24 8495 update_total_code_bytes (nbytes);
54374491
JL
8496}
8497
4977bab6
ZW
8498/* Only direct calls to static functions are allowed to be sibling (tail)
8499 call optimized.
8500
8501 This restriction is necessary because some linker generated stubs will
8502 store return pointers into rp' in some cases which might clobber a
8503 live value already in rp'.
8504
8505 In a sibcall the current function and the target function share stack
8506 space. Thus if the path to the current function and the path to the
8507 target function save a value in rp', they save the value into the
8508 same stack slot, which has undesirable consequences.
8509
8510 Because of the deferred binding nature of shared libraries any function
8511 with external scope could be in a different load module and thus require
8512 rp' to be saved when calling that function. So sibcall optimizations
8513 can only be safe for static function.
8514
8515 Note that GCC never needs return value relocations, so we don't have to
8516 worry about static calls with return value relocations (which require
8517 saving rp').
8518
8519 It is safe to perform a sibcall optimization when the target function
8520 will never return. */
8521static bool
b7849684 8522pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4977bab6 8523{
73096ba9
JDA
8524 if (TARGET_PORTABLE_RUNTIME)
8525 return false;
8526
11f43127
JDA
8527 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8528 single subspace mode and the call is not indirect. As far as I know,
8529 there is no operating system support for the multiple subspace mode.
8530 It might be possible to support indirect calls if we didn't use
ae9d61ab 8531 $$dyncall (see the indirect sequence generated in pa_output_call). */
11f43127
JDA
8532 if (TARGET_ELF32)
8533 return (decl != NULL_TREE);
8534
8535 /* Sibcalls are not ok because the arg pointer register is not a fixed
c1207243 8536 register. This prevents the sibcall optimization from occurring. In
11f43127
JDA
8537 addition, there are problems with stub placement using GNU ld. This
8538 is because a normal sibcall branch uses a 17-bit relocation while
8539 a regular call branch uses a 22-bit relocation. As a result, more
8540 care needs to be taken in the placement of long-branch stubs. */
8541 if (TARGET_64BIT)
8542 return false;
8543
73096ba9
JDA
8544 /* Sibcalls are only ok within a translation unit. */
8545 return (decl && !TREE_PUBLIC (decl));
4977bab6
ZW
8546}
8547
8ddf681a
R
8548/* ??? Addition is not commutative on the PA due to the weird implicit
8549 space register selection rules for memory addresses. Therefore, we
8550 don't consider a + b == b + a, as this might be inside a MEM. */
8551static bool
3101faab 8552pa_commutative_p (const_rtx x, int outer_code)
8ddf681a
R
8553{
8554 return (COMMUTATIVE_P (x)
bd7d5043
JDA
8555 && (TARGET_NO_SPACE_REGS
8556 || (outer_code != UNKNOWN && outer_code != MEM)
8ddf681a
R
8557 || GET_CODE (x) != PLUS));
8558}
8559
88e5c029
JL
8560/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8561 use in fmpyadd instructions. */
2fe24884 8562int
ae9d61ab 8563pa_fmpyaddoperands (rtx *operands)
2fe24884 8564{
f133af4c 8565 enum machine_mode mode = GET_MODE (operands[0]);
2fe24884 8566
d85ab966
JL
8567 /* Must be a floating point mode. */
8568 if (mode != SFmode && mode != DFmode)
8569 return 0;
8570
2fe24884 8571 /* All modes must be the same. */
f133af4c
TG
8572 if (! (mode == GET_MODE (operands[1])
8573 && mode == GET_MODE (operands[2])
8574 && mode == GET_MODE (operands[3])
8575 && mode == GET_MODE (operands[4])
8576 && mode == GET_MODE (operands[5])))
2fe24884
JL
8577 return 0;
8578
d85ab966
JL
8579 /* All operands must be registers. */
8580 if (! (GET_CODE (operands[1]) == REG
8581 && GET_CODE (operands[2]) == REG
8582 && GET_CODE (operands[3]) == REG
8583 && GET_CODE (operands[4]) == REG
8584 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8585 return 0;
8586
88e5c029
JL
8587 /* Only 2 real operands to the addition. One of the input operands must
8588 be the same as the output operand. */
2fe24884
JL
8589 if (! rtx_equal_p (operands[3], operands[4])
8590 && ! rtx_equal_p (operands[3], operands[5]))
8591 return 0;
8592
1e5f1716 8593 /* Inout operand of add cannot conflict with any operands from multiply. */
2fe24884
JL
8594 if (rtx_equal_p (operands[3], operands[0])
8595 || rtx_equal_p (operands[3], operands[1])
8596 || rtx_equal_p (operands[3], operands[2]))
8597 return 0;
8598
1e5f1716 8599 /* multiply cannot feed into addition operands. */
2fe24884
JL
8600 if (rtx_equal_p (operands[4], operands[0])
8601 || rtx_equal_p (operands[5], operands[0]))
8602 return 0;
8603
d85ab966
JL
8604 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8605 if (mode == SFmode
88624c0e
JL
8606 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8607 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8608 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8609 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8610 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8611 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8612 return 0;
8613
2fe24884
JL
8614 /* Passed. Operands are suitable for fmpyadd. */
8615 return 1;
8616}
8617
35d434ed
JDA
8618#if !defined(USE_COLLECT2)
8619static void
b7849684 8620pa_asm_out_constructor (rtx symbol, int priority)
35d434ed
JDA
8621{
8622 if (!function_label_operand (symbol, VOIDmode))
ae9d61ab 8623 pa_encode_label (symbol);
35d434ed
JDA
8624
8625#ifdef CTORS_SECTION_ASM_OP
8626 default_ctor_section_asm_out_constructor (symbol, priority);
8627#else
8628# ifdef TARGET_ASM_NAMED_SECTION
8629 default_named_section_asm_out_constructor (symbol, priority);
8630# else
8631 default_stabs_asm_out_constructor (symbol, priority);
8632# endif
8633#endif
8634}
8635
8636static void
b7849684 8637pa_asm_out_destructor (rtx symbol, int priority)
35d434ed
JDA
8638{
8639 if (!function_label_operand (symbol, VOIDmode))
ae9d61ab 8640 pa_encode_label (symbol);
35d434ed
JDA
8641
8642#ifdef DTORS_SECTION_ASM_OP
8643 default_dtor_section_asm_out_destructor (symbol, priority);
8644#else
8645# ifdef TARGET_ASM_NAMED_SECTION
8646 default_named_section_asm_out_destructor (symbol, priority);
8647# else
8648 default_stabs_asm_out_destructor (symbol, priority);
8649# endif
8650#endif
8651}
8652#endif
8653
d4482715
JDA
8654/* This function places uninitialized global data in the bss section.
8655 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8656 function on the SOM port to prevent uninitialized global data from
8657 being placed in the data section. */
8658
8659void
8660pa_asm_output_aligned_bss (FILE *stream,
8661 const char *name,
8662 unsigned HOST_WIDE_INT size,
8663 unsigned int align)
8664{
d6b5193b 8665 switch_to_section (bss_section);
d4482715
JDA
8666 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8667
8668#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8669 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8670#endif
8671
8672#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8673 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8674#endif
8675
8676 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8677 ASM_OUTPUT_LABEL (stream, name);
8678 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8679}
8680
8681/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8682 that doesn't allow the alignment of global common storage to be directly
8683 specified. The SOM linker aligns common storage based on the rounded
8684 value of the NUM_BYTES parameter in the .comm directive. It's not
8685 possible to use the .align directive as it doesn't affect the alignment
8686 of the label associated with a .comm directive. */
8687
8688void
8689pa_asm_output_aligned_common (FILE *stream,
8690 const char *name,
8691 unsigned HOST_WIDE_INT size,
8692 unsigned int align)
8693{
22f549fd
JDA
8694 unsigned int max_common_align;
8695
8696 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8697 if (align > max_common_align)
8698 {
d4ee4d25 8699 warning (0, "alignment (%u) for %s exceeds maximum alignment "
22f549fd
JDA
8700 "for global common data. Using %u",
8701 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8702 align = max_common_align;
8703 }
8704
d6b5193b 8705 switch_to_section (bss_section);
d4482715
JDA
8706
8707 assemble_name (stream, name);
8708 fprintf (stream, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8709 MAX (size, align / BITS_PER_UNIT));
8710}
8711
8712/* We can't use .comm for local common storage as the SOM linker effectively
8713 treats the symbol as universal and uses the same storage for local symbols
8714 with the same name in different object files. The .block directive
8715 reserves an uninitialized block of storage. However, it's not common
8716 storage. Fortunately, GCC never requests common storage with the same
8717 name in any given translation unit. */
8718
8719void
8720pa_asm_output_aligned_local (FILE *stream,
8721 const char *name,
8722 unsigned HOST_WIDE_INT size,
8723 unsigned int align)
8724{
d6b5193b 8725 switch_to_section (bss_section);
d4482715
JDA
8726 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8727
8728#ifdef LOCAL_ASM_OP
8729 fprintf (stream, "%s", LOCAL_ASM_OP);
8730 assemble_name (stream, name);
8731 fprintf (stream, "\n");
8732#endif
8733
8734 ASM_OUTPUT_LABEL (stream, name);
8735 fprintf (stream, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8736}
8737
88e5c029
JL
8738/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8739 use in fmpysub instructions. */
2fe24884 8740int
ae9d61ab 8741pa_fmpysuboperands (rtx *operands)
2fe24884 8742{
f133af4c 8743 enum machine_mode mode = GET_MODE (operands[0]);
2fe24884 8744
d85ab966
JL
8745 /* Must be a floating point mode. */
8746 if (mode != SFmode && mode != DFmode)
8747 return 0;
8748
2fe24884 8749 /* All modes must be the same. */
f133af4c
TG
8750 if (! (mode == GET_MODE (operands[1])
8751 && mode == GET_MODE (operands[2])
8752 && mode == GET_MODE (operands[3])
8753 && mode == GET_MODE (operands[4])
8754 && mode == GET_MODE (operands[5])))
2fe24884
JL
8755 return 0;
8756
d85ab966
JL
8757 /* All operands must be registers. */
8758 if (! (GET_CODE (operands[1]) == REG
8759 && GET_CODE (operands[2]) == REG
8760 && GET_CODE (operands[3]) == REG
8761 && GET_CODE (operands[4]) == REG
8762 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8763 return 0;
8764
88e5c029
JL
8765 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8766 operation, so operands[4] must be the same as operand[3]. */
2fe24884
JL
8767 if (! rtx_equal_p (operands[3], operands[4]))
8768 return 0;
8769
1e5f1716 8770 /* multiply cannot feed into subtraction. */
88e5c029 8771 if (rtx_equal_p (operands[5], operands[0]))
2fe24884
JL
8772 return 0;
8773
1e5f1716 8774 /* Inout operand of sub cannot conflict with any operands from multiply. */
2fe24884
JL
8775 if (rtx_equal_p (operands[3], operands[0])
8776 || rtx_equal_p (operands[3], operands[1])
8777 || rtx_equal_p (operands[3], operands[2]))
8778 return 0;
8779
d85ab966
JL
8780 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8781 if (mode == SFmode
88624c0e
JL
8782 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8783 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8784 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8785 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8786 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8787 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8788 return 0;
8789
2fe24884
JL
8790 /* Passed. Operands are suitable for fmpysub. */
8791 return 1;
8792}
8793
c2264220
JL
8794/* Return 1 if the given constant is 2, 4, or 8. These are the valid
8795 constants for shadd instructions. */
c9a88190 8796int
ae9d61ab 8797pa_shadd_constant_p (int val)
c2264220
JL
8798{
8799 if (val == 2 || val == 4 || val == 8)
8800 return 1;
8801 else
8802 return 0;
8803}
4802a0d6 8804
3232e9d8
JDA
8805/* Return TRUE if INSN branches forward. */
8806
8807static bool
b7849684 8808forward_branch_p (rtx insn)
b9821af8 8809{
3232e9d8
JDA
8810 rtx lab = JUMP_LABEL (insn);
8811
8812 /* The INSN must have a jump label. */
8813 gcc_assert (lab != NULL_RTX);
8814
8815 if (INSN_ADDRESSES_SET_P ())
8816 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
b9821af8
JL
8817
8818 while (insn)
8819 {
3232e9d8
JDA
8820 if (insn == lab)
8821 return true;
b9821af8
JL
8822 else
8823 insn = NEXT_INSN (insn);
8824 }
8825
3232e9d8 8826 return false;
b9821af8
JL
8827}
8828
2c4ff308
JL
8829/* Return 1 if INSN is in the delay slot of a call instruction. */
8830int
ae9d61ab 8831pa_jump_in_call_delay (rtx insn)
2c4ff308
JL
8832{
8833
b64925dc 8834 if (! JUMP_P (insn))
2c4ff308
JL
8835 return 0;
8836
8837 if (PREV_INSN (insn)
8838 && PREV_INSN (PREV_INSN (insn))
b64925dc 8839 && NONJUMP_INSN_P (next_real_insn (PREV_INSN (PREV_INSN (insn)))))
2c4ff308 8840 {
02a57c73 8841 rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
2c4ff308
JL
8842
8843 return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
8844 && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
8845
8846 }
8847 else
8848 return 0;
8849}
746a9efa 8850
b1092901
JL
8851/* Output an unconditional move and branch insn. */
8852
519104fe 8853const char *
ae9d61ab 8854pa_output_parallel_movb (rtx *operands, rtx insn)
b1092901 8855{
16d74a3c
JDA
8856 int length = get_attr_length (insn);
8857
b1092901
JL
8858 /* These are the cases in which we win. */
8859 if (length == 4)
8860 return "mov%I1b,tr %1,%0,%2";
8861
16d74a3c
JDA
8862 /* None of the following cases win, but they don't lose either. */
8863 if (length == 8)
b1092901 8864 {
16d74a3c
JDA
8865 if (dbr_sequence_length () == 0)
8866 {
8867 /* Nothing in the delay slot, fake it by putting the combined
8868 insn (the copy or add) in the delay slot of a bl. */
8869 if (GET_CODE (operands[1]) == CONST_INT)
8870 return "b %2\n\tldi %1,%0";
8871 else
8872 return "b %2\n\tcopy %1,%0";
8873 }
b1092901 8874 else
16d74a3c
JDA
8875 {
8876 /* Something in the delay slot, but we've got a long branch. */
8877 if (GET_CODE (operands[1]) == CONST_INT)
8878 return "ldi %1,%0\n\tb %2";
8879 else
8880 return "copy %1,%0\n\tb %2";
8881 }
b1092901 8882 }
16d74a3c
JDA
8883
8884 if (GET_CODE (operands[1]) == CONST_INT)
8885 output_asm_insn ("ldi %1,%0", operands);
b1092901 8886 else
16d74a3c 8887 output_asm_insn ("copy %1,%0", operands);
ae9d61ab 8888 return pa_output_lbranch (operands[2], insn, 1);
b1092901
JL
8889}
8890
8891/* Output an unconditional add and branch insn. */
8892
519104fe 8893const char *
ae9d61ab 8894pa_output_parallel_addb (rtx *operands, rtx insn)
b1092901 8895{
16d74a3c
JDA
8896 int length = get_attr_length (insn);
8897
b1092901
JL
8898 /* To make life easy we want operand0 to be the shared input/output
8899 operand and operand1 to be the readonly operand. */
8900 if (operands[0] == operands[1])
8901 operands[1] = operands[2];
8902
8903 /* These are the cases in which we win. */
8904 if (length == 4)
8905 return "add%I1b,tr %1,%0,%3";
8906
16d74a3c
JDA
8907 /* None of the following cases win, but they don't lose either. */
8908 if (length == 8)
b1092901 8909 {
16d74a3c
JDA
8910 if (dbr_sequence_length () == 0)
8911 /* Nothing in the delay slot, fake it by putting the combined
8912 insn (the copy or add) in the delay slot of a bl. */
8913 return "b %3\n\tadd%I1 %1,%0,%0";
8914 else
8915 /* Something in the delay slot, but we've got a long branch. */
8916 return "add%I1 %1,%0,%0\n\tb %3";
b1092901 8917 }
16d74a3c
JDA
8918
8919 output_asm_insn ("add%I1 %1,%0,%0", operands);
ae9d61ab 8920 return pa_output_lbranch (operands[3], insn, 1);
b1092901
JL
8921}
8922
1c31ecf6
JDA
8923/* Return nonzero if INSN (a jump insn) immediately follows a call
8924 to a named function. This is used to avoid filling the delay slot
8925 of the jump since it can usually be eliminated by modifying RP in
8926 the delay slot of the call. */
6619e96c 8927
51723711 8928int
ae9d61ab 8929pa_following_call (rtx insn)
b1092901 8930{
6d8d2bbc 8931 if (! TARGET_JUMP_IN_DELAY)
f9bd8d8e
JL
8932 return 0;
8933
b1092901
JL
8934 /* Find the previous real insn, skipping NOTEs. */
8935 insn = PREV_INSN (insn);
b64925dc 8936 while (insn && NOTE_P (insn))
b1092901
JL
8937 insn = PREV_INSN (insn);
8938
8939 /* Check for CALL_INSNs and millicode calls. */
8940 if (insn
b64925dc 8941 && ((CALL_P (insn)
cdc0de30 8942 && get_attr_type (insn) != TYPE_DYNCALL)
b64925dc 8943 || (NONJUMP_INSN_P (insn)
b1092901
JL
8944 && GET_CODE (PATTERN (insn)) != SEQUENCE
8945 && GET_CODE (PATTERN (insn)) != USE
8946 && GET_CODE (PATTERN (insn)) != CLOBBER
8947 && get_attr_type (insn) == TYPE_MILLI)))
8948 return 1;
8949
8950 return 0;
8951}
8952
746a9efa
JL
8953/* We use this hook to perform a PA specific optimization which is difficult
8954 to do in earlier passes.
8955
33e67557
SB
8956 We surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8957 insns. Those insns mark where we should emit .begin_brtab and
8958 .end_brtab directives when using GAS. This allows for better link
8959 time optimizations. */
746a9efa 8960
18dbd950 8961static void
b7849684 8962pa_reorg (void)
746a9efa
JL
8963{
8964 rtx insn;
8965
18dbd950 8966 remove_useless_addtr_insns (1);
d8b79470 8967
86001391 8968 if (pa_cpu < PROCESSOR_8000)
18dbd950 8969 pa_combine_instructions ();
86001391 8970
33e67557
SB
8971 /* Still need brtab marker insns. FIXME: the presence of these
8972 markers disables output of the branch table to readonly memory,
8973 and any alignment directives that might be needed. Possibly,
8974 the begin_brtab insn should be output before the label for the
8975 table. This doesn't matter at the moment since the tables are
8976 always output in the text section. */
8977 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8978 {
8979 /* Find an ADDR_VEC insn. */
8980 if (! JUMP_TABLE_DATA_P (insn))
8981 continue;
8982
8983 /* Now generate markers for the beginning and end of the
8984 branch table. */
8985 emit_insn_before (gen_begin_brtab (), insn);
8986 emit_insn_after (gen_end_brtab (), insn);
8987 }
aba892c4 8988}
c4bb6b38
JL
8989
8990/* The PA has a number of odd instructions which can perform multiple
8991 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8992 it may be profitable to combine two instructions into one instruction
8993 with two outputs. It's not profitable PA2.0 machines because the
8994 two outputs would take two slots in the reorder buffers.
8995
8996 This routine finds instructions which can be combined and combines
8997 them. We only support some of the potential combinations, and we
8998 only try common ways to find suitable instructions.
8999
9000 * addb can add two registers or a register and a small integer
9001 and jump to a nearby (+-8k) location. Normally the jump to the
9002 nearby location is conditional on the result of the add, but by
9003 using the "true" condition we can make the jump unconditional.
9004 Thus addb can perform two independent operations in one insn.
9005
9006 * movb is similar to addb in that it can perform a reg->reg
9007 or small immediate->reg copy and jump to a nearby (+-8k location).
9008
9009 * fmpyadd and fmpysub can perform a FP multiply and either an
9010 FP add or FP sub if the operands of the multiply and add/sub are
9011 independent (there are other minor restrictions). Note both
9012 the fmpy and fadd/fsub can in theory move to better spots according
9013 to data dependencies, but for now we require the fmpy stay at a
9014 fixed location.
9015
9016 * Many of the memory operations can perform pre & post updates
9017 of index registers. GCC's pre/post increment/decrement addressing
9018 is far too simple to take advantage of all the possibilities. This
9019 pass may not be suitable since those insns may not be independent.
9020
9021 * comclr can compare two ints or an int and a register, nullify
9022 the following instruction and zero some other register. This
9023 is more difficult to use as it's harder to find an insn which
9024 will generate a comclr than finding something like an unconditional
9025 branch. (conditional moves & long branches create comclr insns).
9026
9027 * Most arithmetic operations can conditionally skip the next
9028 instruction. They can be viewed as "perform this operation
9029 and conditionally jump to this nearby location" (where nearby
9030 is an insns away). These are difficult to use due to the
9031 branch length restrictions. */
9032
51723711 9033static void
b7849684 9034pa_combine_instructions (void)
c4bb6b38 9035{
0a2aaacc 9036 rtx anchor, new_rtx;
c4bb6b38
JL
9037
9038 /* This can get expensive since the basic algorithm is on the
9039 order of O(n^2) (or worse). Only do it for -O2 or higher
956d6950 9040 levels of optimization. */
c4bb6b38
JL
9041 if (optimize < 2)
9042 return;
9043
9044 /* Walk down the list of insns looking for "anchor" insns which
9045 may be combined with "floating" insns. As the name implies,
9046 "anchor" instructions don't move, while "floating" insns may
9047 move around. */
0a2aaacc
KG
9048 new_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9049 new_rtx = make_insn_raw (new_rtx);
c4bb6b38
JL
9050
9051 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9052 {
9053 enum attr_pa_combine_type anchor_attr;
9054 enum attr_pa_combine_type floater_attr;
9055
9056 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9057 Also ignore any special USE insns. */
b64925dc 9058 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
c4bb6b38 9059 || GET_CODE (PATTERN (anchor)) == USE
34f0d87a 9060 || GET_CODE (PATTERN (anchor)) == CLOBBER)
c4bb6b38
JL
9061 continue;
9062
9063 anchor_attr = get_attr_pa_combine_type (anchor);
9064 /* See if anchor is an insn suitable for combination. */
9065 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9066 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9067 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9068 && ! forward_branch_p (anchor)))
9069 {
9070 rtx floater;
9071
9072 for (floater = PREV_INSN (anchor);
9073 floater;
9074 floater = PREV_INSN (floater))
9075 {
b64925dc
SB
9076 if (NOTE_P (floater)
9077 || (NONJUMP_INSN_P (floater)
c4bb6b38
JL
9078 && (GET_CODE (PATTERN (floater)) == USE
9079 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9080 continue;
9081
9082 /* Anything except a regular INSN will stop our search. */
39718607 9083 if (! NONJUMP_INSN_P (floater))
c4bb6b38
JL
9084 {
9085 floater = NULL_RTX;
9086 break;
9087 }
9088
9089 /* See if FLOATER is suitable for combination with the
9090 anchor. */
9091 floater_attr = get_attr_pa_combine_type (floater);
9092 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9093 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9094 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9095 && floater_attr == PA_COMBINE_TYPE_FMPY))
9096 {
9097 /* If ANCHOR and FLOATER can be combined, then we're
9098 done with this pass. */
0a2aaacc 9099 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9100 SET_DEST (PATTERN (floater)),
9101 XEXP (SET_SRC (PATTERN (floater)), 0),
9102 XEXP (SET_SRC (PATTERN (floater)), 1)))
9103 break;
9104 }
9105
9106 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9107 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9108 {
9109 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9110 {
0a2aaacc 9111 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9112 SET_DEST (PATTERN (floater)),
9113 XEXP (SET_SRC (PATTERN (floater)), 0),
9114 XEXP (SET_SRC (PATTERN (floater)), 1)))
9115 break;
9116 }
9117 else
9118 {
0a2aaacc 9119 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9120 SET_DEST (PATTERN (floater)),
9121 SET_SRC (PATTERN (floater)),
9122 SET_SRC (PATTERN (floater))))
9123 break;
9124 }
9125 }
9126 }
9127
9128 /* If we didn't find anything on the backwards scan try forwards. */
9129 if (!floater
9130 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9131 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9132 {
9133 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9134 {
b64925dc
SB
9135 if (NOTE_P (floater)
9136 || (NONJUMP_INSN_P (floater)
c4bb6b38
JL
9137 && (GET_CODE (PATTERN (floater)) == USE
9138 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6619e96c 9139
c4bb6b38
JL
9140 continue;
9141
9142 /* Anything except a regular INSN will stop our search. */
39718607 9143 if (! NONJUMP_INSN_P (floater))
c4bb6b38
JL
9144 {
9145 floater = NULL_RTX;
9146 break;
9147 }
9148
9149 /* See if FLOATER is suitable for combination with the
9150 anchor. */
9151 floater_attr = get_attr_pa_combine_type (floater);
9152 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9153 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9154 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9155 && floater_attr == PA_COMBINE_TYPE_FMPY))
9156 {
9157 /* If ANCHOR and FLOATER can be combined, then we're
9158 done with this pass. */
0a2aaacc 9159 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
c4bb6b38 9160 SET_DEST (PATTERN (floater)),
831c1763
AM
9161 XEXP (SET_SRC (PATTERN (floater)),
9162 0),
9163 XEXP (SET_SRC (PATTERN (floater)),
9164 1)))
c4bb6b38
JL
9165 break;
9166 }
9167 }
9168 }
9169
9170 /* FLOATER will be nonzero if we found a suitable floating
9171 insn for combination with ANCHOR. */
9172 if (floater
9173 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9174 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9175 {
9176 /* Emit the new instruction and delete the old anchor. */
c5c76735
JL
9177 emit_insn_before (gen_rtx_PARALLEL
9178 (VOIDmode,
9179 gen_rtvec (2, PATTERN (anchor),
9180 PATTERN (floater))),
9181 anchor);
9182
a38e7aa5 9183 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9184
9185 /* Emit a special USE insn for FLOATER, then delete
9186 the floating insn. */
ad2c71b7 9187 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
c4bb6b38
JL
9188 delete_insn (floater);
9189
9190 continue;
9191 }
9192 else if (floater
9193 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9194 {
9195 rtx temp;
9196 /* Emit the new_jump instruction and delete the old anchor. */
c5c76735
JL
9197 temp
9198 = emit_jump_insn_before (gen_rtx_PARALLEL
9199 (VOIDmode,
9200 gen_rtvec (2, PATTERN (anchor),
9201 PATTERN (floater))),
9202 anchor);
9203
c4bb6b38 9204 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
a38e7aa5 9205 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9206
9207 /* Emit a special USE insn for FLOATER, then delete
9208 the floating insn. */
ad2c71b7 9209 emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater);
c4bb6b38
JL
9210 delete_insn (floater);
9211 continue;
9212 }
9213 }
9214 }
9215}
9216
0952f89b 9217static int
0a2aaacc 9218pa_can_combine_p (rtx new_rtx, rtx anchor, rtx floater, int reversed, rtx dest,
b7849684 9219 rtx src1, rtx src2)
c4bb6b38
JL
9220{
9221 int insn_code_number;
9222 rtx start, end;
9223
9224 /* Create a PARALLEL with the patterns of ANCHOR and
9225 FLOATER, try to recognize it, then test constraints
9226 for the resulting pattern.
9227
9228 If the pattern doesn't match or the constraints
9229 aren't met keep searching for a suitable floater
9230 insn. */
0a2aaacc
KG
9231 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9232 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9233 INSN_CODE (new_rtx) = -1;
9234 insn_code_number = recog_memoized (new_rtx);
c4bb6b38 9235 if (insn_code_number < 0
0a2aaacc 9236 || (extract_insn (new_rtx), ! constrain_operands (1)))
c4bb6b38
JL
9237 return 0;
9238
9239 if (reversed)
9240 {
9241 start = anchor;
9242 end = floater;
9243 }
9244 else
9245 {
9246 start = floater;
9247 end = anchor;
9248 }
9249
9250 /* There's up to three operands to consider. One
9251 output and two inputs.
9252
9253 The output must not be used between FLOATER & ANCHOR
9254 exclusive. The inputs must not be set between
9255 FLOATER and ANCHOR exclusive. */
9256
9257 if (reg_used_between_p (dest, start, end))
9258 return 0;
9259
9260 if (reg_set_between_p (src1, start, end))
9261 return 0;
9262
9263 if (reg_set_between_p (src2, start, end))
9264 return 0;
9265
9266 /* If we get here, then everything is good. */
9267 return 1;
9268}
b9cd54d2 9269
2561a923 9270/* Return nonzero if references for INSN are delayed.
b9cd54d2
JL
9271
9272 Millicode insns are actually function calls with some special
9273 constraints on arguments and register usage.
9274
9275 Millicode calls always expect their arguments in the integer argument
9276 registers, and always return their result in %r29 (ret1). They
7d8b1412
AM
9277 are expected to clobber their arguments, %r1, %r29, and the return
9278 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9279
9280 This function tells reorg that the references to arguments and
9281 millicode calls do not appear to happen until after the millicode call.
9282 This allows reorg to put insns which set the argument registers into the
9283 delay slot of the millicode call -- thus they act more like traditional
9284 CALL_INSNs.
9285
1e5f1716 9286 Note we cannot consider side effects of the insn to be delayed because
7d8b1412
AM
9287 the branch and link insn will clobber the return pointer. If we happened
9288 to use the return pointer in the delay slot of the call, then we lose.
b9cd54d2
JL
9289
9290 get_attr_type will try to recognize the given insn, so make sure to
9291 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9292 in particular. */
9293int
ae9d61ab 9294pa_insn_refs_are_delayed (rtx insn)
b9cd54d2 9295{
b64925dc 9296 return ((NONJUMP_INSN_P (insn)
b9cd54d2
JL
9297 && GET_CODE (PATTERN (insn)) != SEQUENCE
9298 && GET_CODE (PATTERN (insn)) != USE
9299 && GET_CODE (PATTERN (insn)) != CLOBBER
9300 && get_attr_type (insn) == TYPE_MILLI));
9301}
d07d525a 9302
cde0f3fd
PB
9303/* Promote the return value, but not the arguments. */
9304
02095c50 9305static enum machine_mode
cde0f3fd
PB
9306pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9307 enum machine_mode mode,
9308 int *punsignedp ATTRIBUTE_UNUSED,
9309 const_tree fntype ATTRIBUTE_UNUSED,
9310 int for_return)
9311{
666e3ceb 9312 if (for_return == 0)
cde0f3fd 9313 return mode;
02095c50 9314 return promote_mode (type, mode, punsignedp);
cde0f3fd
PB
9315}
9316
44571d6e
JDA
9317/* On the HP-PA the value is found in register(s) 28(-29), unless
9318 the mode is SF or DF. Then the value is returned in fr4 (32).
9319
cde0f3fd
PB
9320 This must perform the same promotions as PROMOTE_MODE, else promoting
9321 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
44571d6e
JDA
9322
9323 Small structures must be returned in a PARALLEL on PA64 in order
9324 to match the HP Compiler ABI. */
9325
8a5b8538 9326static rtx
9193312a
AS
9327pa_function_value (const_tree valtype,
9328 const_tree func ATTRIBUTE_UNUSED,
9329 bool outgoing ATTRIBUTE_UNUSED)
44571d6e
JDA
9330{
9331 enum machine_mode valmode;
9332
4720d5ca
JDA
9333 if (AGGREGATE_TYPE_P (valtype)
9334 || TREE_CODE (valtype) == COMPLEX_TYPE
9335 || TREE_CODE (valtype) == VECTOR_TYPE)
44571d6e 9336 {
2a04824b
JDA
9337 if (TARGET_64BIT)
9338 {
9339 /* Aggregates with a size less than or equal to 128 bits are
9340 returned in GR 28(-29). They are left justified. The pad
9341 bits are undefined. Larger aggregates are returned in
9342 memory. */
9343 rtx loc[2];
9344 int i, offset = 0;
9345 int ub = int_size_in_bytes (valtype) <= UNITS_PER_WORD ? 1 : 2;
9346
9347 for (i = 0; i < ub; i++)
9348 {
9349 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9350 gen_rtx_REG (DImode, 28 + i),
9351 GEN_INT (offset));
9352 offset += 8;
9353 }
44571d6e 9354
2a04824b
JDA
9355 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9356 }
9357 else if (int_size_in_bytes (valtype) > UNITS_PER_WORD)
44571d6e 9358 {
2a04824b
JDA
9359 /* Aggregates 5 to 8 bytes in size are returned in general
9360 registers r28-r29 in the same manner as other non
9361 floating-point objects. The data is right-justified and
9362 zero-extended to 64 bits. This is opposite to the normal
9363 justification used on big endian targets and requires
9364 special treatment. */
9365 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9366 gen_rtx_REG (DImode, 28), const0_rtx);
9367 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
44571d6e 9368 }
44571d6e
JDA
9369 }
9370
9371 if ((INTEGRAL_TYPE_P (valtype)
2ae88ecd 9372 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
44571d6e
JDA
9373 || POINTER_TYPE_P (valtype))
9374 valmode = word_mode;
9375 else
9376 valmode = TYPE_MODE (valtype);
9377
9378 if (TREE_CODE (valtype) == REAL_TYPE
2a04824b 9379 && !AGGREGATE_TYPE_P (valtype)
44571d6e
JDA
9380 && TYPE_MODE (valtype) != TFmode
9381 && !TARGET_SOFT_FLOAT)
9382 return gen_rtx_REG (valmode, 32);
9383
9384 return gen_rtx_REG (valmode, 28);
9385}
9386
8a5b8538
AS
9387/* Implement the TARGET_LIBCALL_VALUE hook. */
9388
9389static rtx
9390pa_libcall_value (enum machine_mode mode,
9391 const_rtx fun ATTRIBUTE_UNUSED)
9392{
9393 if (! TARGET_SOFT_FLOAT
9394 && (mode == SFmode || mode == DFmode))
9395 return gen_rtx_REG (mode, 32);
9396 else
9397 return gen_rtx_REG (mode, 28);
9398}
9399
9400/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9401
9402static bool
9403pa_function_value_regno_p (const unsigned int regno)
9404{
9405 if (regno == 28
9406 || (! TARGET_SOFT_FLOAT && regno == 32))
9407 return true;
9408
9409 return false;
9410}
9411
fd29bdaf
NF
9412/* Update the data in CUM to advance over an argument
9413 of mode MODE and data type TYPE.
9414 (TYPE is null for libcalls where that information may not be available.) */
9415
9416static void
d5cc9181 9417pa_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
fd29bdaf
NF
9418 const_tree type, bool named ATTRIBUTE_UNUSED)
9419{
d5cc9181 9420 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
fd29bdaf
NF
9421 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9422
9423 cum->nargs_prototype--;
9424 cum->words += (arg_size
9425 + ((cum->words & 01)
9426 && type != NULL_TREE
9427 && arg_size > 1));
9428}
9429
520babc7
JL
9430/* Return the location of a parameter that is passed in a register or NULL
9431 if the parameter has any component that is passed in memory.
9432
9433 This is new code and will be pushed to into the net sources after
6619e96c 9434 further testing.
520babc7
JL
9435
9436 ??? We might want to restructure this so that it looks more like other
9437 ports. */
fd29bdaf 9438static rtx
d5cc9181 9439pa_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
fd29bdaf 9440 const_tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9441{
d5cc9181 9442 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
520babc7 9443 int max_arg_words = (TARGET_64BIT ? 8 : 4);
015b1ad1 9444 int alignment = 0;
9dff28ab 9445 int arg_size;
520babc7
JL
9446 int fpr_reg_base;
9447 int gpr_reg_base;
9448 rtx retval;
9449
9dff28ab
JDA
9450 if (mode == VOIDmode)
9451 return NULL_RTX;
9452
9453 arg_size = FUNCTION_ARG_SIZE (mode, type);
9454
9455 /* If this arg would be passed partially or totally on the stack, then
78a52f11 9456 this routine should return zero. pa_arg_partial_bytes will
9dff28ab
JDA
9457 handle arguments which are split between regs and stack slots if
9458 the ABI mandates split arguments. */
4720d5ca 9459 if (!TARGET_64BIT)
520babc7 9460 {
9dff28ab
JDA
9461 /* The 32-bit ABI does not split arguments. */
9462 if (cum->words + arg_size > max_arg_words)
520babc7
JL
9463 return NULL_RTX;
9464 }
9465 else
9466 {
015b1ad1
JDA
9467 if (arg_size > 1)
9468 alignment = cum->words & 1;
9dff28ab 9469 if (cum->words + alignment >= max_arg_words)
520babc7
JL
9470 return NULL_RTX;
9471 }
9472
9473 /* The 32bit ABIs and the 64bit ABIs are rather different,
9474 particularly in their handling of FP registers. We might
9475 be able to cleverly share code between them, but I'm not
0952f89b 9476 going to bother in the hope that splitting them up results
015b1ad1 9477 in code that is more easily understood. */
520babc7 9478
520babc7
JL
9479 if (TARGET_64BIT)
9480 {
9481 /* Advance the base registers to their current locations.
9482
9483 Remember, gprs grow towards smaller register numbers while
015b1ad1
JDA
9484 fprs grow to higher register numbers. Also remember that
9485 although FP regs are 32-bit addressable, we pretend that
9486 the registers are 64-bits wide. */
520babc7
JL
9487 gpr_reg_base = 26 - cum->words;
9488 fpr_reg_base = 32 + cum->words;
6619e96c 9489
9dff28ab
JDA
9490 /* Arguments wider than one word and small aggregates need special
9491 treatment. */
9492 if (arg_size > 1
9493 || mode == BLKmode
4720d5ca
JDA
9494 || (type && (AGGREGATE_TYPE_P (type)
9495 || TREE_CODE (type) == COMPLEX_TYPE
9496 || TREE_CODE (type) == VECTOR_TYPE)))
520babc7 9497 {
015b1ad1
JDA
9498 /* Double-extended precision (80-bit), quad-precision (128-bit)
9499 and aggregates including complex numbers are aligned on
9500 128-bit boundaries. The first eight 64-bit argument slots
9501 are associated one-to-one, with general registers r26
9502 through r19, and also with floating-point registers fr4
9503 through fr11. Arguments larger than one word are always
9dff28ab
JDA
9504 passed in general registers.
9505
9506 Using a PARALLEL with a word mode register results in left
9507 justified data on a big-endian target. */
015b1ad1
JDA
9508
9509 rtx loc[8];
9510 int i, offset = 0, ub = arg_size;
9511
9512 /* Align the base register. */
9513 gpr_reg_base -= alignment;
9514
9515 ub = MIN (ub, max_arg_words - cum->words - alignment);
9516 for (i = 0; i < ub; i++)
520babc7 9517 {
015b1ad1
JDA
9518 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9519 gen_rtx_REG (DImode, gpr_reg_base),
9520 GEN_INT (offset));
9521 gpr_reg_base -= 1;
9522 offset += 8;
520babc7 9523 }
015b1ad1 9524
e4b95210 9525 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
520babc7 9526 }
9dff28ab 9527 }
520babc7
JL
9528 else
9529 {
9530 /* If the argument is larger than a word, then we know precisely
9531 which registers we must use. */
015b1ad1 9532 if (arg_size > 1)
520babc7
JL
9533 {
9534 if (cum->words)
9535 {
9536 gpr_reg_base = 23;
9537 fpr_reg_base = 38;
9538 }
9539 else
9540 {
9541 gpr_reg_base = 25;
9542 fpr_reg_base = 34;
9543 }
9dff28ab
JDA
9544
9545 /* Structures 5 to 8 bytes in size are passed in the general
9546 registers in the same manner as other non floating-point
9547 objects. The data is right-justified and zero-extended
7ea18c08
JDA
9548 to 64 bits. This is opposite to the normal justification
9549 used on big endian targets and requires special treatment.
4720d5ca
JDA
9550 We now define BLOCK_REG_PADDING to pad these objects.
9551 Aggregates, complex and vector types are passed in the same
9552 manner as structures. */
9553 if (mode == BLKmode
9554 || (type && (AGGREGATE_TYPE_P (type)
9555 || TREE_CODE (type) == COMPLEX_TYPE
9556 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab 9557 {
44571d6e
JDA
9558 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9559 gen_rtx_REG (DImode, gpr_reg_base),
9560 const0_rtx);
2a04824b 9561 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9dff28ab 9562 }
520babc7
JL
9563 }
9564 else
9565 {
9566 /* We have a single word (32 bits). A simple computation
9567 will get us the register #s we need. */
9568 gpr_reg_base = 26 - cum->words;
9569 fpr_reg_base = 32 + 2 * cum->words;
9570 }
9571 }
9572
b848dc65 9573 /* Determine if the argument needs to be passed in both general and
520babc7 9574 floating point registers. */
b848dc65
JDA
9575 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9576 /* If we are doing soft-float with portable runtime, then there
9577 is no need to worry about FP regs. */
c328adfa 9578 && !TARGET_SOFT_FLOAT
4720d5ca 9579 /* The parameter must be some kind of scalar float, else we just
b848dc65 9580 pass it in integer registers. */
4720d5ca 9581 && GET_MODE_CLASS (mode) == MODE_FLOAT
b848dc65
JDA
9582 /* The target function must not have a prototype. */
9583 && cum->nargs_prototype <= 0
9584 /* libcalls do not need to pass items in both FP and general
9585 registers. */
9586 && type != NULL_TREE
c328adfa
JDA
9587 /* All this hair applies to "outgoing" args only. This includes
9588 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9589 && !cum->incoming)
b848dc65
JDA
9590 /* Also pass outgoing floating arguments in both registers in indirect
9591 calls with the 32 bit ABI and the HP assembler since there is no
9592 way to the specify argument locations in static functions. */
c328adfa
JDA
9593 || (!TARGET_64BIT
9594 && !TARGET_GAS
9595 && !cum->incoming
b848dc65 9596 && cum->indirect
4720d5ca 9597 && GET_MODE_CLASS (mode) == MODE_FLOAT))
520babc7
JL
9598 {
9599 retval
9600 = gen_rtx_PARALLEL
9601 (mode,
9602 gen_rtvec (2,
9603 gen_rtx_EXPR_LIST (VOIDmode,
9604 gen_rtx_REG (mode, fpr_reg_base),
9605 const0_rtx),
9606 gen_rtx_EXPR_LIST (VOIDmode,
9607 gen_rtx_REG (mode, gpr_reg_base),
9608 const0_rtx)));
9609 }
9610 else
9611 {
9612 /* See if we should pass this parameter in a general register. */
9613 if (TARGET_SOFT_FLOAT
9614 /* Indirect calls in the normal 32bit ABI require all arguments
9615 to be passed in general registers. */
9616 || (!TARGET_PORTABLE_RUNTIME
9617 && !TARGET_64BIT
50b424a9 9618 && !TARGET_ELF32
520babc7 9619 && cum->indirect)
4720d5ca
JDA
9620 /* If the parameter is not a scalar floating-point parameter,
9621 then it belongs in GPRs. */
9622 || GET_MODE_CLASS (mode) != MODE_FLOAT
2a04824b
JDA
9623 /* Structure with single SFmode field belongs in GPR. */
9624 || (type && AGGREGATE_TYPE_P (type)))
520babc7
JL
9625 retval = gen_rtx_REG (mode, gpr_reg_base);
9626 else
9627 retval = gen_rtx_REG (mode, fpr_reg_base);
9628 }
9629 return retval;
9630}
9631
c2ed6cf8
NF
9632/* Arguments larger than one word are double word aligned. */
9633
9634static unsigned int
9635pa_function_arg_boundary (enum machine_mode mode, const_tree type)
9636{
c2ed6cf8 9637 bool singleword = (type
b58a864d
NF
9638 ? (integer_zerop (TYPE_SIZE (type))
9639 || !TREE_CONSTANT (TYPE_SIZE (type))
c2ed6cf8 9640 || int_size_in_bytes (type) <= UNITS_PER_WORD)
19cf9bde 9641 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
c2ed6cf8
NF
9642
9643 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9644}
520babc7
JL
9645
9646/* If this arg would be passed totally in registers or totally on the stack,
78a52f11
RH
9647 then this routine should return zero. */
9648
9649static int
d5cc9181 9650pa_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
78a52f11 9651 tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9652{
d5cc9181 9653 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
e0c556d3
AM
9654 unsigned int max_arg_words = 8;
9655 unsigned int offset = 0;
520babc7 9656
78a52f11
RH
9657 if (!TARGET_64BIT)
9658 return 0;
9659
e0c556d3 9660 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
520babc7
JL
9661 offset = 1;
9662
e0c556d3 9663 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
fe19a83d 9664 /* Arg fits fully into registers. */
520babc7 9665 return 0;
6619e96c 9666 else if (cum->words + offset >= max_arg_words)
fe19a83d 9667 /* Arg fully on the stack. */
520babc7
JL
9668 return 0;
9669 else
fe19a83d 9670 /* Arg is split. */
78a52f11 9671 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
520babc7
JL
9672}
9673
9674
d6b5193b 9675/* A get_unnamed_section callback for switching to the text section.
9a55eab3
JDA
9676
9677 This function is only used with SOM. Because we don't support
9678 named subspaces, we can only create a new subspace or switch back
774acadf 9679 to the default text subspace. */
774acadf 9680
d6b5193b
RS
9681static void
9682som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9683{
9684 gcc_assert (TARGET_SOM);
774acadf 9685 if (TARGET_GAS)
9a55eab3 9686 {
8c5e065b 9687 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9a55eab3
JDA
9688 {
9689 /* We only want to emit a .nsubspa directive once at the
9690 start of the function. */
9691 cfun->machine->in_nsubspa = 1;
9692
9693 /* Create a new subspace for the text. This provides
9694 better stub placement and one-only functions. */
9695 if (cfun->decl
9696 && DECL_ONE_ONLY (cfun->decl)
9697 && !DECL_WEAK (cfun->decl))
1a83bfc3
JDA
9698 {
9699 output_section_asm_op ("\t.SPACE $TEXT$\n"
9700 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9701 "ACCESS=44,SORT=24,COMDAT");
9702 return;
9703 }
9a55eab3
JDA
9704 }
9705 else
9706 {
9707 /* There isn't a current function or the body of the current
9708 function has been completed. So, we are changing to the
1a83bfc3
JDA
9709 text section to output debugging information. Thus, we
9710 need to forget that we are in the text section so that
9711 varasm.c will call us when text_section is selected again. */
8c5e065b
JDA
9712 gcc_assert (!cfun || !cfun->machine
9713 || cfun->machine->in_nsubspa == 2);
d6b5193b 9714 in_section = NULL;
9a55eab3 9715 }
1a83bfc3
JDA
9716 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9717 return;
9a55eab3 9718 }
d6b5193b
RS
9719 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9720}
9721
1a83bfc3
JDA
9722/* A get_unnamed_section callback for switching to comdat data
9723 sections. This function is only used with SOM. */
9724
9725static void
9726som_output_comdat_data_section_asm_op (const void *data)
9727{
9728 in_section = NULL;
9729 output_section_asm_op (data);
9730}
9731
d6b5193b 9732/* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9a55eab3 9733
d6b5193b
RS
9734static void
9735pa_som_asm_init_sections (void)
9736{
9737 text_section
9738 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9739
9740 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9741 is not being generated. */
9742 som_readonly_data_section
9743 = get_unnamed_section (0, output_section_asm_op,
9744 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9745
9746 /* When secondary definitions are not supported, SOM makes readonly
9747 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9748 the comdat flag. */
9749 som_one_only_readonly_data_section
1a83bfc3 9750 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
d6b5193b
RS
9751 "\t.SPACE $TEXT$\n"
9752 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9753 "ACCESS=0x2c,SORT=16,COMDAT");
9754
9755
9756 /* When secondary definitions are not supported, SOM makes data one-only
9757 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9758 som_one_only_data_section
1a83bfc3
JDA
9759 = get_unnamed_section (SECTION_WRITE,
9760 som_output_comdat_data_section_asm_op,
d6b5193b
RS
9761 "\t.SPACE $PRIVATE$\n"
9762 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9763 "ACCESS=31,SORT=24,COMDAT");
9764
57d138a9
JDA
9765 if (flag_tm)
9766 som_tm_clone_table_section
9767 = get_unnamed_section (0, output_section_asm_op,
9768 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9769
d6b5193b
RS
9770 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9771 which reference data within the $TEXT$ space (for example constant
9772 strings in the $LIT$ subspace).
9773
9774 The assemblers (GAS and HP as) both have problems with handling
9775 the difference of two symbols which is the other correct way to
9776 reference constant data during PIC code generation.
9777
9778 So, there's no way to reference constant data which is in the
9779 $TEXT$ space during PIC generation. Instead place all constant
9780 data into the $PRIVATE$ subspace (this reduces sharing, but it
9781 works correctly). */
9782 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9783
9784 /* We must not have a reference to an external symbol defined in a
9785 shared library in a readonly section, else the SOM linker will
9786 complain.
9787
9788 So, we force exception information into the data section. */
9789 exception_section = data_section;
9a55eab3
JDA
9790}
9791
57d138a9
JDA
9792/* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9793
9794static section *
9795pa_som_tm_clone_table_section (void)
9796{
9797 return som_tm_clone_table_section;
9798}
9799
ae46c4e0
RH
9800/* On hpux10, the linker will give an error if we have a reference
9801 in the read-only data section to a symbol defined in a shared
9802 library. Therefore, expressions that might require a reloc can
9803 not be placed in the read-only data section. */
9804
d6b5193b 9805static section *
24a52160
JDA
9806pa_select_section (tree exp, int reloc,
9807 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
ae46c4e0
RH
9808{
9809 if (TREE_CODE (exp) == VAR_DECL
9810 && TREE_READONLY (exp)
9811 && !TREE_THIS_VOLATILE (exp)
9812 && DECL_INITIAL (exp)
9813 && (DECL_INITIAL (exp) == error_mark_node
9814 || TREE_CONSTANT (DECL_INITIAL (exp)))
9815 && !reloc)
9a55eab3
JDA
9816 {
9817 if (TARGET_SOM
9818 && DECL_ONE_ONLY (exp)
9819 && !DECL_WEAK (exp))
d6b5193b 9820 return som_one_only_readonly_data_section;
9a55eab3 9821 else
d6b5193b 9822 return readonly_data_section;
9a55eab3 9823 }
6615c446 9824 else if (CONSTANT_CLASS_P (exp) && !reloc)
d6b5193b 9825 return readonly_data_section;
9a55eab3
JDA
9826 else if (TARGET_SOM
9827 && TREE_CODE (exp) == VAR_DECL
9828 && DECL_ONE_ONLY (exp)
e41f3691 9829 && !DECL_WEAK (exp))
d6b5193b 9830 return som_one_only_data_section;
ae46c4e0 9831 else
d6b5193b 9832 return data_section;
ae46c4e0 9833}
e2500fed 9834
5eb99654 9835static void
b7849684 9836pa_globalize_label (FILE *stream, const char *name)
5eb99654
KG
9837{
9838 /* We only handle DATA objects here, functions are globalized in
9839 ASM_DECLARE_FUNCTION_NAME. */
9840 if (! FUNCTION_NAME_P (name))
9841 {
9842 fputs ("\t.EXPORT ", stream);
9843 assemble_name (stream, name);
9844 fputs (",DATA\n", stream);
9845 }
9846}
3f12cd9b 9847
bd5bd7ac
KH
9848/* Worker function for TARGET_STRUCT_VALUE_RTX. */
9849
3f12cd9b
KH
9850static rtx
9851pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9852 int incoming ATTRIBUTE_UNUSED)
9853{
9854 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9855}
9856
bd5bd7ac
KH
9857/* Worker function for TARGET_RETURN_IN_MEMORY. */
9858
3f12cd9b 9859bool
586de218 9860pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3f12cd9b
KH
9861{
9862 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9863 PA64 ABI says that objects larger than 128 bits are returned in memory.
9864 Note, int_size_in_bytes can return -1 if the size of the object is
9865 variable or larger than the maximum value that can be expressed as
9866 a HOST_WIDE_INT. It can also return zero for an empty type. The
9867 simplest way to handle variable and empty types is to pass them in
9868 memory. This avoids problems in defining the boundaries of argument
9869 slots, allocating registers, etc. */
9870 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9871 || int_size_in_bytes (type) <= 0);
9872}
9873
744b2d61
JDA
9874/* Structure to hold declaration and name of external symbols that are
9875 emitted by GCC. We generate a vector of these symbols and output them
9876 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9877 This avoids putting out names that are never really used. */
9878
d1b38208 9879typedef struct GTY(()) extern_symbol
744b2d61
JDA
9880{
9881 tree decl;
9882 const char *name;
d4e6fecb 9883} extern_symbol;
744b2d61
JDA
9884
9885/* Define gc'd vector type for extern_symbol. */
744b2d61
JDA
9886
9887/* Vector of extern_symbol pointers. */
9771b263 9888static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
744b2d61
JDA
9889
9890#ifdef ASM_OUTPUT_EXTERNAL_REAL
9891/* Mark DECL (name NAME) as an external reference (assembler output
9892 file FILE). This saves the names to output at the end of the file
9893 if actually referenced. */
9894
9895void
9896pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9897{
744b2d61 9898 gcc_assert (file == asm_out_file);
f32682ca 9899 extern_symbol p = {decl, name};
9771b263 9900 vec_safe_push (extern_symbols, p);
744b2d61
JDA
9901}
9902
9903/* Output text required at the end of an assembler file.
9904 This includes deferred plabels and .import directives for
9905 all external symbols that were actually referenced. */
9906
9907static void
9908pa_hpux_file_end (void)
9909{
9910 unsigned int i;
d4e6fecb 9911 extern_symbol *p;
744b2d61 9912
3674b34d
JDA
9913 if (!NO_DEFERRED_PROFILE_COUNTERS)
9914 output_deferred_profile_counters ();
9915
744b2d61
JDA
9916 output_deferred_plabels ();
9917
9771b263 9918 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
744b2d61
JDA
9919 {
9920 tree decl = p->decl;
9921
9922 if (!TREE_ASM_WRITTEN (decl)
9923 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9924 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9925 }
9926
9771b263 9927 vec_free (extern_symbols);
744b2d61
JDA
9928}
9929#endif
9930
6982c5d4 9931/* Return true if a change from mode FROM to mode TO for a register
0a2aaacc 9932 in register class RCLASS is invalid. */
6982c5d4
JDA
9933
9934bool
9935pa_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
0a2aaacc 9936 enum reg_class rclass)
6982c5d4
JDA
9937{
9938 if (from == to)
9939 return false;
9940
9941 /* Reject changes to/from complex and vector modes. */
9942 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9943 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9944 return true;
9945
9946 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9947 return false;
9948
9949 /* There is no way to load QImode or HImode values directly from
9950 memory. SImode loads to the FP registers are not zero extended.
9951 On the 64-bit target, this conflicts with the definition of
9952 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9953 with different sizes in the floating-point registers. */
0a2aaacc 9954 if (MAYBE_FP_REG_CLASS_P (rclass))
6982c5d4
JDA
9955 return true;
9956
9957 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9958 in specific sets of registers. Thus, we cannot allow changing
9959 to a larger mode when it's larger than a word. */
9960 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
9961 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
9962 return true;
9963
9964 return false;
9965}
9966
9967/* Returns TRUE if it is a good idea to tie two pseudo registers
9968 when one has mode MODE1 and one has mode MODE2.
9969 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9970 for any hard reg, then this must be FALSE for correct output.
9971
9972 We should return FALSE for QImode and HImode because these modes
9973 are not ok in the floating-point registers. However, this prevents
9974 tieing these modes to SImode and DImode in the general registers.
9975 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9976 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9977 in the floating-point registers. */
9978
9979bool
9980pa_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9981{
9982 /* Don't tie modes in different classes. */
9983 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
9984 return false;
9985
9986 return true;
9987}
9988
859c146c
RH
9989\f
9990/* Length in units of the trampoline instruction code. */
9991
9992#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9993
9994
9995/* Output assembler code for a block containing the constant parts
9996 of a trampoline, leaving space for the variable parts.\
9997
9998 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9999 and then branches to the specified routine.
10000
10001 This code template is copied from text segment to stack location
10002 and then patched with pa_trampoline_init to contain valid values,
10003 and then entered as a subroutine.
10004
10005 It is best to keep this as small as possible to avoid having to
10006 flush multiple lines in the cache. */
10007
10008static void
10009pa_asm_trampoline_template (FILE *f)
10010{
10011 if (!TARGET_64BIT)
10012 {
10013 fputs ("\tldw 36(%r22),%r21\n", f);
10014 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10015 if (ASSEMBLER_DIALECT == 0)
10016 fputs ("\tdepi 0,31,2,%r21\n", f);
10017 else
10018 fputs ("\tdepwi 0,31,2,%r21\n", f);
10019 fputs ("\tldw 4(%r21),%r19\n", f);
10020 fputs ("\tldw 0(%r21),%r21\n", f);
10021 if (TARGET_PA_20)
10022 {
10023 fputs ("\tbve (%r21)\n", f);
10024 fputs ("\tldw 40(%r22),%r29\n", f);
10025 fputs ("\t.word 0\n", f);
10026 fputs ("\t.word 0\n", f);
10027 }
10028 else
10029 {
10030 fputs ("\tldsid (%r21),%r1\n", f);
10031 fputs ("\tmtsp %r1,%sr0\n", f);
10032 fputs ("\tbe 0(%sr0,%r21)\n", f);
10033 fputs ("\tldw 40(%r22),%r29\n", f);
10034 }
10035 fputs ("\t.word 0\n", f);
10036 fputs ("\t.word 0\n", f);
10037 fputs ("\t.word 0\n", f);
10038 fputs ("\t.word 0\n", f);
10039 }
10040 else
10041 {
10042 fputs ("\t.dword 0\n", f);
10043 fputs ("\t.dword 0\n", f);
10044 fputs ("\t.dword 0\n", f);
10045 fputs ("\t.dword 0\n", f);
10046 fputs ("\tmfia %r31\n", f);
10047 fputs ("\tldd 24(%r31),%r1\n", f);
10048 fputs ("\tldd 24(%r1),%r27\n", f);
10049 fputs ("\tldd 16(%r1),%r1\n", f);
10050 fputs ("\tbve (%r1)\n", f);
10051 fputs ("\tldd 32(%r31),%r31\n", f);
10052 fputs ("\t.dword 0 ; fptr\n", f);
10053 fputs ("\t.dword 0 ; static link\n", f);
10054 }
10055}
10056
10057/* Emit RTL insns to initialize the variable parts of a trampoline.
10058 FNADDR is an RTX for the address of the function's pure code.
10059 CXT is an RTX for the static chain value for the function.
10060
10061 Move the function address to the trampoline template at offset 36.
10062 Move the static chain value to trampoline template at offset 40.
10063 Move the trampoline address to trampoline template at offset 44.
10064 Move r19 to trampoline template at offset 48. The latter two
10065 words create a plabel for the indirect call to the trampoline.
10066
10067 A similar sequence is used for the 64-bit port but the plabel is
10068 at the beginning of the trampoline.
10069
10070 Finally, the cache entries for the trampoline code are flushed.
10071 This is necessary to ensure that the trampoline instruction sequence
10072 is written to memory prior to any attempts at prefetching the code
10073 sequence. */
10074
10075static void
10076pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10077{
10078 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10079 rtx start_addr = gen_reg_rtx (Pmode);
10080 rtx end_addr = gen_reg_rtx (Pmode);
10081 rtx line_length = gen_reg_rtx (Pmode);
10082 rtx r_tramp, tmp;
10083
10084 emit_block_move (m_tramp, assemble_trampoline_template (),
10085 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10086 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10087
10088 if (!TARGET_64BIT)
10089 {
10090 tmp = adjust_address (m_tramp, Pmode, 36);
10091 emit_move_insn (tmp, fnaddr);
10092 tmp = adjust_address (m_tramp, Pmode, 40);
10093 emit_move_insn (tmp, chain_value);
10094
10095 /* Create a fat pointer for the trampoline. */
10096 tmp = adjust_address (m_tramp, Pmode, 44);
10097 emit_move_insn (tmp, r_tramp);
10098 tmp = adjust_address (m_tramp, Pmode, 48);
10099 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10100
10101 /* fdc and fic only use registers for the address to flush,
10102 they do not accept integer displacements. We align the
10103 start and end addresses to the beginning of their respective
10104 cache lines to minimize the number of lines flushed. */
10105 emit_insn (gen_andsi3 (start_addr, r_tramp,
10106 GEN_INT (-MIN_CACHELINE_SIZE)));
0a81f074
RS
10107 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10108 TRAMPOLINE_CODE_SIZE-1));
859c146c
RH
10109 emit_insn (gen_andsi3 (end_addr, tmp,
10110 GEN_INT (-MIN_CACHELINE_SIZE)));
10111 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10112 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10113 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10114 gen_reg_rtx (Pmode),
10115 gen_reg_rtx (Pmode)));
10116 }
10117 else
10118 {
10119 tmp = adjust_address (m_tramp, Pmode, 56);
10120 emit_move_insn (tmp, fnaddr);
10121 tmp = adjust_address (m_tramp, Pmode, 64);
10122 emit_move_insn (tmp, chain_value);
10123
10124 /* Create a fat pointer for the trampoline. */
10125 tmp = adjust_address (m_tramp, Pmode, 16);
0a81f074
RS
10126 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10127 r_tramp, 32)));
859c146c
RH
10128 tmp = adjust_address (m_tramp, Pmode, 24);
10129 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10130
10131 /* fdc and fic only use registers for the address to flush,
10132 they do not accept integer displacements. We align the
10133 start and end addresses to the beginning of their respective
10134 cache lines to minimize the number of lines flushed. */
0a81f074 10135 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
859c146c
RH
10136 emit_insn (gen_anddi3 (start_addr, tmp,
10137 GEN_INT (-MIN_CACHELINE_SIZE)));
0a81f074
RS
10138 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10139 TRAMPOLINE_CODE_SIZE - 1));
859c146c
RH
10140 emit_insn (gen_anddi3 (end_addr, tmp,
10141 GEN_INT (-MIN_CACHELINE_SIZE)));
10142 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10143 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10144 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10145 gen_reg_rtx (Pmode),
10146 gen_reg_rtx (Pmode)));
10147 }
017d38f5
MK
10148
10149#ifdef HAVE_ENABLE_EXECUTE_STACK
10150  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10151      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
10152#endif
859c146c
RH
10153}
10154
10155/* Perform any machine-specific adjustment in the address of the trampoline.
10156 ADDR contains the address that was passed to pa_trampoline_init.
10157 Adjust the trampoline address to point to the plabel at offset 44. */
10158
10159static rtx
10160pa_trampoline_adjust_address (rtx addr)
10161{
10162 if (!TARGET_64BIT)
0a81f074 10163 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
859c146c
RH
10164 return addr;
10165}
1f65437d
SE
10166
10167static rtx
10168pa_delegitimize_address (rtx orig_x)
10169{
10170 rtx x = delegitimize_mem_from_attrs (orig_x);
10171
10172 if (GET_CODE (x) == LO_SUM
10173 && GET_CODE (XEXP (x, 1)) == UNSPEC
10174 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10175 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10176 return x;
10177}
859c146c 10178\f
bc707992
JDA
10179static rtx
10180pa_internal_arg_pointer (void)
10181{
10182 /* The argument pointer and the hard frame pointer are the same in
10183 the 32-bit runtime, so we don't need a copy. */
10184 if (TARGET_64BIT)
10185 return copy_to_reg (virtual_incoming_args_rtx);
10186 else
10187 return virtual_incoming_args_rtx;
10188}
10189
10190/* Given FROM and TO register numbers, say whether this elimination is allowed.
10191 Frame pointer elimination is automatically handled. */
10192
10193static bool
10194pa_can_eliminate (const int from, const int to)
10195{
10196 /* The argument cannot be eliminated in the 64-bit runtime. */
10197 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10198 return false;
10199
10200 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10201 ? ! frame_pointer_needed
10202 : true);
10203}
10204
10205/* Define the offset between two registers, FROM to be eliminated and its
10206 replacement TO, at the start of a routine. */
10207HOST_WIDE_INT
10208pa_initial_elimination_offset (int from, int to)
10209{
10210 HOST_WIDE_INT offset;
10211
10212 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10213 && to == STACK_POINTER_REGNUM)
ae9d61ab 10214 offset = -pa_compute_frame_size (get_frame_size (), 0);
bc707992
JDA
10215 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10216 offset = 0;
10217 else
10218 gcc_unreachable ();
10219
10220 return offset;
10221}
10222
5efd84c5
NF
10223static void
10224pa_conditional_register_usage (void)
10225{
10226 int i;
10227
10228 if (!TARGET_64BIT && !TARGET_PA_11)
10229 {
10230 for (i = 56; i <= FP_REG_LAST; i++)
10231 fixed_regs[i] = call_used_regs[i] = 1;
10232 for (i = 33; i < 56; i += 2)
10233 fixed_regs[i] = call_used_regs[i] = 1;
10234 }
10235 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10236 {
10237 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10238 fixed_regs[i] = call_used_regs[i] = 1;
10239 }
10240 if (flag_pic)
10241 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10242}
10243
41a1208a
JDA
10244/* Target hook for c_mode_for_suffix. */
10245
10246static enum machine_mode
10247pa_c_mode_for_suffix (char suffix)
10248{
10249 if (HPUX_LONG_DOUBLE_LIBRARY)
10250 {
10251 if (suffix == 'q')
10252 return TFmode;
10253 }
10254
10255 return VOIDmode;
10256}
10257
7550cb35
JDA
10258/* Target hook for function_section. */
10259
10260static section *
10261pa_function_section (tree decl, enum node_frequency freq,
10262 bool startup, bool exit)
10263{
10264 /* Put functions in text section if target doesn't have named sections. */
677f3fa8 10265 if (!targetm_common.have_named_sections)
7550cb35
JDA
10266 return text_section;
10267
10268 /* Force nested functions into the same section as the containing
10269 function. */
10270 if (decl
10271 && DECL_SECTION_NAME (decl) == NULL_TREE
10272 && DECL_CONTEXT (decl) != NULL_TREE
10273 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10274 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL_TREE)
10275 return function_section (DECL_CONTEXT (decl));
10276
10277 /* Otherwise, use the default function section. */
10278 return default_function_section (decl, freq, startup, exit);
10279}
10280
1a627b35
RS
10281/* Implement TARGET_LEGITIMATE_CONSTANT_P.
10282
10283 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10284 that need more than three instructions to load prior to reload. This
10285 limit is somewhat arbitrary. It takes three instructions to load a
10286 CONST_INT from memory but two are memory accesses. It may be better
10287 to increase the allowed range for CONST_INTS. We may also be able
10288 to handle CONST_DOUBLES. */
10289
10290static bool
10291pa_legitimate_constant_p (enum machine_mode mode, rtx x)
10292{
10293 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10294 return false;
10295
10296 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10297 return false;
10298
9a201645 10299 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
0e4ae794
JDA
10300 legitimate constants. The other variants can't be handled by
10301 the move patterns after reload starts. */
9a201645 10302 if (PA_SYMBOL_REF_TLS_P (x))
0e4ae794 10303 return false;
9a201645 10304
1a627b35
RS
10305 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10306 return false;
10307
10308 if (TARGET_64BIT
10309 && HOST_BITS_PER_WIDE_INT > 32
10310 && GET_CODE (x) == CONST_INT
10311 && !reload_in_progress
10312 && !reload_completed
10313 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
ae9d61ab 10314 && !pa_cint_ok_for_move (INTVAL (x)))
1a627b35
RS
10315 return false;
10316
fda33f15
JDA
10317 if (function_label_operand (x, mode))
10318 return false;
10319
1a627b35
RS
10320 return true;
10321}
10322
fda33f15
JDA
10323/* Implement TARGET_SECTION_TYPE_FLAGS. */
10324
10325static unsigned int
10326pa_section_type_flags (tree decl, const char *name, int reloc)
10327{
10328 unsigned int flags;
10329
10330 flags = default_section_type_flags (decl, name, reloc);
10331
10332 /* Function labels are placed in the constant pool. This can
10333 cause a section conflict if decls are put in ".data.rel.ro"
10334 or ".data.rel.ro.local" using the __attribute__ construct. */
10335 if (strcmp (name, ".data.rel.ro") == 0
10336 || strcmp (name, ".data.rel.ro.local") == 0)
10337 flags |= SECTION_WRITE | SECTION_RELRO;
10338
10339 return flags;
10340}
10341
1a04ac2b
JDA
10342/* pa_legitimate_address_p recognizes an RTL expression that is a
10343 valid memory address for an instruction. The MODE argument is the
10344 machine mode for the MEM expression that wants to use this address.
10345
10346 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10347 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10348 available with floating point loads and stores, and integer loads.
10349 We get better code by allowing indexed addresses in the initial
10350 RTL generation.
10351
10352 The acceptance of indexed addresses as legitimate implies that we
10353 must provide patterns for doing indexed integer stores, or the move
10354 expanders must force the address of an indexed store to a register.
10355 We have adopted the latter approach.
10356
10357 Another function of pa_legitimate_address_p is to ensure that
10358 the base register is a valid pointer for indexed instructions.
10359 On targets that have non-equivalent space registers, we have to
10360 know at the time of assembler output which register in a REG+REG
10361 pair is the base register. The REG_POINTER flag is sometimes lost
10362 in reload and the following passes, so it can't be relied on during
10363 code generation. Thus, we either have to canonicalize the order
10364 of the registers in REG+REG indexed addresses, or treat REG+REG
10365 addresses separately and provide patterns for both permutations.
10366
10367 The latter approach requires several hundred additional lines of
10368 code in pa.md. The downside to canonicalizing is that a PLUS
10369 in the wrong order can't combine to form to make a scaled indexed
10370 memory operand. As we won't need to canonicalize the operands if
10371 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10372
10373 We initially break out scaled indexed addresses in canonical order
10374 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10375 scaled indexed addresses during RTL generation. However, fold_rtx
10376 has its own opinion on how the operands of a PLUS should be ordered.
10377 If one of the operands is equivalent to a constant, it will make
10378 that operand the second operand. As the base register is likely to
10379 be equivalent to a SYMBOL_REF, we have made it the second operand.
10380
10381 pa_legitimate_address_p accepts REG+REG as legitimate when the
10382 operands are in the order INDEX+BASE on targets with non-equivalent
10383 space registers, and in any order on targets with equivalent space
10384 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10385
10386 We treat a SYMBOL_REF as legitimate if it is part of the current
10387 function's constant-pool, because such addresses can actually be
10388 output as REG+SMALLINT. */
10389
10390static bool
10391pa_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10392{
10393 if ((REG_P (x)
10394 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10395 : REG_OK_FOR_BASE_P (x)))
10396 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10397 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10398 && REG_P (XEXP (x, 0))
10399 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10400 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10401 return true;
10402
10403 if (GET_CODE (x) == PLUS)
10404 {
10405 rtx base, index;
10406
10407 /* For REG+REG, the base register should be in XEXP (x, 1),
10408 so check it first. */
10409 if (REG_P (XEXP (x, 1))
10410 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10411 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10412 base = XEXP (x, 1), index = XEXP (x, 0);
10413 else if (REG_P (XEXP (x, 0))
10414 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10415 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10416 base = XEXP (x, 0), index = XEXP (x, 1);
10417 else
10418 return false;
10419
10420 if (GET_CODE (index) == CONST_INT)
10421 {
10422 if (INT_5_BITS (index))
10423 return true;
10424
10425 /* When INT14_OK_STRICT is false, a secondary reload is needed
10426 to adjust the displacement of SImode and DImode floating point
10427 instructions. So, we return false when STRICT is true. We
10428 also reject long displacements for float mode addresses since
10429 the majority of accesses will use floating point instructions
10430 that don't support 14-bit offsets. */
10431 if (!INT14_OK_STRICT
ceaca33e
JDA
10432 && reload_in_progress
10433 && strict
10434 && mode != QImode
10435 && mode != HImode)
10436 return false;
1a04ac2b 10437
ceaca33e 10438 return base14_operand (index, mode);
1a04ac2b
JDA
10439 }
10440
10441 if (!TARGET_DISABLE_INDEXING
10442 /* Only accept the "canonical" INDEX+BASE operand order
10443 on targets with non-equivalent space registers. */
10444 && (TARGET_NO_SPACE_REGS
10445 ? REG_P (index)
10446 : (base == XEXP (x, 1) && REG_P (index)
10447 && (reload_completed
10448 || (reload_in_progress && HARD_REGISTER_P (base))
10449 || REG_POINTER (base))
10450 && (reload_completed
10451 || (reload_in_progress && HARD_REGISTER_P (index))
10452 || !REG_POINTER (index))))
10453 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10454 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10455 : REG_OK_FOR_INDEX_P (index))
10456 && borx_reg_operand (base, Pmode)
10457 && borx_reg_operand (index, Pmode))
10458 return true;
10459
10460 if (!TARGET_DISABLE_INDEXING
10461 && GET_CODE (index) == MULT
10462 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10463 && REG_P (XEXP (index, 0))
10464 && GET_MODE (XEXP (index, 0)) == Pmode
10465 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10466 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10467 && GET_CODE (XEXP (index, 1)) == CONST_INT
10468 && INTVAL (XEXP (index, 1))
10469 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10470 && borx_reg_operand (base, Pmode))
10471 return true;
10472
10473 return false;
10474 }
10475
10476 if (GET_CODE (x) == LO_SUM)
10477 {
10478 rtx y = XEXP (x, 0);
10479
10480 if (GET_CODE (y) == SUBREG)
10481 y = SUBREG_REG (y);
10482
10483 if (REG_P (y)
10484 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10485 : REG_OK_FOR_BASE_P (y)))
10486 {
10487 /* Needed for -fPIC */
10488 if (mode == Pmode
10489 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10490 return true;
10491
10492 if (!INT14_OK_STRICT
ceaca33e
JDA
10493 && reload_in_progress
10494 && strict
10495 && mode != QImode
10496 && mode != HImode)
10497 return false;
1a04ac2b
JDA
10498
10499 if (CONSTANT_P (XEXP (x, 1)))
10500 return true;
10501 }
10502 return false;
10503 }
10504
10505 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10506 return true;
10507
10508 return false;
10509}
10510
10511/* Look for machine dependent ways to make the invalid address AD a
10512 valid address.
10513
10514 For the PA, transform:
10515
10516 memory(X + <large int>)
10517
10518 into:
10519
10520 if (<large int> & mask) >= 16
10521 Y = (<large int> & ~mask) + mask + 1 Round up.
10522 else
10523 Y = (<large int> & ~mask) Round down.
10524 Z = X + Y
10525 memory (Z + (<large int> - Y));
10526
10527 This makes reload inheritance and reload_cse work better since Z
10528 can be reused.
10529
10530 There may be more opportunities to improve code with this hook. */
10531
10532rtx
10533pa_legitimize_reload_address (rtx ad, enum machine_mode mode,
10534 int opnum, int type,
10535 int ind_levels ATTRIBUTE_UNUSED)
10536{
10537 long offset, newoffset, mask;
10538 rtx new_rtx, temp = NULL_RTX;
10539
10540 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10541 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10542
10543 if (optimize && GET_CODE (ad) == PLUS)
10544 temp = simplify_binary_operation (PLUS, Pmode,
10545 XEXP (ad, 0), XEXP (ad, 1));
10546
10547 new_rtx = temp ? temp : ad;
10548
10549 if (optimize
10550 && GET_CODE (new_rtx) == PLUS
10551 && GET_CODE (XEXP (new_rtx, 0)) == REG
10552 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10553 {
10554 offset = INTVAL (XEXP ((new_rtx), 1));
10555
10556 /* Choose rounding direction. Round up if we are >= halfway. */
10557 if ((offset & mask) >= ((mask + 1) / 2))
10558 newoffset = (offset & ~mask) + mask + 1;
10559 else
10560 newoffset = offset & ~mask;
10561
10562 /* Ensure that long displacements are aligned. */
10563 if (mask == 0x3fff
10564 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10565 || (TARGET_64BIT && (mode) == DImode)))
10566 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10567
10568 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10569 {
10570 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10571 GEN_INT (newoffset));
10572 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10573 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10574 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10575 opnum, (enum reload_type) type);
10576 return ad;
10577 }
10578 }
10579
10580 return NULL_RTX;
10581}
10582
e2500fed 10583#include "gt-pa.h"