]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/pa/pa.c
Turn HARD_REGNO_MODE_OK into a target hook
[thirdparty/gcc.git] / gcc / config / pa / pa.c
CommitLineData
188538df 1/* Subroutines for insn-output.c for HPPA.
cbe34bb5 2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
188538df
TG
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
b7849684 5This file is part of GCC.
188538df 6
b7849684 7GCC is free software; you can redistribute it and/or modify
188538df 8it under the terms of the GNU General Public License as published by
2f83c7d6 9the Free Software Foundation; either version 3, or (at your option)
188538df
TG
10any later version.
11
b7849684 12GCC is distributed in the hope that it will be useful,
188538df
TG
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
2f83c7d6
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
188538df 20
188538df 21#include "config.h"
0b17dd98 22#include "system.h"
4977bab6 23#include "coretypes.h"
4d0cdd0c 24#include "memmodel.h"
c7131fb2 25#include "backend.h"
e11c4407 26#include "target.h"
188538df 27#include "rtl.h"
e11c4407 28#include "tree.h"
c7131fb2 29#include "df.h"
e11c4407
AM
30#include "tm_p.h"
31#include "stringpool.h"
314e6352 32#include "attribs.h"
e11c4407 33#include "optabs.h"
188538df 34#include "regs.h"
e11c4407
AM
35#include "emit-rtl.h"
36#include "recog.h"
37#include "diagnostic-core.h"
188538df 38#include "insn-attr.h"
40e23961 39#include "alias.h"
40e23961 40#include "fold-const.h"
d8a2d370 41#include "stor-layout.h"
d8a2d370
DN
42#include "varasm.h"
43#include "calls.h"
d499455b 44#include "output.h"
823fbbce 45#include "except.h"
36566b39 46#include "explow.h"
becf1647 47#include "expr.h"
e78d8e51 48#include "reload.h"
677f3fa8 49#include "common/common-target.h"
41a1208a 50#include "langhooks.h"
60393bbc 51#include "cfgrtl.h"
96e45421 52#include "opts.h"
9b2b7279 53#include "builtins.h"
188538df 54
994c5d85 55/* This file should be included last. */
d58627a0
RS
56#include "target-def.h"
57
5d50fab3
JL
58/* Return nonzero if there is a bypass for the output of
59 OUT_INSN and the fp store IN_INSN. */
60int
647d790d 61pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
5d50fab3 62{
ef4bddc2
RS
63 machine_mode store_mode;
64 machine_mode other_mode;
5d50fab3
JL
65 rtx set;
66
67 if (recog_memoized (in_insn) < 0
d4f2728a
JDA
68 || (get_attr_type (in_insn) != TYPE_FPSTORE
69 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
5d50fab3
JL
70 || recog_memoized (out_insn) < 0)
71 return 0;
72
73 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
74
75 set = single_set (out_insn);
76 if (!set)
77 return 0;
78
79 other_mode = GET_MODE (SET_SRC (set));
80
81 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
82}
83
84
19ec6a36
AM
85#ifndef DO_FRAME_NOTES
86#ifdef INCOMING_RETURN_ADDR_RTX
87#define DO_FRAME_NOTES 1
88#else
89#define DO_FRAME_NOTES 0
90#endif
91#endif
92
8a5b8538 93static void pa_option_override (void);
d8f95bed 94static void copy_reg_pointer (rtx, rtx);
a2017852 95static void fix_range (const char *);
ef4bddc2 96static int hppa_register_move_cost (machine_mode mode, reg_class_t,
8a5b8538 97 reg_class_t);
ef4bddc2 98static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
e548c9df 99static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
ef4bddc2 100static inline rtx force_mode (machine_mode, rtx);
b7849684
JE
101static void pa_reorg (void);
102static void pa_combine_instructions (void);
647d790d
DM
103static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
104 rtx, rtx);
b32d5189 105static bool forward_branch_p (rtx_insn *);
b7849684 106static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
ae9d61ab 107static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
e0d80a58
JL
108static int compute_movmem_length (rtx_insn *);
109static int compute_clrmem_length (rtx_insn *);
b7849684
JE
110static bool pa_assemble_integer (rtx, unsigned int, int);
111static void remove_useless_addtr_insns (int);
a4295210
JDA
112static void store_reg (int, HOST_WIDE_INT, int);
113static void store_reg_modify (int, int, HOST_WIDE_INT);
114static void load_reg (int, HOST_WIDE_INT, int);
115static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
9193312a 116static rtx pa_function_value (const_tree, const_tree, bool);
ef4bddc2 117static rtx pa_libcall_value (machine_mode, const_rtx);
8a5b8538 118static bool pa_function_value_regno_p (const unsigned int);
42776416 119static void pa_output_function_prologue (FILE *);
67b846fa 120static void update_total_code_bytes (unsigned int);
42776416 121static void pa_output_function_epilogue (FILE *);
b505225b 122static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
ac44248e 123static int pa_adjust_priority (rtx_insn *, int);
b7849684 124static int pa_issue_rate (void);
f258111a 125static int pa_reloc_rw_mask (void);
d6b5193b 126static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
57d138a9 127static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
d6b5193b 128static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
ae46c4e0 129 ATTRIBUTE_UNUSED;
b7849684
JE
130static void pa_encode_section_info (tree, rtx, int);
131static const char *pa_strip_name_encoding (const char *);
132static bool pa_function_ok_for_sibcall (tree, tree);
133static void pa_globalize_label (FILE *, const char *)
a5f3f0ab 134 ATTRIBUTE_UNUSED;
b7849684
JE
135static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
136 HOST_WIDE_INT, tree);
35d434ed 137#if !defined(USE_COLLECT2)
b7849684
JE
138static void pa_asm_out_constructor (rtx, int);
139static void pa_asm_out_destructor (rtx, int);
35d434ed 140#endif
b7849684 141static void pa_init_builtins (void);
ef4bddc2 142static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
3f12cd9b 143static rtx hppa_builtin_saveregs (void);
d7bd8aeb 144static void hppa_va_start (tree, rtx);
726a989a 145static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
18e2a8b8 146static bool pa_scalar_mode_supported_p (scalar_mode);
3101faab 147static bool pa_commutative_p (const_rtx x, int outer_code);
e0d80a58
JL
148static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
149static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
ef4bddc2 150static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
b7849684
JE
151static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
152static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
153static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
154static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
155static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
156static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
157static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
158static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
159static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
160static void output_deferred_plabels (void);
3674b34d 161static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
744b2d61
JDA
162#ifdef ASM_OUTPUT_EXTERNAL_REAL
163static void pa_hpux_file_end (void);
164#endif
50bbeefb 165static void pa_init_libfuncs (void);
3f12cd9b 166static rtx pa_struct_value_rtx (tree, int);
ef4bddc2 167static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
586de218 168 const_tree, bool);
ef4bddc2 169static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
78a52f11 170 tree, bool);
ef4bddc2 171static void pa_function_arg_advance (cumulative_args_t, machine_mode,
fd29bdaf 172 const_tree, bool);
ef4bddc2 173static rtx pa_function_arg (cumulative_args_t, machine_mode,
fd29bdaf 174 const_tree, bool);
ef4bddc2 175static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
9a55eab3 176static struct machine_function * pa_init_machine_status (void);
a87cf97e 177static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
ef4bddc2 178 machine_mode,
a87cf97e 179 secondary_reload_info *);
16c16a24 180static void pa_extra_live_on_entry (bitmap);
ef4bddc2
RS
181static machine_mode pa_promote_function_mode (const_tree,
182 machine_mode, int *,
4ce3042d 183 const_tree, int);
2eddfed1 184
859c146c
RH
185static void pa_asm_trampoline_template (FILE *);
186static void pa_trampoline_init (rtx, tree, rtx);
187static rtx pa_trampoline_adjust_address (rtx);
1f65437d 188static rtx pa_delegitimize_address (rtx);
8a5b8538 189static bool pa_print_operand_punct_valid_p (unsigned char);
bc707992
JDA
190static rtx pa_internal_arg_pointer (void);
191static bool pa_can_eliminate (const int, const int);
5efd84c5 192static void pa_conditional_register_usage (void);
ef4bddc2 193static machine_mode pa_c_mode_for_suffix (char);
7550cb35 194static section *pa_function_section (tree, enum node_frequency, bool, bool);
ef4bddc2
RS
195static bool pa_cannot_force_const_mem (machine_mode, rtx);
196static bool pa_legitimate_constant_p (machine_mode, rtx);
fda33f15 197static unsigned int pa_section_type_flags (tree, const char *, int);
ef4bddc2 198static bool pa_legitimate_address_p (machine_mode, rtx, bool);
84c9e5ff
JDA
199static bool pa_callee_copies (cumulative_args_t, machine_mode,
200 const_tree, bool);
f939c3e6 201static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
859c146c 202
d6b5193b
RS
203/* The following extra sections are only used for SOM. */
204static GTY(()) section *som_readonly_data_section;
205static GTY(()) section *som_one_only_readonly_data_section;
206static GTY(()) section *som_one_only_data_section;
57d138a9 207static GTY(()) section *som_tm_clone_table_section;
d6b5193b 208
68386e1e
JL
209/* Counts for the number of callee-saved general and floating point
210 registers which were saved by the current function's prologue. */
211static int gr_saved, fr_saved;
212
16c16a24
JDA
213/* Boolean indicating whether the return pointer was saved by the
214 current function's prologue. */
215static bool rp_saved;
216
b7849684 217static rtx find_addr_reg (rtx);
188538df 218
5fad1c24 219/* Keep track of the number of bytes we have output in the CODE subspace
279c9bde 220 during this compilation so we'll know when to emit inline long-calls. */
a02aa5b0 221unsigned long total_code_bytes;
279c9bde 222
5fad1c24
JDA
223/* The last address of the previous function plus the number of bytes in
224 associated thunks that have been output. This is used to determine if
225 a thunk can use an IA-relative branch to reach its target function. */
67b846fa 226static unsigned int last_address;
5fad1c24 227
93ae92c1 228/* Variables to handle plabels that we discover are necessary at assembly
ddd5a7c1 229 output time. They are output after the current function. */
d1b38208 230struct GTY(()) deferred_plabel
93ae92c1
JL
231{
232 rtx internal_label;
744b2d61 233 rtx symbol;
e2500fed
GK
234};
235static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
236 deferred_plabels;
0f8e3849 237static size_t n_deferred_plabels = 0;
672a6f42
NB
238\f
239/* Initialize the GCC target structure. */
301d03af 240
8a5b8538
AS
241#undef TARGET_OPTION_OVERRIDE
242#define TARGET_OPTION_OVERRIDE pa_option_override
243
301d03af
RS
244#undef TARGET_ASM_ALIGNED_HI_OP
245#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
246#undef TARGET_ASM_ALIGNED_SI_OP
247#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
248#undef TARGET_ASM_ALIGNED_DI_OP
249#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
250#undef TARGET_ASM_UNALIGNED_HI_OP
251#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
252#undef TARGET_ASM_UNALIGNED_SI_OP
253#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
254#undef TARGET_ASM_UNALIGNED_DI_OP
255#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
256#undef TARGET_ASM_INTEGER
257#define TARGET_ASM_INTEGER pa_assemble_integer
258
08c148a8
NB
259#undef TARGET_ASM_FUNCTION_PROLOGUE
260#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
261#undef TARGET_ASM_FUNCTION_EPILOGUE
262#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
93ae92c1 263
9193312a
AS
264#undef TARGET_FUNCTION_VALUE
265#define TARGET_FUNCTION_VALUE pa_function_value
8a5b8538
AS
266#undef TARGET_LIBCALL_VALUE
267#define TARGET_LIBCALL_VALUE pa_libcall_value
268#undef TARGET_FUNCTION_VALUE_REGNO_P
269#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
9193312a 270
506d7b68
PB
271#undef TARGET_LEGITIMIZE_ADDRESS
272#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
273
c237e94a
ZW
274#undef TARGET_SCHED_ADJUST_COST
275#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
276#undef TARGET_SCHED_ADJUST_PRIORITY
277#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
278#undef TARGET_SCHED_ISSUE_RATE
279#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
280
fb49053f
RH
281#undef TARGET_ENCODE_SECTION_INFO
282#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
772c5265
RH
283#undef TARGET_STRIP_NAME_ENCODING
284#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
fb49053f 285
4977bab6
ZW
286#undef TARGET_FUNCTION_OK_FOR_SIBCALL
287#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
288
8ddf681a
R
289#undef TARGET_COMMUTATIVE_P
290#define TARGET_COMMUTATIVE_P pa_commutative_p
291
c590b625
RH
292#undef TARGET_ASM_OUTPUT_MI_THUNK
293#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
3961e8fe
RH
294#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
295#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
c590b625 296
a5fe455b 297#undef TARGET_ASM_FILE_END
744b2d61
JDA
298#ifdef ASM_OUTPUT_EXTERNAL_REAL
299#define TARGET_ASM_FILE_END pa_hpux_file_end
300#else
a5fe455b 301#define TARGET_ASM_FILE_END output_deferred_plabels
744b2d61 302#endif
a5fe455b 303
f258111a
JDA
304#undef TARGET_ASM_RELOC_RW_MASK
305#define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
306
8a5b8538
AS
307#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
308#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
309
35d434ed
JDA
310#if !defined(USE_COLLECT2)
311#undef TARGET_ASM_CONSTRUCTOR
312#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
313#undef TARGET_ASM_DESTRUCTOR
314#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
315#endif
316
4677862a
JDA
317#undef TARGET_INIT_BUILTINS
318#define TARGET_INIT_BUILTINS pa_init_builtins
319
41a1208a
JDA
320#undef TARGET_EXPAND_BUILTIN
321#define TARGET_EXPAND_BUILTIN pa_expand_builtin
322
8a5b8538
AS
323#undef TARGET_REGISTER_MOVE_COST
324#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
3c50106f
RH
325#undef TARGET_RTX_COSTS
326#define TARGET_RTX_COSTS hppa_rtx_costs
dcefdf67
RH
327#undef TARGET_ADDRESS_COST
328#define TARGET_ADDRESS_COST hppa_address_cost
3c50106f 329
18dbd950
RS
330#undef TARGET_MACHINE_DEPENDENT_REORG
331#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
332
c15c90bb 333#undef TARGET_INIT_LIBFUNCS
50bbeefb 334#define TARGET_INIT_LIBFUNCS pa_init_libfuncs
c15c90bb 335
cde0f3fd
PB
336#undef TARGET_PROMOTE_FUNCTION_MODE
337#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
3f12cd9b 338#undef TARGET_PROMOTE_PROTOTYPES
586de218 339#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
3f12cd9b
KH
340
341#undef TARGET_STRUCT_VALUE_RTX
342#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
343#undef TARGET_RETURN_IN_MEMORY
344#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
fe984136
RH
345#undef TARGET_MUST_PASS_IN_STACK
346#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8cd5a4e0
RH
347#undef TARGET_PASS_BY_REFERENCE
348#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
6cdd5672 349#undef TARGET_CALLEE_COPIES
84c9e5ff 350#define TARGET_CALLEE_COPIES pa_callee_copies
78a52f11
RH
351#undef TARGET_ARG_PARTIAL_BYTES
352#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
fd29bdaf
NF
353#undef TARGET_FUNCTION_ARG
354#define TARGET_FUNCTION_ARG pa_function_arg
355#undef TARGET_FUNCTION_ARG_ADVANCE
356#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
c2ed6cf8
NF
357#undef TARGET_FUNCTION_ARG_BOUNDARY
358#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
3f12cd9b
KH
359
360#undef TARGET_EXPAND_BUILTIN_SAVEREGS
361#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
d7bd8aeb
JJ
362#undef TARGET_EXPAND_BUILTIN_VA_START
363#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
8101c928
RH
364#undef TARGET_GIMPLIFY_VA_ARG_EXPR
365#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
3f12cd9b 366
83c32f2e
JDA
367#undef TARGET_SCALAR_MODE_SUPPORTED_P
368#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
369
51076f96 370#undef TARGET_CANNOT_FORCE_CONST_MEM
fbbf66e7 371#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
51076f96 372
ec963611
JDA
373#undef TARGET_SECONDARY_RELOAD
374#define TARGET_SECONDARY_RELOAD pa_secondary_reload
375
16c16a24
JDA
376#undef TARGET_EXTRA_LIVE_ON_ENTRY
377#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
378
859c146c
RH
379#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
380#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
381#undef TARGET_TRAMPOLINE_INIT
382#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
383#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
384#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
1f65437d
SE
385#undef TARGET_DELEGITIMIZE_ADDRESS
386#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
bc707992
JDA
387#undef TARGET_INTERNAL_ARG_POINTER
388#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
389#undef TARGET_CAN_ELIMINATE
390#define TARGET_CAN_ELIMINATE pa_can_eliminate
5efd84c5
NF
391#undef TARGET_CONDITIONAL_REGISTER_USAGE
392#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
41a1208a
JDA
393#undef TARGET_C_MODE_FOR_SUFFIX
394#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
7550cb35
JDA
395#undef TARGET_ASM_FUNCTION_SECTION
396#define TARGET_ASM_FUNCTION_SECTION pa_function_section
859c146c 397
1a627b35
RS
398#undef TARGET_LEGITIMATE_CONSTANT_P
399#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
fda33f15
JDA
400#undef TARGET_SECTION_TYPE_FLAGS
401#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
1a04ac2b
JDA
402#undef TARGET_LEGITIMATE_ADDRESS_P
403#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
1a627b35 404
d81db636
SB
405#undef TARGET_LRA_P
406#define TARGET_LRA_P hook_bool_void_false
407
f939c3e6
RS
408#undef TARGET_HARD_REGNO_MODE_OK
409#define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
410
f6897b10 411struct gcc_target targetm = TARGET_INITIALIZER;
672a6f42 412\f
a2017852
JDA
413/* Parse the -mfixed-range= option string. */
414
415static void
416fix_range (const char *const_str)
417{
418 int i, first, last;
419 char *str, *dash, *comma;
420
421 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
422 REG2 are either register names or register numbers. The effect
423 of this option is to mark the registers in the range from REG1 to
424 REG2 as ``fixed'' so they won't be used by the compiler. This is
419df6a2 425 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
a2017852
JDA
426
427 i = strlen (const_str);
428 str = (char *) alloca (i + 1);
429 memcpy (str, const_str, i + 1);
430
431 while (1)
432 {
433 dash = strchr (str, '-');
434 if (!dash)
435 {
d4ee4d25 436 warning (0, "value of -mfixed-range must have form REG1-REG2");
a2017852
JDA
437 return;
438 }
439 *dash = '\0';
440
441 comma = strchr (dash + 1, ',');
442 if (comma)
443 *comma = '\0';
444
445 first = decode_reg_name (str);
446 if (first < 0)
447 {
d4ee4d25 448 warning (0, "unknown register name: %s", str);
a2017852
JDA
449 return;
450 }
451
452 last = decode_reg_name (dash + 1);
453 if (last < 0)
454 {
d4ee4d25 455 warning (0, "unknown register name: %s", dash + 1);
a2017852
JDA
456 return;
457 }
458
459 *dash = '-';
460
461 if (first > last)
462 {
d4ee4d25 463 warning (0, "%s-%s is an empty range", str, dash + 1);
a2017852
JDA
464 return;
465 }
466
467 for (i = first; i <= last; ++i)
468 fixed_regs[i] = call_used_regs[i] = 1;
469
470 if (!comma)
471 break;
472
473 *comma = ',';
474 str = comma + 1;
475 }
476
477 /* Check if all floating point registers have been fixed. */
478 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
479 if (!fixed_regs[i])
480 break;
481
482 if (i > FP_REG_LAST)
483 target_flags |= MASK_DISABLE_FPREGS;
484}
485
8a5b8538
AS
486/* Implement the TARGET_OPTION_OVERRIDE hook. */
487
488static void
489pa_option_override (void)
66617831 490{
1bb721dc
JM
491 unsigned int i;
492 cl_deferred_option *opt;
9771b263
DN
493 vec<cl_deferred_option> *v
494 = (vec<cl_deferred_option> *) pa_deferred_options;
1bb721dc 495
9771b263
DN
496 if (v)
497 FOR_EACH_VEC_ELT (*v, i, opt)
498 {
499 switch (opt->opt_index)
500 {
501 case OPT_mfixed_range_:
502 fix_range (opt->arg);
503 break;
1bb721dc 504
9771b263
DN
505 default:
506 gcc_unreachable ();
507 }
508 }
1bb721dc 509
6a73009d
JL
510 if (flag_pic && TARGET_PORTABLE_RUNTIME)
511 {
ab532386 512 warning (0, "PIC code generation is not supported in the portable runtime model");
6a73009d
JL
513 }
514
a7721dc0 515 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
6a73009d 516 {
ab532386 517 warning (0, "PIC code generation is not compatible with fast indirect calls");
6a73009d 518 }
0eba3d30 519
54eef932
JL
520 if (! TARGET_GAS && write_symbols != NO_DEBUG)
521 {
d4ee4d25
DD
522 warning (0, "-g is only supported when using GAS on this processor,");
523 warning (0, "-g option disabled");
54eef932
JL
524 write_symbols = NO_DEBUG;
525 }
d07d525a 526
7ee72796
JL
527 /* We only support the "big PIC" model now. And we always generate PIC
528 code when in 64bit mode. */
529 if (flag_pic == 1 || TARGET_64BIT)
520babc7
JL
530 flag_pic = 2;
531
e92abd50
JDA
532 /* Disable -freorder-blocks-and-partition as we don't support hot and
533 cold partitioning. */
534 if (flag_reorder_blocks_and_partition)
535 {
536 inform (input_location,
537 "-freorder-blocks-and-partition does not work "
538 "on this architecture");
539 flag_reorder_blocks_and_partition = 0;
540 flag_reorder_blocks = 1;
541 }
542
301d03af
RS
543 /* We can't guarantee that .dword is available for 32-bit targets. */
544 if (UNITS_PER_WORD == 4)
545 targetm.asm_out.aligned_op.di = NULL;
546
547 /* The unaligned ops are only available when using GAS. */
548 if (!TARGET_GAS)
549 {
550 targetm.asm_out.unaligned_op.hi = NULL;
551 targetm.asm_out.unaligned_op.si = NULL;
552 targetm.asm_out.unaligned_op.di = NULL;
553 }
9a55eab3
JDA
554
555 init_machine_status = pa_init_machine_status;
c47decad
JL
556}
557
41a1208a
JDA
558enum pa_builtins
559{
560 PA_BUILTIN_COPYSIGNQ,
561 PA_BUILTIN_FABSQ,
562 PA_BUILTIN_INFQ,
563 PA_BUILTIN_HUGE_VALQ,
564 PA_BUILTIN_max
565};
566
567static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
568
eab9e742 569static void
b7849684 570pa_init_builtins (void)
4677862a
JDA
571{
572#ifdef DONT_HAVE_FPUTC_UNLOCKED
e79983f4
MM
573 {
574 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
575 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
576 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
577 }
4677862a 578#endif
dfcb2b51 579#if TARGET_HPUX_11
e79983f4
MM
580 {
581 tree decl;
582
583 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
584 set_user_assembler_name (decl, "_Isfinite");
585 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
586 set_user_assembler_name (decl, "_Isfinitef");
587 }
7d522000 588#endif
41a1208a
JDA
589
590 if (HPUX_LONG_DOUBLE_LIBRARY)
591 {
592 tree decl, ftype;
593
594 /* Under HPUX, the __float128 type is a synonym for "long double". */
595 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
596 "__float128");
597
598 /* TFmode support builtins. */
599 ftype = build_function_type_list (long_double_type_node,
600 long_double_type_node,
601 NULL_TREE);
602 decl = add_builtin_function ("__builtin_fabsq", ftype,
603 PA_BUILTIN_FABSQ, BUILT_IN_MD,
604 "_U_Qfabs", NULL_TREE);
605 TREE_READONLY (decl) = 1;
606 pa_builtins[PA_BUILTIN_FABSQ] = decl;
607
608 ftype = build_function_type_list (long_double_type_node,
609 long_double_type_node,
610 long_double_type_node,
611 NULL_TREE);
612 decl = add_builtin_function ("__builtin_copysignq", ftype,
613 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
614 "_U_Qfcopysign", NULL_TREE);
615 TREE_READONLY (decl) = 1;
616 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
617
12526412 618 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
41a1208a
JDA
619 decl = add_builtin_function ("__builtin_infq", ftype,
620 PA_BUILTIN_INFQ, BUILT_IN_MD,
621 NULL, NULL_TREE);
622 pa_builtins[PA_BUILTIN_INFQ] = decl;
623
624 decl = add_builtin_function ("__builtin_huge_valq", ftype,
625 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
626 NULL, NULL_TREE);
627 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
628 }
629}
630
631static rtx
632pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 633 machine_mode mode ATTRIBUTE_UNUSED,
41a1208a
JDA
634 int ignore ATTRIBUTE_UNUSED)
635{
636 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
637 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
638
639 switch (fcode)
640 {
641 case PA_BUILTIN_FABSQ:
642 case PA_BUILTIN_COPYSIGNQ:
643 return expand_call (exp, target, ignore);
644
645 case PA_BUILTIN_INFQ:
646 case PA_BUILTIN_HUGE_VALQ:
647 {
ef4bddc2 648 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
41a1208a
JDA
649 REAL_VALUE_TYPE inf;
650 rtx tmp;
651
652 real_inf (&inf);
555affd7 653 tmp = const_double_from_real_value (inf, target_mode);
41a1208a
JDA
654
655 tmp = validize_mem (force_const_mem (target_mode, tmp));
656
657 if (target == 0)
658 target = gen_reg_rtx (target_mode);
659
660 emit_move_insn (target, tmp);
661 return target;
662 }
663
664 default:
665 gcc_unreachable ();
666 }
667
668 return NULL_RTX;
4677862a
JDA
669}
670
9a55eab3
JDA
671/* Function to init struct machine_function.
672 This will be called, via a pointer variable,
673 from push_function_context. */
674
675static struct machine_function *
676pa_init_machine_status (void)
677{
766090c2 678 return ggc_cleared_alloc<machine_function> ();
9a55eab3
JDA
679}
680
d8f95bed
JDA
681/* If FROM is a probable pointer register, mark TO as a probable
682 pointer register with the same pointer alignment as FROM. */
683
684static void
685copy_reg_pointer (rtx to, rtx from)
686{
687 if (REG_POINTER (from))
688 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
689}
690
23f6f34f
TG
691/* Return 1 if X contains a symbolic expression. We know these
692 expressions will have one of a few well defined forms, so
c1d1b3f0
JL
693 we need only check those forms. */
694int
ae9d61ab 695pa_symbolic_expression_p (rtx x)
c1d1b3f0
JL
696{
697
fe19a83d 698 /* Strip off any HIGH. */
c1d1b3f0
JL
699 if (GET_CODE (x) == HIGH)
700 x = XEXP (x, 0);
701
1a04ac2b 702 return symbolic_operand (x, VOIDmode);
c1d1b3f0
JL
703}
704
47abc309 705/* Accept any constant that can be moved in one instruction into a
6746a52e 706 general register. */
23f6f34f 707int
5877e54e 708pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
6746a52e
JL
709{
710 /* OK if ldo, ldil, or zdepi, can be used. */
5b281141 711 return (VAL_14_BITS_P (ival)
ae9d61ab
JDA
712 || pa_ldil_cint_p (ival)
713 || pa_zdepi_cint_p (ival));
6746a52e 714}
188538df 715\f
5b281141
JDA
716/* True iff ldil can be used to load this CONST_INT. The least
717 significant 11 bits of the value must be zero and the value must
718 not change sign when extended from 32 to 64 bits. */
719int
5877e54e 720pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
5b281141 721{
5877e54e 722 unsigned HOST_WIDE_INT x;
5b281141 723
5877e54e
JDA
724 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
725 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
5b281141
JDA
726}
727
831c1763 728/* True iff zdepi can be used to generate this CONST_INT.
a7b376ee 729 zdepi first sign extends a 5-bit signed number to a given field
831c1763 730 length, then places this field anywhere in a zero. */
0e7f4c19 731int
ae9d61ab 732pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
3a5babac 733{
0c235d7e 734 unsigned HOST_WIDE_INT lsb_mask, t;
3a5babac
TG
735
736 /* This might not be obvious, but it's at least fast.
ddd5a7c1 737 This function is critical; we don't have the time loops would take. */
a1747d2c
TG
738 lsb_mask = x & -x;
739 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
740 /* Return true iff t is a power of two. */
3a5babac
TG
741 return ((t & (t - 1)) == 0);
742}
743
23f6f34f
TG
744/* True iff depi or extru can be used to compute (reg & mask).
745 Accept bit pattern like these:
746 0....01....1
747 1....10....0
748 1..10..01..1 */
0e7f4c19 749int
ae9d61ab 750pa_and_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
751{
752 mask = ~mask;
753 mask += mask & -mask;
754 return (mask & (mask - 1)) == 0;
755}
756
0e7f4c19
TG
757/* True iff depi can be used to compute (reg | MASK). */
758int
ae9d61ab 759pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
760{
761 mask += mask & -mask;
762 return (mask & (mask - 1)) == 0;
763}
188538df
TG
764\f
765/* Legitimize PIC addresses. If the address is already
766 position-independent, we return ORIG. Newly generated
767 position-independent addresses go to REG. If we need more
768 than one register, we lose. */
769
ae9d61ab 770static rtx
ef4bddc2 771legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
188538df
TG
772{
773 rtx pic_ref = orig;
774
06ae7eb1 775 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
51076f96 776
abc95ed3 777 /* Labels need special handling. */
519104fe 778 if (pic_label_operand (orig, mode))
6bb36601 779 {
e0d80a58 780 rtx_insn *insn;
4d811a05 781
b3d9ecf0
JL
782 /* We do not want to go through the movXX expanders here since that
783 would create recursion.
784
785 Nor do we really want to call a generator for a named pattern
786 since that requires multiple patterns if we want to support
787 multiple word sizes.
788
789 So instead we just emit the raw set, which avoids the movXX
790 expanders completely. */
d8f95bed 791 mark_reg_pointer (reg, BITS_PER_UNIT);
f7df4a84 792 insn = emit_insn (gen_rtx_SET (reg, orig));
4d811a05
JDA
793
794 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bbbbb16a 795 add_reg_note (insn, REG_EQUAL, orig);
4d811a05
JDA
796
797 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
798 and update LABEL_NUSES because this is not done automatically. */
799 if (reload_in_progress || reload_completed)
800 {
801 /* Extract LABEL_REF. */
802 if (GET_CODE (orig) == CONST)
803 orig = XEXP (XEXP (orig, 0), 0);
804 /* Extract CODE_LABEL. */
805 orig = XEXP (orig, 0);
65c5f2a6 806 add_reg_note (insn, REG_LABEL_OPERAND, orig);
27e430a2
JDA
807 /* Make sure we have label and not a note. */
808 if (LABEL_P (orig))
809 LABEL_NUSES (orig)++;
4d811a05 810 }
e3b5732b 811 crtl->uses_pic_offset_table = 1;
6bb36601
JL
812 return reg;
813 }
188538df
TG
814 if (GET_CODE (orig) == SYMBOL_REF)
815 {
e0d80a58
JL
816 rtx_insn *insn;
817 rtx tmp_reg;
9ab81df2 818
144d51f9 819 gcc_assert (reg);
188538df 820
9ab81df2
JDA
821 /* Before reload, allocate a temporary register for the intermediate
822 result. This allows the sequence to be deleted when the final
823 result is unused and the insns are trivially dead. */
824 tmp_reg = ((reload_in_progress || reload_completed)
825 ? reg : gen_reg_rtx (Pmode));
826
9c575e20 827 if (function_label_operand (orig, VOIDmode))
7813231b 828 {
0b076fea
JDA
829 /* Force function label into memory in word mode. */
830 orig = XEXP (force_const_mem (word_mode, orig), 0);
7813231b
JDA
831 /* Load plabel address from DLT. */
832 emit_move_insn (tmp_reg,
833 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
834 gen_rtx_HIGH (word_mode, orig)));
835 pic_ref
836 = gen_const_mem (Pmode,
837 gen_rtx_LO_SUM (Pmode, tmp_reg,
838 gen_rtx_UNSPEC (Pmode,
542a8afa
RH
839 gen_rtvec (1, orig),
840 UNSPEC_DLTIND14R)));
7813231b
JDA
841 emit_move_insn (reg, pic_ref);
842 /* Now load address of function descriptor. */
843 pic_ref = gen_rtx_MEM (Pmode, reg);
844 }
845 else
846 {
847 /* Load symbol reference from DLT. */
848 emit_move_insn (tmp_reg,
849 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
850 gen_rtx_HIGH (word_mode, orig)));
851 pic_ref
852 = gen_const_mem (Pmode,
853 gen_rtx_LO_SUM (Pmode, tmp_reg,
854 gen_rtx_UNSPEC (Pmode,
855 gen_rtvec (1, orig),
856 UNSPEC_DLTIND14R)));
857 }
c5c76735 858
e3b5732b 859 crtl->uses_pic_offset_table = 1;
d8f95bed 860 mark_reg_pointer (reg, BITS_PER_UNIT);
9ab81df2
JDA
861 insn = emit_move_insn (reg, pic_ref);
862
863 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bd94cb6e 864 set_unique_reg_note (insn, REG_EQUAL, orig);
9ab81df2 865
188538df
TG
866 return reg;
867 }
868 else if (GET_CODE (orig) == CONST)
869 {
f1c7ce82 870 rtx base;
188538df
TG
871
872 if (GET_CODE (XEXP (orig, 0)) == PLUS
873 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
874 return orig;
875
144d51f9
NS
876 gcc_assert (reg);
877 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
878
879 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
880 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
881 base == reg ? 0 : reg);
d8f95bed 882
188538df
TG
883 if (GET_CODE (orig) == CONST_INT)
884 {
a1747d2c 885 if (INT_14_BITS (orig))
0a81f074 886 return plus_constant (Pmode, base, INTVAL (orig));
188538df
TG
887 orig = force_reg (Pmode, orig);
888 }
ad2c71b7 889 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
188538df
TG
890 /* Likewise, should we set special REG_NOTEs here? */
891 }
d8f95bed 892
188538df
TG
893 return pic_ref;
894}
895
51076f96
RC
896static GTY(()) rtx gen_tls_tga;
897
898static rtx
899gen_tls_get_addr (void)
900{
901 if (!gen_tls_tga)
902 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
903 return gen_tls_tga;
904}
905
906static rtx
907hppa_tls_call (rtx arg)
908{
909 rtx ret;
910
911 ret = gen_reg_rtx (Pmode);
912 emit_library_call_value (gen_tls_get_addr (), ret,
db69559b 913 LCT_CONST, Pmode, arg, Pmode);
51076f96
RC
914
915 return ret;
916}
917
918static rtx
919legitimize_tls_address (rtx addr)
920{
e0d80a58
JL
921 rtx ret, tmp, t1, t2, tp;
922 rtx_insn *insn;
51076f96 923
d92f4df0
JDA
924 /* Currently, we can't handle anything but a SYMBOL_REF. */
925 if (GET_CODE (addr) != SYMBOL_REF)
926 return addr;
927
928 switch (SYMBOL_REF_TLS_MODEL (addr))
51076f96
RC
929 {
930 case TLS_MODEL_GLOBAL_DYNAMIC:
931 tmp = gen_reg_rtx (Pmode);
a758fa89
AJ
932 if (flag_pic)
933 emit_insn (gen_tgd_load_pic (tmp, addr));
934 else
935 emit_insn (gen_tgd_load (tmp, addr));
51076f96
RC
936 ret = hppa_tls_call (tmp);
937 break;
938
939 case TLS_MODEL_LOCAL_DYNAMIC:
940 ret = gen_reg_rtx (Pmode);
941 tmp = gen_reg_rtx (Pmode);
942 start_sequence ();
a758fa89
AJ
943 if (flag_pic)
944 emit_insn (gen_tld_load_pic (tmp, addr));
945 else
946 emit_insn (gen_tld_load (tmp, addr));
51076f96
RC
947 t1 = hppa_tls_call (tmp);
948 insn = get_insns ();
949 end_sequence ();
950 t2 = gen_reg_rtx (Pmode);
951 emit_libcall_block (insn, t2, t1,
952 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
953 UNSPEC_TLSLDBASE));
954 emit_insn (gen_tld_offset_load (ret, addr, t2));
955 break;
956
957 case TLS_MODEL_INITIAL_EXEC:
958 tp = gen_reg_rtx (Pmode);
959 tmp = gen_reg_rtx (Pmode);
960 ret = gen_reg_rtx (Pmode);
961 emit_insn (gen_tp_load (tp));
a758fa89
AJ
962 if (flag_pic)
963 emit_insn (gen_tie_load_pic (tmp, addr));
964 else
965 emit_insn (gen_tie_load (tmp, addr));
51076f96
RC
966 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
967 break;
968
969 case TLS_MODEL_LOCAL_EXEC:
970 tp = gen_reg_rtx (Pmode);
971 ret = gen_reg_rtx (Pmode);
972 emit_insn (gen_tp_load (tp));
973 emit_insn (gen_tle_load (ret, addr, tp));
974 break;
975
976 default:
06ae7eb1 977 gcc_unreachable ();
51076f96
RC
978 }
979
980 return ret;
981}
982
92d0b058
JL
983/* Helper for hppa_legitimize_address. Given X, return true if it
984 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
985
986 This respectively represent canonical shift-add rtxs or scaled
987 memory addresses. */
988static bool
989mem_shadd_or_shadd_rtx_p (rtx x)
990{
991 return ((GET_CODE (x) == ASHIFT
992 || GET_CODE (x) == MULT)
993 && GET_CODE (XEXP (x, 1)) == CONST_INT
994 && ((GET_CODE (x) == ASHIFT
995 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
996 || (GET_CODE (x) == MULT
997 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
998}
999
c1d1b3f0
JL
1000/* Try machine-dependent ways of modifying an illegitimate address
1001 to be legitimate. If we find one, return the new, valid address.
1002 This macro is used in only one place: `memory_address' in explow.c.
1003
1004 OLDX is the address as it was before break_out_memory_refs was called.
1005 In some cases it is useful to look at this to decide what needs to be done.
1006
c1d1b3f0 1007 It is always safe for this macro to do nothing. It exists to recognize
23f6f34f 1008 opportunities to optimize the output.
c1d1b3f0
JL
1009
1010 For the PA, transform:
1011
1012 memory(X + <large int>)
1013
1014 into:
1015
1016 if (<large int> & mask) >= 16
1017 Y = (<large int> & ~mask) + mask + 1 Round up.
1018 else
1019 Y = (<large int> & ~mask) Round down.
1020 Z = X + Y
1021 memory (Z + (<large int> - Y));
1022
23f6f34f 1023 This is for CSE to find several similar references, and only use one Z.
c1d1b3f0 1024
1e5f1716 1025 X can either be a SYMBOL_REF or REG, but because combine cannot
c1d1b3f0
JL
1026 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1027 D will not fit in 14 bits.
1028
1029 MODE_FLOAT references allow displacements which fit in 5 bits, so use
23f6f34f 1030 0x1f as the mask.
c1d1b3f0
JL
1031
1032 MODE_INT references allow displacements which fit in 14 bits, so use
23f6f34f 1033 0x3fff as the mask.
c1d1b3f0
JL
1034
1035 This relies on the fact that most mode MODE_FLOAT references will use FP
1036 registers and most mode MODE_INT references will use integer registers.
1037 (In the rare case of an FP register used in an integer MODE, we depend
1038 on secondary reloads to clean things up.)
1039
1040
1041 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1042 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
ddd5a7c1 1043 addressing modes to be used).
c1d1b3f0 1044
92d0b058
JL
1045 Note that the addresses passed into hppa_legitimize_address always
1046 come from a MEM, so we only have to match the MULT form on incoming
1047 addresses. But to be future proof we also match the ASHIFT form.
1048
1049 However, this routine always places those shift-add sequences into
1050 registers, so we have to generate the ASHIFT form as our output.
1051
c1d1b3f0
JL
1052 Put X and Z into registers. Then put the entire expression into
1053 a register. */
1054
1055rtx
b7849684 1056hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
ef4bddc2 1057 machine_mode mode)
c1d1b3f0 1058{
c1d1b3f0
JL
1059 rtx orig = x;
1060
d8f95bed
JDA
1061 /* We need to canonicalize the order of operands in unscaled indexed
1062 addresses since the code that checks if an address is valid doesn't
1063 always try both orders. */
1064 if (!TARGET_NO_SPACE_REGS
1065 && GET_CODE (x) == PLUS
1066 && GET_MODE (x) == Pmode
1067 && REG_P (XEXP (x, 0))
1068 && REG_P (XEXP (x, 1))
1069 && REG_POINTER (XEXP (x, 0))
1070 && !REG_POINTER (XEXP (x, 1)))
1071 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1072
093a6c99 1073 if (tls_referenced_p (x))
51076f96
RC
1074 return legitimize_tls_address (x);
1075 else if (flag_pic)
6bb36601
JL
1076 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1077
fe19a83d 1078 /* Strip off CONST. */
c1d1b3f0
JL
1079 if (GET_CODE (x) == CONST)
1080 x = XEXP (x, 0);
1081
68944452
JL
1082 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1083 That should always be safe. */
1084 if (GET_CODE (x) == PLUS
1085 && GET_CODE (XEXP (x, 0)) == REG
1086 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1087 {
690d4228
JL
1088 rtx reg = force_reg (Pmode, XEXP (x, 1));
1089 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
68944452
JL
1090 }
1091
326bc2de
JL
1092 /* Note we must reject symbols which represent function addresses
1093 since the assembler/linker can't handle arithmetic on plabels. */
c1d1b3f0
JL
1094 if (GET_CODE (x) == PLUS
1095 && GET_CODE (XEXP (x, 1)) == CONST_INT
326bc2de
JL
1096 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1097 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
c1d1b3f0
JL
1098 || GET_CODE (XEXP (x, 0)) == REG))
1099 {
1100 rtx int_part, ptr_reg;
1101 int newoffset;
1102 int offset = INTVAL (XEXP (x, 1));
f9bd8d8e
JL
1103 int mask;
1104
1105 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1a04ac2b 1106 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
c1d1b3f0 1107
23f6f34f 1108 /* Choose which way to round the offset. Round up if we
c1d1b3f0
JL
1109 are >= halfway to the next boundary. */
1110 if ((offset & mask) >= ((mask + 1) / 2))
1111 newoffset = (offset & ~ mask) + mask + 1;
1112 else
1113 newoffset = (offset & ~ mask);
1114
1115 /* If the newoffset will not fit in 14 bits (ldo), then
1116 handling this would take 4 or 5 instructions (2 to load
1117 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1118 add the new offset and the SYMBOL_REF.) Combine can
1119 not handle 4->2 or 5->2 combinations, so do not create
1120 them. */
1121 if (! VAL_14_BITS_P (newoffset)
1122 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1123 {
0a81f074 1124 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
c1d1b3f0 1125 rtx tmp_reg
e5e28962 1126 = force_reg (Pmode,
ad2c71b7 1127 gen_rtx_HIGH (Pmode, const_part));
c1d1b3f0 1128 ptr_reg
e5e28962 1129 = force_reg (Pmode,
c5c76735
JL
1130 gen_rtx_LO_SUM (Pmode,
1131 tmp_reg, const_part));
c1d1b3f0
JL
1132 }
1133 else
1134 {
1135 if (! VAL_14_BITS_P (newoffset))
e5e28962 1136 int_part = force_reg (Pmode, GEN_INT (newoffset));
c1d1b3f0
JL
1137 else
1138 int_part = GEN_INT (newoffset);
1139
e5e28962 1140 ptr_reg = force_reg (Pmode,
ad2c71b7
JL
1141 gen_rtx_PLUS (Pmode,
1142 force_reg (Pmode, XEXP (x, 0)),
1143 int_part));
c1d1b3f0 1144 }
0a81f074 1145 return plus_constant (Pmode, ptr_reg, offset - newoffset);
c1d1b3f0 1146 }
7426c959 1147
92d0b058 1148 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
7426c959 1149
92d0b058
JL
1150 if (GET_CODE (x) == PLUS
1151 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
ec8e098d 1152 && (OBJECT_P (XEXP (x, 1))
7426c959
JL
1153 || GET_CODE (XEXP (x, 1)) == SUBREG)
1154 && GET_CODE (XEXP (x, 1)) != CONST)
c1d1b3f0 1155 {
92d0b058
JL
1156 /* If we were given a MULT, we must fix the constant
1157 as we're going to create the ASHIFT form. */
1158 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1159 if (GET_CODE (XEXP (x, 0)) == MULT)
1160 shift_val = exact_log2 (shift_val);
78c0acfd 1161
92d0b058 1162 rtx reg1, reg2;
78c0acfd
JL
1163 reg1 = XEXP (x, 1);
1164 if (GET_CODE (reg1) != REG)
1165 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1166
1167 reg2 = XEXP (XEXP (x, 0), 0);
1168 if (GET_CODE (reg2) != REG)
1169 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1170
92d0b058
JL
1171 return force_reg (Pmode,
1172 gen_rtx_PLUS (Pmode,
1173 gen_rtx_ASHIFT (Pmode, reg2,
1174 GEN_INT (shift_val)),
1175 reg1));
c1d1b3f0 1176 }
7426c959 1177
92d0b058 1178 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
305123ba
JL
1179
1180 Only do so for floating point modes since this is more speculative
1181 and we lose if it's an integer store. */
78c0acfd 1182 if (GET_CODE (x) == PLUS
305123ba 1183 && GET_CODE (XEXP (x, 0)) == PLUS
92d0b058 1184 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
78c0acfd 1185 && (mode == SFmode || mode == DFmode))
305123ba 1186 {
92d0b058
JL
1187 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1188
1189 /* If we were given a MULT, we must fix the constant
1190 as we're going to create the ASHIFT form. */
1191 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1192 shift_val = exact_log2 (shift_val);
78c0acfd 1193
92d0b058 1194 /* Try and figure out what to use as a base register. */
b38bccca 1195 rtx reg1, reg2, base, idx;
78c0acfd
JL
1196
1197 reg1 = XEXP (XEXP (x, 0), 1);
1198 reg2 = XEXP (x, 1);
1199 base = NULL_RTX;
1200 idx = NULL_RTX;
1201
1202 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
ae9d61ab 1203 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
3502dc9c 1204 it's a base register below. */
78c0acfd
JL
1205 if (GET_CODE (reg1) != REG)
1206 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1207
1208 if (GET_CODE (reg2) != REG)
1209 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1210
1211 /* Figure out what the base and index are. */
6619e96c 1212
78c0acfd 1213 if (GET_CODE (reg1) == REG
3502dc9c 1214 && REG_POINTER (reg1))
78c0acfd
JL
1215 {
1216 base = reg1;
ad2c71b7 1217 idx = gen_rtx_PLUS (Pmode,
92d0b058
JL
1218 gen_rtx_ASHIFT (Pmode,
1219 XEXP (XEXP (XEXP (x, 0), 0), 0),
1220 GEN_INT (shift_val)),
ad2c71b7 1221 XEXP (x, 1));
78c0acfd
JL
1222 }
1223 else if (GET_CODE (reg2) == REG
3502dc9c 1224 && REG_POINTER (reg2))
78c0acfd
JL
1225 {
1226 base = reg2;
78c0acfd
JL
1227 idx = XEXP (x, 0);
1228 }
1229
1230 if (base == 0)
31d4f31f 1231 return orig;
78c0acfd
JL
1232
1233 /* If the index adds a large constant, try to scale the
1234 constant so that it can be loaded with only one insn. */
1235 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1236 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1237 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1238 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1239 {
1240 /* Divide the CONST_INT by the scale factor, then add it to A. */
1241 int val = INTVAL (XEXP (idx, 1));
92d0b058 1242 val /= (1 << shift_val);
78c0acfd 1243
78c0acfd
JL
1244 reg1 = XEXP (XEXP (idx, 0), 0);
1245 if (GET_CODE (reg1) != REG)
1246 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1247
ad2c71b7 1248 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
78c0acfd
JL
1249
1250 /* We can now generate a simple scaled indexed address. */
c5c76735
JL
1251 return
1252 force_reg
1253 (Pmode, gen_rtx_PLUS (Pmode,
92d0b058
JL
1254 gen_rtx_ASHIFT (Pmode, reg1,
1255 GEN_INT (shift_val)),
c5c76735 1256 base));
78c0acfd
JL
1257 }
1258
1259 /* If B + C is still a valid base register, then add them. */
1260 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1261 && INTVAL (XEXP (idx, 1)) <= 4096
1262 && INTVAL (XEXP (idx, 1)) >= -4096)
1263 {
78c0acfd
JL
1264 rtx reg1, reg2;
1265
ad2c71b7 1266 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
78c0acfd
JL
1267
1268 reg2 = XEXP (XEXP (idx, 0), 0);
1269 if (GET_CODE (reg2) != CONST_INT)
1270 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1271
92d0b058
JL
1272 return force_reg (Pmode,
1273 gen_rtx_PLUS (Pmode,
1274 gen_rtx_ASHIFT (Pmode, reg2,
1275 GEN_INT (shift_val)),
1276 reg1));
78c0acfd
JL
1277 }
1278
1279 /* Get the index into a register, then add the base + index and
1280 return a register holding the result. */
1281
1282 /* First get A into a register. */
1283 reg1 = XEXP (XEXP (idx, 0), 0);
1284 if (GET_CODE (reg1) != REG)
1285 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1286
1287 /* And get B into a register. */
1288 reg2 = XEXP (idx, 1);
1289 if (GET_CODE (reg2) != REG)
1290 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1291
ad2c71b7
JL
1292 reg1 = force_reg (Pmode,
1293 gen_rtx_PLUS (Pmode,
92d0b058
JL
1294 gen_rtx_ASHIFT (Pmode, reg1,
1295 GEN_INT (shift_val)),
ad2c71b7 1296 reg2));
78c0acfd
JL
1297
1298 /* Add the result to our base register and return. */
ad2c71b7 1299 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
6619e96c 1300
305123ba
JL
1301 }
1302
23f6f34f 1303 /* Uh-oh. We might have an address for x[n-100000]. This needs
c2827c50
JL
1304 special handling to avoid creating an indexed memory address
1305 with x-100000 as the base.
6619e96c 1306
c2827c50
JL
1307 If the constant part is small enough, then it's still safe because
1308 there is a guard page at the beginning and end of the data segment.
1309
1310 Scaled references are common enough that we want to try and rearrange the
1311 terms so that we can use indexing for these addresses too. Only
305123ba 1312 do the optimization for floatint point modes. */
7426c959 1313
c2827c50 1314 if (GET_CODE (x) == PLUS
ae9d61ab 1315 && pa_symbolic_expression_p (XEXP (x, 1)))
7426c959
JL
1316 {
1317 /* Ugly. We modify things here so that the address offset specified
1318 by the index expression is computed first, then added to x to form
c2827c50 1319 the entire address. */
7426c959 1320
305123ba 1321 rtx regx1, regx2, regy1, regy2, y;
7426c959
JL
1322
1323 /* Strip off any CONST. */
1324 y = XEXP (x, 1);
1325 if (GET_CODE (y) == CONST)
1326 y = XEXP (y, 0);
1327
77fc9313
RK
1328 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1329 {
305123ba 1330 /* See if this looks like
92d0b058 1331 (plus (mult (reg) (mem_shadd_const))
305123ba
JL
1332 (const (plus (symbol_ref) (const_int))))
1333
78c0acfd 1334 Where const_int is small. In that case the const
6619e96c 1335 expression is a valid pointer for indexing.
78c0acfd
JL
1336
1337 If const_int is big, but can be divided evenly by shadd_const
1338 and added to (reg). This allows more scaled indexed addresses. */
1339 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
92d0b058 1340 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
305123ba 1341 && GET_CODE (XEXP (y, 1)) == CONST_INT
78c0acfd 1342 && INTVAL (XEXP (y, 1)) >= -4096
92d0b058 1343 && INTVAL (XEXP (y, 1)) <= 4095)
78c0acfd 1344 {
92d0b058
JL
1345 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1346
1347 /* If we were given a MULT, we must fix the constant
1348 as we're going to create the ASHIFT form. */
1349 if (GET_CODE (XEXP (x, 0)) == MULT)
1350 shift_val = exact_log2 (shift_val);
1351
78c0acfd
JL
1352 rtx reg1, reg2;
1353
1354 reg1 = XEXP (x, 1);
1355 if (GET_CODE (reg1) != REG)
1356 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1357
1358 reg2 = XEXP (XEXP (x, 0), 0);
1359 if (GET_CODE (reg2) != REG)
1360 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1361
92d0b058
JL
1362 return
1363 force_reg (Pmode,
1364 gen_rtx_PLUS (Pmode,
1365 gen_rtx_ASHIFT (Pmode,
1366 reg2,
1367 GEN_INT (shift_val)),
1368 reg1));
78c0acfd
JL
1369 }
1370 else if ((mode == DFmode || mode == SFmode)
1371 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
92d0b058 1372 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
78c0acfd 1373 && GET_CODE (XEXP (y, 1)) == CONST_INT
92d0b058 1374 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
305123ba 1375 {
92d0b058
JL
1376 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1377
1378 /* If we were given a MULT, we must fix the constant
1379 as we're going to create the ASHIFT form. */
1380 if (GET_CODE (XEXP (x, 0)) == MULT)
1381 shift_val = exact_log2 (shift_val);
1382
305123ba
JL
1383 regx1
1384 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1385 / INTVAL (XEXP (XEXP (x, 0), 1))));
1386 regx2 = XEXP (XEXP (x, 0), 0);
1387 if (GET_CODE (regx2) != REG)
1388 regx2 = force_reg (Pmode, force_operand (regx2, 0));
ad2c71b7
JL
1389 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1390 regx2, regx1));
c5c76735
JL
1391 return
1392 force_reg (Pmode,
1393 gen_rtx_PLUS (Pmode,
92d0b058
JL
1394 gen_rtx_ASHIFT (Pmode, regx2,
1395 GEN_INT (shift_val)),
c5c76735 1396 force_reg (Pmode, XEXP (y, 0))));
305123ba 1397 }
c2827c50
JL
1398 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1399 && INTVAL (XEXP (y, 1)) >= -4096
1400 && INTVAL (XEXP (y, 1)) <= 4095)
1401 {
1402 /* This is safe because of the guard page at the
1403 beginning and end of the data space. Just
1404 return the original address. */
1405 return orig;
1406 }
305123ba
JL
1407 else
1408 {
1409 /* Doesn't look like one we can optimize. */
1410 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1411 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1412 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1413 regx1 = force_reg (Pmode,
ad2c71b7
JL
1414 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1415 regx1, regy2));
1416 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
305123ba 1417 }
77fc9313 1418 }
7426c959
JL
1419 }
1420
c1d1b3f0
JL
1421 return orig;
1422}
1423
8a5b8538
AS
1424/* Implement the TARGET_REGISTER_MOVE_COST hook.
1425
1426 Compute extra cost of moving data between one register class
1427 and another.
1428
1429 Make moves from SAR so expensive they should never happen. We used to
1430 have 0xffff here, but that generates overflow in rare cases.
1431
1432 Copies involving a FP register and a non-FP register are relatively
1433 expensive because they must go through memory.
1434
1435 Other copies are reasonably cheap. */
1436
1437static int
ef4bddc2 1438hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
8a5b8538
AS
1439 reg_class_t from, reg_class_t to)
1440{
1441 if (from == SHIFT_REGS)
1442 return 0x100;
483d7ad3
JDA
1443 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1444 return 18;
8a5b8538
AS
1445 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1446 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1447 return 16;
1448 else
1449 return 2;
1450}
1451
188538df
TG
1452/* For the HPPA, REG and REG+CONST is cost 0
1453 and addresses involving symbolic constants are cost 2.
1454
1455 PIC addresses are very expensive.
1456
1457 It is no coincidence that this has the same structure
1a04ac2b 1458 as pa_legitimate_address_p. */
dcefdf67
RH
1459
1460static int
ef4bddc2 1461hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
b413068c 1462 addr_space_t as ATTRIBUTE_UNUSED,
f40751dd 1463 bool speed ATTRIBUTE_UNUSED)
188538df 1464{
dcefdf67
RH
1465 switch (GET_CODE (X))
1466 {
1467 case REG:
1468 case PLUS:
1469 case LO_SUM:
188538df 1470 return 1;
dcefdf67
RH
1471 case HIGH:
1472 return 2;
1473 default:
1474 return 4;
1475 }
188538df
TG
1476}
1477
3c50106f
RH
1478/* Compute a (partial) cost for rtx X. Return true if the complete
1479 cost has been computed, and false if subexpressions should be
1480 scanned. In either case, *TOTAL contains the cost result. */
1481
1482static bool
e548c9df
AM
1483hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1484 int opno ATTRIBUTE_UNUSED,
68f932c4 1485 int *total, bool speed ATTRIBUTE_UNUSED)
3c50106f 1486{
259febfe 1487 int factor;
e548c9df 1488 int code = GET_CODE (x);
259febfe 1489
3c50106f
RH
1490 switch (code)
1491 {
1492 case CONST_INT:
1493 if (INTVAL (x) == 0)
1494 *total = 0;
1495 else if (INT_14_BITS (x))
1496 *total = 1;
1497 else
1498 *total = 2;
1499 return true;
1500
1501 case HIGH:
1502 *total = 2;
1503 return true;
1504
1505 case CONST:
1506 case LABEL_REF:
1507 case SYMBOL_REF:
1508 *total = 4;
1509 return true;
1510
1511 case CONST_DOUBLE:
1512 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1513 && outer_code != SET)
1514 *total = 0;
1515 else
1516 *total = 8;
1517 return true;
1518
1519 case MULT:
e548c9df 1520 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
259febfe
JDA
1521 {
1522 *total = COSTS_N_INSNS (3);
1523 return true;
1524 }
1525
1526 /* A mode size N times larger than SImode needs O(N*N) more insns. */
e548c9df 1527 factor = GET_MODE_SIZE (mode) / 4;
259febfe
JDA
1528 if (factor == 0)
1529 factor = 1;
1530
1531 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1532 *total = factor * factor * COSTS_N_INSNS (8);
3c50106f 1533 else
259febfe 1534 *total = factor * factor * COSTS_N_INSNS (20);
3c50106f
RH
1535 return true;
1536
1537 case DIV:
e548c9df 1538 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3c50106f
RH
1539 {
1540 *total = COSTS_N_INSNS (14);
1541 return true;
1542 }
5efb1046 1543 /* FALLTHRU */
3c50106f
RH
1544
1545 case UDIV:
1546 case MOD:
1547 case UMOD:
259febfe 1548 /* A mode size N times larger than SImode needs O(N*N) more insns. */
e548c9df 1549 factor = GET_MODE_SIZE (mode) / 4;
259febfe
JDA
1550 if (factor == 0)
1551 factor = 1;
1552
1553 *total = factor * factor * COSTS_N_INSNS (60);
3c50106f
RH
1554 return true;
1555
1556 case PLUS: /* this includes shNadd insns */
1557 case MINUS:
e548c9df 1558 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
259febfe
JDA
1559 {
1560 *total = COSTS_N_INSNS (3);
1561 return true;
1562 }
1563
1564 /* A size N times larger than UNITS_PER_WORD needs N times as
1565 many insns, taking N times as long. */
e548c9df 1566 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
259febfe
JDA
1567 if (factor == 0)
1568 factor = 1;
1569 *total = factor * COSTS_N_INSNS (1);
3c50106f
RH
1570 return true;
1571
1572 case ASHIFT:
1573 case ASHIFTRT:
1574 case LSHIFTRT:
1575 *total = COSTS_N_INSNS (1);
1576 return true;
1577
1578 default:
1579 return false;
1580 }
1581}
1582
6619e96c
AM
1583/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1584 new rtx with the correct mode. */
1585static inline rtx
ef4bddc2 1586force_mode (machine_mode mode, rtx orig)
6619e96c
AM
1587{
1588 if (mode == GET_MODE (orig))
1589 return orig;
1590
144d51f9 1591 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
6619e96c
AM
1592
1593 return gen_rtx_REG (mode, REGNO (orig));
1594}
1595
fbbf66e7
RS
1596/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1597
1598static bool
ef4bddc2 1599pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
fbbf66e7 1600{
093a6c99 1601 return tls_referenced_p (x);
fbbf66e7
RS
1602}
1603
188538df
TG
1604/* Emit insns to move operands[1] into operands[0].
1605
1606 Return 1 if we have written out everything that needs to be done to
1607 do the move. Otherwise, return 0 and the caller will emit the move
6619e96c 1608 normally.
1b8ad134
JL
1609
1610 Note SCRATCH_REG may not be in the proper mode depending on how it
c1207243 1611 will be used. This routine is responsible for creating a new copy
1b8ad134 1612 of SCRATCH_REG in the proper mode. */
188538df
TG
1613
1614int
ef4bddc2 1615pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
188538df
TG
1616{
1617 register rtx operand0 = operands[0];
1618 register rtx operand1 = operands[1];
428be702 1619 register rtx tem;
188538df 1620
d8f95bed
JDA
1621 /* We can only handle indexed addresses in the destination operand
1622 of floating point stores. Thus, we need to break out indexed
1623 addresses from the destination operand. */
1624 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1625 {
b3a13419 1626 gcc_assert (can_create_pseudo_p ());
d8f95bed
JDA
1627
1628 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1629 operand0 = replace_equiv_address (operand0, tem);
1630 }
1631
1632 /* On targets with non-equivalent space registers, break out unscaled
1633 indexed addresses from the source operand before the final CSE.
1634 We have to do this because the REG_POINTER flag is not correctly
1635 carried through various optimization passes and CSE may substitute
1636 a pseudo without the pointer set for one with the pointer set. As
71cc389b 1637 a result, we loose various opportunities to create insns with
d8f95bed
JDA
1638 unscaled indexed addresses. */
1639 if (!TARGET_NO_SPACE_REGS
1640 && !cse_not_expected
1641 && GET_CODE (operand1) == MEM
1642 && GET_CODE (XEXP (operand1, 0)) == PLUS
1643 && REG_P (XEXP (XEXP (operand1, 0), 0))
1644 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1645 operand1
1646 = replace_equiv_address (operand1,
1647 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1648
54d65918
JL
1649 if (scratch_reg
1650 && reload_in_progress && GET_CODE (operand0) == REG
8a642d97 1651 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
f2034d06 1652 operand0 = reg_equiv_mem (REGNO (operand0));
54d65918
JL
1653 else if (scratch_reg
1654 && reload_in_progress && GET_CODE (operand0) == SUBREG
8a642d97
RK
1655 && GET_CODE (SUBREG_REG (operand0)) == REG
1656 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1657 {
ddef6bc7 1658 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1659 the code which tracks sets/uses for delete_output_reload. */
1660 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
f2034d06 1661 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
ddef6bc7 1662 SUBREG_BYTE (operand0));
55a2c322 1663 operand0 = alter_subreg (&temp, true);
27a2c2b5 1664 }
8a642d97 1665
54d65918
JL
1666 if (scratch_reg
1667 && reload_in_progress && GET_CODE (operand1) == REG
8a642d97 1668 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
f2034d06 1669 operand1 = reg_equiv_mem (REGNO (operand1));
54d65918
JL
1670 else if (scratch_reg
1671 && reload_in_progress && GET_CODE (operand1) == SUBREG
8a642d97
RK
1672 && GET_CODE (SUBREG_REG (operand1)) == REG
1673 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1674 {
ddef6bc7 1675 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1676 the code which tracks sets/uses for delete_output_reload. */
1677 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
f2034d06 1678 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
ddef6bc7 1679 SUBREG_BYTE (operand1));
55a2c322 1680 operand1 = alter_subreg (&temp, true);
27a2c2b5 1681 }
8a642d97 1682
54d65918 1683 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
428be702
RK
1684 && ((tem = find_replacement (&XEXP (operand0, 0)))
1685 != XEXP (operand0, 0)))
7c95bbfb 1686 operand0 = replace_equiv_address (operand0, tem);
d8f95bed 1687
54d65918 1688 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
428be702
RK
1689 && ((tem = find_replacement (&XEXP (operand1, 0)))
1690 != XEXP (operand1, 0)))
7c95bbfb 1691 operand1 = replace_equiv_address (operand1, tem);
428be702 1692
4d3cea21 1693 /* Handle secondary reloads for loads/stores of FP registers from
cae80939 1694 REG+D addresses where D does not fit in 5 or 14 bits, including
668c901b
JDA
1695 (subreg (mem (addr))) cases, and reloads for other unsupported
1696 memory operands. */
a4295210 1697 if (scratch_reg
512e4ace 1698 && FP_REG_P (operand0)
1a04ac2b
JDA
1699 && (MEM_P (operand1)
1700 || (GET_CODE (operand1) == SUBREG
512e4ace 1701 && MEM_P (XEXP (operand1, 0)))))
d2a94ec0 1702 {
d3ccfbb9 1703 rtx op1 = operand1;
42fbe27f 1704
d3ccfbb9
JDA
1705 if (GET_CODE (op1) == SUBREG)
1706 op1 = XEXP (op1, 0);
2d7b2c36 1707
668c901b 1708 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
2d7b2c36 1709 {
668c901b
JDA
1710 if (!(TARGET_PA_20
1711 && !TARGET_ELF32
1712 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1713 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
d3ccfbb9 1714 {
668c901b
JDA
1715 /* SCRATCH_REG will hold an address and maybe the actual data.
1716 We want it in WORD_MODE regardless of what mode it was
1717 originally given to us. */
1718 scratch_reg = force_mode (word_mode, scratch_reg);
1719
1720 /* D might not fit in 14 bits either; for such cases load D
1721 into scratch reg. */
1722 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1723 {
1724 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1725 emit_move_insn (scratch_reg,
1726 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1727 Pmode,
1728 XEXP (XEXP (op1, 0), 0),
1729 scratch_reg));
1730 }
1731 else
1732 emit_move_insn (scratch_reg, XEXP (op1, 0));
1733 emit_insn (gen_rtx_SET (operand0,
1734 replace_equiv_address (op1, scratch_reg)));
1735 return 1;
d3ccfbb9 1736 }
668c901b
JDA
1737 }
1738 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1739 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1740 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1741 {
1742 /* Load memory address into SCRATCH_REG. */
1743 scratch_reg = force_mode (word_mode, scratch_reg);
1744 emit_move_insn (scratch_reg, XEXP (op1, 0));
d3ccfbb9
JDA
1745 emit_insn (gen_rtx_SET (operand0,
1746 replace_equiv_address (op1, scratch_reg)));
1747 return 1;
2d7b2c36 1748 }
d2a94ec0 1749 }
a4295210 1750 else if (scratch_reg
512e4ace 1751 && FP_REG_P (operand1)
1a04ac2b
JDA
1752 && (MEM_P (operand0)
1753 || (GET_CODE (operand0) == SUBREG
512e4ace 1754 && MEM_P (XEXP (operand0, 0)))))
d2a94ec0 1755 {
d3ccfbb9 1756 rtx op0 = operand0;
42fbe27f 1757
d3ccfbb9
JDA
1758 if (GET_CODE (op0) == SUBREG)
1759 op0 = XEXP (op0, 0);
1b8ad134 1760
668c901b 1761 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
2d7b2c36 1762 {
668c901b
JDA
1763 if (!(TARGET_PA_20
1764 && !TARGET_ELF32
1765 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1766 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
d3ccfbb9 1767 {
668c901b
JDA
1768 /* SCRATCH_REG will hold an address and maybe the actual data.
1769 We want it in WORD_MODE regardless of what mode it was
1770 originally given to us. */
1771 scratch_reg = force_mode (word_mode, scratch_reg);
1772
1773 /* D might not fit in 14 bits either; for such cases load D
1774 into scratch reg. */
1775 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1776 {
1777 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1778 emit_move_insn (scratch_reg,
1779 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1780 Pmode,
1781 XEXP (XEXP (op0, 0), 0),
1782 scratch_reg));
1783 }
1784 else
1785 emit_move_insn (scratch_reg, XEXP (op0, 0));
1786 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1787 operand1));
1788 return 1;
d3ccfbb9 1789 }
668c901b
JDA
1790 }
1791 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1792 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1793 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1794 {
1795 /* Load memory address into SCRATCH_REG. */
1796 scratch_reg = force_mode (word_mode, scratch_reg);
1797 emit_move_insn (scratch_reg, XEXP (op0, 0));
d3ccfbb9
JDA
1798 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1799 operand1));
1800 return 1;
2d7b2c36 1801 }
d2a94ec0 1802 }
c063ad75 1803 /* Handle secondary reloads for loads of FP registers from constant
1a04ac2b
JDA
1804 expressions by forcing the constant into memory. For the most part,
1805 this is only necessary for SImode and DImode.
c063ad75 1806
1a04ac2b 1807 Use scratch_reg to hold the address of the memory location. */
a4295210 1808 else if (scratch_reg
c063ad75 1809 && CONSTANT_P (operand1)
d3ccfbb9 1810 && FP_REG_P (operand0))
c063ad75 1811 {
7c95bbfb 1812 rtx const_mem, xoperands[2];
c063ad75 1813
1a04ac2b
JDA
1814 if (operand1 == CONST0_RTX (mode))
1815 {
f7df4a84 1816 emit_insn (gen_rtx_SET (operand0, operand1));
1a04ac2b
JDA
1817 return 1;
1818 }
1819
1b8ad134
JL
1820 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1821 it in WORD_MODE regardless of what mode it was originally given
1822 to us. */
6619e96c 1823 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1824
c063ad75
JL
1825 /* Force the constant into memory and put the address of the
1826 memory location into scratch_reg. */
7c95bbfb 1827 const_mem = force_const_mem (mode, operand1);
c063ad75 1828 xoperands[0] = scratch_reg;
7c95bbfb 1829 xoperands[1] = XEXP (const_mem, 0);
ae9d61ab 1830 pa_emit_move_sequence (xoperands, Pmode, 0);
c063ad75
JL
1831
1832 /* Now load the destination register. */
f7df4a84 1833 emit_insn (gen_rtx_SET (operand0,
7c95bbfb 1834 replace_equiv_address (const_mem, scratch_reg)));
c063ad75
JL
1835 return 1;
1836 }
4d3cea21 1837 /* Handle secondary reloads for SAR. These occur when trying to load
483d7ad3 1838 the SAR from memory or a constant. */
a4295210
JDA
1839 else if (scratch_reg
1840 && GET_CODE (operand0) == REG
9c1eed37 1841 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
4d3cea21 1842 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
483d7ad3 1843 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
4d3cea21 1844 {
09ece7b5
JL
1845 /* D might not fit in 14 bits either; for such cases load D into
1846 scratch reg. */
1847 if (GET_CODE (operand1) == MEM
2fd74bff 1848 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
09ece7b5 1849 {
dd8c13e3
JL
1850 /* We are reloading the address into the scratch register, so we
1851 want to make sure the scratch register is a full register. */
6619e96c 1852 scratch_reg = force_mode (word_mode, scratch_reg);
dd8c13e3 1853
6619e96c 1854 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
ad2c71b7
JL
1855 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1856 0)),
690d4228 1857 Pmode,
ad2c71b7
JL
1858 XEXP (XEXP (operand1, 0),
1859 0),
1860 scratch_reg));
dd8c13e3
JL
1861
1862 /* Now we are going to load the scratch register from memory,
1863 we want to load it in the same width as the original MEM,
1864 which must be the same as the width of the ultimate destination,
1865 OPERAND0. */
6619e96c
AM
1866 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1867
7c95bbfb
RH
1868 emit_move_insn (scratch_reg,
1869 replace_equiv_address (operand1, scratch_reg));
09ece7b5
JL
1870 }
1871 else
dd8c13e3
JL
1872 {
1873 /* We want to load the scratch register using the same mode as
1874 the ultimate destination. */
6619e96c
AM
1875 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1876
dd8c13e3
JL
1877 emit_move_insn (scratch_reg, operand1);
1878 }
1879
1880 /* And emit the insn to set the ultimate destination. We know that
1881 the scratch register has the same mode as the destination at this
1882 point. */
4d3cea21
JL
1883 emit_move_insn (operand0, scratch_reg);
1884 return 1;
1885 }
d3ccfbb9 1886
d8f95bed 1887 /* Handle the most common case: storing into a register. */
d3ccfbb9 1888 if (register_operand (operand0, mode))
188538df 1889 {
9a201645
JDA
1890 /* Legitimize TLS symbol references. This happens for references
1891 that aren't a legitimate constant. */
1892 if (PA_SYMBOL_REF_TLS_P (operand1))
1893 operand1 = legitimize_tls_address (operand1);
1894
188538df 1895 if (register_operand (operand1, mode)
b8e42321 1896 || (GET_CODE (operand1) == CONST_INT
5877e54e 1897 && pa_cint_ok_for_move (UINTVAL (operand1)))
f048ca47 1898 || (operand1 == CONST0_RTX (mode))
188538df 1899 || (GET_CODE (operand1) == HIGH
80225b66 1900 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
188538df
TG
1901 /* Only `general_operands' can come here, so MEM is ok. */
1902 || GET_CODE (operand1) == MEM)
1903 {
d8f95bed
JDA
1904 /* Various sets are created during RTL generation which don't
1905 have the REG_POINTER flag correctly set. After the CSE pass,
1906 instruction recognition can fail if we don't consistently
1907 set this flag when performing register copies. This should
1908 also improve the opportunities for creating insns that use
1909 unscaled indexing. */
1910 if (REG_P (operand0) && REG_P (operand1))
1911 {
1912 if (REG_POINTER (operand1)
1913 && !REG_POINTER (operand0)
1914 && !HARD_REGISTER_P (operand0))
1915 copy_reg_pointer (operand0, operand1);
d8f95bed
JDA
1916 }
1917
1918 /* When MEMs are broken out, the REG_POINTER flag doesn't
1919 get set. In some cases, we can set the REG_POINTER flag
1920 from the declaration for the MEM. */
1921 if (REG_P (operand0)
1922 && GET_CODE (operand1) == MEM
1923 && !REG_POINTER (operand0))
1924 {
1925 tree decl = MEM_EXPR (operand1);
1926
1927 /* Set the register pointer flag and register alignment
1928 if the declaration for this memory reference is a
077c8ada
SE
1929 pointer type. */
1930 if (decl)
d8f95bed
JDA
1931 {
1932 tree type;
1933
1934 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1935 tree operand 1. */
1936 if (TREE_CODE (decl) == COMPONENT_REF)
1937 decl = TREE_OPERAND (decl, 1);
1938
1939 type = TREE_TYPE (decl);
dd25a747 1940 type = strip_array_types (type);
d8f95bed
JDA
1941
1942 if (POINTER_TYPE_P (type))
25b75a48 1943 mark_reg_pointer (operand0, BITS_PER_UNIT);
d8f95bed
JDA
1944 }
1945 }
1946
f7df4a84 1947 emit_insn (gen_rtx_SET (operand0, operand1));
188538df
TG
1948 return 1;
1949 }
1950 }
1951 else if (GET_CODE (operand0) == MEM)
1952 {
d66dec28
JL
1953 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1954 && !(reload_in_progress || reload_completed))
1955 {
1956 rtx temp = gen_reg_rtx (DFmode);
1957
f7df4a84
RS
1958 emit_insn (gen_rtx_SET (temp, operand1));
1959 emit_insn (gen_rtx_SET (operand0, temp));
d66dec28
JL
1960 return 1;
1961 }
f048ca47 1962 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
188538df
TG
1963 {
1964 /* Run this case quickly. */
f7df4a84 1965 emit_insn (gen_rtx_SET (operand0, operand1));
188538df
TG
1966 return 1;
1967 }
1bc695cd 1968 if (! (reload_in_progress || reload_completed))
188538df
TG
1969 {
1970 operands[0] = validize_mem (operand0);
1971 operands[1] = operand1 = force_reg (mode, operand1);
1972 }
1973 }
1974
44201dba
JL
1975 /* Simplify the source if we need to.
1976 Note we do have to handle function labels here, even though we do
1977 not consider them legitimate constants. Loop optimizations can
06387d7c 1978 call the emit_move_xxx with one as a source. */
f1c7ce82 1979 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
43940f6b 1980 || (GET_CODE (operand1) == HIGH
d92f4df0
JDA
1981 && symbolic_operand (XEXP (operand1, 0), mode))
1982 || function_label_operand (operand1, VOIDmode)
093a6c99 1983 || tls_referenced_p (operand1))
188538df 1984 {
43940f6b
JL
1985 int ishighonly = 0;
1986
1987 if (GET_CODE (operand1) == HIGH)
1988 {
1989 ishighonly = 1;
1990 operand1 = XEXP (operand1, 0);
1991 }
188538df
TG
1992 if (symbolic_operand (operand1, mode))
1993 {
5eceed92 1994 /* Argh. The assembler and linker can't handle arithmetic
b0fabad3 1995 involving plabels.
5eceed92 1996
b0fabad3
JL
1997 So we force the plabel into memory, load operand0 from
1998 the memory location, then add in the constant part. */
44201dba
JL
1999 if ((GET_CODE (operand1) == CONST
2000 && GET_CODE (XEXP (operand1, 0)) == PLUS
9c575e20
JDA
2001 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2002 VOIDmode))
2003 || function_label_operand (operand1, VOIDmode))
5eceed92 2004 {
8e64b41a 2005 rtx temp, const_part;
b0fabad3
JL
2006
2007 /* Figure out what (if any) scratch register to use. */
2008 if (reload_in_progress || reload_completed)
1b8ad134
JL
2009 {
2010 scratch_reg = scratch_reg ? scratch_reg : operand0;
2011 /* SCRATCH_REG will hold an address and maybe the actual
2012 data. We want it in WORD_MODE regardless of what mode it
2013 was originally given to us. */
6619e96c 2014 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 2015 }
b0fabad3
JL
2016 else if (flag_pic)
2017 scratch_reg = gen_reg_rtx (Pmode);
2018
44201dba
JL
2019 if (GET_CODE (operand1) == CONST)
2020 {
2021 /* Save away the constant part of the expression. */
2022 const_part = XEXP (XEXP (operand1, 0), 1);
144d51f9 2023 gcc_assert (GET_CODE (const_part) == CONST_INT);
44201dba
JL
2024
2025 /* Force the function label into memory. */
2026 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2027 }
2028 else
2029 {
2030 /* No constant part. */
2031 const_part = NULL_RTX;
5eceed92 2032
44201dba
JL
2033 /* Force the function label into memory. */
2034 temp = force_const_mem (mode, operand1);
2035 }
6619e96c 2036
b0fabad3
JL
2037
2038 /* Get the address of the memory location. PIC-ify it if
2039 necessary. */
2040 temp = XEXP (temp, 0);
2041 if (flag_pic)
2042 temp = legitimize_pic_address (temp, mode, scratch_reg);
2043
2044 /* Put the address of the memory location into our destination
2045 register. */
2046 operands[1] = temp;
ae9d61ab 2047 pa_emit_move_sequence (operands, mode, scratch_reg);
b0fabad3
JL
2048
2049 /* Now load from the memory location into our destination
2050 register. */
ad2c71b7 2051 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
ae9d61ab 2052 pa_emit_move_sequence (operands, mode, scratch_reg);
b0fabad3
JL
2053
2054 /* And add back in the constant part. */
44201dba
JL
2055 if (const_part != NULL_RTX)
2056 expand_inc (operand0, const_part);
b0fabad3
JL
2057
2058 return 1;
5eceed92
JL
2059 }
2060
188538df
TG
2061 if (flag_pic)
2062 {
283b768c 2063 rtx_insn *insn;
1bc695cd
JL
2064 rtx temp;
2065
2066 if (reload_in_progress || reload_completed)
1b8ad134
JL
2067 {
2068 temp = scratch_reg ? scratch_reg : operand0;
2069 /* TEMP will hold an address and maybe the actual
2070 data. We want it in WORD_MODE regardless of what mode it
2071 was originally given to us. */
6619e96c 2072 temp = force_mode (word_mode, temp);
1b8ad134 2073 }
1bc695cd
JL
2074 else
2075 temp = gen_reg_rtx (Pmode);
23f6f34f 2076
283b768c
JDA
2077 /* Force (const (plus (symbol) (const_int))) to memory
2078 if the const_int will not fit in 14 bits. Although
2079 this requires a relocation, the instruction sequence
2080 needed to load the value is shorter. */
b0fabad3 2081 if (GET_CODE (operand1) == CONST
bc4a9f17
JL
2082 && GET_CODE (XEXP (operand1, 0)) == PLUS
2083 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
283b768c 2084 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
bc4a9f17 2085 {
283b768c
JDA
2086 rtx x, m = force_const_mem (mode, operand1);
2087
2088 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2089 x = replace_equiv_address (m, x);
2090 insn = emit_move_insn (operand0, x);
bc4a9f17 2091 }
5eceed92
JL
2092 else
2093 {
2094 operands[1] = legitimize_pic_address (operand1, mode, temp);
d8f95bed
JDA
2095 if (REG_P (operand0) && REG_P (operands[1]))
2096 copy_reg_pointer (operand0, operands[1]);
283b768c 2097 insn = emit_move_insn (operand0, operands[1]);
5eceed92 2098 }
283b768c
JDA
2099
2100 /* Put a REG_EQUAL note on this insn. */
2101 set_unique_reg_note (insn, REG_EQUAL, operand1);
188538df 2102 }
6bb36601
JL
2103 /* On the HPPA, references to data space are supposed to use dp,
2104 register 27, but showing it in the RTL inhibits various cse
2105 and loop optimizations. */
23f6f34f 2106 else
188538df 2107 {
5eceed92 2108 rtx temp, set;
43940f6b 2109
23f6f34f 2110 if (reload_in_progress || reload_completed)
1b8ad134
JL
2111 {
2112 temp = scratch_reg ? scratch_reg : operand0;
2113 /* TEMP will hold an address and maybe the actual
2114 data. We want it in WORD_MODE regardless of what mode it
2115 was originally given to us. */
6619e96c 2116 temp = force_mode (word_mode, temp);
1b8ad134 2117 }
43940f6b
JL
2118 else
2119 temp = gen_reg_rtx (mode);
2120
68944452 2121 /* Loading a SYMBOL_REF into a register makes that register
6619e96c 2122 safe to be used as the base in an indexed address.
68944452
JL
2123
2124 Don't mark hard registers though. That loses. */
c34d858f
RK
2125 if (GET_CODE (operand0) == REG
2126 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
d8f95bed 2127 mark_reg_pointer (operand0, BITS_PER_UNIT);
68944452 2128 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
d8f95bed
JDA
2129 mark_reg_pointer (temp, BITS_PER_UNIT);
2130
43940f6b 2131 if (ishighonly)
f7df4a84 2132 set = gen_rtx_SET (operand0, temp);
43940f6b 2133 else
f7df4a84 2134 set = gen_rtx_SET (operand0,
ad2c71b7 2135 gen_rtx_LO_SUM (mode, temp, operand1));
23f6f34f 2136
f7df4a84 2137 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
b0ce651a 2138 emit_insn (set);
326bc2de 2139
188538df 2140 }
43940f6b 2141 return 1;
188538df 2142 }
093a6c99 2143 else if (tls_referenced_p (operand1))
51076f96
RC
2144 {
2145 rtx tmp = operand1;
2146 rtx addend = NULL;
2147
2148 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2149 {
2150 addend = XEXP (XEXP (tmp, 0), 1);
2151 tmp = XEXP (XEXP (tmp, 0), 0);
2152 }
2153
2154 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2155 tmp = legitimize_tls_address (tmp);
2156 if (addend)
2157 {
2158 tmp = gen_rtx_PLUS (mode, tmp, addend);
2159 tmp = force_operand (tmp, operands[0]);
2160 }
2161 operands[1] = tmp;
2162 }
a1747d2c 2163 else if (GET_CODE (operand1) != CONST_INT
5877e54e 2164 || !pa_cint_ok_for_move (UINTVAL (operand1)))
188538df 2165 {
e0d80a58
JL
2166 rtx temp;
2167 rtx_insn *insn;
a4295210 2168 rtx op1 = operand1;
4cce9dd8 2169 HOST_WIDE_INT value = 0;
a4295210
JDA
2170 HOST_WIDE_INT insv = 0;
2171 int insert = 0;
2172
4cce9dd8
RS
2173 if (GET_CODE (operand1) == CONST_INT)
2174 value = INTVAL (operand1);
2175
a4295210
JDA
2176 if (TARGET_64BIT
2177 && GET_CODE (operand1) == CONST_INT
e0c556d3 2178 && HOST_BITS_PER_WIDE_INT > 32
520babc7
JL
2179 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2180 {
e0c556d3 2181 HOST_WIDE_INT nval;
520babc7 2182
b8e42321
JDA
2183 /* Extract the low order 32 bits of the value and sign extend.
2184 If the new value is the same as the original value, we can
2185 can use the original value as-is. If the new value is
2186 different, we use it and insert the most-significant 32-bits
2187 of the original value into the final result. */
a4295210 2188 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
e0c556d3 2189 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
a4295210 2190 if (value != nval)
520babc7 2191 {
b8e42321 2192#if HOST_BITS_PER_WIDE_INT > 32
a4295210 2193 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
b8e42321 2194#endif
a4295210
JDA
2195 insert = 1;
2196 value = nval;
520babc7
JL
2197 operand1 = GEN_INT (nval);
2198 }
2199 }
1bc695cd
JL
2200
2201 if (reload_in_progress || reload_completed)
a4295210 2202 temp = scratch_reg ? scratch_reg : operand0;
1bc695cd
JL
2203 else
2204 temp = gen_reg_rtx (mode);
2205
47abc309
JDA
2206 /* We don't directly split DImode constants on 32-bit targets
2207 because PLUS uses an 11-bit immediate and the insn sequence
2208 generated is not as efficient as the one using HIGH/LO_SUM. */
2209 if (GET_CODE (operand1) == CONST_INT
0eab7815 2210 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
a4295210
JDA
2211 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2212 && !insert)
b8e42321 2213 {
47abc309 2214 /* Directly break constant into high and low parts. This
b8e42321
JDA
2215 provides better optimization opportunities because various
2216 passes recognize constants split with PLUS but not LO_SUM.
2217 We use a 14-bit signed low part except when the addition
2218 of 0x4000 to the high part might change the sign of the
2219 high part. */
b8e42321
JDA
2220 HOST_WIDE_INT low = value & 0x3fff;
2221 HOST_WIDE_INT high = value & ~ 0x3fff;
2222
2223 if (low >= 0x2000)
2224 {
2225 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2226 high += 0x2000;
2227 else
2228 high += 0x4000;
2229 }
2230
2231 low = value - high;
520babc7 2232
f7df4a84 2233 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
b8e42321
JDA
2234 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2235 }
2236 else
520babc7 2237 {
f7df4a84 2238 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
b8e42321 2239 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
520babc7 2240 }
6619e96c 2241
a4295210
JDA
2242 insn = emit_move_insn (operands[0], operands[1]);
2243
2244 /* Now insert the most significant 32 bits of the value
2245 into the register. When we don't have a second register
2246 available, it could take up to nine instructions to load
2247 a 64-bit integer constant. Prior to reload, we force
2248 constants that would take more than three instructions
2249 to load to the constant pool. During and after reload,
2250 we have to handle all possible values. */
2251 if (insert)
2252 {
2253 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2254 register and the value to be inserted is outside the
2255 range that can be loaded with three depdi instructions. */
2256 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2257 {
2258 operand1 = GEN_INT (insv);
2259
f7df4a84 2260 emit_insn (gen_rtx_SET (temp,
a4295210
JDA
2261 gen_rtx_HIGH (mode, operand1)));
2262 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
f0d54148 2263 if (mode == DImode)
225f4747
JDA
2264 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2265 const0_rtx, temp));
f0d54148 2266 else
225f4747
JDA
2267 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2268 const0_rtx, temp));
a4295210
JDA
2269 }
2270 else
2271 {
2272 int len = 5, pos = 27;
2273
2274 /* Insert the bits using the depdi instruction. */
2275 while (pos >= 0)
2276 {
2277 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2278 HOST_WIDE_INT sign = v5 < 0;
2279
2280 /* Left extend the insertion. */
2281 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2282 while (pos > 0 && (insv & 1) == sign)
2283 {
2284 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2285 len += 1;
2286 pos -= 1;
2287 }
2288
f0d54148 2289 if (mode == DImode)
225f4747
JDA
2290 insn = emit_insn (gen_insvdi (operand0,
2291 GEN_INT (len),
2292 GEN_INT (pos),
2293 GEN_INT (v5)));
f0d54148 2294 else
225f4747
JDA
2295 insn = emit_insn (gen_insvsi (operand0,
2296 GEN_INT (len),
2297 GEN_INT (pos),
2298 GEN_INT (v5)));
a4295210
JDA
2299
2300 len = pos > 0 && pos < 5 ? pos : 5;
2301 pos -= len;
2302 }
2303 }
2304 }
b8e42321 2305
bd94cb6e 2306 set_unique_reg_note (insn, REG_EQUAL, op1);
b8e42321 2307
520babc7 2308 return 1;
188538df
TG
2309 }
2310 }
2311 /* Now have insn-emit do whatever it normally does. */
2312 return 0;
2313}
2314
c77c286a 2315/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
c4bb6b38 2316 it will need a link/runtime reloc). */
c77c286a
JL
2317
2318int
ae9d61ab 2319pa_reloc_needed (tree exp)
c77c286a
JL
2320{
2321 int reloc = 0;
2322
2323 switch (TREE_CODE (exp))
2324 {
2325 case ADDR_EXPR:
2326 return 1;
2327
5be014d5 2328 case POINTER_PLUS_EXPR:
c77c286a
JL
2329 case PLUS_EXPR:
2330 case MINUS_EXPR:
ae9d61ab
JDA
2331 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2332 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
c77c286a
JL
2333 break;
2334
1043771b 2335 CASE_CONVERT:
c77c286a 2336 case NON_LVALUE_EXPR:
ae9d61ab 2337 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
c77c286a
JL
2338 break;
2339
2340 case CONSTRUCTOR:
2341 {
28f155be
GB
2342 tree value;
2343 unsigned HOST_WIDE_INT ix;
2344
2345 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2346 if (value)
ae9d61ab 2347 reloc |= pa_reloc_needed (value);
c77c286a
JL
2348 }
2349 break;
2350
2351 case ERROR_MARK:
2352 break;
51723711
KG
2353
2354 default:
2355 break;
c77c286a
JL
2356 }
2357 return reloc;
2358}
2359
188538df
TG
2360\f
2361/* Return the best assembler insn template
71cc389b 2362 for moving operands[1] into operands[0] as a fullword. */
519104fe 2363const char *
ae9d61ab 2364pa_singlemove_string (rtx *operands)
188538df 2365{
0c235d7e
TG
2366 HOST_WIDE_INT intval;
2367
188538df
TG
2368 if (GET_CODE (operands[0]) == MEM)
2369 return "stw %r1,%0";
0c235d7e 2370 if (GET_CODE (operands[1]) == MEM)
188538df 2371 return "ldw %1,%0";
0c235d7e 2372 if (GET_CODE (operands[1]) == CONST_DOUBLE)
e5c2baa1 2373 {
0c235d7e 2374 long i;
e5c2baa1 2375
144d51f9 2376 gcc_assert (GET_MODE (operands[1]) == SFmode);
e5c2baa1 2377
0c235d7e
TG
2378 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2379 bit pattern. */
34a72c33 2380 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
e5c2baa1 2381
0c235d7e
TG
2382 operands[1] = GEN_INT (i);
2383 /* Fall through to CONST_INT case. */
2384 }
2385 if (GET_CODE (operands[1]) == CONST_INT)
e5c2baa1 2386 {
0c235d7e
TG
2387 intval = INTVAL (operands[1]);
2388
2389 if (VAL_14_BITS_P (intval))
2390 return "ldi %1,%0";
2391 else if ((intval & 0x7ff) == 0)
2392 return "ldil L'%1,%0";
ae9d61ab 2393 else if (pa_zdepi_cint_p (intval))
f38b27c7 2394 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
e5c2baa1
RS
2395 else
2396 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2397 }
188538df
TG
2398 return "copy %1,%0";
2399}
2400\f
2401
f133af4c
TG
2402/* Compute position (in OP[1]) and width (in OP[2])
2403 useful for copying IMM to a register using the zdepi
2404 instructions. Store the immediate value to insert in OP[0]. */
519104fe 2405static void
b7849684 2406compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
c819adf2 2407{
0e7f4c19 2408 int lsb, len;
c819adf2 2409
0e7f4c19
TG
2410 /* Find the least significant set bit in IMM. */
2411 for (lsb = 0; lsb < 32; lsb++)
c819adf2 2412 {
0e7f4c19 2413 if ((imm & 1) != 0)
c819adf2 2414 break;
0e7f4c19 2415 imm >>= 1;
c819adf2
TG
2416 }
2417
0e7f4c19
TG
2418 /* Choose variants based on *sign* of the 5-bit field. */
2419 if ((imm & 0x10) == 0)
2420 len = (lsb <= 28) ? 4 : 32 - lsb;
c819adf2
TG
2421 else
2422 {
0e7f4c19 2423 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2424 for (len = 5; len < 32 - lsb; len++)
c819adf2 2425 {
ef8d9a0e 2426 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
c819adf2 2427 break;
c819adf2
TG
2428 }
2429
0e7f4c19
TG
2430 /* Sign extend IMM as a 5-bit value. */
2431 imm = (imm & 0xf) - 0x10;
c819adf2
TG
2432 }
2433
a1747d2c
TG
2434 op[0] = imm;
2435 op[1] = 31 - lsb;
2436 op[2] = len;
c819adf2
TG
2437}
2438
520babc7
JL
2439/* Compute position (in OP[1]) and width (in OP[2])
2440 useful for copying IMM to a register using the depdi,z
2441 instructions. Store the immediate value to insert in OP[0]. */
ae9d61ab
JDA
2442
2443static void
b7849684 2444compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
520babc7 2445{
ef8d9a0e
JDA
2446 int lsb, len, maxlen;
2447
2448 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
520babc7
JL
2449
2450 /* Find the least significant set bit in IMM. */
ef8d9a0e 2451 for (lsb = 0; lsb < maxlen; lsb++)
520babc7
JL
2452 {
2453 if ((imm & 1) != 0)
2454 break;
2455 imm >>= 1;
2456 }
2457
2458 /* Choose variants based on *sign* of the 5-bit field. */
2459 if ((imm & 0x10) == 0)
ef8d9a0e 2460 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
520babc7
JL
2461 else
2462 {
2463 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2464 for (len = 5; len < maxlen - lsb; len++)
520babc7 2465 {
831c1763 2466 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
520babc7
JL
2467 break;
2468 }
2469
ef8d9a0e
JDA
2470 /* Extend length if host is narrow and IMM is negative. */
2471 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2472 len += 32;
2473
520babc7
JL
2474 /* Sign extend IMM as a 5-bit value. */
2475 imm = (imm & 0xf) - 0x10;
2476 }
2477
2478 op[0] = imm;
2479 op[1] = 63 - lsb;
2480 op[2] = len;
2481}
2482
188538df
TG
2483/* Output assembler code to perform a doubleword move insn
2484 with operands OPERANDS. */
2485
519104fe 2486const char *
ae9d61ab 2487pa_output_move_double (rtx *operands)
188538df
TG
2488{
2489 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2490 rtx latehalf[2];
2491 rtx addreg0 = 0, addreg1 = 0;
2b564b9c 2492 int highonly = 0;
188538df
TG
2493
2494 /* First classify both operands. */
2495
2496 if (REG_P (operands[0]))
2497 optype0 = REGOP;
2498 else if (offsettable_memref_p (operands[0]))
2499 optype0 = OFFSOP;
2500 else if (GET_CODE (operands[0]) == MEM)
2501 optype0 = MEMOP;
2502 else
2503 optype0 = RNDOP;
2504
2505 if (REG_P (operands[1]))
2506 optype1 = REGOP;
2507 else if (CONSTANT_P (operands[1]))
2508 optype1 = CNSTOP;
2509 else if (offsettable_memref_p (operands[1]))
2510 optype1 = OFFSOP;
2511 else if (GET_CODE (operands[1]) == MEM)
2512 optype1 = MEMOP;
2513 else
2514 optype1 = RNDOP;
2515
2516 /* Check for the cases that the operand constraints are not
144d51f9
NS
2517 supposed to allow to happen. */
2518 gcc_assert (optype0 == REGOP || optype1 == REGOP);
188538df 2519
5401050b
JDA
2520 /* Handle copies between general and floating registers. */
2521
2522 if (optype0 == REGOP && optype1 == REGOP
2523 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2524 {
2525 if (FP_REG_P (operands[0]))
2526 {
2527 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2528 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2529 return "{fldds|fldd} -16(%%sp),%0";
2530 }
2531 else
2532 {
2533 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2534 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2535 return "{ldws|ldw} -12(%%sp),%R0";
2536 }
2537 }
2538
188538df
TG
2539 /* Handle auto decrementing and incrementing loads and stores
2540 specifically, since the structure of the function doesn't work
2541 for them without major modification. Do it better when we learn
2542 this port about the general inc/dec addressing of PA.
2543 (This was written by tege. Chide him if it doesn't work.) */
2544
2545 if (optype0 == MEMOP)
2546 {
e37ce5f6
JL
2547 /* We have to output the address syntax ourselves, since print_operand
2548 doesn't deal with the addresses we want to use. Fix this later. */
2549
188538df 2550 rtx addr = XEXP (operands[0], 0);
e37ce5f6 2551 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
188538df 2552 {
ad2c71b7 2553 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2554
2555 operands[0] = XEXP (addr, 0);
144d51f9
NS
2556 gcc_assert (GET_CODE (operands[1]) == REG
2557 && GET_CODE (operands[0]) == REG);
e37ce5f6 2558
144d51f9
NS
2559 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2560
2561 /* No overlap between high target register and address
2562 register. (We do this in a non-obvious way to
2563 save a register file writeback) */
2564 if (GET_CODE (addr) == POST_INC)
2565 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2566 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
9682683d 2567 }
e37ce5f6 2568 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
9682683d 2569 {
ad2c71b7 2570 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2571
2572 operands[0] = XEXP (addr, 0);
144d51f9
NS
2573 gcc_assert (GET_CODE (operands[1]) == REG
2574 && GET_CODE (operands[0]) == REG);
2575
2576 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2577 /* No overlap between high target register and address
2578 register. (We do this in a non-obvious way to save a
2579 register file writeback) */
2580 if (GET_CODE (addr) == PRE_INC)
2581 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2582 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
188538df
TG
2583 }
2584 }
2585 if (optype1 == MEMOP)
2586 {
2587 /* We have to output the address syntax ourselves, since print_operand
2588 doesn't deal with the addresses we want to use. Fix this later. */
2589
2590 rtx addr = XEXP (operands[1], 0);
2591 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2592 {
ad2c71b7 2593 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2594
2595 operands[1] = XEXP (addr, 0);
144d51f9
NS
2596 gcc_assert (GET_CODE (operands[0]) == REG
2597 && GET_CODE (operands[1]) == REG);
188538df
TG
2598
2599 if (!reg_overlap_mentioned_p (high_reg, addr))
2600 {
2601 /* No overlap between high target register and address
dd605bb4 2602 register. (We do this in a non-obvious way to
188538df
TG
2603 save a register file writeback) */
2604 if (GET_CODE (addr) == POST_INC)
f38b27c7 2605 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
6126a380 2606 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
188538df
TG
2607 }
2608 else
2609 {
2610 /* This is an undefined situation. We should load into the
2611 address register *and* update that register. Probably
2612 we don't need to handle this at all. */
2613 if (GET_CODE (addr) == POST_INC)
f38b27c7
JL
2614 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2615 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
188538df
TG
2616 }
2617 }
2618 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2619 {
ad2c71b7 2620 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2621
2622 operands[1] = XEXP (addr, 0);
144d51f9
NS
2623 gcc_assert (GET_CODE (operands[0]) == REG
2624 && GET_CODE (operands[1]) == REG);
188538df
TG
2625
2626 if (!reg_overlap_mentioned_p (high_reg, addr))
2627 {
2628 /* No overlap between high target register and address
dd605bb4 2629 register. (We do this in a non-obvious way to
188538df
TG
2630 save a register file writeback) */
2631 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2632 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2633 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
188538df
TG
2634 }
2635 else
2636 {
2637 /* This is an undefined situation. We should load into the
2638 address register *and* update that register. Probably
2639 we don't need to handle this at all. */
2640 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2641 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2642 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
188538df
TG
2643 }
2644 }
a89974a2
JL
2645 else if (GET_CODE (addr) == PLUS
2646 && GET_CODE (XEXP (addr, 0)) == MULT)
2647 {
4c6d8726 2648 rtx xoperands[4];
a89974a2 2649
166d826f
JDA
2650 /* Load address into left half of destination register. */
2651 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2652 xoperands[1] = XEXP (addr, 1);
2653 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2654 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2655 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2656 xoperands);
2657 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2658 }
2659 else if (GET_CODE (addr) == PLUS
2660 && REG_P (XEXP (addr, 0))
2661 && REG_P (XEXP (addr, 1)))
2662 {
2663 rtx xoperands[3];
2664
2665 /* Load address into left half of destination register. */
2666 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2667 xoperands[1] = XEXP (addr, 0);
2668 xoperands[2] = XEXP (addr, 1);
2669 output_asm_insn ("{addl|add,l} %1,%2,%0",
2670 xoperands);
2671 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
a89974a2 2672 }
188538df
TG
2673 }
2674
2675 /* If an operand is an unoffsettable memory ref, find a register
2676 we can increment temporarily to make it refer to the second word. */
2677
2678 if (optype0 == MEMOP)
2679 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2680
2681 if (optype1 == MEMOP)
2682 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2683
2684 /* Ok, we can do one word at a time.
2685 Normally we do the low-numbered word first.
2686
2687 In either case, set up in LATEHALF the operands to use
2688 for the high-numbered word and in some cases alter the
2689 operands in OPERANDS to be suitable for the low-numbered word. */
2690
2691 if (optype0 == REGOP)
ad2c71b7 2692 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
188538df 2693 else if (optype0 == OFFSOP)
325fefe0 2694 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
188538df
TG
2695 else
2696 latehalf[0] = operands[0];
2697
2698 if (optype1 == REGOP)
ad2c71b7 2699 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
188538df 2700 else if (optype1 == OFFSOP)
325fefe0 2701 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
188538df 2702 else if (optype1 == CNSTOP)
2b564b9c
JDA
2703 {
2704 if (GET_CODE (operands[1]) == HIGH)
2705 {
2706 operands[1] = XEXP (operands[1], 0);
2707 highonly = 1;
2708 }
2709 split_double (operands[1], &operands[1], &latehalf[1]);
2710 }
188538df
TG
2711 else
2712 latehalf[1] = operands[1];
2713
2714 /* If the first move would clobber the source of the second one,
2715 do them in the other order.
2716
bad883f8 2717 This can happen in two cases:
188538df 2718
bad883f8
JL
2719 mem -> register where the first half of the destination register
2720 is the same register used in the memory's address. Reload
2721 can create such insns.
188538df 2722
bad883f8 2723 mem in this case will be either register indirect or register
6619e96c 2724 indirect plus a valid offset.
bad883f8
JL
2725
2726 register -> register move where REGNO(dst) == REGNO(src + 1)
6619e96c 2727 someone (Tim/Tege?) claimed this can happen for parameter loads.
bad883f8
JL
2728
2729 Handle mem -> register case first. */
2730 if (optype0 == REGOP
2731 && (optype1 == MEMOP || optype1 == OFFSOP)
c9bd6bcd 2732 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
188538df 2733 {
188538df
TG
2734 /* Do the late half first. */
2735 if (addreg1)
498ee10c 2736 output_asm_insn ("ldo 4(%0),%0", &addreg1);
ae9d61ab 2737 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
bad883f8
JL
2738
2739 /* Then clobber. */
188538df 2740 if (addreg1)
498ee10c 2741 output_asm_insn ("ldo -4(%0),%0", &addreg1);
ae9d61ab 2742 return pa_singlemove_string (operands);
188538df
TG
2743 }
2744
bad883f8 2745 /* Now handle register -> register case. */
63a1f834
TG
2746 if (optype0 == REGOP && optype1 == REGOP
2747 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2748 {
ae9d61ab
JDA
2749 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2750 return pa_singlemove_string (operands);
63a1f834
TG
2751 }
2752
188538df
TG
2753 /* Normal case: do the two words, low-numbered first. */
2754
ae9d61ab 2755 output_asm_insn (pa_singlemove_string (operands), operands);
188538df
TG
2756
2757 /* Make any unoffsettable addresses point at high-numbered word. */
2758 if (addreg0)
498ee10c 2759 output_asm_insn ("ldo 4(%0),%0", &addreg0);
188538df 2760 if (addreg1)
498ee10c 2761 output_asm_insn ("ldo 4(%0),%0", &addreg1);
188538df 2762
2b564b9c
JDA
2763 /* Do high-numbered word. */
2764 if (highonly)
2765 output_asm_insn ("ldil L'%1,%0", latehalf);
2766 else
2767 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
188538df
TG
2768
2769 /* Undo the adds we just did. */
2770 if (addreg0)
498ee10c 2771 output_asm_insn ("ldo -4(%0),%0", &addreg0);
188538df 2772 if (addreg1)
498ee10c 2773 output_asm_insn ("ldo -4(%0),%0", &addreg1);
188538df
TG
2774
2775 return "";
2776}
2777\f
519104fe 2778const char *
ae9d61ab 2779pa_output_fp_move_double (rtx *operands)
188538df
TG
2780{
2781 if (FP_REG_P (operands[0]))
2782 {
23f6f34f 2783 if (FP_REG_P (operands[1])
f048ca47 2784 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
55abf18a 2785 output_asm_insn ("fcpy,dbl %f1,%0", operands);
23f6f34f 2786 else
2414e0e2 2787 output_asm_insn ("fldd%F1 %1,%0", operands);
188538df
TG
2788 }
2789 else if (FP_REG_P (operands[1]))
2790 {
2414e0e2 2791 output_asm_insn ("fstd%F0 %1,%0", operands);
188538df 2792 }
144d51f9 2793 else
f048ca47 2794 {
144d51f9
NS
2795 rtx xoperands[2];
2796
2797 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2798
23f6f34f 2799 /* This is a pain. You have to be prepared to deal with an
ddd5a7c1 2800 arbitrary address here including pre/post increment/decrement.
f048ca47
JL
2801
2802 so avoid this in the MD. */
144d51f9
NS
2803 gcc_assert (GET_CODE (operands[0]) == REG);
2804
2805 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2806 xoperands[0] = operands[0];
2807 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
f048ca47 2808 }
188538df
TG
2809 return "";
2810}
2811\f
2812/* Return a REG that occurs in ADDR with coefficient 1.
2813 ADDR can be effectively incremented by incrementing REG. */
2814
2815static rtx
b7849684 2816find_addr_reg (rtx addr)
188538df
TG
2817{
2818 while (GET_CODE (addr) == PLUS)
2819 {
2820 if (GET_CODE (XEXP (addr, 0)) == REG)
2821 addr = XEXP (addr, 0);
2822 else if (GET_CODE (XEXP (addr, 1)) == REG)
2823 addr = XEXP (addr, 1);
2824 else if (CONSTANT_P (XEXP (addr, 0)))
2825 addr = XEXP (addr, 1);
2826 else if (CONSTANT_P (XEXP (addr, 1)))
2827 addr = XEXP (addr, 0);
2828 else
144d51f9 2829 gcc_unreachable ();
188538df 2830 }
144d51f9
NS
2831 gcc_assert (GET_CODE (addr) == REG);
2832 return addr;
188538df
TG
2833}
2834
188538df
TG
2835/* Emit code to perform a block move.
2836
188538df
TG
2837 OPERANDS[0] is the destination pointer as a REG, clobbered.
2838 OPERANDS[1] is the source pointer as a REG, clobbered.
68944452 2839 OPERANDS[2] is a register for temporary storage.
188538df 2840 OPERANDS[3] is a register for temporary storage.
cdc9103c 2841 OPERANDS[4] is the size as a CONST_INT
6619e96c 2842 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
71cc389b 2843 OPERANDS[6] is another temporary register. */
188538df 2844
519104fe 2845const char *
ae9d61ab 2846pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
188538df
TG
2847{
2848 int align = INTVAL (operands[5]);
68944452 2849 unsigned long n_bytes = INTVAL (operands[4]);
188538df 2850
cdc9103c 2851 /* We can't move more than a word at a time because the PA
188538df 2852 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2853 if (align > (TARGET_64BIT ? 8 : 4))
2854 align = (TARGET_64BIT ? 8 : 4);
188538df 2855
68944452
JL
2856 /* Note that we know each loop below will execute at least twice
2857 (else we would have open-coded the copy). */
2858 switch (align)
188538df 2859 {
cdc9103c
JDA
2860 case 8:
2861 /* Pre-adjust the loop counter. */
2862 operands[4] = GEN_INT (n_bytes - 16);
2863 output_asm_insn ("ldi %4,%2", operands);
2864
2865 /* Copying loop. */
2866 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2867 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2868 output_asm_insn ("std,ma %3,8(%0)", operands);
2869 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2870 output_asm_insn ("std,ma %6,8(%0)", operands);
2871
2872 /* Handle the residual. There could be up to 7 bytes of
2873 residual to copy! */
2874 if (n_bytes % 16 != 0)
2875 {
2876 operands[4] = GEN_INT (n_bytes % 8);
2877 if (n_bytes % 16 >= 8)
2878 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2879 if (n_bytes % 8 != 0)
2880 output_asm_insn ("ldd 0(%1),%6", operands);
2881 if (n_bytes % 16 >= 8)
2882 output_asm_insn ("std,ma %3,8(%0)", operands);
2883 if (n_bytes % 8 != 0)
2884 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2885 }
2886 return "";
2887
68944452
JL
2888 case 4:
2889 /* Pre-adjust the loop counter. */
2890 operands[4] = GEN_INT (n_bytes - 8);
2891 output_asm_insn ("ldi %4,%2", operands);
2892
2893 /* Copying loop. */
f38b27c7
JL
2894 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2895 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2896 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2897 output_asm_insn ("addib,>= -8,%2,.-12", operands);
f38b27c7 2898 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
68944452
JL
2899
2900 /* Handle the residual. There could be up to 7 bytes of
2901 residual to copy! */
2902 if (n_bytes % 8 != 0)
2903 {
2904 operands[4] = GEN_INT (n_bytes % 4);
2905 if (n_bytes % 8 >= 4)
f38b27c7 2906 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
68944452 2907 if (n_bytes % 4 != 0)
d2d28085 2908 output_asm_insn ("ldw 0(%1),%6", operands);
68944452 2909 if (n_bytes % 8 >= 4)
f38b27c7 2910 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2911 if (n_bytes % 4 != 0)
f38b27c7 2912 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
68944452
JL
2913 }
2914 return "";
188538df 2915
68944452
JL
2916 case 2:
2917 /* Pre-adjust the loop counter. */
2918 operands[4] = GEN_INT (n_bytes - 4);
2919 output_asm_insn ("ldi %4,%2", operands);
188538df 2920
68944452 2921 /* Copying loop. */
f38b27c7
JL
2922 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2923 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2924 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2925 output_asm_insn ("addib,>= -4,%2,.-12", operands);
f38b27c7 2926 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
188538df 2927
68944452
JL
2928 /* Handle the residual. */
2929 if (n_bytes % 4 != 0)
2930 {
2931 if (n_bytes % 4 >= 2)
f38b27c7 2932 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
68944452 2933 if (n_bytes % 2 != 0)
d2d28085 2934 output_asm_insn ("ldb 0(%1),%6", operands);
68944452 2935 if (n_bytes % 4 >= 2)
f38b27c7 2936 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2937 if (n_bytes % 2 != 0)
d2d28085 2938 output_asm_insn ("stb %6,0(%0)", operands);
68944452
JL
2939 }
2940 return "";
188538df 2941
68944452
JL
2942 case 1:
2943 /* Pre-adjust the loop counter. */
2944 operands[4] = GEN_INT (n_bytes - 2);
2945 output_asm_insn ("ldi %4,%2", operands);
188538df 2946
68944452 2947 /* Copying loop. */
f38b27c7
JL
2948 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2949 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2950 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
68944452 2951 output_asm_insn ("addib,>= -2,%2,.-12", operands);
f38b27c7 2952 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
188538df 2953
68944452
JL
2954 /* Handle the residual. */
2955 if (n_bytes % 2 != 0)
2956 {
d2d28085
JL
2957 output_asm_insn ("ldb 0(%1),%3", operands);
2958 output_asm_insn ("stb %3,0(%0)", operands);
68944452
JL
2959 }
2960 return "";
188538df 2961
68944452 2962 default:
144d51f9 2963 gcc_unreachable ();
188538df 2964 }
188538df 2965}
3673e996
RS
2966
2967/* Count the number of insns necessary to handle this block move.
2968
2969 Basic structure is the same as emit_block_move, except that we
2970 count insns rather than emit them. */
2971
519104fe 2972static int
e0d80a58 2973compute_movmem_length (rtx_insn *insn)
3673e996
RS
2974{
2975 rtx pat = PATTERN (insn);
a36a47ad
GS
2976 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2977 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
68944452 2978 unsigned int n_insns = 0;
3673e996
RS
2979
2980 /* We can't move more than four bytes at a time because the PA
2981 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2982 if (align > (TARGET_64BIT ? 8 : 4))
2983 align = (TARGET_64BIT ? 8 : 4);
3673e996 2984
90304f64 2985 /* The basic copying loop. */
68944452 2986 n_insns = 6;
3673e996 2987
68944452
JL
2988 /* Residuals. */
2989 if (n_bytes % (2 * align) != 0)
3673e996 2990 {
90304f64
JL
2991 if ((n_bytes % (2 * align)) >= align)
2992 n_insns += 2;
2993
2994 if ((n_bytes % align) != 0)
2995 n_insns += 2;
3673e996 2996 }
68944452
JL
2997
2998 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2999 return n_insns * 4;
3673e996 3000}
cdc9103c
JDA
3001
3002/* Emit code to perform a block clear.
3003
3004 OPERANDS[0] is the destination pointer as a REG, clobbered.
3005 OPERANDS[1] is a register for temporary storage.
3006 OPERANDS[2] is the size as a CONST_INT
3007 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3008
3009const char *
ae9d61ab 3010pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
cdc9103c
JDA
3011{
3012 int align = INTVAL (operands[3]);
3013 unsigned long n_bytes = INTVAL (operands[2]);
3014
3015 /* We can't clear more than a word at a time because the PA
3016 has no longer integer move insns. */
3017 if (align > (TARGET_64BIT ? 8 : 4))
3018 align = (TARGET_64BIT ? 8 : 4);
3019
3020 /* Note that we know each loop below will execute at least twice
3021 (else we would have open-coded the copy). */
3022 switch (align)
3023 {
3024 case 8:
3025 /* Pre-adjust the loop counter. */
3026 operands[2] = GEN_INT (n_bytes - 16);
3027 output_asm_insn ("ldi %2,%1", operands);
3028
3029 /* Loop. */
3030 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3031 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3032 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3033
3034 /* Handle the residual. There could be up to 7 bytes of
3035 residual to copy! */
3036 if (n_bytes % 16 != 0)
3037 {
3038 operands[2] = GEN_INT (n_bytes % 8);
3039 if (n_bytes % 16 >= 8)
3040 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3041 if (n_bytes % 8 != 0)
3042 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3043 }
3044 return "";
3045
3046 case 4:
3047 /* Pre-adjust the loop counter. */
3048 operands[2] = GEN_INT (n_bytes - 8);
3049 output_asm_insn ("ldi %2,%1", operands);
3050
3051 /* Loop. */
3052 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3053 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3054 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3055
3056 /* Handle the residual. There could be up to 7 bytes of
3057 residual to copy! */
3058 if (n_bytes % 8 != 0)
3059 {
3060 operands[2] = GEN_INT (n_bytes % 4);
3061 if (n_bytes % 8 >= 4)
3062 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3063 if (n_bytes % 4 != 0)
3064 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3065 }
3066 return "";
3067
3068 case 2:
3069 /* Pre-adjust the loop counter. */
3070 operands[2] = GEN_INT (n_bytes - 4);
3071 output_asm_insn ("ldi %2,%1", operands);
3072
3073 /* Loop. */
3074 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3075 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3076 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3077
3078 /* Handle the residual. */
3079 if (n_bytes % 4 != 0)
3080 {
3081 if (n_bytes % 4 >= 2)
3082 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3083 if (n_bytes % 2 != 0)
3084 output_asm_insn ("stb %%r0,0(%0)", operands);
3085 }
3086 return "";
3087
3088 case 1:
3089 /* Pre-adjust the loop counter. */
3090 operands[2] = GEN_INT (n_bytes - 2);
3091 output_asm_insn ("ldi %2,%1", operands);
3092
3093 /* Loop. */
3094 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3095 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3096 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3097
3098 /* Handle the residual. */
3099 if (n_bytes % 2 != 0)
3100 output_asm_insn ("stb %%r0,0(%0)", operands);
3101
3102 return "";
3103
3104 default:
144d51f9 3105 gcc_unreachable ();
cdc9103c
JDA
3106 }
3107}
3108
3109/* Count the number of insns necessary to handle this block move.
3110
3111 Basic structure is the same as emit_block_move, except that we
3112 count insns rather than emit them. */
3113
3114static int
e0d80a58 3115compute_clrmem_length (rtx_insn *insn)
cdc9103c
JDA
3116{
3117 rtx pat = PATTERN (insn);
3118 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3119 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3120 unsigned int n_insns = 0;
3121
3122 /* We can't clear more than a word at a time because the PA
3123 has no longer integer move insns. */
3124 if (align > (TARGET_64BIT ? 8 : 4))
3125 align = (TARGET_64BIT ? 8 : 4);
3126
3127 /* The basic loop. */
3128 n_insns = 4;
3129
3130 /* Residuals. */
3131 if (n_bytes % (2 * align) != 0)
3132 {
3133 if ((n_bytes % (2 * align)) >= align)
3134 n_insns++;
3135
3136 if ((n_bytes % align) != 0)
3137 n_insns++;
3138 }
3139
3140 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3141 return n_insns * 4;
3142}
188538df
TG
3143\f
3144
519104fe 3145const char *
ae9d61ab 3146pa_output_and (rtx *operands)
0e7f4c19 3147{
d2a94ec0 3148 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
0e7f4c19 3149 {
0c235d7e 3150 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
0e7f4c19
TG
3151 int ls0, ls1, ms0, p, len;
3152
3153 for (ls0 = 0; ls0 < 32; ls0++)
3154 if ((mask & (1 << ls0)) == 0)
3155 break;
3156
3157 for (ls1 = ls0; ls1 < 32; ls1++)
3158 if ((mask & (1 << ls1)) != 0)
3159 break;
3160
3161 for (ms0 = ls1; ms0 < 32; ms0++)
3162 if ((mask & (1 << ms0)) == 0)
3163 break;
3164
144d51f9 3165 gcc_assert (ms0 == 32);
0e7f4c19
TG
3166
3167 if (ls1 == 32)
3168 {
3169 len = ls0;
3170
144d51f9 3171 gcc_assert (len);
0e7f4c19 3172
8919037c 3173 operands[2] = GEN_INT (len);
f38b27c7 3174 return "{extru|extrw,u} %1,31,%2,%0";
0e7f4c19
TG
3175 }
3176 else
3177 {
3178 /* We could use this `depi' for the case above as well, but `depi'
3179 requires one more register file access than an `extru'. */
3180
3181 p = 31 - ls0;
3182 len = ls1 - ls0;
3183
8919037c
TG
3184 operands[2] = GEN_INT (p);
3185 operands[3] = GEN_INT (len);
f38b27c7 3186 return "{depi|depwi} 0,%2,%3,%0";
0e7f4c19
TG
3187 }
3188 }
3189 else
3190 return "and %1,%2,%0";
3191}
3192
520babc7
JL
3193/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3194 storing the result in operands[0]. */
0952f89b 3195const char *
ae9d61ab 3196pa_output_64bit_and (rtx *operands)
520babc7
JL
3197{
3198 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3199 {
3200 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3201 int ls0, ls1, ms0, p, len;
520babc7
JL
3202
3203 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
e0c556d3 3204 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
520babc7
JL
3205 break;
3206
3207 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
e0c556d3 3208 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
520babc7
JL
3209 break;
3210
3211 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
e0c556d3 3212 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
520babc7
JL
3213 break;
3214
144d51f9 3215 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
520babc7
JL
3216
3217 if (ls1 == HOST_BITS_PER_WIDE_INT)
3218 {
3219 len = ls0;
3220
144d51f9 3221 gcc_assert (len);
520babc7
JL
3222
3223 operands[2] = GEN_INT (len);
3224 return "extrd,u %1,63,%2,%0";
3225 }
3226 else
3227 {
3228 /* We could use this `depi' for the case above as well, but `depi'
3229 requires one more register file access than an `extru'. */
3230
3231 p = 63 - ls0;
3232 len = ls1 - ls0;
3233
3234 operands[2] = GEN_INT (p);
3235 operands[3] = GEN_INT (len);
3236 return "depdi 0,%2,%3,%0";
3237 }
3238 }
3239 else
3240 return "and %1,%2,%0";
3241}
3242
519104fe 3243const char *
ae9d61ab 3244pa_output_ior (rtx *operands)
0e7f4c19 3245{
0c235d7e 3246 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
f1c7ce82 3247 int bs0, bs1, p, len;
23f6f34f 3248
8365d59b
TG
3249 if (INTVAL (operands[2]) == 0)
3250 return "copy %1,%0";
0e7f4c19 3251
8365d59b
TG
3252 for (bs0 = 0; bs0 < 32; bs0++)
3253 if ((mask & (1 << bs0)) != 0)
3254 break;
0e7f4c19 3255
8365d59b
TG
3256 for (bs1 = bs0; bs1 < 32; bs1++)
3257 if ((mask & (1 << bs1)) == 0)
3258 break;
0e7f4c19 3259
144d51f9 3260 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
0e7f4c19 3261
8365d59b
TG
3262 p = 31 - bs0;
3263 len = bs1 - bs0;
0e7f4c19 3264
8919037c
TG
3265 operands[2] = GEN_INT (p);
3266 operands[3] = GEN_INT (len);
f38b27c7 3267 return "{depi|depwi} -1,%2,%3,%0";
0e7f4c19 3268}
520babc7
JL
3269
3270/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3271 storing the result in operands[0]. */
0952f89b 3272const char *
ae9d61ab 3273pa_output_64bit_ior (rtx *operands)
520babc7
JL
3274{
3275 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3276 int bs0, bs1, p, len;
520babc7
JL
3277
3278 if (INTVAL (operands[2]) == 0)
3279 return "copy %1,%0";
3280
3281 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
e0c556d3 3282 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
520babc7
JL
3283 break;
3284
3285 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
e0c556d3 3286 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
520babc7
JL
3287 break;
3288
144d51f9
NS
3289 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3290 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
520babc7
JL
3291
3292 p = 63 - bs0;
3293 len = bs1 - bs0;
3294
3295 operands[2] = GEN_INT (p);
3296 operands[3] = GEN_INT (len);
3297 return "depdi -1,%2,%3,%0";
3298}
0e7f4c19 3299\f
301d03af 3300/* Target hook for assembling integer objects. This code handles
cdcb88d7
JDA
3301 aligned SI and DI integers specially since function references
3302 must be preceded by P%. */
301d03af
RS
3303
3304static bool
b7849684 3305pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3306{
686048e4
JDA
3307 bool result;
3308 tree decl = NULL;
3309
3310 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3311 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3312 calling output_addr_const. Otherwise, it may call assemble_external
3313 in the midst of outputing the assembler code for the SYMBOL_REF.
3314 We restore the SYMBOL_REF_DECL after the output is done. */
3315 if (GET_CODE (x) == SYMBOL_REF)
3316 {
3317 decl = SYMBOL_REF_DECL (x);
3318 if (decl)
3319 {
3320 assemble_external (decl);
3321 SET_SYMBOL_REF_DECL (x, NULL);
3322 }
3323 }
3324
cdcb88d7
JDA
3325 if (size == UNITS_PER_WORD
3326 && aligned_p
301d03af
RS
3327 && function_label_operand (x, VOIDmode))
3328 {
ef719901
JDA
3329 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3330
3331 /* We don't want an OPD when generating fast indirect calls. */
3332 if (!TARGET_FAST_INDIRECT_CALLS)
3333 fputs ("P%", asm_out_file);
3334
301d03af
RS
3335 output_addr_const (asm_out_file, x);
3336 fputc ('\n', asm_out_file);
686048e4 3337 result = true;
301d03af 3338 }
686048e4
JDA
3339 else
3340 result = default_assemble_integer (x, size, aligned_p);
3341
3342 if (decl)
3343 SET_SYMBOL_REF_DECL (x, decl);
3344
3345 return result;
301d03af
RS
3346}
3347\f
188538df 3348/* Output an ascii string. */
f1c7ce82 3349void
ae9d61ab 3350pa_output_ascii (FILE *file, const char *p, int size)
188538df
TG
3351{
3352 int i;
3353 int chars_output;
71cc389b 3354 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
188538df
TG
3355
3356 /* The HP assembler can only take strings of 256 characters at one
3357 time. This is a limitation on input line length, *not* the
3358 length of the string. Sigh. Even worse, it seems that the
3359 restriction is in number of input characters (see \xnn &
3360 \whatever). So we have to do this very carefully. */
3361
e236a9ff 3362 fputs ("\t.STRING \"", file);
188538df
TG
3363
3364 chars_output = 0;
3365 for (i = 0; i < size; i += 4)
3366 {
3367 int co = 0;
3368 int io = 0;
3369 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3370 {
6b5ffd4e 3371 register unsigned int c = (unsigned char) p[i + io];
188538df
TG
3372
3373 if (c == '\"' || c == '\\')
3374 partial_output[co++] = '\\';
3375 if (c >= ' ' && c < 0177)
3376 partial_output[co++] = c;
3377 else
3378 {
3379 unsigned int hexd;
3380 partial_output[co++] = '\\';
3381 partial_output[co++] = 'x';
3382 hexd = c / 16 - 0 + '0';
3383 if (hexd > '9')
3384 hexd -= '9' - 'a' + 1;
3385 partial_output[co++] = hexd;
3386 hexd = c % 16 - 0 + '0';
3387 if (hexd > '9')
3388 hexd -= '9' - 'a' + 1;
3389 partial_output[co++] = hexd;
3390 }
3391 }
3392 if (chars_output + co > 243)
3393 {
e236a9ff 3394 fputs ("\"\n\t.STRING \"", file);
188538df
TG
3395 chars_output = 0;
3396 }
823fbbce 3397 fwrite (partial_output, 1, (size_t) co, file);
188538df
TG
3398 chars_output += co;
3399 co = 0;
3400 }
e236a9ff 3401 fputs ("\"\n", file);
188538df 3402}
5621d717
JL
3403
3404/* Try to rewrite floating point comparisons & branches to avoid
3405 useless add,tr insns.
3406
3407 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3408 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3409 first attempt to remove useless add,tr insns. It is zero
3410 for the second pass as reorg sometimes leaves bogus REG_DEAD
3411 notes lying around.
3412
3413 When CHECK_NOTES is zero we can only eliminate add,tr insns
3414 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3415 instructions. */
519104fe 3416static void
b7849684 3417remove_useless_addtr_insns (int check_notes)
5621d717 3418{
4ce524a1 3419 rtx_insn *insn;
5621d717
JL
3420 static int pass = 0;
3421
3422 /* This is fairly cheap, so always run it when optimizing. */
3423 if (optimize > 0)
3424 {
3425 int fcmp_count = 0;
3426 int fbranch_count = 0;
3427
3428 /* Walk all the insns in this function looking for fcmp & fbranch
3429 instructions. Keep track of how many of each we find. */
18dbd950 3430 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717
JL
3431 {
3432 rtx tmp;
3433
3434 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
b64925dc 3435 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
5621d717
JL
3436 continue;
3437
3438 tmp = PATTERN (insn);
3439
3440 /* It must be a set. */
3441 if (GET_CODE (tmp) != SET)
3442 continue;
3443
3444 /* If the destination is CCFP, then we've found an fcmp insn. */
3445 tmp = SET_DEST (tmp);
3446 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3447 {
3448 fcmp_count++;
3449 continue;
3450 }
6619e96c 3451
5621d717
JL
3452 tmp = PATTERN (insn);
3453 /* If this is an fbranch instruction, bump the fbranch counter. */
3454 if (GET_CODE (tmp) == SET
3455 && SET_DEST (tmp) == pc_rtx
3456 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3457 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3458 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3459 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3460 {
3461 fbranch_count++;
3462 continue;
3463 }
3464 }
3465
3466
3467 /* Find all floating point compare + branch insns. If possible,
3468 reverse the comparison & the branch to avoid add,tr insns. */
18dbd950 3469 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717 3470 {
4ce524a1
DM
3471 rtx tmp;
3472 rtx_insn *next;
5621d717
JL
3473
3474 /* Ignore anything that isn't an INSN. */
b64925dc 3475 if (! NONJUMP_INSN_P (insn))
5621d717
JL
3476 continue;
3477
3478 tmp = PATTERN (insn);
3479
3480 /* It must be a set. */
3481 if (GET_CODE (tmp) != SET)
3482 continue;
3483
3484 /* The destination must be CCFP, which is register zero. */
3485 tmp = SET_DEST (tmp);
3486 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3487 continue;
3488
3489 /* INSN should be a set of CCFP.
3490
3491 See if the result of this insn is used in a reversed FP
3492 conditional branch. If so, reverse our condition and
3493 the branch. Doing so avoids useless add,tr insns. */
3494 next = next_insn (insn);
3495 while (next)
3496 {
3497 /* Jumps, calls and labels stop our search. */
b64925dc 3498 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
5621d717
JL
3499 break;
3500
3501 /* As does another fcmp insn. */
b64925dc 3502 if (NONJUMP_INSN_P (next)
5621d717
JL
3503 && GET_CODE (PATTERN (next)) == SET
3504 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3505 && REGNO (SET_DEST (PATTERN (next))) == 0)
3506 break;
3507
3508 next = next_insn (next);
3509 }
3510
3511 /* Is NEXT_INSN a branch? */
b64925dc 3512 if (next && JUMP_P (next))
5621d717
JL
3513 {
3514 rtx pattern = PATTERN (next);
3515
112cdef5 3516 /* If it a reversed fp conditional branch (e.g. uses add,tr)
5621d717
JL
3517 and CCFP dies, then reverse our conditional and the branch
3518 to avoid the add,tr. */
3519 if (GET_CODE (pattern) == SET
3520 && SET_DEST (pattern) == pc_rtx
3521 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3522 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3523 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3524 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3525 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3526 && (fcmp_count == fbranch_count
3527 || (check_notes
3528 && find_regno_note (next, REG_DEAD, 0))))
3529 {
3530 /* Reverse the branch. */
3531 tmp = XEXP (SET_SRC (pattern), 1);
3532 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3533 XEXP (SET_SRC (pattern), 2) = tmp;
3534 INSN_CODE (next) = -1;
3535
3536 /* Reverse our condition. */
3537 tmp = PATTERN (insn);
3538 PUT_CODE (XEXP (tmp, 1),
831c1763
AM
3539 (reverse_condition_maybe_unordered
3540 (GET_CODE (XEXP (tmp, 1)))));
5621d717
JL
3541 }
3542 }
3543 }
3544 }
3545
3546 pass = !pass;
3547
3548}
188538df 3549\f
831c1763
AM
3550/* You may have trouble believing this, but this is the 32 bit HP-PA
3551 stack layout. Wow.
188538df
TG
3552
3553 Offset Contents
3554
3555 Variable arguments (optional; any number may be allocated)
3556
3557 SP-(4*(N+9)) arg word N
3558 : :
3559 SP-56 arg word 5
3560 SP-52 arg word 4
3561
3562 Fixed arguments (must be allocated; may remain unused)
3563
3564 SP-48 arg word 3
3565 SP-44 arg word 2
3566 SP-40 arg word 1
3567 SP-36 arg word 0
3568
3569 Frame Marker
3570
3571 SP-32 External Data Pointer (DP)
3572 SP-28 External sr4
3573 SP-24 External/stub RP (RP')
3574 SP-20 Current RP
3575 SP-16 Static Link
3576 SP-12 Clean up
3577 SP-8 Calling Stub RP (RP'')
3578 SP-4 Previous SP
3579
3580 Top of Frame
3581
3582 SP-0 Stack Pointer (points to next available address)
3583
3584*/
3585
3586/* This function saves registers as follows. Registers marked with ' are
3587 this function's registers (as opposed to the previous function's).
3588 If a frame_pointer isn't needed, r4 is saved as a general register;
3589 the space for the frame pointer is still allocated, though, to keep
3590 things simple.
3591
3592
3593 Top of Frame
3594
3595 SP (FP') Previous FP
3596 SP + 4 Alignment filler (sigh)
3597 SP + 8 Space for locals reserved here.
3598 .
3599 .
3600 .
3601 SP + n All call saved register used.
3602 .
3603 .
3604 .
3605 SP + o All call saved fp registers used.
3606 .
3607 .
3608 .
3609 SP + p (SP') points to next available address.
23f6f34f 3610
188538df
TG
3611*/
3612
08c148a8 3613/* Global variables set by output_function_prologue(). */
19ec6a36
AM
3614/* Size of frame. Need to know this to emit return insns from
3615 leaf procedures. */
a4295210
JDA
3616static HOST_WIDE_INT actual_fsize, local_fsize;
3617static int save_fregs;
19ec6a36 3618
aadcdb45 3619/* Emit RTL to store REG at the memory location specified by BASE+DISP.
fc82f2f1 3620 Handle case where DISP > 8k by using the add_high_const patterns.
aadcdb45
JL
3621
3622 Note in DISP > 8k case, we will leave the high part of the address
3623 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
c5c76735 3624
f6bcf44c 3625static void
a4295210 3626store_reg (int reg, HOST_WIDE_INT disp, int base)
188538df 3627{
e0d80a58
JL
3628 rtx dest, src, basereg;
3629 rtx_insn *insn;
19ec6a36
AM
3630
3631 src = gen_rtx_REG (word_mode, reg);
3632 basereg = gen_rtx_REG (Pmode, base);
188538df 3633 if (VAL_14_BITS_P (disp))
aadcdb45 3634 {
0a81f074 3635 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
f6bcf44c 3636 insn = emit_move_insn (dest, src);
aadcdb45 3637 }
a4295210
JDA
3638 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3639 {
3640 rtx delta = GEN_INT (disp);
3641 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3642
3643 emit_move_insn (tmpreg, delta);
5dcc9605 3644 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
a4295210
JDA
3645 if (DO_FRAME_NOTES)
3646 {
bbbbb16a 3647 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 3648 gen_rtx_SET (tmpreg,
bbbbb16a 3649 gen_rtx_PLUS (Pmode, basereg, delta)));
5dcc9605 3650 RTX_FRAME_RELATED_P (insn) = 1;
a4295210 3651 }
5dcc9605
JDA
3652 dest = gen_rtx_MEM (word_mode, tmpreg);
3653 insn = emit_move_insn (dest, src);
a4295210 3654 }
aadcdb45
JL
3655 else
3656 {
19ec6a36
AM
3657 rtx delta = GEN_INT (disp);
3658 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3659 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 3660
19ec6a36
AM
3661 emit_move_insn (tmpreg, high);
3662 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
f6bcf44c
JDA
3663 insn = emit_move_insn (dest, src);
3664 if (DO_FRAME_NOTES)
bbbbb16a 3665 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 3666 gen_rtx_SET (gen_rtx_MEM (word_mode,
bbbbb16a
ILT
3667 gen_rtx_PLUS (word_mode,
3668 basereg,
3669 delta)),
3670 src));
aadcdb45 3671 }
f6bcf44c
JDA
3672
3673 if (DO_FRAME_NOTES)
3674 RTX_FRAME_RELATED_P (insn) = 1;
aadcdb45
JL
3675}
3676
823fbbce
JDA
3677/* Emit RTL to store REG at the memory location specified by BASE and then
3678 add MOD to BASE. MOD must be <= 8k. */
aadcdb45 3679
823fbbce 3680static void
a4295210 3681store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
823fbbce 3682{
e0d80a58
JL
3683 rtx basereg, srcreg, delta;
3684 rtx_insn *insn;
823fbbce 3685
144d51f9 3686 gcc_assert (VAL_14_BITS_P (mod));
823fbbce
JDA
3687
3688 basereg = gen_rtx_REG (Pmode, base);
3689 srcreg = gen_rtx_REG (word_mode, reg);
3690 delta = GEN_INT (mod);
3691
3692 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3693 if (DO_FRAME_NOTES)
3694 {
3695 RTX_FRAME_RELATED_P (insn) = 1;
3696
3697 /* RTX_FRAME_RELATED_P must be set on each frame related set
77c4f044
RH
3698 in a parallel with more than one element. */
3699 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3700 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
823fbbce
JDA
3701 }
3702}
3703
3704/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3705 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3706 whether to add a frame note or not.
3707
3708 In the DISP > 8k case, we leave the high part of the address in %r1.
3709 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
c5c76735 3710
f6bcf44c 3711static void
a4295210 3712set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
188538df 3713{
e0d80a58 3714 rtx_insn *insn;
19ec6a36 3715
188538df 3716 if (VAL_14_BITS_P (disp))
19ec6a36 3717 {
f6bcf44c 3718 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
0a81f074
RS
3719 plus_constant (Pmode,
3720 gen_rtx_REG (Pmode, base), disp));
19ec6a36 3721 }
a4295210
JDA
3722 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3723 {
3724 rtx basereg = gen_rtx_REG (Pmode, base);
3725 rtx delta = GEN_INT (disp);
3726 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3727
3728 emit_move_insn (tmpreg, delta);
3729 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3730 gen_rtx_PLUS (Pmode, tmpreg, basereg));
5dcc9605 3731 if (DO_FRAME_NOTES)
bbbbb16a 3732 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 3733 gen_rtx_SET (tmpreg,
bbbbb16a 3734 gen_rtx_PLUS (Pmode, basereg, delta)));
a4295210 3735 }
188538df 3736 else
aadcdb45 3737 {
f6bcf44c 3738 rtx basereg = gen_rtx_REG (Pmode, base);
19ec6a36 3739 rtx delta = GEN_INT (disp);
a4295210 3740 rtx tmpreg = gen_rtx_REG (Pmode, 1);
f6bcf44c 3741
a4295210 3742 emit_move_insn (tmpreg,
f6bcf44c 3743 gen_rtx_PLUS (Pmode, basereg,
19ec6a36 3744 gen_rtx_HIGH (Pmode, delta)));
f6bcf44c 3745 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
a4295210 3746 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
aadcdb45 3747 }
f6bcf44c 3748
823fbbce 3749 if (DO_FRAME_NOTES && note)
f6bcf44c 3750 RTX_FRAME_RELATED_P (insn) = 1;
188538df
TG
3751}
3752
a4295210 3753HOST_WIDE_INT
ae9d61ab 3754pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
188538df 3755{
95f3f59e
JDA
3756 int freg_saved = 0;
3757 int i, j;
3758
ae9d61ab 3759 /* The code in pa_expand_prologue and pa_expand_epilogue must
95f3f59e
JDA
3760 be consistent with the rounding and size calculation done here.
3761 Change them at the same time. */
3762
3763 /* We do our own stack alignment. First, round the size of the
3764 stack locals up to a word boundary. */
3765 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3766
3767 /* Space for previous frame pointer + filler. If any frame is
3768 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3769 waste some space here for the sake of HP compatibility. The
3770 first slot is only used when the frame pointer is needed. */
3771 if (size || frame_pointer_needed)
3772 size += STARTING_FRAME_OFFSET;
3773
823fbbce
JDA
3774 /* If the current function calls __builtin_eh_return, then we need
3775 to allocate stack space for registers that will hold data for
3776 the exception handler. */
e3b5732b 3777 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3778 {
3779 unsigned int i;
3780
3781 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3782 continue;
95f3f59e 3783 size += i * UNITS_PER_WORD;
823fbbce
JDA
3784 }
3785
6261ede7 3786 /* Account for space used by the callee general register saves. */
95f3f59e 3787 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
6fb5fa3c 3788 if (df_regs_ever_live_p (i))
95f3f59e 3789 size += UNITS_PER_WORD;
80225b66 3790
6261ede7 3791 /* Account for space used by the callee floating point register saves. */
88624c0e 3792 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
3793 if (df_regs_ever_live_p (i)
3794 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
80225b66 3795 {
95f3f59e 3796 freg_saved = 1;
9e18f575 3797
6261ede7
JL
3798 /* We always save both halves of the FP register, so always
3799 increment the frame size by 8 bytes. */
95f3f59e 3800 size += 8;
80225b66
TG
3801 }
3802
95f3f59e
JDA
3803 /* If any of the floating registers are saved, account for the
3804 alignment needed for the floating point register save block. */
3805 if (freg_saved)
3806 {
3807 size = (size + 7) & ~7;
3808 if (fregs_live)
3809 *fregs_live = 1;
3810 }
3811
6261ede7 3812 /* The various ABIs include space for the outgoing parameters in the
95f3f59e
JDA
3813 size of the current function's stack frame. We don't need to align
3814 for the outgoing arguments as their alignment is set by the final
3815 rounding for the frame as a whole. */
38173d38 3816 size += crtl->outgoing_args_size;
6261ede7
JL
3817
3818 /* Allocate space for the fixed frame marker. This space must be
685d0e07 3819 allocated for any function that makes calls or allocates
6261ede7 3820 stack space. */
416ff32e 3821 if (!crtl->is_leaf || size)
685d0e07 3822 size += TARGET_64BIT ? 48 : 32;
520babc7 3823
95f3f59e 3824 /* Finally, round to the preferred stack boundary. */
5fad1c24
JDA
3825 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3826 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
188538df 3827}
23f6f34f 3828
08c148a8
NB
3829/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3830 of memory. If any fpu reg is used in the function, we allocate
3831 such a block here, at the bottom of the frame, just in case it's needed.
3832
3833 If this function is a leaf procedure, then we may choose not
3834 to do a "save" insn. The decision about whether or not
3835 to do this is made in regclass.c. */
3836
c590b625 3837static void
42776416 3838pa_output_function_prologue (FILE *file)
188538df 3839{
ba0bfdac
JL
3840 /* The function's label and associated .PROC must never be
3841 separated and must be output *after* any profiling declarations
3842 to avoid changing spaces/subspaces within a procedure. */
3843 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3844 fputs ("\t.PROC\n", file);
3845
ae9d61ab 3846 /* pa_expand_prologue does the dirty work now. We just need
aadcdb45
JL
3847 to output the assembler directives which denote the start
3848 of a function. */
a4295210 3849 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
416ff32e 3850 if (crtl->is_leaf)
e236a9ff 3851 fputs (",NO_CALLS", file);
16c16a24
JDA
3852 else
3853 fputs (",CALLS", file);
3854 if (rp_saved)
3855 fputs (",SAVE_RP", file);
da3c3336 3856
685d0e07
JDA
3857 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3858 at the beginning of the frame and that it is used as the frame
3859 pointer for the frame. We do this because our current frame
a4d05547 3860 layout doesn't conform to that specified in the HP runtime
685d0e07
JDA
3861 documentation and we need a way to indicate to programs such as
3862 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3863 isn't used by HP compilers but is supported by the assembler.
3864 However, SAVE_SP is supposed to indicate that the previous stack
3865 pointer has been saved in the frame marker. */
da3c3336 3866 if (frame_pointer_needed)
e236a9ff 3867 fputs (",SAVE_SP", file);
da3c3336 3868
68386e1e 3869 /* Pass on information about the number of callee register saves
e8cfae5c
JL
3870 performed in the prologue.
3871
3872 The compiler is supposed to pass the highest register number
23f6f34f 3873 saved, the assembler then has to adjust that number before
e8cfae5c 3874 entering it into the unwind descriptor (to account for any
23f6f34f 3875 caller saved registers with lower register numbers than the
e8cfae5c
JL
3876 first callee saved register). */
3877 if (gr_saved)
3878 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3879
3880 if (fr_saved)
3881 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
68386e1e 3882
e236a9ff 3883 fputs ("\n\t.ENTRY\n", file);
aadcdb45 3884
18dbd950 3885 remove_useless_addtr_insns (0);
aadcdb45
JL
3886}
3887
f1c7ce82 3888void
ae9d61ab 3889pa_expand_prologue (void)
aadcdb45 3890{
4971c587 3891 int merge_sp_adjust_with_store = 0;
a4295210
JDA
3892 HOST_WIDE_INT size = get_frame_size ();
3893 HOST_WIDE_INT offset;
3894 int i;
e0d80a58
JL
3895 rtx tmpreg;
3896 rtx_insn *insn;
aadcdb45 3897
68386e1e
JL
3898 gr_saved = 0;
3899 fr_saved = 0;
8a9c76f3 3900 save_fregs = 0;
6261ede7 3901
95f3f59e 3902 /* Compute total size for frame pointer, filler, locals and rounding to
ae9d61ab 3903 the next word boundary. Similar code appears in pa_compute_frame_size
95f3f59e
JDA
3904 and must be changed in tandem with this code. */
3905 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3906 if (local_fsize || frame_pointer_needed)
3907 local_fsize += STARTING_FRAME_OFFSET;
6261ede7 3908
ae9d61ab 3909 actual_fsize = pa_compute_frame_size (size, &save_fregs);
a11e0df4 3910 if (flag_stack_usage_info)
d3c12306 3911 current_function_static_stack_size = actual_fsize;
188538df 3912
aadcdb45 3913 /* Compute a few things we will use often. */
690d4228 3914 tmpreg = gen_rtx_REG (word_mode, 1);
188538df 3915
23f6f34f 3916 /* Save RP first. The calling conventions manual states RP will
19ec6a36 3917 always be stored into the caller's frame at sp - 20 or sp - 16
520babc7 3918 depending on which ABI is in use. */
e3b5732b 3919 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
16c16a24
JDA
3920 {
3921 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3922 rp_saved = true;
3923 }
3924 else
3925 rp_saved = false;
23f6f34f 3926
aadcdb45 3927 /* Allocate the local frame and set up the frame pointer if needed. */
31d68947
AM
3928 if (actual_fsize != 0)
3929 {
3930 if (frame_pointer_needed)
3931 {
3932 /* Copy the old frame pointer temporarily into %r1. Set up the
3933 new stack pointer, then store away the saved old frame pointer
823fbbce
JDA
3934 into the stack at sp and at the same time update the stack
3935 pointer by actual_fsize bytes. Two versions, first
31d68947
AM
3936 handles small (<8k) frames. The second handles large (>=8k)
3937 frames. */
bc707992 3938 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
823fbbce 3939 if (DO_FRAME_NOTES)
77c4f044 3940 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce 3941
bc707992 3942 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
823fbbce
JDA
3943 if (DO_FRAME_NOTES)
3944 RTX_FRAME_RELATED_P (insn) = 1;
3945
3946 if (VAL_14_BITS_P (actual_fsize))
3947 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
31d68947
AM
3948 else
3949 {
3950 /* It is incorrect to store the saved frame pointer at *sp,
3951 then increment sp (writes beyond the current stack boundary).
3952
3953 So instead use stwm to store at *sp and post-increment the
3954 stack pointer as an atomic operation. Then increment sp to
3955 finish allocating the new frame. */
a4295210
JDA
3956 HOST_WIDE_INT adjust1 = 8192 - 64;
3957 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
19ec6a36 3958
823fbbce 3959 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
f6bcf44c 3960 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3961 adjust2, 1);
31d68947 3962 }
823fbbce 3963
685d0e07
JDA
3964 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3965 we need to store the previous stack pointer (frame pointer)
3966 into the frame marker on targets that use the HP unwind
3967 library. This allows the HP unwind library to be used to
3968 unwind GCC frames. However, we are not fully compatible
3969 with the HP library because our frame layout differs from
3970 that specified in the HP runtime specification.
3971
3972 We don't want a frame note on this instruction as the frame
3973 marker moves during dynamic stack allocation.
3974
3975 This instruction also serves as a blockage to prevent
3976 register spills from being scheduled before the stack
3977 pointer is raised. This is necessary as we store
3978 registers using the frame pointer as a base register,
3979 and the frame pointer is set before sp is raised. */
3980 if (TARGET_HPUX_UNWIND_LIBRARY)
3981 {
3982 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3983 GEN_INT (TARGET_64BIT ? -8 : -4));
3984
3985 emit_move_insn (gen_rtx_MEM (word_mode, addr),
bc707992 3986 hard_frame_pointer_rtx);
685d0e07
JDA
3987 }
3988 else
3989 emit_insn (gen_blockage ());
31d68947
AM
3990 }
3991 /* no frame pointer needed. */
3992 else
3993 {
3994 /* In some cases we can perform the first callee register save
3995 and allocating the stack frame at the same time. If so, just
3996 make a note of it and defer allocating the frame until saving
3997 the callee registers. */
1c7a8112 3998 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
31d68947
AM
3999 merge_sp_adjust_with_store = 1;
4000 /* Can not optimize. Adjust the stack frame by actual_fsize
4001 bytes. */
4002 else
f6bcf44c 4003 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 4004 actual_fsize, 1);
31d68947 4005 }
a9d91d6f
RS
4006 }
4007
23f6f34f 4008 /* Normal register save.
aadcdb45
JL
4009
4010 Do not save the frame pointer in the frame_pointer_needed case. It
4011 was done earlier. */
188538df
TG
4012 if (frame_pointer_needed)
4013 {
823fbbce
JDA
4014 offset = local_fsize;
4015
4016 /* Saving the EH return data registers in the frame is the simplest
4017 way to get the frame unwind information emitted. We put them
4018 just before the general registers. */
e3b5732b 4019 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4020 {
4021 unsigned int i, regno;
4022
4023 for (i = 0; ; ++i)
4024 {
4025 regno = EH_RETURN_DATA_REGNO (i);
4026 if (regno == INVALID_REGNUM)
4027 break;
4028
bc707992 4029 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
4030 offset += UNITS_PER_WORD;
4031 }
4032 }
4033
4034 for (i = 18; i >= 4; i--)
6fb5fa3c 4035 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4036 {
bc707992 4037 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 4038 offset += UNITS_PER_WORD;
68386e1e 4039 gr_saved++;
188538df 4040 }
e63ffc38 4041 /* Account for %r3 which is saved in a special place. */
e8cfae5c 4042 gr_saved++;
188538df 4043 }
aadcdb45 4044 /* No frame pointer needed. */
188538df
TG
4045 else
4046 {
823fbbce
JDA
4047 offset = local_fsize - actual_fsize;
4048
4049 /* Saving the EH return data registers in the frame is the simplest
4050 way to get the frame unwind information emitted. */
e3b5732b 4051 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4052 {
4053 unsigned int i, regno;
4054
4055 for (i = 0; ; ++i)
4056 {
4057 regno = EH_RETURN_DATA_REGNO (i);
4058 if (regno == INVALID_REGNUM)
4059 break;
4060
4061 /* If merge_sp_adjust_with_store is nonzero, then we can
4062 optimize the first save. */
4063 if (merge_sp_adjust_with_store)
4064 {
4065 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4066 merge_sp_adjust_with_store = 0;
4067 }
4068 else
4069 store_reg (regno, offset, STACK_POINTER_REGNUM);
4070 offset += UNITS_PER_WORD;
4071 }
4072 }
4073
4074 for (i = 18; i >= 3; i--)
6fb5fa3c 4075 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4076 {
23f6f34f 4077 /* If merge_sp_adjust_with_store is nonzero, then we can
4971c587 4078 optimize the first GR save. */
f133af4c 4079 if (merge_sp_adjust_with_store)
4971c587 4080 {
823fbbce 4081 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4971c587 4082 merge_sp_adjust_with_store = 0;
4971c587
JL
4083 }
4084 else
f6bcf44c 4085 store_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 4086 offset += UNITS_PER_WORD;
68386e1e 4087 gr_saved++;
188538df 4088 }
aadcdb45 4089
4971c587 4090 /* If we wanted to merge the SP adjustment with a GR save, but we never
aadcdb45 4091 did any GR saves, then just emit the adjustment here. */
f133af4c 4092 if (merge_sp_adjust_with_store)
f6bcf44c 4093 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 4094 actual_fsize, 1);
188538df 4095 }
23f6f34f 4096
1c7a8112
AM
4097 /* The hppa calling conventions say that %r19, the pic offset
4098 register, is saved at sp - 32 (in this function's frame)
4099 when generating PIC code. FIXME: What is the correct thing
4100 to do for functions which make no calls and allocate no
4101 frame? Do we need to allocate a frame, or can we just omit
3ffa9dc1
JDA
4102 the save? For now we'll just omit the save.
4103
4104 We don't want a note on this insn as the frame marker can
4105 move if there is a dynamic stack allocation. */
1c7a8112 4106 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3ffa9dc1
JDA
4107 {
4108 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4109
4110 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4111
4112 }
1c7a8112 4113
188538df
TG
4114 /* Align pointer properly (doubleword boundary). */
4115 offset = (offset + 7) & ~7;
4116
4117 /* Floating point register store. */
4118 if (save_fregs)
188538df 4119 {
823fbbce
JDA
4120 rtx base;
4121
aadcdb45
JL
4122 /* First get the frame or stack pointer to the start of the FP register
4123 save area. */
2b41935c 4124 if (frame_pointer_needed)
823fbbce 4125 {
bc707992
JDA
4126 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4127 base = hard_frame_pointer_rtx;
823fbbce 4128 }
2b41935c 4129 else
823fbbce
JDA
4130 {
4131 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4132 base = stack_pointer_rtx;
4133 }
aadcdb45
JL
4134
4135 /* Now actually save the FP registers. */
88624c0e 4136 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
e63ffc38 4137 {
6fb5fa3c
DB
4138 if (df_regs_ever_live_p (i)
4139 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
e63ffc38 4140 {
e0d80a58
JL
4141 rtx addr, reg;
4142 rtx_insn *insn;
403a3fb7
JDA
4143 addr = gen_rtx_MEM (DFmode,
4144 gen_rtx_POST_INC (word_mode, tmpreg));
19ec6a36 4145 reg = gen_rtx_REG (DFmode, i);
f6bcf44c
JDA
4146 insn = emit_move_insn (addr, reg);
4147 if (DO_FRAME_NOTES)
4148 {
4149 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce
JDA
4150 if (TARGET_64BIT)
4151 {
4152 rtx mem = gen_rtx_MEM (DFmode,
0a81f074
RS
4153 plus_constant (Pmode, base,
4154 offset));
bbbbb16a 4155 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 4156 gen_rtx_SET (mem, reg));
823fbbce
JDA
4157 }
4158 else
4159 {
4160 rtx meml = gen_rtx_MEM (SFmode,
0a81f074
RS
4161 plus_constant (Pmode, base,
4162 offset));
823fbbce 4163 rtx memr = gen_rtx_MEM (SFmode,
0a81f074
RS
4164 plus_constant (Pmode, base,
4165 offset + 4));
823fbbce
JDA
4166 rtx regl = gen_rtx_REG (SFmode, i);
4167 rtx regr = gen_rtx_REG (SFmode, i + 1);
f7df4a84
RS
4168 rtx setl = gen_rtx_SET (meml, regl);
4169 rtx setr = gen_rtx_SET (memr, regr);
823fbbce
JDA
4170 rtvec vec;
4171
4172 RTX_FRAME_RELATED_P (setl) = 1;
4173 RTX_FRAME_RELATED_P (setr) = 1;
4174 vec = gen_rtvec (2, setl, setr);
bbbbb16a
ILT
4175 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4176 gen_rtx_SEQUENCE (VOIDmode, vec));
823fbbce 4177 }
f6bcf44c
JDA
4178 }
4179 offset += GET_MODE_SIZE (DFmode);
e63ffc38
JL
4180 fr_saved++;
4181 }
4182 }
188538df
TG
4183 }
4184}
4185
19ec6a36
AM
4186/* Emit RTL to load REG from the memory location specified by BASE+DISP.
4187 Handle case where DISP > 8k by using the add_high_const patterns. */
4188
f6bcf44c 4189static void
a4295210 4190load_reg (int reg, HOST_WIDE_INT disp, int base)
19ec6a36 4191{
a4295210
JDA
4192 rtx dest = gen_rtx_REG (word_mode, reg);
4193 rtx basereg = gen_rtx_REG (Pmode, base);
4194 rtx src;
19ec6a36 4195
19ec6a36 4196 if (VAL_14_BITS_P (disp))
0a81f074 4197 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
a4295210 4198 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
19ec6a36 4199 {
a4295210
JDA
4200 rtx delta = GEN_INT (disp);
4201 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4202
4203 emit_move_insn (tmpreg, delta);
4204 if (TARGET_DISABLE_INDEXING)
4205 {
4206 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4207 src = gen_rtx_MEM (word_mode, tmpreg);
4208 }
4209 else
4210 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
19ec6a36
AM
4211 }
4212 else
4213 {
4214 rtx delta = GEN_INT (disp);
4215 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4216 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 4217
19ec6a36
AM
4218 emit_move_insn (tmpreg, high);
4219 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
19ec6a36 4220 }
a4295210
JDA
4221
4222 emit_move_insn (dest, src);
19ec6a36 4223}
aadcdb45 4224
5fad1c24
JDA
4225/* Update the total code bytes output to the text section. */
4226
4227static void
67b846fa 4228update_total_code_bytes (unsigned int nbytes)
5fad1c24
JDA
4229{
4230 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
62910663 4231 && !IN_NAMED_SECTION_P (cfun->decl))
5fad1c24 4232 {
67b846fa 4233 unsigned int old_total = total_code_bytes;
5fad1c24 4234
67b846fa 4235 total_code_bytes += nbytes;
5fad1c24 4236
67b846fa
JDA
4237 /* Be prepared to handle overflows. */
4238 if (old_total > total_code_bytes)
4239 total_code_bytes = UINT_MAX;
5fad1c24
JDA
4240 }
4241}
4242
08c148a8
NB
4243/* This function generates the assembly code for function exit.
4244 Args are as for output_function_prologue ().
4245
4246 The function epilogue should not depend on the current stack
4247 pointer! It should use the frame pointer only. This is mandatory
4248 because of alloca; we also take advantage of it to omit stack
fe19a83d 4249 adjustments before returning. */
08c148a8
NB
4250
4251static void
42776416 4252pa_output_function_epilogue (FILE *file)
188538df 4253{
84034c69 4254 rtx_insn *insn = get_last_insn ();
5dba8769 4255 bool extra_nop;
5fad1c24 4256
ae9d61ab 4257 /* pa_expand_epilogue does the dirty work now. We just need
aadcdb45 4258 to output the assembler directives which denote the end
08a2b118
RS
4259 of a function.
4260
4261 To make debuggers happy, emit a nop if the epilogue was completely
4262 eliminated due to a volatile call as the last insn in the
23f6f34f 4263 current function. That way the return address (in %r2) will
08a2b118
RS
4264 always point to a valid instruction in the current function. */
4265
4266 /* Get the last real insn. */
b64925dc 4267 if (NOTE_P (insn))
08a2b118
RS
4268 insn = prev_real_insn (insn);
4269
4270 /* If it is a sequence, then look inside. */
b64925dc 4271 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
84034c69 4272 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
08a2b118 4273
23f6f34f 4274 /* If insn is a CALL_INSN, then it must be a call to a volatile
08a2b118 4275 function (otherwise there would be epilogue insns). */
b64925dc 4276 if (insn && CALL_P (insn))
17e6098e
JDA
4277 {
4278 fputs ("\tnop\n", file);
5dba8769 4279 extra_nop = true;
17e6098e 4280 }
5dba8769
JDA
4281 else
4282 extra_nop = false;
23f6f34f 4283
e236a9ff 4284 fputs ("\t.EXIT\n\t.PROCEND\n", file);
17e6098e 4285
9a55eab3
JDA
4286 if (TARGET_SOM && TARGET_GAS)
4287 {
a9a302d9 4288 /* We are done with this subspace except possibly for some additional
9a55eab3
JDA
4289 debug information. Forget that we are in this subspace to ensure
4290 that the next function is output in its own subspace. */
d6b5193b 4291 in_section = NULL;
1a83bfc3 4292 cfun->machine->in_nsubspa = 2;
9a55eab3
JDA
4293 }
4294
5dba8769 4295 /* Thunks do their own insn accounting. */
a9a302d9
JDA
4296 if (cfun->is_thunk)
4297 return;
4298
5fad1c24 4299 if (INSN_ADDRESSES_SET_P ())
17e6098e 4300 {
5dba8769 4301 last_address = extra_nop ? 4 : 0;
5fad1c24 4302 insn = get_last_nonnote_insn ();
501fcaf5
JDA
4303 if (insn)
4304 {
4305 last_address += INSN_ADDRESSES (INSN_UID (insn));
4306 if (INSN_P (insn))
4307 last_address += insn_default_length (insn);
4308 }
5fad1c24
JDA
4309 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4310 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
17e6098e 4311 }
67b846fa
JDA
4312 else
4313 last_address = UINT_MAX;
5fad1c24
JDA
4314
4315 /* Finally, update the total number of code bytes output so far. */
4316 update_total_code_bytes (last_address);
aadcdb45 4317}
4971c587 4318
aadcdb45 4319void
ae9d61ab 4320pa_expand_epilogue (void)
aadcdb45 4321{
23f6f34f 4322 rtx tmpreg;
a4295210
JDA
4323 HOST_WIDE_INT offset;
4324 HOST_WIDE_INT ret_off = 0;
4325 int i;
31d68947 4326 int merge_sp_adjust_with_load = 0;
aadcdb45
JL
4327
4328 /* We will use this often. */
690d4228 4329 tmpreg = gen_rtx_REG (word_mode, 1);
aadcdb45
JL
4330
4331 /* Try to restore RP early to avoid load/use interlocks when
4332 RP gets used in the return (bv) instruction. This appears to still
fe19a83d 4333 be necessary even when we schedule the prologue and epilogue. */
16c16a24 4334 if (rp_saved)
31d68947
AM
4335 {
4336 ret_off = TARGET_64BIT ? -16 : -20;
4337 if (frame_pointer_needed)
4338 {
bc707992 4339 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
31d68947
AM
4340 ret_off = 0;
4341 }
4342 else
4343 {
4344 /* No frame pointer, and stack is smaller than 8k. */
4345 if (VAL_14_BITS_P (ret_off - actual_fsize))
4346 {
f6bcf44c 4347 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
31d68947
AM
4348 ret_off = 0;
4349 }
4350 }
4351 }
aadcdb45
JL
4352
4353 /* General register restores. */
188538df
TG
4354 if (frame_pointer_needed)
4355 {
823fbbce
JDA
4356 offset = local_fsize;
4357
4358 /* If the current function calls __builtin_eh_return, then we need
4359 to restore the saved EH data registers. */
e3b5732b 4360 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4361 {
4362 unsigned int i, regno;
4363
4364 for (i = 0; ; ++i)
4365 {
4366 regno = EH_RETURN_DATA_REGNO (i);
4367 if (regno == INVALID_REGNUM)
4368 break;
4369
bc707992 4370 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
4371 offset += UNITS_PER_WORD;
4372 }
4373 }
4374
4375 for (i = 18; i >= 4; i--)
6fb5fa3c 4376 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4377 {
bc707992 4378 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 4379 offset += UNITS_PER_WORD;
188538df 4380 }
188538df
TG
4381 }
4382 else
4383 {
823fbbce
JDA
4384 offset = local_fsize - actual_fsize;
4385
4386 /* If the current function calls __builtin_eh_return, then we need
4387 to restore the saved EH data registers. */
e3b5732b 4388 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4389 {
4390 unsigned int i, regno;
4391
4392 for (i = 0; ; ++i)
4393 {
4394 regno = EH_RETURN_DATA_REGNO (i);
4395 if (regno == INVALID_REGNUM)
4396 break;
4397
4398 /* Only for the first load.
4399 merge_sp_adjust_with_load holds the register load
4400 with which we will merge the sp adjustment. */
4401 if (merge_sp_adjust_with_load == 0
4402 && local_fsize == 0
4403 && VAL_14_BITS_P (-actual_fsize))
4404 merge_sp_adjust_with_load = regno;
4405 else
4406 load_reg (regno, offset, STACK_POINTER_REGNUM);
4407 offset += UNITS_PER_WORD;
4408 }
4409 }
4410
4411 for (i = 18; i >= 3; i--)
e63ffc38 4412 {
6fb5fa3c 4413 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
e63ffc38 4414 {
e63ffc38
JL
4415 /* Only for the first load.
4416 merge_sp_adjust_with_load holds the register load
4417 with which we will merge the sp adjustment. */
31d68947 4418 if (merge_sp_adjust_with_load == 0
e63ffc38 4419 && local_fsize == 0
31d68947 4420 && VAL_14_BITS_P (-actual_fsize))
e63ffc38
JL
4421 merge_sp_adjust_with_load = i;
4422 else
f6bcf44c 4423 load_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 4424 offset += UNITS_PER_WORD;
e63ffc38
JL
4425 }
4426 }
188538df 4427 }
aadcdb45 4428
188538df
TG
4429 /* Align pointer properly (doubleword boundary). */
4430 offset = (offset + 7) & ~7;
4431
aadcdb45 4432 /* FP register restores. */
188538df 4433 if (save_fregs)
188538df 4434 {
aadcdb45 4435 /* Adjust the register to index off of. */
2b41935c 4436 if (frame_pointer_needed)
bc707992 4437 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
2b41935c 4438 else
823fbbce 4439 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
aadcdb45
JL
4440
4441 /* Actually do the restores now. */
88624c0e 4442 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
4443 if (df_regs_ever_live_p (i)
4444 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
19ec6a36 4445 {
403a3fb7
JDA
4446 rtx src = gen_rtx_MEM (DFmode,
4447 gen_rtx_POST_INC (word_mode, tmpreg));
19ec6a36 4448 rtx dest = gen_rtx_REG (DFmode, i);
f6bcf44c 4449 emit_move_insn (dest, src);
19ec6a36 4450 }
188538df 4451 }
aadcdb45 4452
1144563f
JL
4453 /* Emit a blockage insn here to keep these insns from being moved to
4454 an earlier spot in the epilogue, or into the main instruction stream.
4455
4456 This is necessary as we must not cut the stack back before all the
4457 restores are finished. */
4458 emit_insn (gen_blockage ());
aadcdb45 4459
6619e96c 4460 /* Reset stack pointer (and possibly frame pointer). The stack
68944452 4461 pointer is initially set to fp + 64 to avoid a race condition. */
31d68947 4462 if (frame_pointer_needed)
188538df 4463 {
19ec6a36 4464 rtx delta = GEN_INT (-64);
823fbbce 4465
bc707992
JDA
4466 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4467 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4468 stack_pointer_rtx, delta));
188538df 4469 }
aadcdb45 4470 /* If we were deferring a callee register restore, do it now. */
31d68947
AM
4471 else if (merge_sp_adjust_with_load)
4472 {
4473 rtx delta = GEN_INT (-actual_fsize);
19ec6a36 4474 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
823fbbce
JDA
4475
4476 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
31d68947 4477 }
aadcdb45 4478 else if (actual_fsize != 0)
823fbbce
JDA
4479 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4480 - actual_fsize, 0);
31d68947
AM
4481
4482 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4483 frame greater than 8k), do so now. */
4484 if (ret_off != 0)
f6bcf44c 4485 load_reg (2, ret_off, STACK_POINTER_REGNUM);
823fbbce 4486
e3b5732b 4487 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4488 {
4489 rtx sa = EH_RETURN_STACKADJ_RTX;
4490
4491 emit_insn (gen_blockage ());
4492 emit_insn (TARGET_64BIT
4493 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4494 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4495 }
15768583
JDA
4496}
4497
4498bool
4499pa_can_use_return_insn (void)
4500{
4501 if (!reload_completed)
4502 return false;
4503
4504 if (frame_pointer_needed)
4505 return false;
4506
4507 if (df_regs_ever_live_p (2))
4508 return false;
4509
4510 if (crtl->profile)
4511 return false;
4512
ae9d61ab 4513 return pa_compute_frame_size (get_frame_size (), 0) == 0;
188538df
TG
4514}
4515
d777856d 4516rtx
b7849684 4517hppa_pic_save_rtx (void)
824e7605 4518{
d777856d 4519 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
1c7a8112
AM
4520}
4521
3674b34d
JDA
4522#ifndef NO_DEFERRED_PROFILE_COUNTERS
4523#define NO_DEFERRED_PROFILE_COUNTERS 0
4524#endif
4525
3674b34d
JDA
4526
4527/* Vector of funcdef numbers. */
9771b263 4528static vec<int> funcdef_nos;
3674b34d
JDA
4529
4530/* Output deferred profile counters. */
4531static void
4532output_deferred_profile_counters (void)
4533{
4534 unsigned int i;
4535 int align, n;
4536
9771b263 4537 if (funcdef_nos.is_empty ())
3674b34d
JDA
4538 return;
4539
d6b5193b 4540 switch_to_section (data_section);
3674b34d
JDA
4541 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4542 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4543
9771b263 4544 for (i = 0; funcdef_nos.iterate (i, &n); i++)
3674b34d
JDA
4545 {
4546 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4547 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4548 }
4549
9771b263 4550 funcdef_nos.release ();
3674b34d
JDA
4551}
4552
1c7a8112 4553void
b7849684 4554hppa_profile_hook (int label_no)
1c7a8112 4555{
a3d4c92f
RC
4556 /* We use SImode for the address of the function in both 32 and
4557 64-bit code to avoid having to provide DImode versions of the
4558 lcla2 and load_offset_label_address insn patterns. */
4559 rtx reg = gen_reg_rtx (SImode);
19f8b229 4560 rtx_code_label *label_rtx = gen_label_rtx ();
730a27a2
JDA
4561 rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount"));
4562 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4563 rtx arg_bytes, begin_label_rtx;
e0d80a58 4564 rtx_insn *call_insn;
8f949e7e 4565 char begin_label_name[16];
730a27a2
JDA
4566 bool use_mcount_pcrel_call;
4567
4568 /* If we can reach _mcount with a pc-relative call, we can optimize
4569 loading the address of the current function. This requires linker
4570 long branch stub support. */
4571 if (!TARGET_PORTABLE_RUNTIME
4572 && !TARGET_LONG_CALLS
4573 && (TARGET_SOM || flag_function_sections))
4574 use_mcount_pcrel_call = TRUE;
4575 else
4576 use_mcount_pcrel_call = FALSE;
1c7a8112 4577
8f949e7e 4578 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
f6f315fe 4579 label_no);
a3d4c92f 4580 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
1c7a8112 4581
1c7a8112
AM
4582 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4583
730a27a2
JDA
4584 if (!use_mcount_pcrel_call)
4585 {
4586 /* The address of the function is loaded into %r25 with an instruction-
4587 relative sequence that avoids the use of relocations. The sequence
4588 is split so that the load_offset_label_address instruction can
4589 occupy the delay slot of the call to _mcount. */
4590 if (TARGET_PA_20)
4591 emit_insn (gen_lcla2 (reg, label_rtx));
4592 else
4593 emit_insn (gen_lcla1 (reg, label_rtx));
1c7a8112 4594
730a27a2
JDA
4595 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4596 reg,
4597 begin_label_rtx,
4598 label_rtx));
4599 }
a3d4c92f 4600
730a27a2
JDA
4601 if (!NO_DEFERRED_PROFILE_COUNTERS)
4602 {
4603 rtx count_label_rtx, addr, r24;
4604 char count_label_name[16];
4605
4606 funcdef_nos.safe_push (label_no);
4607 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4608 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4609 ggc_strdup (count_label_name));
4610
4611 addr = force_reg (Pmode, count_label_rtx);
4612 r24 = gen_rtx_REG (Pmode, 24);
4613 emit_move_insn (r24, addr);
4614
4615 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4616 if (use_mcount_pcrel_call)
4617 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4618 begin_label_rtx));
4619 else
4620 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
a3d4c92f 4621
730a27a2
JDA
4622 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4623 }
4624 else
4625 {
4626 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4627 if (use_mcount_pcrel_call)
4628 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4629 begin_label_rtx));
4630 else
4631 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4632 }
1c7a8112 4633
a3d4c92f
RC
4634 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4635 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4636
1c7a8112
AM
4637 /* Indicate the _mcount call cannot throw, nor will it execute a
4638 non-local goto. */
062a5fd1 4639 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
730a27a2
JDA
4640
4641 /* Allocate space for fixed arguments. */
4642 if (reg_parm_stack_space > crtl->outgoing_args_size)
4643 crtl->outgoing_args_size = reg_parm_stack_space;
824e7605
AM
4644}
4645
e99d6592
MS
4646/* Fetch the return address for the frame COUNT steps up from
4647 the current frame, after the prologue. FRAMEADDR is the
4648 frame pointer of the COUNT frame.
4649
cf3735b8
JDA
4650 We want to ignore any export stub remnants here. To handle this,
4651 we examine the code at the return address, and if it is an export
4652 stub, we return a memory rtx for the stub return address stored
4653 at frame-24.
c28eb6c2
JL
4654
4655 The value returned is used in two different ways:
4656
4657 1. To find a function's caller.
4658
4659 2. To change the return address for a function.
4660
4661 This function handles most instances of case 1; however, it will
4662 fail if there are two levels of stubs to execute on the return
4663 path. The only way I believe that can happen is if the return value
4664 needs a parameter relocation, which never happens for C code.
4665
4666 This function handles most instances of case 2; however, it will
4667 fail if we did not originally have stub code on the return path
cf3735b8 4668 but will need stub code on the new return path. This can happen if
c28eb6c2 4669 the caller & callee are both in the main program, but the new
cf3735b8 4670 return location is in a shared library. */
e99d6592
MS
4671
4672rtx
ae9d61ab 4673pa_return_addr_rtx (int count, rtx frameaddr)
e99d6592
MS
4674{
4675 rtx label;
cf3735b8 4676 rtx rp;
e99d6592
MS
4677 rtx saved_rp;
4678 rtx ins;
4679
df8b5535 4680 /* The instruction stream at the return address of a PA1.X export stub is:
f90b7a5a
PB
4681
4682 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4683 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4684 0x00011820 | stub+16: mtsp r1,sr0
4685 0xe0400002 | stub+20: be,n 0(sr0,rp)
4686
4687 0xe0400002 must be specified as -532676606 so that it won't be
df8b5535 4688 rejected as an invalid immediate operand on 64-bit hosts.
f90b7a5a 4689
df8b5535
JDA
4690 The instruction stream at the return address of a PA2.0 export stub is:
4691
4692 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4693 0xe840d002 | stub+12: bve,n (rp)
4694 */
4695
4696 HOST_WIDE_INT insns[4];
4697 int i, len;
f90b7a5a 4698
cf3735b8
JDA
4699 if (count != 0)
4700 return NULL_RTX;
a7721dc0 4701
cf3735b8 4702 rp = get_hard_reg_initial_val (Pmode, 2);
e99d6592 4703
cf3735b8
JDA
4704 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4705 return rp;
e99d6592 4706
f90b7a5a
PB
4707 /* If there is no export stub then just use the value saved from
4708 the return pointer register. */
4709
a7721dc0 4710 saved_rp = gen_reg_rtx (Pmode);
cf3735b8 4711 emit_move_insn (saved_rp, rp);
e99d6592
MS
4712
4713 /* Get pointer to the instruction stream. We have to mask out the
4714 privilege level from the two low order bits of the return address
4715 pointer here so that ins will point to the start of the first
4716 instruction that would have been executed if we returned. */
cf3735b8 4717 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
e99d6592
MS
4718 label = gen_label_rtx ();
4719
df8b5535
JDA
4720 if (TARGET_PA_20)
4721 {
4722 insns[0] = 0x4bc23fd1;
4723 insns[1] = -398405630;
4724 len = 2;
4725 }
4726 else
4727 {
4728 insns[0] = 0x4bc23fd1;
4729 insns[1] = 0x004010a1;
4730 insns[2] = 0x00011820;
4731 insns[3] = -532676606;
4732 len = 4;
4733 }
4734
e99d6592 4735 /* Check the instruction stream at the normal return address for the
f90b7a5a
PB
4736 export stub. If it is an export stub, than our return address is
4737 really in -24[frameaddr]. */
e99d6592 4738
df8b5535 4739 for (i = 0; i < len; i++)
f90b7a5a 4740 {
0a81f074 4741 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
f90b7a5a
PB
4742 rtx op1 = GEN_INT (insns[i]);
4743 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4744 }
e99d6592 4745
cf3735b8 4746 /* Here we know that our return address points to an export
e99d6592 4747 stub. We don't want to return the address of the export stub,
cf3735b8
JDA
4748 but rather the return address of the export stub. That return
4749 address is stored at -24[frameaddr]. */
e99d6592 4750
cf3735b8
JDA
4751 emit_move_insn (saved_rp,
4752 gen_rtx_MEM (Pmode,
4753 memory_address (Pmode,
0a81f074 4754 plus_constant (Pmode, frameaddr,
cf3735b8 4755 -24))));
e99d6592
MS
4756
4757 emit_label (label);
f90b7a5a 4758
cf3735b8 4759 return saved_rp;
e99d6592
MS
4760}
4761
188538df 4762void
ae9d61ab 4763pa_emit_bcond_fp (rtx operands[])
188538df 4764{
f90b7a5a
PB
4765 enum rtx_code code = GET_CODE (operands[0]);
4766 rtx operand0 = operands[1];
4767 rtx operand1 = operands[2];
4768 rtx label = operands[3];
4769
f7df4a84 4770 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
f90b7a5a
PB
4771 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4772
f7df4a84 4773 emit_jump_insn (gen_rtx_SET (pc_rtx,
ad2c71b7 4774 gen_rtx_IF_THEN_ELSE (VOIDmode,
f90b7a5a 4775 gen_rtx_fmt_ee (NE,
ad2c71b7
JL
4776 VOIDmode,
4777 gen_rtx_REG (CCFPmode, 0),
4778 const0_rtx),
f90b7a5a 4779 gen_rtx_LABEL_REF (VOIDmode, label),
ad2c71b7 4780 pc_rtx)));
188538df
TG
4781
4782}
4783
780f491f
TG
4784/* Adjust the cost of a scheduling dependency. Return the new cost of
4785 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4786
c237e94a 4787static int
b505225b
TS
4788pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4789 unsigned int)
780f491f 4790{
b09fa787
JL
4791 enum attr_type attr_type;
4792
5d50fab3
JL
4793 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4794 true dependencies as they are described with bypasses now. */
b505225b 4795 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
86001391
JQ
4796 return cost;
4797
e150ae4f
TG
4798 if (! recog_memoized (insn))
4799 return 0;
780f491f 4800
b09fa787
JL
4801 attr_type = get_attr_type (insn);
4802
b505225b 4803 switch (dep_type)
780f491f 4804 {
144d51f9 4805 case REG_DEP_ANTI:
780f491f
TG
4806 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4807 cycles later. */
4808
b09fa787 4809 if (attr_type == TYPE_FPLOAD)
780f491f 4810 {
e150ae4f
TG
4811 rtx pat = PATTERN (insn);
4812 rtx dep_pat = PATTERN (dep_insn);
4813 if (GET_CODE (pat) == PARALLEL)
4814 {
4815 /* This happens for the fldXs,mb patterns. */
4816 pat = XVECEXP (pat, 0, 0);
4817 }
4818 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
780f491f 4819 /* If this happens, we have to extend this to schedule
e150ae4f
TG
4820 optimally. Return 0 for now. */
4821 return 0;
780f491f 4822
e150ae4f 4823 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
780f491f 4824 {
e150ae4f
TG
4825 if (! recog_memoized (dep_insn))
4826 return 0;
780f491f
TG
4827 switch (get_attr_type (dep_insn))
4828 {
4829 case TYPE_FPALU:
c47decad
JL
4830 case TYPE_FPMULSGL:
4831 case TYPE_FPMULDBL:
780f491f
TG
4832 case TYPE_FPDIVSGL:
4833 case TYPE_FPDIVDBL:
4834 case TYPE_FPSQRTSGL:
4835 case TYPE_FPSQRTDBL:
e150ae4f 4836 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4837 preceding arithmetic operation has finished if
e150ae4f
TG
4838 the target of the fpload is any of the sources
4839 (or destination) of the arithmetic operation. */
5d50fab3 4840 return insn_default_latency (dep_insn) - 1;
c47decad
JL
4841
4842 default:
4843 return 0;
4844 }
4845 }
4846 }
b09fa787 4847 else if (attr_type == TYPE_FPALU)
c47decad
JL
4848 {
4849 rtx pat = PATTERN (insn);
4850 rtx dep_pat = PATTERN (dep_insn);
4851 if (GET_CODE (pat) == PARALLEL)
4852 {
4853 /* This happens for the fldXs,mb patterns. */
4854 pat = XVECEXP (pat, 0, 0);
4855 }
4856 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4857 /* If this happens, we have to extend this to schedule
4858 optimally. Return 0 for now. */
4859 return 0;
4860
4861 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4862 {
4863 if (! recog_memoized (dep_insn))
4864 return 0;
4865 switch (get_attr_type (dep_insn))
4866 {
4867 case TYPE_FPDIVSGL:
4868 case TYPE_FPDIVDBL:
4869 case TYPE_FPSQRTSGL:
4870 case TYPE_FPSQRTDBL:
4871 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4872 preceding divide or sqrt operation has finished if
c47decad
JL
4873 the target of the ALU flop is any of the sources
4874 (or destination) of the divide or sqrt operation. */
5d50fab3 4875 return insn_default_latency (dep_insn) - 2;
780f491f
TG
4876
4877 default:
4878 return 0;
4879 }
4880 }
4881 }
4882
4883 /* For other anti dependencies, the cost is 0. */
4884 return 0;
144d51f9
NS
4885
4886 case REG_DEP_OUTPUT:
c47decad
JL
4887 /* Output dependency; DEP_INSN writes a register that INSN writes some
4888 cycles later. */
b09fa787 4889 if (attr_type == TYPE_FPLOAD)
c47decad
JL
4890 {
4891 rtx pat = PATTERN (insn);
4892 rtx dep_pat = PATTERN (dep_insn);
4893 if (GET_CODE (pat) == PARALLEL)
4894 {
4895 /* This happens for the fldXs,mb patterns. */
4896 pat = XVECEXP (pat, 0, 0);
4897 }
4898 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4899 /* If this happens, we have to extend this to schedule
4900 optimally. Return 0 for now. */
4901 return 0;
4902
4903 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4904 {
4905 if (! recog_memoized (dep_insn))
4906 return 0;
4907 switch (get_attr_type (dep_insn))
4908 {
4909 case TYPE_FPALU:
4910 case TYPE_FPMULSGL:
4911 case TYPE_FPMULDBL:
4912 case TYPE_FPDIVSGL:
4913 case TYPE_FPDIVDBL:
4914 case TYPE_FPSQRTSGL:
4915 case TYPE_FPSQRTDBL:
4916 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4917 preceding arithmetic operation has finished if
c47decad 4918 the target of the fpload is the destination of the
fae15c93
VM
4919 arithmetic operation.
4920
4921 Exception: For PA7100LC, PA7200 and PA7300, the cost
4922 is 3 cycles, unless they bundle together. We also
4923 pay the penalty if the second insn is a fpload. */
5d50fab3 4924 return insn_default_latency (dep_insn) - 1;
780f491f 4925
c47decad
JL
4926 default:
4927 return 0;
4928 }
4929 }
4930 }
b09fa787 4931 else if (attr_type == TYPE_FPALU)
c47decad
JL
4932 {
4933 rtx pat = PATTERN (insn);
4934 rtx dep_pat = PATTERN (dep_insn);
4935 if (GET_CODE (pat) == PARALLEL)
4936 {
4937 /* This happens for the fldXs,mb patterns. */
4938 pat = XVECEXP (pat, 0, 0);
4939 }
4940 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4941 /* If this happens, we have to extend this to schedule
4942 optimally. Return 0 for now. */
4943 return 0;
4944
4945 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4946 {
4947 if (! recog_memoized (dep_insn))
4948 return 0;
4949 switch (get_attr_type (dep_insn))
4950 {
4951 case TYPE_FPDIVSGL:
4952 case TYPE_FPDIVDBL:
4953 case TYPE_FPSQRTSGL:
4954 case TYPE_FPSQRTDBL:
4955 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4956 preceding divide or sqrt operation has finished if
c47decad 4957 the target of the ALU flop is also the target of
38e01259 4958 the divide or sqrt operation. */
5d50fab3 4959 return insn_default_latency (dep_insn) - 2;
c47decad
JL
4960
4961 default:
4962 return 0;
4963 }
4964 }
4965 }
4966
4967 /* For other output dependencies, the cost is 0. */
4968 return 0;
144d51f9
NS
4969
4970 default:
4971 gcc_unreachable ();
c47decad 4972 }
780f491f 4973}
188538df 4974
c237e94a
ZW
4975/* Adjust scheduling priorities. We use this to try and keep addil
4976 and the next use of %r1 close together. */
4977static int
ac44248e 4978pa_adjust_priority (rtx_insn *insn, int priority)
c237e94a
ZW
4979{
4980 rtx set = single_set (insn);
4981 rtx src, dest;
4982 if (set)
4983 {
4984 src = SET_SRC (set);
4985 dest = SET_DEST (set);
4986 if (GET_CODE (src) == LO_SUM
4987 && symbolic_operand (XEXP (src, 1), VOIDmode)
4988 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4989 priority >>= 3;
4990
4991 else if (GET_CODE (src) == MEM
4992 && GET_CODE (XEXP (src, 0)) == LO_SUM
4993 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
4994 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
4995 priority >>= 1;
4996
4997 else if (GET_CODE (dest) == MEM
4998 && GET_CODE (XEXP (dest, 0)) == LO_SUM
4999 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
5000 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
5001 priority >>= 3;
5002 }
5003 return priority;
5004}
5005
5006/* The 700 can only issue a single insn at a time.
5007 The 7XXX processors can issue two insns at a time.
5008 The 8000 can issue 4 insns at a time. */
5009static int
b7849684 5010pa_issue_rate (void)
c237e94a
ZW
5011{
5012 switch (pa_cpu)
5013 {
5014 case PROCESSOR_700: return 1;
5015 case PROCESSOR_7100: return 2;
5016 case PROCESSOR_7100LC: return 2;
5017 case PROCESSOR_7200: return 2;
fae15c93 5018 case PROCESSOR_7300: return 2;
c237e94a
ZW
5019 case PROCESSOR_8000: return 4;
5020
5021 default:
144d51f9 5022 gcc_unreachable ();
c237e94a
ZW
5023 }
5024}
5025
5026
5027
ab11fb42
JDA
5028/* Return any length plus adjustment needed by INSN which already has
5029 its length computed as LENGTH. Return LENGTH if no adjustment is
5030 necessary.
3673e996
RS
5031
5032 Also compute the length of an inline block move here as it is too
b9821af8 5033 complicated to express as a length attribute in pa.md. */
3673e996 5034int
432d483a 5035pa_adjust_insn_length (rtx_insn *insn, int length)
3673e996
RS
5036{
5037 rtx pat = PATTERN (insn);
5038
ab11fb42
JDA
5039 /* If length is negative or undefined, provide initial length. */
5040 if ((unsigned int) length >= INT_MAX)
5041 {
5042 if (GET_CODE (pat) == SEQUENCE)
432d483a 5043 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
ab11fb42
JDA
5044
5045 switch (get_attr_type (insn))
5046 {
5047 case TYPE_MILLI:
5048 length = pa_attr_length_millicode_call (insn);
5049 break;
5050 case TYPE_CALL:
5051 length = pa_attr_length_call (insn, 0);
5052 break;
5053 case TYPE_SIBCALL:
5054 length = pa_attr_length_call (insn, 1);
5055 break;
5056 case TYPE_DYNCALL:
5057 length = pa_attr_length_indirect_call (insn);
5058 break;
5059 case TYPE_SH_FUNC_ADRS:
5060 length = pa_attr_length_millicode_call (insn) + 20;
5061 break;
5062 default:
5063 gcc_unreachable ();
5064 }
5065 }
5066
3673e996 5067 /* Block move pattern. */
33e67557
SB
5068 if (NONJUMP_INSN_P (insn)
5069 && GET_CODE (pat) == PARALLEL
5070 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5071 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5072 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5073 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5074 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
ab11fb42 5075 length += compute_movmem_length (insn) - 4;
cdc9103c 5076 /* Block clear pattern. */
b64925dc 5077 else if (NONJUMP_INSN_P (insn)
cdc9103c
JDA
5078 && GET_CODE (pat) == PARALLEL
5079 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5080 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5081 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5082 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
ab11fb42 5083 length += compute_clrmem_length (insn) - 4;
3673e996 5084 /* Conditional branch with an unfilled delay slot. */
b64925dc 5085 else if (JUMP_P (insn) && ! simplejump_p (insn))
b9821af8
JL
5086 {
5087 /* Adjust a short backwards conditional with an unfilled delay slot. */
5088 if (GET_CODE (pat) == SET
a1b36964 5089 && length == 4
3232e9d8 5090 && JUMP_LABEL (insn) != NULL_RTX
b9821af8 5091 && ! forward_branch_p (insn))
ab11fb42 5092 length += 4;
b1092901
JL
5093 else if (GET_CODE (pat) == PARALLEL
5094 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5095 && length == 4)
ab11fb42 5096 length += 4;
b9821af8 5097 /* Adjust dbra insn with short backwards conditional branch with
23f6f34f 5098 unfilled delay slot -- only for case where counter is in a
fe19a83d 5099 general register register. */
b9821af8
JL
5100 else if (GET_CODE (pat) == PARALLEL
5101 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5102 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
23f6f34f 5103 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
a1b36964 5104 && length == 4
b9821af8 5105 && ! forward_branch_p (insn))
ab11fb42 5106 length += 4;
b9821af8 5107 }
ab11fb42 5108 return length;
3673e996
RS
5109}
5110
8a5b8538
AS
5111/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5112
5113static bool
5114pa_print_operand_punct_valid_p (unsigned char code)
5115{
5116 if (code == '@'
5117 || code == '#'
5118 || code == '*'
5119 || code == '^')
5120 return true;
5121
5122 return false;
5123}
5124
188538df
TG
5125/* Print operand X (an rtx) in assembler syntax to file FILE.
5126 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5127 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5128
5129void
ae9d61ab 5130pa_print_operand (FILE *file, rtx x, int code)
188538df
TG
5131{
5132 switch (code)
5133 {
5134 case '#':
5135 /* Output a 'nop' if there's nothing for the delay slot. */
5136 if (dbr_sequence_length () == 0)
5137 fputs ("\n\tnop", file);
5138 return;
5139 case '*':
5bdc5878 5140 /* Output a nullification completer if there's nothing for the */
23f6f34f 5141 /* delay slot or nullification is requested. */
188538df
TG
5142 if (dbr_sequence_length () == 0 ||
5143 (final_sequence &&
5144 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5145 fputs (",n", file);
5146 return;
5147 case 'R':
5148 /* Print out the second register name of a register pair.
5149 I.e., R (6) => 7. */
831c1763 5150 fputs (reg_names[REGNO (x) + 1], file);
188538df
TG
5151 return;
5152 case 'r':
fe19a83d 5153 /* A register or zero. */
f048ca47
JL
5154 if (x == const0_rtx
5155 || (x == CONST0_RTX (DFmode))
5156 || (x == CONST0_RTX (SFmode)))
188538df 5157 {
55abf18a
JL
5158 fputs ("%r0", file);
5159 return;
5160 }
5161 else
5162 break;
5163 case 'f':
fe19a83d 5164 /* A register or zero (floating point). */
55abf18a
JL
5165 if (x == const0_rtx
5166 || (x == CONST0_RTX (DFmode))
5167 || (x == CONST0_RTX (SFmode)))
5168 {
5169 fputs ("%fr0", file);
188538df
TG
5170 return;
5171 }
5172 else
5173 break;
f8eb41cc
JL
5174 case 'A':
5175 {
5176 rtx xoperands[2];
5177
5178 xoperands[0] = XEXP (XEXP (x, 0), 0);
5179 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
ae9d61ab 5180 pa_output_global_address (file, xoperands[1], 0);
f8eb41cc
JL
5181 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5182 return;
5183 }
5184
c85b8963 5185 case 'C': /* Plain (C)ondition */
188538df
TG
5186 case 'X':
5187 switch (GET_CODE (x))
23f6f34f 5188 {
188538df 5189 case EQ:
e236a9ff 5190 fputs ("=", file); break;
188538df 5191 case NE:
e236a9ff 5192 fputs ("<>", file); break;
188538df 5193 case GT:
e236a9ff 5194 fputs (">", file); break;
188538df 5195 case GE:
e236a9ff 5196 fputs (">=", file); break;
188538df 5197 case GEU:
e236a9ff 5198 fputs (">>=", file); break;
188538df 5199 case GTU:
e236a9ff 5200 fputs (">>", file); break;
188538df 5201 case LT:
e236a9ff 5202 fputs ("<", file); break;
188538df 5203 case LE:
e236a9ff 5204 fputs ("<=", file); break;
188538df 5205 case LEU:
e236a9ff 5206 fputs ("<<=", file); break;
188538df 5207 case LTU:
e236a9ff 5208 fputs ("<<", file); break;
188538df 5209 default:
144d51f9 5210 gcc_unreachable ();
188538df
TG
5211 }
5212 return;
c85b8963 5213 case 'N': /* Condition, (N)egated */
188538df
TG
5214 switch (GET_CODE (x))
5215 {
5216 case EQ:
e236a9ff 5217 fputs ("<>", file); break;
188538df 5218 case NE:
e236a9ff 5219 fputs ("=", file); break;
188538df 5220 case GT:
e236a9ff 5221 fputs ("<=", file); break;
188538df 5222 case GE:
e236a9ff 5223 fputs ("<", file); break;
188538df 5224 case GEU:
e236a9ff 5225 fputs ("<<", file); break;
188538df 5226 case GTU:
e236a9ff 5227 fputs ("<<=", file); break;
188538df 5228 case LT:
e236a9ff 5229 fputs (">=", file); break;
188538df 5230 case LE:
e236a9ff 5231 fputs (">", file); break;
188538df 5232 case LEU:
e236a9ff 5233 fputs (">>", file); break;
188538df 5234 case LTU:
e236a9ff 5235 fputs (">>=", file); break;
188538df 5236 default:
144d51f9 5237 gcc_unreachable ();
188538df
TG
5238 }
5239 return;
831c1763 5240 /* For floating point comparisons. Note that the output
69049ba0
JDA
5241 predicates are the complement of the desired mode. The
5242 conditions for GT, GE, LT, LE and LTGT cause an invalid
5243 operation exception if the result is unordered and this
5244 exception is enabled in the floating-point status register. */
d6c0d377
JL
5245 case 'Y':
5246 switch (GET_CODE (x))
5247 {
5248 case EQ:
e236a9ff 5249 fputs ("!=", file); break;
d6c0d377 5250 case NE:
e236a9ff 5251 fputs ("=", file); break;
d6c0d377 5252 case GT:
becf1647 5253 fputs ("!>", file); break;
d6c0d377 5254 case GE:
becf1647 5255 fputs ("!>=", file); break;
d6c0d377 5256 case LT:
becf1647 5257 fputs ("!<", file); break;
d6c0d377 5258 case LE:
becf1647
DA
5259 fputs ("!<=", file); break;
5260 case LTGT:
5261 fputs ("!<>", file); break;
5262 case UNLE:
69049ba0 5263 fputs ("!?<=", file); break;
becf1647 5264 case UNLT:
69049ba0 5265 fputs ("!?<", file); break;
becf1647 5266 case UNGE:
69049ba0 5267 fputs ("!?>=", file); break;
becf1647 5268 case UNGT:
69049ba0 5269 fputs ("!?>", file); break;
becf1647 5270 case UNEQ:
69049ba0 5271 fputs ("!?=", file); break;
becf1647 5272 case UNORDERED:
69049ba0 5273 fputs ("!?", file); break;
becf1647 5274 case ORDERED:
69049ba0 5275 fputs ("?", file); break;
d6c0d377 5276 default:
144d51f9 5277 gcc_unreachable ();
d6c0d377
JL
5278 }
5279 return;
c85b8963
TG
5280 case 'S': /* Condition, operands are (S)wapped. */
5281 switch (GET_CODE (x))
5282 {
5283 case EQ:
e236a9ff 5284 fputs ("=", file); break;
c85b8963 5285 case NE:
e236a9ff 5286 fputs ("<>", file); break;
c85b8963 5287 case GT:
e236a9ff 5288 fputs ("<", file); break;
c85b8963 5289 case GE:
e236a9ff 5290 fputs ("<=", file); break;
c85b8963 5291 case GEU:
e236a9ff 5292 fputs ("<<=", file); break;
c85b8963 5293 case GTU:
e236a9ff 5294 fputs ("<<", file); break;
c85b8963 5295 case LT:
e236a9ff 5296 fputs (">", file); break;
c85b8963 5297 case LE:
e236a9ff 5298 fputs (">=", file); break;
c85b8963 5299 case LEU:
e236a9ff 5300 fputs (">>=", file); break;
c85b8963 5301 case LTU:
e236a9ff 5302 fputs (">>", file); break;
c85b8963 5303 default:
144d51f9 5304 gcc_unreachable ();
23f6f34f 5305 }
c85b8963
TG
5306 return;
5307 case 'B': /* Condition, (B)oth swapped and negate. */
5308 switch (GET_CODE (x))
5309 {
5310 case EQ:
e236a9ff 5311 fputs ("<>", file); break;
c85b8963 5312 case NE:
e236a9ff 5313 fputs ("=", file); break;
c85b8963 5314 case GT:
e236a9ff 5315 fputs (">=", file); break;
c85b8963 5316 case GE:
e236a9ff 5317 fputs (">", file); break;
c85b8963 5318 case GEU:
e236a9ff 5319 fputs (">>", file); break;
c85b8963 5320 case GTU:
e236a9ff 5321 fputs (">>=", file); break;
c85b8963 5322 case LT:
e236a9ff 5323 fputs ("<=", file); break;
c85b8963 5324 case LE:
e236a9ff 5325 fputs ("<", file); break;
c85b8963 5326 case LEU:
e236a9ff 5327 fputs ("<<", file); break;
c85b8963 5328 case LTU:
e236a9ff 5329 fputs ("<<=", file); break;
c85b8963 5330 default:
144d51f9 5331 gcc_unreachable ();
23f6f34f 5332 }
c85b8963
TG
5333 return;
5334 case 'k':
144d51f9
NS
5335 gcc_assert (GET_CODE (x) == CONST_INT);
5336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5337 return;
520babc7 5338 case 'Q':
144d51f9
NS
5339 gcc_assert (GET_CODE (x) == CONST_INT);
5340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5341 return;
c8d6697c 5342 case 'L':
144d51f9
NS
5343 gcc_assert (GET_CODE (x) == CONST_INT);
5344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5345 return;
b47fbc53
JL
5346 case 'o':
5347 gcc_assert (GET_CODE (x) == CONST_INT
5348 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
34319f9a 5349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
b47fbc53 5350 return;
4802a0d6 5351 case 'O':
144d51f9
NS
5352 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5353 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5354 return;
520babc7 5355 case 'p':
144d51f9
NS
5356 gcc_assert (GET_CODE (x) == CONST_INT);
5357 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5358 return;
c8d6697c 5359 case 'P':
144d51f9
NS
5360 gcc_assert (GET_CODE (x) == CONST_INT);
5361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5362 return;
c85b8963
TG
5363 case 'I':
5364 if (GET_CODE (x) == CONST_INT)
5365 fputs ("i", file);
5366 return;
188538df 5367 case 'M':
2414e0e2 5368 case 'F':
188538df
TG
5369 switch (GET_CODE (XEXP (x, 0)))
5370 {
5371 case PRE_DEC:
5372 case PRE_INC:
f38b27c7
JL
5373 if (ASSEMBLER_DIALECT == 0)
5374 fputs ("s,mb", file);
5375 else
5376 fputs (",mb", file);
188538df
TG
5377 break;
5378 case POST_DEC:
5379 case POST_INC:
f38b27c7
JL
5380 if (ASSEMBLER_DIALECT == 0)
5381 fputs ("s,ma", file);
5382 else
5383 fputs (",ma", file);
188538df 5384 break;
2414e0e2 5385 case PLUS:
d8f95bed
JDA
5386 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5387 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5388 {
5389 if (ASSEMBLER_DIALECT == 0)
5390 fputs ("x", file);
5391 }
5392 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5393 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
f38b27c7
JL
5394 {
5395 if (ASSEMBLER_DIALECT == 0)
5396 fputs ("x,s", file);
5397 else
5398 fputs (",s", file);
5399 }
5400 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5401 fputs ("s", file);
188538df
TG
5402 break;
5403 default:
f38b27c7 5404 if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5405 fputs ("s", file);
188538df
TG
5406 break;
5407 }
5408 return;
5409 case 'G':
ae9d61ab 5410 pa_output_global_address (file, x, 0);
ad238e4b
JL
5411 return;
5412 case 'H':
ae9d61ab 5413 pa_output_global_address (file, x, 1);
188538df
TG
5414 return;
5415 case 0: /* Don't do anything special */
5416 break;
a1747d2c
TG
5417 case 'Z':
5418 {
5419 unsigned op[3];
6fda0f5b 5420 compute_zdepwi_operands (INTVAL (x), op);
a1747d2c
TG
5421 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5422 return;
5423 }
520babc7
JL
5424 case 'z':
5425 {
5426 unsigned op[3];
5427 compute_zdepdi_operands (INTVAL (x), op);
5428 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5429 return;
5430 }
11881f37
AM
5431 case 'c':
5432 /* We can get here from a .vtable_inherit due to our
5433 CONSTANT_ADDRESS_P rejecting perfectly good constant
5434 addresses. */
5435 break;
188538df 5436 default:
144d51f9 5437 gcc_unreachable ();
188538df
TG
5438 }
5439 if (GET_CODE (x) == REG)
80225b66 5440 {
3ba1236f 5441 fputs (reg_names [REGNO (x)], file);
520babc7
JL
5442 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5443 {
5444 fputs ("R", file);
5445 return;
5446 }
5447 if (FP_REG_P (x)
5448 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5449 && (REGNO (x) & 1) == 0)
3ba1236f 5450 fputs ("L", file);
80225b66 5451 }
188538df
TG
5452 else if (GET_CODE (x) == MEM)
5453 {
5454 int size = GET_MODE_SIZE (GET_MODE (x));
478a4495 5455 rtx base = NULL_RTX;
188538df
TG
5456 switch (GET_CODE (XEXP (x, 0)))
5457 {
5458 case PRE_DEC:
5459 case POST_DEC:
520babc7 5460 base = XEXP (XEXP (x, 0), 0);
d2d28085 5461 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
188538df
TG
5462 break;
5463 case PRE_INC:
5464 case POST_INC:
520babc7 5465 base = XEXP (XEXP (x, 0), 0);
d2d28085 5466 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
188538df 5467 break;
d8f95bed
JDA
5468 case PLUS:
5469 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
d2d28085 5470 fprintf (file, "%s(%s)",
2414e0e2
JL
5471 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5472 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
d8f95bed 5473 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
d2d28085 5474 fprintf (file, "%s(%s)",
2414e0e2
JL
5475 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5476 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
d8f95bed
JDA
5477 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5478 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5479 {
5480 /* Because the REG_POINTER flag can get lost during reload,
1a04ac2b 5481 pa_legitimate_address_p canonicalizes the order of the
d8f95bed
JDA
5482 index and base registers in the combined move patterns. */
5483 rtx base = XEXP (XEXP (x, 0), 1);
5484 rtx index = XEXP (XEXP (x, 0), 0);
5485
5486 fprintf (file, "%s(%s)",
5487 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5488 }
2414e0e2 5489 else
cc8ca59e 5490 output_address (GET_MODE (x), XEXP (x, 0));
188538df 5491 break;
d8f95bed 5492 default:
cc8ca59e 5493 output_address (GET_MODE (x), XEXP (x, 0));
d8f95bed 5494 break;
188538df
TG
5495 }
5496 }
188538df
TG
5497 else
5498 output_addr_const (file, x);
5499}
5500
fe19a83d 5501/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
188538df
TG
5502
5503void
ae9d61ab 5504pa_output_global_address (FILE *file, rtx x, int round_constant)
188538df 5505{
43940f6b
JL
5506
5507 /* Imagine (high (const (plus ...))). */
5508 if (GET_CODE (x) == HIGH)
5509 x = XEXP (x, 0);
5510
519104fe 5511 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
744b2d61 5512 output_addr_const (file, x);
6bb36601 5513 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
188538df 5514 {
744b2d61 5515 output_addr_const (file, x);
e236a9ff 5516 fputs ("-$global$", file);
188538df
TG
5517 }
5518 else if (GET_CODE (x) == CONST)
5519 {
519104fe 5520 const char *sep = "";
188538df 5521 int offset = 0; /* assembler wants -$global$ at end */
516c2342 5522 rtx base = NULL_RTX;
23f6f34f 5523
144d51f9 5524 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
188538df 5525 {
3ab604d5 5526 case LABEL_REF:
144d51f9 5527 case SYMBOL_REF:
188538df
TG
5528 base = XEXP (XEXP (x, 0), 0);
5529 output_addr_const (file, base);
144d51f9
NS
5530 break;
5531 case CONST_INT:
5532 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5533 break;
5534 default:
5535 gcc_unreachable ();
188538df 5536 }
188538df 5537
144d51f9 5538 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
188538df 5539 {
3ab604d5 5540 case LABEL_REF:
144d51f9 5541 case SYMBOL_REF:
188538df
TG
5542 base = XEXP (XEXP (x, 0), 1);
5543 output_addr_const (file, base);
144d51f9
NS
5544 break;
5545 case CONST_INT:
5546 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5547 break;
5548 default:
5549 gcc_unreachable ();
188538df 5550 }
188538df 5551
ad238e4b
JL
5552 /* How bogus. The compiler is apparently responsible for
5553 rounding the constant if it uses an LR field selector.
5554
5555 The linker and/or assembler seem a better place since
5556 they have to do this kind of thing already.
5557
5558 If we fail to do this, HP's optimizing linker may eliminate
5559 an addil, but not update the ldw/stw/ldo instruction that
5560 uses the result of the addil. */
5561 if (round_constant)
5562 offset = ((offset + 0x1000) & ~0x1fff);
5563
144d51f9 5564 switch (GET_CODE (XEXP (x, 0)))
188538df 5565 {
144d51f9 5566 case PLUS:
188538df
TG
5567 if (offset < 0)
5568 {
5569 offset = -offset;
5570 sep = "-";
5571 }
5572 else
5573 sep = "+";
144d51f9
NS
5574 break;
5575
5576 case MINUS:
5577 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5578 sep = "-";
5579 break;
188538df 5580
144d51f9
NS
5581 default:
5582 gcc_unreachable ();
5583 }
5584
519104fe 5585 if (!read_only_operand (base, VOIDmode) && !flag_pic)
e236a9ff 5586 fputs ("-$global$", file);
ad238e4b 5587 if (offset)
831c1763 5588 fprintf (file, "%s%d", sep, offset);
188538df
TG
5589 }
5590 else
5591 output_addr_const (file, x);
5592}
5593
1bc7c5b6
ZW
5594/* Output boilerplate text to appear at the beginning of the file.
5595 There are several possible versions. */
5596#define aputs(x) fputs(x, asm_out_file)
5597static inline void
b7849684 5598pa_file_start_level (void)
1bc7c5b6
ZW
5599{
5600 if (TARGET_64BIT)
5601 aputs ("\t.LEVEL 2.0w\n");
5602 else if (TARGET_PA_20)
5603 aputs ("\t.LEVEL 2.0\n");
5604 else if (TARGET_PA_11)
5605 aputs ("\t.LEVEL 1.1\n");
5606 else
5607 aputs ("\t.LEVEL 1.0\n");
5608}
5609
5610static inline void
b7849684 5611pa_file_start_space (int sortspace)
1bc7c5b6
ZW
5612{
5613 aputs ("\t.SPACE $PRIVATE$");
5614 if (sortspace)
5615 aputs (",SORT=16");
57d138a9
JDA
5616 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5617 if (flag_tm)
5618 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5619 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5620 "\n\t.SPACE $TEXT$");
1bc7c5b6
ZW
5621 if (sortspace)
5622 aputs (",SORT=8");
5623 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
57d138a9 5624 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
1bc7c5b6
ZW
5625}
5626
5627static inline void
b7849684 5628pa_file_start_file (int want_version)
1bc7c5b6
ZW
5629{
5630 if (write_symbols != NO_DEBUG)
5631 {
5632 output_file_directive (asm_out_file, main_input_filename);
5633 if (want_version)
5634 aputs ("\t.version\t\"01.01\"\n");
5635 }
5636}
5637
5638static inline void
b7849684 5639pa_file_start_mcount (const char *aswhat)
1bc7c5b6
ZW
5640{
5641 if (profile_flag)
5642 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5643}
5644
5645static void
b7849684 5646pa_elf_file_start (void)
1bc7c5b6
ZW
5647{
5648 pa_file_start_level ();
5649 pa_file_start_mcount ("ENTRY");
5650 pa_file_start_file (0);
5651}
5652
5653static void
b7849684 5654pa_som_file_start (void)
1bc7c5b6
ZW
5655{
5656 pa_file_start_level ();
5657 pa_file_start_space (0);
5658 aputs ("\t.IMPORT $global$,DATA\n"
5659 "\t.IMPORT $$dyncall,MILLICODE\n");
5660 pa_file_start_mcount ("CODE");
5661 pa_file_start_file (0);
5662}
5663
5664static void
b7849684 5665pa_linux_file_start (void)
1bc7c5b6
ZW
5666{
5667 pa_file_start_file (1);
5668 pa_file_start_level ();
5669 pa_file_start_mcount ("CODE");
5670}
5671
5672static void
b7849684 5673pa_hpux64_gas_file_start (void)
1bc7c5b6
ZW
5674{
5675 pa_file_start_level ();
5676#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5677 if (profile_flag)
5678 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5679#endif
5680 pa_file_start_file (1);
5681}
5682
5683static void
b7849684 5684pa_hpux64_hpas_file_start (void)
1bc7c5b6
ZW
5685{
5686 pa_file_start_level ();
5687 pa_file_start_space (1);
5688 pa_file_start_mcount ("CODE");
5689 pa_file_start_file (0);
5690}
5691#undef aputs
5692
7aaf280e
JDA
5693/* Search the deferred plabel list for SYMBOL and return its internal
5694 label. If an entry for SYMBOL is not found, a new entry is created. */
5695
5696rtx
ae9d61ab 5697pa_get_deferred_plabel (rtx symbol)
a02aa5b0 5698{
744b2d61 5699 const char *fname = XSTR (symbol, 0);
a02aa5b0
JDA
5700 size_t i;
5701
5702 /* See if we have already put this function on the list of deferred
5703 plabels. This list is generally small, so a liner search is not
5704 too ugly. If it proves too slow replace it with something faster. */
5705 for (i = 0; i < n_deferred_plabels; i++)
744b2d61 5706 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
a02aa5b0
JDA
5707 break;
5708
5709 /* If the deferred plabel list is empty, or this entry was not found
5710 on the list, create a new entry on the list. */
5711 if (deferred_plabels == NULL || i == n_deferred_plabels)
5712 {
744b2d61
JDA
5713 tree id;
5714
a02aa5b0 5715 if (deferred_plabels == 0)
766090c2 5716 deferred_plabels = ggc_alloc<deferred_plabel> ();
a02aa5b0 5717 else
a9429e29
LB
5718 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5719 deferred_plabels,
5720 n_deferred_plabels + 1);
a02aa5b0
JDA
5721
5722 i = n_deferred_plabels++;
5723 deferred_plabels[i].internal_label = gen_label_rtx ();
744b2d61 5724 deferred_plabels[i].symbol = symbol;
a02aa5b0 5725
744b2d61
JDA
5726 /* Gross. We have just implicitly taken the address of this
5727 function. Mark it in the same manner as assemble_name. */
5728 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5729 if (id)
5730 mark_referenced (id);
a02aa5b0
JDA
5731 }
5732
7aaf280e 5733 return deferred_plabels[i].internal_label;
a02aa5b0
JDA
5734}
5735
a5fe455b 5736static void
b7849684 5737output_deferred_plabels (void)
359255a9 5738{
0f8e3849 5739 size_t i;
1a83bfc3
JDA
5740
5741 /* If we have some deferred plabels, then we need to switch into the
5742 data or readonly data section, and align it to a 4 byte boundary
6416ae7f 5743 before outputting the deferred plabels. */
359255a9
JL
5744 if (n_deferred_plabels)
5745 {
1a83bfc3 5746 switch_to_section (flag_pic ? data_section : readonly_data_section);
a5fe455b 5747 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
359255a9
JL
5748 }
5749
5750 /* Now output the deferred plabels. */
5751 for (i = 0; i < n_deferred_plabels; i++)
5752 {
ecc418c4 5753 targetm.asm_out.internal_label (asm_out_file, "L",
a5fe455b 5754 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
744b2d61 5755 assemble_integer (deferred_plabels[i].symbol,
3d9268b6 5756 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
359255a9
JL
5757 }
5758}
5759
50bbeefb
JDA
5760/* Initialize optabs to point to emulation routines. */
5761
c15c90bb 5762static void
50bbeefb 5763pa_init_libfuncs (void)
c15c90bb 5764{
50bbeefb
JDA
5765 if (HPUX_LONG_DOUBLE_LIBRARY)
5766 {
5767 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5768 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5769 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5770 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5771 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5772 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5773 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5774 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5775 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5776
5777 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5778 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5779 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5780 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5781 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5782 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5783 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5784
5785 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5786 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5787 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5788 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5789
5790 set_conv_libfunc (sfix_optab, SImode, TFmode,
5791 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5792 : "_U_Qfcnvfxt_quad_to_sgl");
5793 set_conv_libfunc (sfix_optab, DImode, TFmode,
5794 "_U_Qfcnvfxt_quad_to_dbl");
5795 set_conv_libfunc (ufix_optab, SImode, TFmode,
5796 "_U_Qfcnvfxt_quad_to_usgl");
5797 set_conv_libfunc (ufix_optab, DImode, TFmode,
5798 "_U_Qfcnvfxt_quad_to_udbl");
5799
5800 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5801 "_U_Qfcnvxf_sgl_to_quad");
5802 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5803 "_U_Qfcnvxf_dbl_to_quad");
5804 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5805 "_U_Qfcnvxf_usgl_to_quad");
5806 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5807 "_U_Qfcnvxf_udbl_to_quad");
5808 }
33a55f29
RH
5809
5810 if (TARGET_SYNC_LIBCALL)
7e7c9d40 5811 init_sync_libfuncs (8);
c15c90bb 5812}
c15c90bb 5813
188538df
TG
5814/* HP's millicode routines mean something special to the assembler.
5815 Keep track of which ones we have used. */
5816
f3a4e54e 5817enum millicodes { remI, remU, divI, divU, mulI, end1000 };
b7849684 5818static void import_milli (enum millicodes);
831c1763 5819static char imported[(int) end1000];
f3a4e54e 5820static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
8b60264b 5821static const char import_string[] = ".IMPORT $$....,MILLICODE";
188538df
TG
5822#define MILLI_START 10
5823
f1c7ce82 5824static void
b7849684 5825import_milli (enum millicodes code)
188538df
TG
5826{
5827 char str[sizeof (import_string)];
23f6f34f 5828
831c1763 5829 if (!imported[(int) code])
188538df 5830 {
831c1763 5831 imported[(int) code] = 1;
188538df 5832 strcpy (str, import_string);
831c1763 5833 strncpy (str + MILLI_START, milli_names[(int) code], 4);
188538df
TG
5834 output_asm_insn (str, 0);
5835 }
5836}
5837
23f6f34f 5838/* The register constraints have put the operands and return value in
fe19a83d 5839 the proper registers. */
188538df 5840
519104fe 5841const char *
b32d5189 5842pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
188538df 5843{
9b38c2fa 5844 import_milli (mulI);
ae9d61ab 5845 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
188538df
TG
5846}
5847
fe19a83d 5848/* Emit the rtl for doing a division by a constant. */
188538df 5849
9b38c2fa 5850/* Do magic division millicodes exist for this value? */
ae9d61ab 5851const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
188538df 5852
23f6f34f 5853/* We'll use an array to keep track of the magic millicodes and
188538df 5854 whether or not we've used them already. [n][0] is signed, [n][1] is
fe19a83d 5855 unsigned. */
188538df 5856
188538df
TG
5857static int div_milli[16][2];
5858
188538df 5859int
ae9d61ab 5860pa_emit_hpdiv_const (rtx *operands, int unsignedp)
188538df
TG
5861{
5862 if (GET_CODE (operands[2]) == CONST_INT
5863 && INTVAL (operands[2]) > 0
5864 && INTVAL (operands[2]) < 16
ae9d61ab 5865 && pa_magic_milli[INTVAL (operands[2])])
188538df 5866 {
7d8b1412
AM
5867 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5868
ad2c71b7 5869 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
188538df 5870 emit
92fd5e41
KH
5871 (gen_rtx_PARALLEL
5872 (VOIDmode,
f7df4a84 5873 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
ad2c71b7
JL
5874 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5875 SImode,
5876 gen_rtx_REG (SImode, 26),
5877 operands[2])),
bd83f9a5 5878 gen_rtx_CLOBBER (VOIDmode, operands[4]),
ad2c71b7
JL
5879 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5880 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5881 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
7d8b1412 5882 gen_rtx_CLOBBER (VOIDmode, ret))));
ad2c71b7 5883 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
188538df
TG
5884 return 1;
5885 }
5886 return 0;
5887}
5888
519104fe 5889const char *
b32d5189 5890pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
188538df
TG
5891{
5892 int divisor;
23f6f34f
TG
5893
5894 /* If the divisor is a constant, try to use one of the special
188538df
TG
5895 opcodes .*/
5896 if (GET_CODE (operands[0]) == CONST_INT)
5897 {
2c4ff308 5898 static char buf[100];
188538df
TG
5899 divisor = INTVAL (operands[0]);
5900 if (!div_milli[divisor][unsignedp])
5901 {
2c4ff308 5902 div_milli[divisor][unsignedp] = 1;
188538df
TG
5903 if (unsignedp)
5904 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5905 else
5906 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
188538df
TG
5907 }
5908 if (unsignedp)
2c4ff308 5909 {
4a0a75dd
KG
5910 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5911 INTVAL (operands[0]));
ae9d61ab
JDA
5912 return pa_output_millicode_call (insn,
5913 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308
JL
5914 }
5915 else
5916 {
4a0a75dd
KG
5917 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5918 INTVAL (operands[0]));
ae9d61ab
JDA
5919 return pa_output_millicode_call (insn,
5920 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308 5921 }
188538df 5922 }
fe19a83d 5923 /* Divisor isn't a special constant. */
188538df
TG
5924 else
5925 {
5926 if (unsignedp)
5927 {
5928 import_milli (divU);
ae9d61ab 5929 return pa_output_millicode_call (insn,
ad2c71b7 5930 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
188538df
TG
5931 }
5932 else
5933 {
5934 import_milli (divI);
ae9d61ab 5935 return pa_output_millicode_call (insn,
ad2c71b7 5936 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
188538df
TG
5937 }
5938 }
5939}
5940
fe19a83d 5941/* Output a $$rem millicode to do mod. */
188538df 5942
519104fe 5943const char *
b32d5189 5944pa_output_mod_insn (int unsignedp, rtx_insn *insn)
188538df
TG
5945{
5946 if (unsignedp)
5947 {
5948 import_milli (remU);
ae9d61ab
JDA
5949 return pa_output_millicode_call (insn,
5950 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
188538df
TG
5951 }
5952 else
5953 {
5954 import_milli (remI);
ae9d61ab
JDA
5955 return pa_output_millicode_call (insn,
5956 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
188538df
TG
5957 }
5958}
5959
5960void
e0d80a58 5961pa_output_arg_descriptor (rtx_insn *call_insn)
188538df 5962{
519104fe 5963 const char *arg_regs[4];
ef4bddc2 5964 machine_mode arg_mode;
80225b66 5965 rtx link;
188538df
TG
5966 int i, output_flag = 0;
5967 int regno;
23f6f34f 5968
520babc7 5969 /* We neither need nor want argument location descriptors for the
e25724d8
AM
5970 64bit runtime environment or the ELF32 environment. */
5971 if (TARGET_64BIT || TARGET_ELF32)
520babc7
JL
5972 return;
5973
188538df
TG
5974 for (i = 0; i < 4; i++)
5975 arg_regs[i] = 0;
5976
2822d96e
JL
5977 /* Specify explicitly that no argument relocations should take place
5978 if using the portable runtime calling conventions. */
5979 if (TARGET_PORTABLE_RUNTIME)
5980 {
e236a9ff
JL
5981 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5982 asm_out_file);
2822d96e
JL
5983 return;
5984 }
5985
b64925dc 5986 gcc_assert (CALL_P (call_insn));
144d51f9
NS
5987 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5988 link; link = XEXP (link, 1))
188538df 5989 {
80225b66 5990 rtx use = XEXP (link, 0);
3529be83 5991
80225b66
TG
5992 if (! (GET_CODE (use) == USE
5993 && GET_CODE (XEXP (use, 0)) == REG
5994 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
3529be83
RS
5995 continue;
5996
80225b66
TG
5997 arg_mode = GET_MODE (XEXP (use, 0));
5998 regno = REGNO (XEXP (use, 0));
188538df 5999 if (regno >= 23 && regno <= 26)
a9d91d6f
RS
6000 {
6001 arg_regs[26 - regno] = "GR";
6002 if (arg_mode == DImode)
6003 arg_regs[25 - regno] = "GR";
6004 }
80225b66 6005 else if (regno >= 32 && regno <= 39)
188538df
TG
6006 {
6007 if (arg_mode == SFmode)
80225b66 6008 arg_regs[(regno - 32) / 2] = "FR";
d0616842 6009 else
188538df 6010 {
22d6e660 6011#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
80225b66
TG
6012 arg_regs[(regno - 34) / 2] = "FR";
6013 arg_regs[(regno - 34) / 2 + 1] = "FU";
188538df 6014#else
80225b66
TG
6015 arg_regs[(regno - 34) / 2] = "FU";
6016 arg_regs[(regno - 34) / 2 + 1] = "FR";
188538df
TG
6017#endif
6018 }
188538df
TG
6019 }
6020 }
6021 fputs ("\t.CALL ", asm_out_file);
6022 for (i = 0; i < 4; i++)
6023 {
6024 if (arg_regs[i])
6025 {
6026 if (output_flag++)
6027 fputc (',', asm_out_file);
6028 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6029 }
6030 }
6031 fputc ('\n', asm_out_file);
6032}
6033\f
1a04ac2b
JDA
6034/* Inform reload about cases where moving X with a mode MODE to or from
6035 a register in RCLASS requires an extra scratch or immediate register.
6036 Return the class needed for the immediate register. */
483d7ad3 6037
a87cf97e
JR
6038static reg_class_t
6039pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
ef4bddc2 6040 machine_mode mode, secondary_reload_info *sri)
ec963611 6041{
715a567d 6042 int regno;
a87cf97e 6043 enum reg_class rclass = (enum reg_class) rclass_i;
e236a9ff 6044
ec963611 6045 /* Handle the easy stuff first. */
0a2aaacc 6046 if (rclass == R1_REGS)
ec963611 6047 return NO_REGS;
e236a9ff 6048
ec963611
JDA
6049 if (REG_P (x))
6050 {
6051 regno = REGNO (x);
0a2aaacc 6052 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
ec963611
JDA
6053 return NO_REGS;
6054 }
69f8a2d6
JDA
6055 else
6056 regno = -1;
188538df 6057
ec963611
JDA
6058 /* If we have something like (mem (mem (...)), we can safely assume the
6059 inner MEM will end up in a general register after reloading, so there's
6060 no need for a secondary reload. */
6061 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6062 return NO_REGS;
188538df 6063
6bb36601 6064 /* Trying to load a constant into a FP register during PIC code
1a04ac2b
JDA
6065 generation requires %r1 as a scratch register. For float modes,
6066 the only legitimate constant is CONST0_RTX. However, there are
6067 a few patterns that accept constant double operands. */
7ee72796 6068 if (flag_pic
0a2aaacc 6069 && FP_REG_CLASS_P (rclass)
ec963611 6070 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
e236a9ff 6071 {
1a04ac2b
JDA
6072 switch (mode)
6073 {
4e10a5a7 6074 case E_SImode:
1a04ac2b
JDA
6075 sri->icode = CODE_FOR_reload_insi_r1;
6076 break;
6077
4e10a5a7 6078 case E_DImode:
1a04ac2b
JDA
6079 sri->icode = CODE_FOR_reload_indi_r1;
6080 break;
6081
4e10a5a7 6082 case E_SFmode:
1a04ac2b
JDA
6083 sri->icode = CODE_FOR_reload_insf_r1;
6084 break;
6085
4e10a5a7 6086 case E_DFmode:
1a04ac2b
JDA
6087 sri->icode = CODE_FOR_reload_indf_r1;
6088 break;
6089
6090 default:
6091 gcc_unreachable ();
6092 }
ec963611 6093 return NO_REGS;
e236a9ff 6094 }
e236a9ff 6095
1a04ac2b
JDA
6096 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6097 register when we're generating PIC code or when the operand isn't
715a567d 6098 readonly. */
ae9d61ab 6099 if (pa_symbolic_expression_p (x))
715a567d
JDA
6100 {
6101 if (GET_CODE (x) == HIGH)
6102 x = XEXP (x, 0);
6103
6104 if (flag_pic || !read_only_operand (x, VOIDmode))
6105 {
1a04ac2b
JDA
6106 switch (mode)
6107 {
4e10a5a7 6108 case E_SImode:
1a04ac2b
JDA
6109 sri->icode = CODE_FOR_reload_insi_r1;
6110 break;
6111
4e10a5a7 6112 case E_DImode:
1a04ac2b
JDA
6113 sri->icode = CODE_FOR_reload_indi_r1;
6114 break;
6115
6116 default:
6117 gcc_unreachable ();
6118 }
715a567d
JDA
6119 return NO_REGS;
6120 }
6121 }
6122
ec963611
JDA
6123 /* Profiling showed the PA port spends about 1.3% of its compilation
6124 time in true_regnum from calls inside pa_secondary_reload_class. */
6125 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6126 regno = true_regnum (x);
39dfb55a 6127
1a04ac2b 6128 /* Handle reloads for floating point loads and stores. */
6982c5d4 6129 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
0a2aaacc 6130 && FP_REG_CLASS_P (rclass))
6982c5d4 6131 {
1a04ac2b 6132 if (MEM_P (x))
6982c5d4
JDA
6133 {
6134 x = XEXP (x, 0);
6135
feb675e4
JDA
6136 /* We don't need a secondary reload for indexed memory addresses.
6137
6138 When INT14_OK_STRICT is true, it might appear that we could
6139 directly allow register indirect memory addresses. However,
6140 this doesn't work because we don't support SUBREGs in
6141 floating-point register copies and reload doesn't tell us
6142 when it's going to use a SUBREG. */
6143 if (IS_INDEX_ADDR_P (x))
6982c5d4 6144 return NO_REGS;
6982c5d4
JDA
6145 }
6146
6147 /* Request a secondary reload with a general scratch register
073a8998 6148 for everything else. ??? Could symbolic operands be handled
6982c5d4 6149 directly when generating non-pic PA 2.0 code? */
f9621cc4
RS
6150 sri->icode = (in_p
6151 ? direct_optab_handler (reload_in_optab, mode)
6152 : direct_optab_handler (reload_out_optab, mode));
6982c5d4
JDA
6153 return NO_REGS;
6154 }
6155
483d7ad3
JDA
6156 /* A SAR<->FP register copy requires an intermediate general register
6157 and secondary memory. We need a secondary reload with a general
6158 scratch register for spills. */
6159 if (rclass == SHIFT_REGS)
ec963611 6160 {
483d7ad3
JDA
6161 /* Handle spill. */
6162 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6163 {
6164 sri->icode = (in_p
6165 ? direct_optab_handler (reload_in_optab, mode)
6166 : direct_optab_handler (reload_out_optab, mode));
6167 return NO_REGS;
6168 }
6169
6170 /* Handle FP copy. */
6171 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6172 return GENERAL_REGS;
ec963611 6173 }
fa5e5c1e 6174
26ee120d 6175 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
483d7ad3
JDA
6176 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6177 && FP_REG_CLASS_P (rclass))
6178 return GENERAL_REGS;
43940f6b 6179
fa5e5c1e 6180 return NO_REGS;
188538df
TG
6181}
6182
16c16a24
JDA
6183/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6184 is only marked as live on entry by df-scan when it is a fixed
6185 register. It isn't a fixed register in the 64-bit runtime,
6186 so we need to mark it here. */
6187
6188static void
6189pa_extra_live_on_entry (bitmap regs)
6190{
6191 if (TARGET_64BIT)
6192 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6193}
6194
6195/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6196 to prevent it from being deleted. */
6197
6198rtx
6199pa_eh_return_handler_rtx (void)
6200{
6201 rtx tmp;
6202
bc707992 6203 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
16c16a24
JDA
6204 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6205 tmp = gen_rtx_MEM (word_mode, tmp);
6206 tmp->volatil = 1;
6207 return tmp;
6208}
6209
8cd5a4e0
RH
6210/* In the 32-bit runtime, arguments larger than eight bytes are passed
6211 by invisible reference. As a GCC extension, we also pass anything
6212 with a zero or variable size by reference.
6213
6214 The 64-bit runtime does not describe passing any types by invisible
6215 reference. The internals of GCC can't currently handle passing
6216 empty structures, and zero or variable length arrays when they are
6217 not passed entirely on the stack or by reference. Thus, as a GCC
6218 extension, we pass these types by reference. The HP compiler doesn't
6219 support these types, so hopefully there shouldn't be any compatibility
6220 issues. This may have to be revisited when HP releases a C99 compiler
6221 or updates the ABI. */
6222
6223static bool
d5cc9181 6224pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
ef4bddc2 6225 machine_mode mode, const_tree type,
8cd5a4e0
RH
6226 bool named ATTRIBUTE_UNUSED)
6227{
6228 HOST_WIDE_INT size;
6229
6230 if (type)
6231 size = int_size_in_bytes (type);
6232 else
6233 size = GET_MODE_SIZE (mode);
6234
6235 if (TARGET_64BIT)
6236 return size <= 0;
6237 else
6238 return size <= 0 || size > 8;
6239}
6240
188538df 6241enum direction
ef4bddc2 6242pa_function_arg_padding (machine_mode mode, const_tree type)
188538df 6243{
9dff28ab 6244 if (mode == BLKmode
c3e39a47
JDA
6245 || (TARGET_64BIT
6246 && type
6247 && (AGGREGATE_TYPE_P (type)
6248 || TREE_CODE (type) == COMPLEX_TYPE
6249 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab
JDA
6250 {
6251 /* Return none if justification is not required. */
6252 if (type
6253 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6254 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6255 return none;
6256
6257 /* The directions set here are ignored when a BLKmode argument larger
6258 than a word is placed in a register. Different code is used for
6259 the stack and registers. This makes it difficult to have a
6260 consistent data representation for both the stack and registers.
6261 For both runtimes, the justification and padding for arguments on
6262 the stack and in registers should be identical. */
6263 if (TARGET_64BIT)
6264 /* The 64-bit runtime specifies left justification for aggregates. */
6265 return upward;
188538df 6266 else
9dff28ab
JDA
6267 /* The 32-bit runtime architecture specifies right justification.
6268 When the argument is passed on the stack, the argument is padded
6269 with garbage on the left. The HP compiler pads with zeros. */
6270 return downward;
188538df 6271 }
9dff28ab
JDA
6272
6273 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
188538df 6274 return downward;
188538df
TG
6275 else
6276 return none;
6277}
6278
188538df 6279\f
648d2ffc
RH
6280/* Do what is necessary for `va_start'. We look at the current function
6281 to determine if stdargs or varargs is used and fill in an initial
6282 va_list. A pointer to this constructor is returned. */
188538df 6283
3f12cd9b 6284static rtx
b7849684 6285hppa_builtin_saveregs (void)
188538df 6286{
5e32727c 6287 rtx offset, dest;
188538df 6288 tree fntype = TREE_TYPE (current_function_decl);
f38958e8 6289 int argadj = ((!stdarg_p (fntype))
188538df
TG
6290 ? UNITS_PER_WORD : 0);
6291
6292 if (argadj)
0a81f074 6293 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
188538df 6294 else
38173d38 6295 offset = crtl->args.arg_offset_rtx;
17e1dfa2 6296
520babc7
JL
6297 if (TARGET_64BIT)
6298 {
6299 int i, off;
6619e96c 6300
520babc7
JL
6301 /* Adjust for varargs/stdarg differences. */
6302 if (argadj)
0a81f074 6303 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
520babc7 6304 else
38173d38 6305 offset = crtl->args.arg_offset_rtx;
520babc7
JL
6306
6307 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6308 from the incoming arg pointer and growing to larger addresses. */
6309 for (i = 26, off = -64; i >= 19; i--, off += 8)
6310 emit_move_insn (gen_rtx_MEM (word_mode,
0a81f074
RS
6311 plus_constant (Pmode,
6312 arg_pointer_rtx, off)),
520babc7
JL
6313 gen_rtx_REG (word_mode, i));
6314
6315 /* The incoming args pointer points just beyond the flushback area;
f710504c 6316 normally this is not a serious concern. However, when we are doing
520babc7
JL
6317 varargs/stdargs we want to make the arg pointer point to the start
6318 of the incoming argument area. */
6319 emit_move_insn (virtual_incoming_args_rtx,
0a81f074 6320 plus_constant (Pmode, arg_pointer_rtx, -64));
520babc7
JL
6321
6322 /* Now return a pointer to the first anonymous argument. */
6323 return copy_to_reg (expand_binop (Pmode, add_optab,
6324 virtual_incoming_args_rtx,
6325 offset, 0, 0, OPTAB_LIB_WIDEN));
6326 }
6327
fe19a83d 6328 /* Store general registers on the stack. */
ad2c71b7 6329 dest = gen_rtx_MEM (BLKmode,
0a81f074 6330 plus_constant (Pmode, crtl->args.internal_arg_pointer,
ad2c71b7 6331 -16));
ba4828e0 6332 set_mem_alias_set (dest, get_varargs_alias_set ());
8ac61af7 6333 set_mem_align (dest, BITS_PER_WORD);
c6b97fac 6334 move_block_from_reg (23, dest, 4);
5e32727c 6335
39dfb55a
JL
6336 /* move_block_from_reg will emit code to store the argument registers
6337 individually as scalar stores.
6338
6339 However, other insns may later load from the same addresses for
956d6950 6340 a structure load (passing a struct to a varargs routine).
39dfb55a
JL
6341
6342 The alias code assumes that such aliasing can never happen, so we
6343 have to keep memory referencing insns from moving up beyond the
6344 last argument register store. So we emit a blockage insn here. */
6345 emit_insn (gen_blockage ());
6346
17e1dfa2 6347 return copy_to_reg (expand_binop (Pmode, add_optab,
38173d38 6348 crtl->args.internal_arg_pointer,
17e1dfa2 6349 offset, 0, 0, OPTAB_LIB_WIDEN));
188538df 6350}
d2a94ec0 6351
d7bd8aeb 6352static void
b7849684 6353hppa_va_start (tree valist, rtx nextarg)
ca5f4364
RH
6354{
6355 nextarg = expand_builtin_saveregs ();
e5faf155 6356 std_expand_builtin_va_start (valist, nextarg);
ca5f4364
RH
6357}
6358
8101c928 6359static tree
726a989a
RB
6360hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6361 gimple_seq *post_p)
ca5f4364 6362{
520babc7
JL
6363 if (TARGET_64BIT)
6364 {
8101c928 6365 /* Args grow upward. We can use the generic routines. */
af064de5 6366 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
ca5f4364 6367 }
9dff28ab 6368 else /* !TARGET_64BIT */
ca5f4364 6369 {
8101c928
RH
6370 tree ptr = build_pointer_type (type);
6371 tree valist_type;
6372 tree t, u;
6373 unsigned int size, ofs;
af064de5 6374 bool indirect;
ca5f4364 6375
af064de5 6376 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
8101c928 6377 if (indirect)
9dff28ab 6378 {
8101c928
RH
6379 type = ptr;
6380 ptr = build_pointer_type (type);
ca5f4364 6381 }
8101c928
RH
6382 size = int_size_in_bytes (type);
6383 valist_type = TREE_TYPE (valist);
9dff28ab 6384
8101c928 6385 /* Args grow down. Not handled by generic routines. */
9dff28ab 6386
5be014d5
AP
6387 u = fold_convert (sizetype, size_in_bytes (type));
6388 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5d49b6a7 6389 t = fold_build_pointer_plus (valist, u);
9dff28ab 6390
e4f1aef1
RG
6391 /* Align to 4 or 8 byte boundary depending on argument size. */
6392
6393 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6394 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
5be014d5 6395 t = fold_convert (valist_type, t);
8101c928 6396
66863d89 6397 t = build2 (MODIFY_EXPR, valist_type, valist, t);
ca5f4364 6398
8101c928
RH
6399 ofs = (8 - size) % 4;
6400 if (ofs != 0)
5d49b6a7 6401 t = fold_build_pointer_plus_hwi (t, ofs);
ca5f4364 6402
8101c928 6403 t = fold_convert (ptr, t);
d6e9821f 6404 t = build_va_arg_indirect_ref (t);
ca5f4364 6405
8101c928 6406 if (indirect)
d6e9821f 6407 t = build_va_arg_indirect_ref (t);
ca5f4364 6408
8101c928
RH
6409 return t;
6410 }
6411}
ca5f4364 6412
83c32f2e
JDA
6413/* True if MODE is valid for the target. By "valid", we mean able to
6414 be manipulated in non-trivial ways. In particular, this means all
6415 the arithmetic is supported.
6416
6417 Currently, TImode is not valid as the HP 64-bit runtime documentation
6418 doesn't document the alignment and calling conventions for this type.
6419 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6420 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6421
6422static bool
18e2a8b8 6423pa_scalar_mode_supported_p (scalar_mode mode)
83c32f2e
JDA
6424{
6425 int precision = GET_MODE_PRECISION (mode);
6426
6427 switch (GET_MODE_CLASS (mode))
6428 {
6429 case MODE_PARTIAL_INT:
6430 case MODE_INT:
6431 if (precision == CHAR_TYPE_SIZE)
6432 return true;
6433 if (precision == SHORT_TYPE_SIZE)
6434 return true;
6435 if (precision == INT_TYPE_SIZE)
6436 return true;
6437 if (precision == LONG_TYPE_SIZE)
6438 return true;
6439 if (precision == LONG_LONG_TYPE_SIZE)
6440 return true;
6441 return false;
6442
6443 case MODE_FLOAT:
6444 if (precision == FLOAT_TYPE_SIZE)
6445 return true;
6446 if (precision == DOUBLE_TYPE_SIZE)
6447 return true;
6448 if (precision == LONG_DOUBLE_TYPE_SIZE)
6449 return true;
6450 return false;
6451
70c1d012
JDA
6452 case MODE_DECIMAL_FLOAT:
6453 return false;
6454
83c32f2e
JDA
6455 default:
6456 gcc_unreachable ();
6457 }
6458}
6459
f5e66865 6460/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
16923e7b 6461 it branches into the delay slot. Otherwise, return FALSE. */
f5e66865
JDA
6462
6463static bool
84034c69 6464branch_to_delay_slot_p (rtx_insn *insn)
f5e66865 6465{
e0d80a58 6466 rtx_insn *jump_insn;
16923e7b 6467
f5e66865
JDA
6468 if (dbr_sequence_length ())
6469 return FALSE;
6470
7c9796ed 6471 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
16923e7b
JDA
6472 while (insn)
6473 {
6474 insn = next_active_insn (insn);
6475 if (jump_insn == insn)
6476 return TRUE;
6477
6478 /* We can't rely on the length of asms. So, we return FALSE when
6479 the branch is followed by an asm. */
6480 if (!insn
6481 || GET_CODE (PATTERN (insn)) == ASM_INPUT
93671519 6482 || asm_noperands (PATTERN (insn)) >= 0
16923e7b
JDA
6483 || get_attr_length (insn) > 0)
6484 break;
6485 }
6486
6487 return FALSE;
f5e66865
JDA
6488}
6489
16923e7b 6490/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
f5e66865
JDA
6491
6492 This occurs when INSN has an unfilled delay slot and is followed
16923e7b
JDA
6493 by an asm. Disaster can occur if the asm is empty and the jump
6494 branches into the delay slot. So, we add a nop in the delay slot
6495 when this occurs. */
f5e66865
JDA
6496
6497static bool
84034c69 6498branch_needs_nop_p (rtx_insn *insn)
f5e66865 6499{
e0d80a58 6500 rtx_insn *jump_insn;
f5e66865
JDA
6501
6502 if (dbr_sequence_length ())
6503 return FALSE;
6504
7c9796ed 6505 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
16923e7b
JDA
6506 while (insn)
6507 {
6508 insn = next_active_insn (insn);
6509 if (!insn || jump_insn == insn)
6510 return TRUE;
6511
6512 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
93671519 6513 || asm_noperands (PATTERN (insn)) >= 0)
16923e7b
JDA
6514 && get_attr_length (insn) > 0)
6515 break;
6516 }
6517
6518 return FALSE;
6519}
6520
6521/* Return TRUE if INSN, a forward jump insn, can use nullification
6522 to skip the following instruction. This avoids an extra cycle due
6523 to a mis-predicted branch when we fall through. */
6524
6525static bool
84034c69 6526use_skip_p (rtx_insn *insn)
16923e7b 6527{
7c9796ed 6528 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
16923e7b
JDA
6529
6530 while (insn)
6531 {
6532 insn = next_active_insn (insn);
6533
6534 /* We can't rely on the length of asms, so we can't skip asms. */
6535 if (!insn
6536 || GET_CODE (PATTERN (insn)) == ASM_INPUT
93671519 6537 || asm_noperands (PATTERN (insn)) >= 0)
16923e7b
JDA
6538 break;
6539 if (get_attr_length (insn) == 4
6540 && jump_insn == next_active_insn (insn))
6541 return TRUE;
6542 if (get_attr_length (insn) > 0)
6543 break;
6544 }
6545
6546 return FALSE;
f5e66865
JDA
6547}
6548
23f6f34f
TG
6549/* This routine handles all the normal conditional branch sequences we
6550 might need to generate. It handles compare immediate vs compare
6551 register, nullification of delay slots, varying length branches,
d2364a74 6552 negated branches, and all combinations of the above. It returns the
23f6f34f 6553 output appropriate to emit the branch corresponding to all given
d2364a74
JL
6554 parameters. */
6555
519104fe 6556const char *
b32d5189 6557pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
b1a275e1 6558{
d2364a74 6559 static char buf[100];
16923e7b 6560 bool useskip;
16d74a3c
JDA
6561 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6562 int length = get_attr_length (insn);
6563 int xdelay;
d2364a74 6564
112cdef5 6565 /* A conditional branch to the following instruction (e.g. the delay slot)
02a57c73
JDA
6566 is asking for a disaster. This can happen when not optimizing and
6567 when jump optimization fails.
b1a275e1 6568
7772f0a9
JDA
6569 While it is usually safe to emit nothing, this can fail if the
6570 preceding instruction is a nullified branch with an empty delay
6571 slot and the same branch target as this branch. We could check
6572 for this but jump optimization should eliminate nop jumps. It
6573 is always safe to emit a nop. */
f5e66865 6574 if (branch_to_delay_slot_p (insn))
02a57c73 6575 return "nop";
23f6f34f 6576
ae2ea719
JDA
6577 /* The doubleword form of the cmpib instruction doesn't have the LEU
6578 and GTU conditions while the cmpb instruction does. Since we accept
6579 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6580 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6581 operands[2] = gen_rtx_REG (DImode, 0);
9972f30d
SE
6582 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6583 operands[1] = gen_rtx_REG (DImode, 0);
ae2ea719 6584
b9821af8
JL
6585 /* If this is a long branch with its delay slot unfilled, set `nullify'
6586 as it can nullify the delay slot and save a nop. */
a1b36964 6587 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6588 nullify = 1;
6589
6590 /* If this is a short forward conditional branch which did not get
6591 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6592 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6593 nullify = forward_branch_p (insn);
6594
23f6f34f 6595 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6596 comclr instruction. This avoids a single cycle penalty due to
6597 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6598 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6599
6600 switch (length)
6601 {
b9821af8
JL
6602 /* All short conditional branches except backwards with an unfilled
6603 delay slot. */
a1b36964 6604 case 4:
d2364a74 6605 if (useskip)
f38b27c7 6606 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
d2364a74 6607 else
f38b27c7 6608 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6609 if (GET_MODE (operands[1]) == DImode)
6610 strcat (buf, "*");
d2364a74
JL
6611 if (negated)
6612 strcat (buf, "%B3");
6613 else
6614 strcat (buf, "%S3");
6615 if (useskip)
3b5e5fb3 6616 strcat (buf, " %2,%r1,%%r0");
d2364a74 6617 else if (nullify)
f5e66865
JDA
6618 {
6619 if (branch_needs_nop_p (insn))
6620 strcat (buf, ",n %2,%r1,%0%#");
6621 else
6622 strcat (buf, ",n %2,%r1,%0");
6623 }
23f6f34f 6624 else
dcaeffef 6625 strcat (buf, " %2,%r1,%0");
d2364a74
JL
6626 break;
6627
5bdc5878 6628 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6629 unfilled delay slot is treated just like a long backward branch
6630 with an unfilled delay slot. */
a1b36964 6631 case 8:
b9821af8 6632 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6633 which is nullified. */
b9821af8
JL
6634 if (dbr_sequence_length () != 0
6635 && ! forward_branch_p (insn)
6636 && nullify)
6637 {
f38b27c7 6638 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6639 if (GET_MODE (operands[1]) == DImode)
6640 strcat (buf, "*");
b9821af8
JL
6641 if (negated)
6642 strcat (buf, "%S3");
6643 else
6644 strcat (buf, "%B3");
3b5e5fb3 6645 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
b9821af8 6646 }
923f781d
JL
6647 /* Handle short backwards branch with an unfilled delay slot.
6648 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6649 taken and untaken branches. */
6650 else if (dbr_sequence_length () == 0
6651 && ! forward_branch_p (insn)
9d98a694
AO
6652 && INSN_ADDRESSES_SET_P ()
6653 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6654 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 6655 {
f38b27c7 6656 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6657 if (GET_MODE (operands[1]) == DImode)
6658 strcat (buf, "*");
923f781d 6659 if (negated)
dcaeffef 6660 strcat (buf, "%B3 %2,%r1,%0%#");
923f781d 6661 else
dcaeffef 6662 strcat (buf, "%S3 %2,%r1,%0%#");
923f781d 6663 }
d2364a74 6664 else
b9821af8 6665 {
f38b27c7 6666 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
520babc7
JL
6667 if (GET_MODE (operands[1]) == DImode)
6668 strcat (buf, "*");
b9821af8
JL
6669 if (negated)
6670 strcat (buf, "%S3");
6671 else
6672 strcat (buf, "%B3");
6673 if (nullify)
3b5e5fb3 6674 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
b9821af8 6675 else
3b5e5fb3 6676 strcat (buf, " %2,%r1,%%r0\n\tb %0");
b9821af8 6677 }
d2364a74
JL
6678 break;
6679
16d74a3c 6680 default:
685d0e07 6681 /* The reversed conditional branch must branch over one additional
16d74a3c 6682 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 6683 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
6684 nullified forward branch, the instruction after the reversed
6685 condition branch must be nullified. */
6686 if (dbr_sequence_length () == 0
6687 || (nullify && forward_branch_p (insn)))
6688 {
6689 nullify = 1;
6690 xdelay = 0;
6691 operands[4] = GEN_INT (length);
6692 }
6693 else
6694 {
6695 xdelay = 1;
6696 operands[4] = GEN_INT (length + 4);
6697 }
4bcb9e3f
JL
6698
6699 /* Create a reversed conditional branch which branches around
6700 the following insns. */
685d0e07
JDA
6701 if (GET_MODE (operands[1]) != DImode)
6702 {
6703 if (nullify)
6704 {
6705 if (negated)
6706 strcpy (buf,
6707 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6708 else
6709 strcpy (buf,
6710 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6711 }
6712 else
6713 {
6714 if (negated)
6715 strcpy (buf,
6716 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6717 else
6718 strcpy (buf,
6719 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6720 }
6721 }
4bcb9e3f 6722 else
520babc7 6723 {
685d0e07
JDA
6724 if (nullify)
6725 {
6726 if (negated)
6727 strcpy (buf,
6728 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6729 else
6730 strcpy (buf,
6731 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6732 }
520babc7 6733 else
685d0e07
JDA
6734 {
6735 if (negated)
6736 strcpy (buf,
6737 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6738 else
6739 strcpy (buf,
6740 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6741 }
520babc7 6742 }
4bcb9e3f 6743
16d74a3c 6744 output_asm_insn (buf, operands);
ae9d61ab 6745 return pa_output_lbranch (operands[0], insn, xdelay);
685d0e07
JDA
6746 }
6747 return buf;
6748}
4bcb9e3f 6749
568de9bb
JDA
6750/* Output a PIC pc-relative instruction sequence to load the address of
6751 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6752 or a code label. OPERANDS[1] specifies the register to use to load
6753 the program counter. OPERANDS[3] may be used for label generation
6754 The sequence is always three instructions in length. The program
6755 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6756 Register %r1 is clobbered. */
6757
6758static void
6759pa_output_pic_pcrel_sequence (rtx *operands)
6760{
6761 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6762 if (TARGET_PA_20)
6763 {
6764 /* We can use mfia to determine the current program counter. */
6765 if (TARGET_SOM || !TARGET_GAS)
6766 {
6767 operands[3] = gen_label_rtx ();
6768 targetm.asm_out.internal_label (asm_out_file, "L",
6769 CODE_LABEL_NUMBER (operands[3]));
6770 output_asm_insn ("mfia %1", operands);
6771 output_asm_insn ("addil L'%0-%l3,%1", operands);
6772 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6773 }
6774 else
6775 {
6776 output_asm_insn ("mfia %1", operands);
6777 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6778 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6779 }
6780 }
6781 else
6782 {
6783 /* We need to use a branch to determine the current program counter. */
6784 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6785 if (TARGET_SOM || !TARGET_GAS)
6786 {
6787 operands[3] = gen_label_rtx ();
6788 output_asm_insn ("addil L'%0-%l3,%1", operands);
6789 targetm.asm_out.internal_label (asm_out_file, "L",
6790 CODE_LABEL_NUMBER (operands[3]));
6791 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6792 }
6793 else
6794 {
6795 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6796 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6797 }
6798 }
6799}
6800
16d74a3c
JDA
6801/* This routine handles output of long unconditional branches that
6802 exceed the maximum range of a simple branch instruction. Since
6803 we don't have a register available for the branch, we save register
6804 %r1 in the frame marker, load the branch destination DEST into %r1,
6805 execute the branch, and restore %r1 in the delay slot of the branch.
6806
6807 Since long branches may have an insn in the delay slot and the
6808 delay slot is used to restore %r1, we in general need to extract
6809 this insn and execute it before the branch. However, to facilitate
6810 use of this function by conditional branches, we also provide an
6811 option to not extract the delay insn so that it will be emitted
6812 after the long branch. So, if there is an insn in the delay slot,
6813 it is extracted if XDELAY is nonzero.
6814
6815 The lengths of the various long-branch sequences are 20, 16 and 24
6816 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
4bcb9e3f 6817
685d0e07 6818const char *
b32d5189 6819pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
685d0e07 6820{
568de9bb 6821 rtx xoperands[4];
685d0e07
JDA
6822
6823 xoperands[0] = dest;
4bcb9e3f 6824
685d0e07 6825 /* First, free up the delay slot. */
16d74a3c 6826 if (xdelay && dbr_sequence_length () != 0)
685d0e07
JDA
6827 {
6828 /* We can't handle a jump in the delay slot. */
b64925dc 6829 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
4bcb9e3f 6830
685d0e07 6831 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 6832 optimize, 0, NULL);
4bcb9e3f 6833
685d0e07 6834 /* Now delete the delay insn. */
a38e7aa5 6835 SET_INSN_DELETED (NEXT_INSN (insn));
685d0e07 6836 }
4bcb9e3f 6837
685d0e07
JDA
6838 /* Output an insn to save %r1. The runtime documentation doesn't
6839 specify whether the "Clean Up" slot in the callers frame can
6840 be clobbered by the callee. It isn't copied by HP's builtin
6841 alloca, so this suggests that it can be clobbered if necessary.
6842 The "Static Link" location is copied by HP builtin alloca, so
6843 we avoid using it. Using the cleanup slot might be a problem
6844 if we have to interoperate with languages that pass cleanup
6845 information. However, it should be possible to handle these
6846 situations with GCC's asm feature.
6847
6848 The "Current RP" slot is reserved for the called procedure, so
6849 we try to use it when we don't have a frame of our own. It's
6850 rather unlikely that we won't have a frame when we need to emit
6851 a very long branch.
6852
6853 Really the way to go long term is a register scavenger; goto
6854 the target of the jump and find a register which we can use
6855 as a scratch to hold the value in %r1. Then, we wouldn't have
6856 to free up the delay slot or clobber a slot that may be needed
6857 for other purposes. */
6858 if (TARGET_64BIT)
6859 {
6fb5fa3c 6860 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6861 /* Use the return pointer slot in the frame marker. */
6862 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6863 else
6864 /* Use the slot at -40 in the frame marker since HP builtin
6865 alloca doesn't copy it. */
6866 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6867 }
6868 else
6869 {
6fb5fa3c 6870 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6871 /* Use the return pointer slot in the frame marker. */
6872 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6873 else
6874 /* Use the "Clean Up" slot in the frame marker. In GCC,
6875 the only other use of this location is for copying a
6876 floating point double argument from a floating-point
6877 register to two general registers. The copy is done
aa7f1eb1 6878 as an "atomic" operation when outputting a call, so it
685d0e07
JDA
6879 won't interfere with our using the location here. */
6880 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6881 }
3d9268b6 6882
5fad1c24
JDA
6883 if (TARGET_PORTABLE_RUNTIME)
6884 {
6885 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6886 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6887 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6888 }
6889 else if (flag_pic)
685d0e07 6890 {
568de9bb
JDA
6891 xoperands[1] = gen_rtx_REG (Pmode, 1);
6892 xoperands[2] = xoperands[1];
6893 pa_output_pic_pcrel_sequence (xoperands);
685d0e07
JDA
6894 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6895 }
6896 else
6897 /* Now output a very long branch to the original target. */
6898 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
4bcb9e3f 6899
685d0e07
JDA
6900 /* Now restore the value of %r1 in the delay slot. */
6901 if (TARGET_64BIT)
6902 {
6fb5fa3c 6903 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6904 return "ldd -16(%%r30),%%r1";
6905 else
6906 return "ldd -40(%%r30),%%r1";
6907 }
6908 else
6909 {
6fb5fa3c 6910 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6911 return "ldw -20(%%r30),%%r1";
6912 else
6913 return "ldw -12(%%r30),%%r1";
b9821af8 6914 }
d2364a74
JL
6915}
6916
23f6f34f 6917/* This routine handles all the branch-on-bit conditional branch sequences we
d2364a74
JL
6918 might need to generate. It handles nullification of delay slots,
6919 varying length branches, negated branches and all combinations of the
6920 above. it returns the appropriate output template to emit the branch. */
6921
519104fe 6922const char *
b32d5189 6923pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
b1a275e1 6924{
d2364a74 6925 static char buf[100];
16923e7b 6926 bool useskip;
16d74a3c
JDA
6927 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6928 int length = get_attr_length (insn);
6929 int xdelay;
d2364a74 6930
112cdef5 6931 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1 6932 asking for a disaster. I do not think this can happen as this pattern
23f6f34f 6933 is only used when optimizing; jump optimization should eliminate the
b1a275e1 6934 jump. But be prepared just in case. */
23f6f34f 6935
f5e66865 6936 if (branch_to_delay_slot_p (insn))
02a57c73 6937 return "nop";
23f6f34f 6938
b9821af8
JL
6939 /* If this is a long branch with its delay slot unfilled, set `nullify'
6940 as it can nullify the delay slot and save a nop. */
a1b36964 6941 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6942 nullify = 1;
6943
6944 /* If this is a short forward conditional branch which did not get
6945 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6946 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6947 nullify = forward_branch_p (insn);
6948
23f6f34f 6949 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6950 extrs instruction. This avoids a single cycle penalty due to
6951 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6952 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6953
6954 switch (length)
6955 {
6956
b9821af8
JL
6957 /* All short conditional branches except backwards with an unfilled
6958 delay slot. */
a1b36964 6959 case 4:
d2364a74 6960 if (useskip)
f38b27c7 6961 strcpy (buf, "{extrs,|extrw,s,}");
23f6f34f 6962 else
d2364a74 6963 strcpy (buf, "bb,");
520babc7
JL
6964 if (useskip && GET_MODE (operands[0]) == DImode)
6965 strcpy (buf, "extrd,s,*");
6966 else if (GET_MODE (operands[0]) == DImode)
6967 strcpy (buf, "bb,*");
d2364a74
JL
6968 if ((which == 0 && negated)
6969 || (which == 1 && ! negated))
6970 strcat (buf, ">=");
6971 else
6972 strcat (buf, "<");
6973 if (useskip)
3b5e5fb3 6974 strcat (buf, " %0,%1,1,%%r0");
d2364a74 6975 else if (nullify && negated)
f5e66865
JDA
6976 {
6977 if (branch_needs_nop_p (insn))
6978 strcat (buf, ",n %0,%1,%3%#");
6979 else
6980 strcat (buf, ",n %0,%1,%3");
6981 }
d2364a74 6982 else if (nullify && ! negated)
f5e66865
JDA
6983 {
6984 if (branch_needs_nop_p (insn))
6985 strcat (buf, ",n %0,%1,%2%#");
6986 else
6987 strcat (buf, ",n %0,%1,%2");
6988 }
d2364a74 6989 else if (! nullify && negated)
f5e66865 6990 strcat (buf, " %0,%1,%3");
d2364a74 6991 else if (! nullify && ! negated)
b9821af8 6992 strcat (buf, " %0,%1,%2");
d2364a74
JL
6993 break;
6994
5bdc5878 6995 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6996 unfilled delay slot is treated just like a long backward branch
6997 with an unfilled delay slot. */
a1b36964 6998 case 8:
b9821af8 6999 /* Handle weird backwards branch with a filled delay slot
16d74a3c 7000 which is nullified. */
b9821af8
JL
7001 if (dbr_sequence_length () != 0
7002 && ! forward_branch_p (insn)
7003 && nullify)
7004 {
7005 strcpy (buf, "bb,");
520babc7
JL
7006 if (GET_MODE (operands[0]) == DImode)
7007 strcat (buf, "*");
b9821af8
JL
7008 if ((which == 0 && negated)
7009 || (which == 1 && ! negated))
7010 strcat (buf, "<");
7011 else
7012 strcat (buf, ">=");
7013 if (negated)
3b5e5fb3 7014 strcat (buf, ",n %0,%1,.+12\n\tb %3");
b9821af8 7015 else
3b5e5fb3 7016 strcat (buf, ",n %0,%1,.+12\n\tb %2");
b9821af8 7017 }
923f781d
JL
7018 /* Handle short backwards branch with an unfilled delay slot.
7019 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7020 taken and untaken branches. */
7021 else if (dbr_sequence_length () == 0
7022 && ! forward_branch_p (insn)
9d98a694
AO
7023 && INSN_ADDRESSES_SET_P ()
7024 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7025 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d
JL
7026 {
7027 strcpy (buf, "bb,");
520babc7
JL
7028 if (GET_MODE (operands[0]) == DImode)
7029 strcat (buf, "*");
923f781d
JL
7030 if ((which == 0 && negated)
7031 || (which == 1 && ! negated))
7032 strcat (buf, ">=");
7033 else
7034 strcat (buf, "<");
7035 if (negated)
7036 strcat (buf, " %0,%1,%3%#");
7037 else
7038 strcat (buf, " %0,%1,%2%#");
7039 }
d2364a74 7040 else
b9821af8 7041 {
520babc7
JL
7042 if (GET_MODE (operands[0]) == DImode)
7043 strcpy (buf, "extrd,s,*");
16d74a3c
JDA
7044 else
7045 strcpy (buf, "{extrs,|extrw,s,}");
b9821af8
JL
7046 if ((which == 0 && negated)
7047 || (which == 1 && ! negated))
7048 strcat (buf, "<");
7049 else
7050 strcat (buf, ">=");
7051 if (nullify && negated)
55abf18a 7052 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
b9821af8 7053 else if (nullify && ! negated)
55abf18a 7054 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
b9821af8 7055 else if (negated)
3b5e5fb3 7056 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
23f6f34f 7057 else
3b5e5fb3 7058 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
b9821af8 7059 }
d2364a74
JL
7060 break;
7061
7062 default:
16d74a3c
JDA
7063 /* The reversed conditional branch must branch over one additional
7064 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7065 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7066 nullified forward branch, the instruction after the reversed
7067 condition branch must be nullified. */
7068 if (dbr_sequence_length () == 0
7069 || (nullify && forward_branch_p (insn)))
7070 {
7071 nullify = 1;
7072 xdelay = 0;
8370f6fa 7073 operands[4] = GEN_INT (length);
16d74a3c
JDA
7074 }
7075 else
7076 {
7077 xdelay = 1;
8370f6fa 7078 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
7079 }
7080
7081 if (GET_MODE (operands[0]) == DImode)
8370f6fa 7082 strcpy (buf, "bb,*");
16d74a3c 7083 else
8370f6fa 7084 strcpy (buf, "bb,");
16d74a3c
JDA
7085 if ((which == 0 && negated)
7086 || (which == 1 && !negated))
8370f6fa 7087 strcat (buf, "<");
16d74a3c 7088 else
8370f6fa 7089 strcat (buf, ">=");
16d74a3c 7090 if (nullify)
8370f6fa 7091 strcat (buf, ",n %0,%1,.+%4");
16d74a3c 7092 else
8370f6fa 7093 strcat (buf, " %0,%1,.+%4");
16d74a3c 7094 output_asm_insn (buf, operands);
ae9d61ab
JDA
7095 return pa_output_lbranch (negated ? operands[3] : operands[2],
7096 insn, xdelay);
b9821af8 7097 }
d2364a74
JL
7098 return buf;
7099}
7100
6a73009d
JL
7101/* This routine handles all the branch-on-variable-bit conditional branch
7102 sequences we might need to generate. It handles nullification of delay
7103 slots, varying length branches, negated branches and all combinations
7104 of the above. it returns the appropriate output template to emit the
7105 branch. */
7106
519104fe 7107const char *
b32d5189 7108pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
ae9d61ab 7109 int which)
6a73009d
JL
7110{
7111 static char buf[100];
16923e7b 7112 bool useskip;
16d74a3c
JDA
7113 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7114 int length = get_attr_length (insn);
7115 int xdelay;
6a73009d 7116
112cdef5 7117 /* A conditional branch to the following instruction (e.g. the delay slot) is
6a73009d
JL
7118 asking for a disaster. I do not think this can happen as this pattern
7119 is only used when optimizing; jump optimization should eliminate the
7120 jump. But be prepared just in case. */
7121
f5e66865 7122 if (branch_to_delay_slot_p (insn))
02a57c73 7123 return "nop";
6a73009d
JL
7124
7125 /* If this is a long branch with its delay slot unfilled, set `nullify'
7126 as it can nullify the delay slot and save a nop. */
7127 if (length == 8 && dbr_sequence_length () == 0)
7128 nullify = 1;
7129
7130 /* If this is a short forward conditional branch which did not get
7131 its delay slot filled, the delay slot can still be nullified. */
7132 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7133 nullify = forward_branch_p (insn);
7134
7135 /* A forward branch over a single nullified insn can be done with a
7136 extrs instruction. This avoids a single cycle penalty due to
7137 mis-predicted branch if we fall through (branch not taken). */
16923e7b 7138 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6a73009d
JL
7139
7140 switch (length)
7141 {
7142
7143 /* All short conditional branches except backwards with an unfilled
7144 delay slot. */
7145 case 4:
7146 if (useskip)
f38b27c7 7147 strcpy (buf, "{vextrs,|extrw,s,}");
6a73009d 7148 else
f38b27c7 7149 strcpy (buf, "{bvb,|bb,}");
520babc7 7150 if (useskip && GET_MODE (operands[0]) == DImode)
e72ed000 7151 strcpy (buf, "extrd,s,*");
520babc7
JL
7152 else if (GET_MODE (operands[0]) == DImode)
7153 strcpy (buf, "bb,*");
6a73009d
JL
7154 if ((which == 0 && negated)
7155 || (which == 1 && ! negated))
7156 strcat (buf, ">=");
7157 else
7158 strcat (buf, "<");
7159 if (useskip)
f38b27c7 7160 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6a73009d 7161 else if (nullify && negated)
f5e66865
JDA
7162 {
7163 if (branch_needs_nop_p (insn))
7164 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7165 else
7166 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7167 }
6a73009d 7168 else if (nullify && ! negated)
f5e66865
JDA
7169 {
7170 if (branch_needs_nop_p (insn))
7171 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7172 else
7173 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7174 }
6a73009d 7175 else if (! nullify && negated)
f5e66865 7176 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6a73009d 7177 else if (! nullify && ! negated)
f38b27c7 7178 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6a73009d
JL
7179 break;
7180
5bdc5878 7181 /* All long conditionals. Note a short backward branch with an
6a73009d
JL
7182 unfilled delay slot is treated just like a long backward branch
7183 with an unfilled delay slot. */
7184 case 8:
7185 /* Handle weird backwards branch with a filled delay slot
16d74a3c 7186 which is nullified. */
6a73009d
JL
7187 if (dbr_sequence_length () != 0
7188 && ! forward_branch_p (insn)
7189 && nullify)
7190 {
f38b27c7 7191 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
7192 if (GET_MODE (operands[0]) == DImode)
7193 strcat (buf, "*");
6a73009d
JL
7194 if ((which == 0 && negated)
7195 || (which == 1 && ! negated))
7196 strcat (buf, "<");
7197 else
7198 strcat (buf, ">=");
7199 if (negated)
f38b27c7 7200 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6a73009d 7201 else
f38b27c7 7202 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6a73009d
JL
7203 }
7204 /* Handle short backwards branch with an unfilled delay slot.
7205 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7206 taken and untaken branches. */
7207 else if (dbr_sequence_length () == 0
7208 && ! forward_branch_p (insn)
9d98a694
AO
7209 && INSN_ADDRESSES_SET_P ()
7210 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7211 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6a73009d 7212 {
f38b27c7 7213 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
7214 if (GET_MODE (operands[0]) == DImode)
7215 strcat (buf, "*");
6a73009d
JL
7216 if ((which == 0 && negated)
7217 || (which == 1 && ! negated))
7218 strcat (buf, ">=");
7219 else
7220 strcat (buf, "<");
7221 if (negated)
f38b27c7 7222 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6a73009d 7223 else
f38b27c7 7224 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6a73009d
JL
7225 }
7226 else
7227 {
f38b27c7 7228 strcpy (buf, "{vextrs,|extrw,s,}");
520babc7
JL
7229 if (GET_MODE (operands[0]) == DImode)
7230 strcpy (buf, "extrd,s,*");
6a73009d
JL
7231 if ((which == 0 && negated)
7232 || (which == 1 && ! negated))
7233 strcat (buf, "<");
7234 else
7235 strcat (buf, ">=");
7236 if (nullify && negated)
f38b27c7 7237 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6a73009d 7238 else if (nullify && ! negated)
f38b27c7 7239 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6a73009d 7240 else if (negated)
f38b27c7 7241 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6a73009d 7242 else
f38b27c7 7243 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6a73009d
JL
7244 }
7245 break;
7246
7247 default:
16d74a3c
JDA
7248 /* The reversed conditional branch must branch over one additional
7249 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7250 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7251 nullified forward branch, the instruction after the reversed
7252 condition branch must be nullified. */
7253 if (dbr_sequence_length () == 0
7254 || (nullify && forward_branch_p (insn)))
7255 {
7256 nullify = 1;
7257 xdelay = 0;
8370f6fa 7258 operands[4] = GEN_INT (length);
16d74a3c
JDA
7259 }
7260 else
7261 {
7262 xdelay = 1;
8370f6fa 7263 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
7264 }
7265
7266 if (GET_MODE (operands[0]) == DImode)
8370f6fa 7267 strcpy (buf, "bb,*");
16d74a3c 7268 else
8370f6fa 7269 strcpy (buf, "{bvb,|bb,}");
16d74a3c
JDA
7270 if ((which == 0 && negated)
7271 || (which == 1 && !negated))
8370f6fa 7272 strcat (buf, "<");
16d74a3c 7273 else
8370f6fa 7274 strcat (buf, ">=");
16d74a3c 7275 if (nullify)
8370f6fa 7276 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 7277 else
8370f6fa 7278 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 7279 output_asm_insn (buf, operands);
ae9d61ab
JDA
7280 return pa_output_lbranch (negated ? operands[3] : operands[2],
7281 insn, xdelay);
6a73009d
JL
7282 }
7283 return buf;
7284}
7285
b1a275e1
JL
7286/* Return the output template for emitting a dbra type insn.
7287
7288 Note it may perform some output operations on its own before
7289 returning the final output string. */
519104fe 7290const char *
b32d5189 7291pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
b1a275e1 7292{
16d74a3c 7293 int length = get_attr_length (insn);
b1a275e1 7294
112cdef5 7295 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7296 asking for a disaster. Be prepared! */
7297
f5e66865 7298 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7299 {
7300 if (which_alternative == 0)
7301 return "ldo %1(%0),%0";
7302 else if (which_alternative == 1)
7303 {
831c1763
AM
7304 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7305 output_asm_insn ("ldw -16(%%r30),%4", operands);
d2d28085 7306 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
f38b27c7 7307 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1
JL
7308 }
7309 else
7310 {
7311 output_asm_insn ("ldw %0,%4", operands);
7312 return "ldo %1(%4),%4\n\tstw %4,%0";
7313 }
7314 }
7315
7316 if (which_alternative == 0)
7317 {
7318 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7319 int xdelay;
b1a275e1
JL
7320
7321 /* If this is a long branch with its delay slot unfilled, set `nullify'
7322 as it can nullify the delay slot and save a nop. */
a1b36964 7323 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7324 nullify = 1;
7325
7326 /* If this is a short forward conditional branch which did not get
7327 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7328 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7329 nullify = forward_branch_p (insn);
7330
144d51f9 7331 switch (length)
b1a275e1 7332 {
144d51f9
NS
7333 case 4:
7334 if (nullify)
f5e66865
JDA
7335 {
7336 if (branch_needs_nop_p (insn))
7337 return "addib,%C2,n %1,%0,%3%#";
7338 else
7339 return "addib,%C2,n %1,%0,%3";
7340 }
144d51f9
NS
7341 else
7342 return "addib,%C2 %1,%0,%3";
7343
7344 case 8:
23f6f34f 7345 /* Handle weird backwards branch with a fulled delay slot
b1a275e1
JL
7346 which is nullified. */
7347 if (dbr_sequence_length () != 0
7348 && ! forward_branch_p (insn)
7349 && nullify)
3b5e5fb3 7350 return "addib,%N2,n %1,%0,.+12\n\tb %3";
923f781d
JL
7351 /* Handle short backwards branch with an unfilled delay slot.
7352 Using a addb;nop rather than addi;bl saves 1 cycle for both
7353 taken and untaken branches. */
7354 else if (dbr_sequence_length () == 0
7355 && ! forward_branch_p (insn)
9d98a694
AO
7356 && INSN_ADDRESSES_SET_P ()
7357 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7358 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7359 return "addib,%C2 %1,%0,%3%#";
23f6f34f
TG
7360
7361 /* Handle normal cases. */
b1a275e1 7362 if (nullify)
3b5e5fb3 7363 return "addi,%N2 %1,%0,%0\n\tb,n %3";
b1a275e1 7364 else
3b5e5fb3 7365 return "addi,%N2 %1,%0,%0\n\tb %3";
144d51f9
NS
7366
7367 default:
16d74a3c
JDA
7368 /* The reversed conditional branch must branch over one additional
7369 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7370 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7371 nullified forward branch, the instruction after the reversed
7372 condition branch must be nullified. */
7373 if (dbr_sequence_length () == 0
7374 || (nullify && forward_branch_p (insn)))
7375 {
7376 nullify = 1;
7377 xdelay = 0;
7378 operands[4] = GEN_INT (length);
7379 }
7380 else
7381 {
7382 xdelay = 1;
7383 operands[4] = GEN_INT (length + 4);
7384 }
7385
7386 if (nullify)
7387 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7388 else
7389 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7390
ae9d61ab 7391 return pa_output_lbranch (operands[3], insn, xdelay);
b1a275e1 7392 }
144d51f9 7393
b1a275e1
JL
7394 }
7395 /* Deal with gross reload from FP register case. */
7396 else if (which_alternative == 1)
7397 {
7398 /* Move loop counter from FP register to MEM then into a GR,
7399 increment the GR, store the GR into MEM, and finally reload
23f6f34f 7400 the FP register from MEM from within the branch's delay slot. */
831c1763
AM
7401 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7402 operands);
d2d28085 7403 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
16d74a3c 7404 if (length == 24)
f38b27c7 7405 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7406 else if (length == 28)
f38b27c7 7407 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7408 else
7409 {
8370f6fa
JDA
7410 operands[5] = GEN_INT (length - 16);
7411 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
16d74a3c 7412 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
ae9d61ab 7413 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7414 }
b1a275e1
JL
7415 }
7416 /* Deal with gross reload from memory case. */
7417 else
7418 {
7419 /* Reload loop counter from memory, the store back to memory
71cc389b 7420 happens in the branch's delay slot. */
b1a275e1 7421 output_asm_insn ("ldw %0,%4", operands);
16d74a3c 7422 if (length == 12)
b1a275e1 7423 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
16d74a3c 7424 else if (length == 16)
3b5e5fb3 7425 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
16d74a3c
JDA
7426 else
7427 {
8370f6fa
JDA
7428 operands[5] = GEN_INT (length - 4);
7429 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
ae9d61ab 7430 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7431 }
b1a275e1
JL
7432 }
7433}
7434
16d74a3c 7435/* Return the output template for emitting a movb type insn.
b1a275e1
JL
7436
7437 Note it may perform some output operations on its own before
7438 returning the final output string. */
519104fe 7439const char *
b32d5189 7440pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
b7849684 7441 int reverse_comparison)
b1a275e1 7442{
16d74a3c 7443 int length = get_attr_length (insn);
b1a275e1 7444
112cdef5 7445 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7446 asking for a disaster. Be prepared! */
7447
f5e66865 7448 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7449 {
7450 if (which_alternative == 0)
7451 return "copy %1,%0";
7452 else if (which_alternative == 1)
7453 {
831c1763 7454 output_asm_insn ("stw %1,-16(%%r30)", operands);
f38b27c7 7455 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1 7456 }
b1092901 7457 else if (which_alternative == 2)
b1a275e1 7458 return "stw %1,%0";
b1092901
JL
7459 else
7460 return "mtsar %r1";
b1a275e1
JL
7461 }
7462
7463 /* Support the second variant. */
7464 if (reverse_comparison)
7465 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7466
7467 if (which_alternative == 0)
7468 {
7469 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7470 int xdelay;
b1a275e1
JL
7471
7472 /* If this is a long branch with its delay slot unfilled, set `nullify'
7473 as it can nullify the delay slot and save a nop. */
a1b36964 7474 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7475 nullify = 1;
7476
7477 /* If this is a short forward conditional branch which did not get
7478 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7479 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7480 nullify = forward_branch_p (insn);
7481
144d51f9 7482 switch (length)
b1a275e1 7483 {
144d51f9
NS
7484 case 4:
7485 if (nullify)
f5e66865
JDA
7486 {
7487 if (branch_needs_nop_p (insn))
7488 return "movb,%C2,n %1,%0,%3%#";
7489 else
7490 return "movb,%C2,n %1,%0,%3";
7491 }
144d51f9
NS
7492 else
7493 return "movb,%C2 %1,%0,%3";
7494
7495 case 8:
23f6f34f 7496 /* Handle weird backwards branch with a filled delay slot
b1a275e1
JL
7497 which is nullified. */
7498 if (dbr_sequence_length () != 0
7499 && ! forward_branch_p (insn)
7500 && nullify)
3b5e5fb3 7501 return "movb,%N2,n %1,%0,.+12\n\tb %3";
23f6f34f 7502
923f781d
JL
7503 /* Handle short backwards branch with an unfilled delay slot.
7504 Using a movb;nop rather than or;bl saves 1 cycle for both
7505 taken and untaken branches. */
7506 else if (dbr_sequence_length () == 0
7507 && ! forward_branch_p (insn)
9d98a694
AO
7508 && INSN_ADDRESSES_SET_P ()
7509 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7510 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7511 return "movb,%C2 %1,%0,%3%#";
23f6f34f 7512 /* Handle normal cases. */
b1a275e1 7513 if (nullify)
3b5e5fb3 7514 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
b1a275e1 7515 else
3b5e5fb3 7516 return "or,%N2 %1,%%r0,%0\n\tb %3";
144d51f9
NS
7517
7518 default:
16d74a3c
JDA
7519 /* The reversed conditional branch must branch over one additional
7520 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7521 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7522 nullified forward branch, the instruction after the reversed
7523 condition branch must be nullified. */
7524 if (dbr_sequence_length () == 0
7525 || (nullify && forward_branch_p (insn)))
7526 {
7527 nullify = 1;
7528 xdelay = 0;
7529 operands[4] = GEN_INT (length);
7530 }
7531 else
7532 {
7533 xdelay = 1;
7534 operands[4] = GEN_INT (length + 4);
7535 }
7536
7537 if (nullify)
7538 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7539 else
7540 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7541
ae9d61ab 7542 return pa_output_lbranch (operands[3], insn, xdelay);
b1a275e1 7543 }
b1a275e1 7544 }
16d74a3c 7545 /* Deal with gross reload for FP destination register case. */
b1a275e1
JL
7546 else if (which_alternative == 1)
7547 {
16d74a3c
JDA
7548 /* Move source register to MEM, perform the branch test, then
7549 finally load the FP register from MEM from within the branch's
7550 delay slot. */
831c1763 7551 output_asm_insn ("stw %1,-16(%%r30)", operands);
16d74a3c 7552 if (length == 12)
f38b27c7 7553 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7554 else if (length == 16)
f38b27c7 7555 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7556 else
7557 {
8370f6fa
JDA
7558 operands[4] = GEN_INT (length - 4);
7559 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
16d74a3c 7560 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
ae9d61ab 7561 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7562 }
b1a275e1
JL
7563 }
7564 /* Deal with gross reload from memory case. */
b1092901 7565 else if (which_alternative == 2)
b1a275e1
JL
7566 {
7567 /* Reload loop counter from memory, the store back to memory
71cc389b 7568 happens in the branch's delay slot. */
16d74a3c 7569 if (length == 8)
f38b27c7 7570 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
16d74a3c 7571 else if (length == 12)
f38b27c7 7572 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
16d74a3c
JDA
7573 else
7574 {
8370f6fa
JDA
7575 operands[4] = GEN_INT (length);
7576 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7577 operands);
ae9d61ab 7578 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7579 }
b1a275e1 7580 }
b1092901
JL
7581 /* Handle SAR as a destination. */
7582 else
7583 {
16d74a3c 7584 if (length == 8)
f38b27c7 7585 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
16d74a3c 7586 else if (length == 12)
715ab8c3 7587 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
16d74a3c
JDA
7588 else
7589 {
8370f6fa
JDA
7590 operands[4] = GEN_INT (length);
7591 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7592 operands);
ae9d61ab 7593 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7594 }
b1092901 7595 }
b1a275e1
JL
7596}
7597
a02aa5b0
JDA
7598/* Copy any FP arguments in INSN into integer registers. */
7599static void
e0d80a58 7600copy_fp_args (rtx_insn *insn)
a02aa5b0
JDA
7601{
7602 rtx link;
7603 rtx xoperands[2];
b1a275e1 7604
a02aa5b0
JDA
7605 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7606 {
7607 int arg_mode, regno;
7608 rtx use = XEXP (link, 0);
f726ea7d 7609
a02aa5b0
JDA
7610 if (! (GET_CODE (use) == USE
7611 && GET_CODE (XEXP (use, 0)) == REG
7612 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7613 continue;
2c4ff308 7614
a02aa5b0
JDA
7615 arg_mode = GET_MODE (XEXP (use, 0));
7616 regno = REGNO (XEXP (use, 0));
520babc7 7617
a02aa5b0
JDA
7618 /* Is it a floating point register? */
7619 if (regno >= 32 && regno <= 39)
7620 {
7621 /* Copy the FP register into an integer register via memory. */
7622 if (arg_mode == SFmode)
7623 {
7624 xoperands[0] = XEXP (use, 0);
7625 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7626 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7627 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7628 }
7629 else
7630 {
7631 xoperands[0] = XEXP (use, 0);
7632 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7633 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7634 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7635 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7636 }
7637 }
279c9bde 7638 }
a02aa5b0
JDA
7639}
7640
7641/* Compute length of the FP argument copy sequence for INSN. */
7642static int
e0d80a58 7643length_fp_args (rtx_insn *insn)
a02aa5b0
JDA
7644{
7645 int length = 0;
7646 rtx link;
279c9bde 7647
a02aa5b0 7648 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6a73009d 7649 {
a02aa5b0
JDA
7650 int arg_mode, regno;
7651 rtx use = XEXP (link, 0);
7652
7653 if (! (GET_CODE (use) == USE
7654 && GET_CODE (XEXP (use, 0)) == REG
7655 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7656 continue;
6a73009d 7657
a02aa5b0
JDA
7658 arg_mode = GET_MODE (XEXP (use, 0));
7659 regno = REGNO (XEXP (use, 0));
7660
7661 /* Is it a floating point register? */
7662 if (regno >= 32 && regno <= 39)
6a73009d 7663 {
a02aa5b0
JDA
7664 if (arg_mode == SFmode)
7665 length += 8;
7666 else
7667 length += 12;
6a73009d 7668 }
a02aa5b0 7669 }
6a73009d 7670
a02aa5b0
JDA
7671 return length;
7672}
3d9268b6 7673
611ad29e 7674/* Return the attribute length for the millicode call instruction INSN.
ae9d61ab 7675 The length must match the code generated by pa_output_millicode_call.
611ad29e 7676 We include the delay slot in the returned length as it is better to
a02aa5b0 7677 over estimate the length than to under estimate it. */
a7721dc0 7678
a02aa5b0 7679int
432d483a 7680pa_attr_length_millicode_call (rtx_insn *insn)
a02aa5b0 7681{
611ad29e 7682 unsigned long distance = -1;
62910663 7683 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
a02aa5b0 7684
611ad29e
JDA
7685 if (INSN_ADDRESSES_SET_P ())
7686 {
5fad1c24
JDA
7687 distance = (total + insn_current_reference_address (insn));
7688 if (distance < total)
611ad29e
JDA
7689 distance = -1;
7690 }
a02aa5b0
JDA
7691
7692 if (TARGET_64BIT)
7693 {
7694 if (!TARGET_LONG_CALLS && distance < 7600000)
611ad29e 7695 return 8;
a02aa5b0 7696
611ad29e 7697 return 20;
a02aa5b0
JDA
7698 }
7699 else if (TARGET_PORTABLE_RUNTIME)
611ad29e 7700 return 24;
a02aa5b0
JDA
7701 else
7702 {
a43434ff 7703 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
611ad29e 7704 return 8;
a02aa5b0 7705
925cb97d 7706 if (!flag_pic)
611ad29e 7707 return 12;
a02aa5b0 7708
611ad29e 7709 return 24;
a02aa5b0
JDA
7710 }
7711}
7712
bf95e88b 7713/* INSN is a function call.
a7721dc0 7714
a02aa5b0 7715 CALL_DEST is the routine we are calling. */
a7721dc0 7716
a02aa5b0 7717const char *
b32d5189 7718pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
a02aa5b0
JDA
7719{
7720 int attr_length = get_attr_length (insn);
7721 int seq_length = dbr_sequence_length ();
568de9bb 7722 rtx xoperands[4];
a7721dc0 7723
a02aa5b0 7724 xoperands[0] = call_dest;
a02aa5b0
JDA
7725
7726 /* Handle the common case where we are sure that the branch will
7727 reach the beginning of the $CODE$ subspace. The within reach
ab11fb42
JDA
7728 form of the $$sh_func_adrs call has a length of 28. Because it
7729 has an attribute type of sh_func_adrs, it never has a nonzero
7730 sequence length (i.e., the delay slot is never filled). */
a02aa5b0 7731 if (!TARGET_LONG_CALLS
ab11fb42
JDA
7732 && (attr_length == 8
7733 || (attr_length == 28
7734 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
a02aa5b0 7735 {
568de9bb
JDA
7736 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7737 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
a02aa5b0
JDA
7738 }
7739 else
7740 {
7741 if (TARGET_64BIT)
7742 {
7743 /* It might seem that one insn could be saved by accessing
7744 the millicode function using the linkage table. However,
7745 this doesn't work in shared libraries and other dynamically
7746 loaded objects. Using a pc-relative sequence also avoids
7747 problems related to the implicit use of the gp register. */
568de9bb
JDA
7748 xoperands[1] = gen_rtx_REG (Pmode, 1);
7749 xoperands[2] = xoperands[1];
7750 pa_output_pic_pcrel_sequence (xoperands);
a02aa5b0 7751 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6a73009d 7752 }
6a73009d
JL
7753 else if (TARGET_PORTABLE_RUNTIME)
7754 {
a02aa5b0
JDA
7755 /* Pure portable runtime doesn't allow be/ble; we also don't
7756 have PIC support in the assembler/linker, so this sequence
7757 is needed. */
6a73009d 7758
a02aa5b0
JDA
7759 /* Get the address of our target into %r1. */
7760 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7761 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6a73009d 7762
a02aa5b0
JDA
7763 /* Get our return address into %r31. */
7764 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7765 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6a73009d 7766
a02aa5b0
JDA
7767 /* Jump to our target address in %r1. */
7768 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7769 }
a02aa5b0 7770 else if (!flag_pic)
6a73009d 7771 {
a02aa5b0 7772 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6248c4dd 7773 if (TARGET_PA_20)
a02aa5b0 7774 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6248c4dd 7775 else
a02aa5b0 7776 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 7777 }
a02aa5b0 7778 else
6a73009d 7779 {
568de9bb
JDA
7780 xoperands[1] = gen_rtx_REG (Pmode, 31);
7781 xoperands[2] = gen_rtx_REG (Pmode, 1);
7782 pa_output_pic_pcrel_sequence (xoperands);
581d9404 7783
568de9bb
JDA
7784 /* Adjust return address. */
7785 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
6a73009d 7786
a02aa5b0
JDA
7787 /* Jump to our target address in %r1. */
7788 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7789 }
6a73009d
JL
7790 }
7791
a02aa5b0
JDA
7792 if (seq_length == 0)
7793 output_asm_insn ("nop", xoperands);
6a73009d 7794
6a73009d
JL
7795 return "";
7796}
7797
611ad29e
JDA
7798/* Return the attribute length of the call instruction INSN. The SIBCALL
7799 flag indicates whether INSN is a regular call or a sibling call. The
32562302 7800 length returned must be longer than the code actually generated by
ae9d61ab 7801 pa_output_call. Since branch shortening is done before delay branch
32562302
JDA
7802 sequencing, there is no way to determine whether or not the delay
7803 slot will be filled during branch shortening. Even when the delay
7804 slot is filled, we may have to add a nop if the delay slot contains
7805 a branch that can't reach its target. Thus, we always have to include
7806 the delay slot in the length estimate. This used to be done in
7807 pa_adjust_insn_length but we do it here now as some sequences always
7808 fill the delay slot and we can save four bytes in the estimate for
7809 these sequences. */
a02aa5b0
JDA
7810
7811int
432d483a 7812pa_attr_length_call (rtx_insn *insn, int sibcall)
a02aa5b0 7813{
32562302 7814 int local_call;
e40375e0 7815 rtx call, call_dest;
32562302
JDA
7816 tree call_decl;
7817 int length = 0;
7818 rtx pat = PATTERN (insn);
611ad29e 7819 unsigned long distance = -1;
a02aa5b0 7820
b64925dc 7821 gcc_assert (CALL_P (insn));
e40375e0 7822
611ad29e
JDA
7823 if (INSN_ADDRESSES_SET_P ())
7824 {
32562302
JDA
7825 unsigned long total;
7826
7827 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
5fad1c24
JDA
7828 distance = (total + insn_current_reference_address (insn));
7829 if (distance < total)
611ad29e
JDA
7830 distance = -1;
7831 }
a02aa5b0 7832
e40375e0 7833 gcc_assert (GET_CODE (pat) == PARALLEL);
a02aa5b0 7834
e40375e0
JDA
7835 /* Get the call rtx. */
7836 call = XVECEXP (pat, 0, 0);
7837 if (GET_CODE (call) == SET)
7838 call = SET_SRC (call);
7839
7840 gcc_assert (GET_CODE (call) == CALL);
7841
7842 /* Determine if this is a local call. */
7843 call_dest = XEXP (XEXP (call, 0), 0);
32562302 7844 call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7845 local_call = call_decl && targetm.binds_local_p (call_decl);
a02aa5b0 7846
32562302
JDA
7847 /* pc-relative branch. */
7848 if (!TARGET_LONG_CALLS
7849 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
a43434ff 7850 || distance < MAX_PCREL17F_OFFSET))
32562302 7851 length += 8;
a02aa5b0 7852
32562302
JDA
7853 /* 64-bit plabel sequence. */
7854 else if (TARGET_64BIT && !local_call)
7855 length += sibcall ? 28 : 24;
a02aa5b0 7856
32562302
JDA
7857 /* non-pic long absolute branch sequence. */
7858 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7859 length += 12;
a02aa5b0 7860
32562302 7861 /* long pc-relative branch sequence. */
9dbd54be 7862 else if (TARGET_LONG_PIC_SDIFF_CALL
568de9bb 7863 || (TARGET_GAS && !TARGET_SOM && local_call))
32562302
JDA
7864 {
7865 length += 20;
a02aa5b0 7866
0831e1d1 7867 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302
JDA
7868 length += 8;
7869 }
62910663 7870
32562302
JDA
7871 /* 32-bit plabel sequence. */
7872 else
7873 {
7874 length += 32;
a02aa5b0 7875
32562302
JDA
7876 if (TARGET_SOM)
7877 length += length_fp_args (insn);
7878
7879 if (flag_pic)
7880 length += 4;
90330d31 7881
32562302
JDA
7882 if (!TARGET_PA_20)
7883 {
a02aa5b0
JDA
7884 if (!sibcall)
7885 length += 8;
7886
0831e1d1 7887 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302 7888 length += 8;
a02aa5b0
JDA
7889 }
7890 }
32562302
JDA
7891
7892 return length;
a02aa5b0
JDA
7893}
7894
bf95e88b 7895/* INSN is a function call.
6a73009d
JL
7896
7897 CALL_DEST is the routine we are calling. */
7898
519104fe 7899const char *
432d483a 7900pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
6a73009d 7901{
3d9268b6 7902 int seq_length = dbr_sequence_length ();
5fad1c24 7903 tree call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7904 int local_call = call_decl && targetm.binds_local_p (call_decl);
568de9bb 7905 rtx xoperands[4];
a02aa5b0
JDA
7906
7907 xoperands[0] = call_dest;
6a73009d 7908
a02aa5b0 7909 /* Handle the common case where we're sure that the branch will reach
5fad1c24
JDA
7910 the beginning of the "$CODE$" subspace. This is the beginning of
7911 the current function if we are in a named section. */
ae9d61ab 7912 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
2c4ff308 7913 {
520babc7 7914 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
a02aa5b0 7915 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
279c9bde 7916 }
a02aa5b0 7917 else
279c9bde 7918 {
5fad1c24 7919 if (TARGET_64BIT && !local_call)
f726ea7d 7920 {
a02aa5b0
JDA
7921 /* ??? As far as I can tell, the HP linker doesn't support the
7922 long pc-relative sequence described in the 64-bit runtime
7923 architecture. So, we use a slightly longer indirect call. */
ae9d61ab 7924 xoperands[0] = pa_get_deferred_plabel (call_dest);
a02aa5b0
JDA
7925 xoperands[1] = gen_label_rtx ();
7926
7927 /* If this isn't a sibcall, we put the load of %r27 into the
7928 delay slot. We can't do this in a sibcall as we don't
bf95e88b
JDA
7929 have a second call-clobbered scratch register available.
7930 We don't need to do anything when generating fast indirect
7931 calls. */
7932 if (seq_length != 0 && !sibcall)
a02aa5b0
JDA
7933 {
7934 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 7935 optimize, 0, NULL);
a02aa5b0
JDA
7936
7937 /* Now delete the delay insn. */
a38e7aa5 7938 SET_INSN_DELETED (NEXT_INSN (insn));
bf95e88b 7939 seq_length = 0;
a02aa5b0 7940 }
279c9bde 7941
a02aa5b0
JDA
7942 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7943 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7944 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
279c9bde 7945
a02aa5b0 7946 if (sibcall)
279c9bde 7947 {
a02aa5b0
JDA
7948 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7949 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7950 output_asm_insn ("bve (%%r1)", xoperands);
7951 }
7952 else
7953 {
7954 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7955 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7956 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
bf95e88b 7957 seq_length = 1;
279c9bde
JL
7958 }
7959 }
a02aa5b0 7960 else
93ae92c1 7961 {
a02aa5b0
JDA
7962 int indirect_call = 0;
7963
7964 /* Emit a long call. There are several different sequences
7965 of increasing length and complexity. In most cases,
7966 they don't allow an instruction in the delay slot. */
5fad1c24 7967 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
9dbd54be 7968 && !TARGET_LONG_PIC_SDIFF_CALL
568de9bb 7969 && !(TARGET_GAS && !TARGET_SOM && local_call)
5fad1c24 7970 && !TARGET_64BIT)
a02aa5b0
JDA
7971 indirect_call = 1;
7972
7973 if (seq_length != 0
a02aa5b0 7974 && !sibcall
44b86471
JDA
7975 && (!TARGET_PA_20
7976 || indirect_call
7977 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
359255a9 7978 {
a02aa5b0
JDA
7979 /* A non-jump insn in the delay slot. By definition we can
7980 emit this insn before the call (and in fact before argument
7981 relocating. */
c9d691e9 7982 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
5cfc5f84 7983 NULL);
a02aa5b0
JDA
7984
7985 /* Now delete the delay insn. */
a38e7aa5 7986 SET_INSN_DELETED (NEXT_INSN (insn));
bf95e88b 7987 seq_length = 0;
359255a9 7988 }
93ae92c1 7989
5fad1c24 7990 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
359255a9 7991 {
a02aa5b0
JDA
7992 /* This is the best sequence for making long calls in
7993 non-pic code. Unfortunately, GNU ld doesn't provide
7994 the stub needed for external calls, and GAS's support
5fad1c24
JDA
7995 for this with the SOM linker is buggy. It is safe
7996 to use this for local calls. */
a02aa5b0
JDA
7997 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7998 if (sibcall)
7999 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8000 else
8001 {
8002 if (TARGET_PA_20)
8003 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8004 xoperands);
8005 else
8006 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 8007
a02aa5b0 8008 output_asm_insn ("copy %%r31,%%r2", xoperands);
bf95e88b 8009 seq_length = 1;
a02aa5b0
JDA
8010 }
8011 }
8012 else
8013 {
568de9bb
JDA
8014 /* The HP assembler and linker can handle relocations for
8015 the difference of two symbols. The HP assembler
8016 recognizes the sequence as a pc-relative call and
8017 the linker provides stubs when needed. */
8018
8019 /* GAS currently can't generate the relocations that
8020 are needed for the SOM linker under HP-UX using this
8021 sequence. The GNU linker doesn't generate the stubs
8022 that are needed for external calls on TARGET_ELF32
8023 with this sequence. For now, we have to use a longer
8024 plabel sequence when using GAS for non local calls. */
8025 if (TARGET_LONG_PIC_SDIFF_CALL
8026 || (TARGET_GAS && !TARGET_SOM && local_call))
3d9268b6 8027 {
568de9bb
JDA
8028 xoperands[1] = gen_rtx_REG (Pmode, 1);
8029 xoperands[2] = xoperands[1];
8030 pa_output_pic_pcrel_sequence (xoperands);
3d9268b6 8031 }
520babc7
JL
8032 else
8033 {
a02aa5b0
JDA
8034 /* Emit a long plabel-based call sequence. This is
8035 essentially an inline implementation of $$dyncall.
8036 We don't actually try to call $$dyncall as this is
8037 as difficult as calling the function itself. */
ae9d61ab 8038 xoperands[0] = pa_get_deferred_plabel (call_dest);
a02aa5b0
JDA
8039 xoperands[1] = gen_label_rtx ();
8040
8041 /* Since the call is indirect, FP arguments in registers
8042 need to be copied to the general registers. Then, the
8043 argument relocation stub will copy them back. */
8044 if (TARGET_SOM)
8045 copy_fp_args (insn);
8046
8047 if (flag_pic)
8048 {
8049 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8050 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8051 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8052 }
8053 else
8054 {
8055 output_asm_insn ("addil LR'%0-$global$,%%r27",
8056 xoperands);
8057 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8058 xoperands);
8059 }
279c9bde 8060
a02aa5b0
JDA
8061 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8062 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8063 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8064 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6a73009d 8065
a02aa5b0
JDA
8066 if (!sibcall && !TARGET_PA_20)
8067 {
8068 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
0831e1d1 8069 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8070 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8071 else
8072 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
a02aa5b0
JDA
8073 }
8074 }
6a73009d 8075
a02aa5b0 8076 if (TARGET_PA_20)
520babc7 8077 {
a02aa5b0
JDA
8078 if (sibcall)
8079 output_asm_insn ("bve (%%r1)", xoperands);
8080 else
8081 {
8082 if (indirect_call)
8083 {
8084 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8085 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
bf95e88b 8086 seq_length = 1;
a02aa5b0
JDA
8087 }
8088 else
8089 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8090 }
520babc7
JL
8091 }
8092 else
8093 {
0831e1d1 8094 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
90330d31
JDA
8095 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8096 xoperands);
279c9bde 8097
a02aa5b0 8098 if (sibcall)
90330d31 8099 {
0831e1d1 8100 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8101 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8102 else
8103 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8104 }
a02aa5b0
JDA
8105 else
8106 {
0831e1d1 8107 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8108 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8109 else
8110 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
279c9bde 8111
a02aa5b0
JDA
8112 if (indirect_call)
8113 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8114 else
8115 output_asm_insn ("copy %%r31,%%r2", xoperands);
bf95e88b 8116 seq_length = 1;
a02aa5b0
JDA
8117 }
8118 }
8119 }
279c9bde 8120 }
2c4ff308 8121 }
23f6f34f 8122
bf95e88b 8123 if (seq_length == 0)
a02aa5b0 8124 output_asm_insn ("nop", xoperands);
2c4ff308 8125
2c4ff308
JL
8126 return "";
8127}
8128
611ad29e
JDA
8129/* Return the attribute length of the indirect call instruction INSN.
8130 The length must match the code generated by output_indirect call.
8131 The returned length includes the delay slot. Currently, the delay
8132 slot of an indirect call sequence is not exposed and it is used by
8133 the sequence itself. */
8134
8135int
432d483a 8136pa_attr_length_indirect_call (rtx_insn *insn)
611ad29e
JDA
8137{
8138 unsigned long distance = -1;
62910663 8139 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
611ad29e
JDA
8140
8141 if (INSN_ADDRESSES_SET_P ())
8142 {
5fad1c24
JDA
8143 distance = (total + insn_current_reference_address (insn));
8144 if (distance < total)
611ad29e
JDA
8145 distance = -1;
8146 }
8147
8148 if (TARGET_64BIT)
8149 return 12;
8150
445f9a50 8151 if (TARGET_FAST_INDIRECT_CALLS)
611ad29e
JDA
8152 return 8;
8153
611ad29e 8154 if (TARGET_PORTABLE_RUNTIME)
cc5cec10 8155 return 16;
611ad29e 8156
445f9a50
JDA
8157 /* Inline version of $$dyncall. */
8158 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8159 return 20;
8160
8161 if (!TARGET_LONG_CALLS
8162 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8163 || distance < MAX_PCREL17F_OFFSET))
8164 return 8;
8165
611ad29e 8166 /* Out of reach, can use ble. */
445f9a50
JDA
8167 if (!flag_pic)
8168 return 12;
8169
8170 /* Inline version of $$dyncall. */
8171 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8172 return 20;
8173
8174 if (!optimize_size)
8175 return 36;
8176
8177 /* Long PIC pc-relative call. */
8178 return 20;
611ad29e
JDA
8179}
8180
8181const char *
432d483a 8182pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
611ad29e 8183{
568de9bb 8184 rtx xoperands[4];
445f9a50 8185 int length;
611ad29e
JDA
8186
8187 if (TARGET_64BIT)
8188 {
8189 xoperands[0] = call_dest;
445f9a50
JDA
8190 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8191 "bve,l (%%r2),%%r2\n\t"
8192 "ldd 24(%0),%%r27", xoperands);
611ad29e
JDA
8193 return "";
8194 }
8195
8196 /* First the special case for kernels, level 0 systems, etc. */
8197 if (TARGET_FAST_INDIRECT_CALLS)
445f9a50
JDA
8198 {
8199 pa_output_arg_descriptor (insn);
8200 if (TARGET_PA_20)
8201 return "bve,l,n (%%r22),%%r2\n\tnop";
8202 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8203 }
8204
8205 if (TARGET_PORTABLE_RUNTIME)
8206 {
8207 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8208 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8209 pa_output_arg_descriptor (insn);
8210 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8211 }
8212
8213 /* Maybe emit a fast inline version of $$dyncall. */
8214 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8215 {
8216 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8217 "ldw 2(%%r22),%%r19\n\t"
8218 "ldw -2(%%r22),%%r22", xoperands);
8219 pa_output_arg_descriptor (insn);
8220 if (TARGET_NO_SPACE_REGS)
8221 {
8222 if (TARGET_PA_20)
8223 return "bve,l,n (%%r22),%%r2\n\tnop";
8224 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8225 }
8226 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8227 }
611ad29e
JDA
8228
8229 /* Now the normal case -- we can reach $$dyncall directly or
8230 we're sure that we can get there via a long-branch stub.
8231
8232 No need to check target flags as the length uniquely identifies
8233 the remaining cases. */
445f9a50
JDA
8234 length = pa_attr_length_indirect_call (insn);
8235 if (length == 8)
2c774817 8236 {
445f9a50
JDA
8237 pa_output_arg_descriptor (insn);
8238
40fc2e0b
JDA
8239 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8240 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8241 variant of the B,L instruction can't be used on the SOM target. */
8242 if (TARGET_PA_20 && !TARGET_SOM)
445f9a50 8243 return "b,l,n $$dyncall,%%r2\n\tnop";
2c774817 8244 else
445f9a50 8245 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
2c774817 8246 }
611ad29e
JDA
8247
8248 /* Long millicode call, but we are not generating PIC or portable runtime
8249 code. */
445f9a50
JDA
8250 if (length == 12)
8251 {
8252 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8253 pa_output_arg_descriptor (insn);
8254 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8255 }
611ad29e 8256
445f9a50
JDA
8257 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8258 pc-relative call sequence is five instructions. The inline PA 2.0
8259 version of $$dyncall is also five instructions. The PA 1.X versions
8260 are longer but still an overall win. */
8261 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8262 {
8263 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8264 "ldw 2(%%r22),%%r19\n\t"
8265 "ldw -2(%%r22),%%r22", xoperands);
8266 if (TARGET_NO_SPACE_REGS)
8267 {
8268 pa_output_arg_descriptor (insn);
8269 if (TARGET_PA_20)
8270 return "bve,l,n (%%r22),%%r2\n\tnop";
8271 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8272 }
8273 if (TARGET_PA_20)
8274 {
8275 pa_output_arg_descriptor (insn);
8276 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8277 }
8278 output_asm_insn ("bl .+8,%%r2\n\t"
8279 "ldo 16(%%r2),%%r2\n\t"
8280 "ldsid (%%r22),%%r1\n\t"
8281 "mtsp %%r1,%%sr0", xoperands);
8282 pa_output_arg_descriptor (insn);
8283 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8284 }
8285
611ad29e 8286 /* We need a long PIC call to $$dyncall. */
568de9bb
JDA
8287 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8288 xoperands[1] = gen_rtx_REG (Pmode, 2);
8289 xoperands[2] = gen_rtx_REG (Pmode, 1);
8290 pa_output_pic_pcrel_sequence (xoperands);
445f9a50
JDA
8291 pa_output_arg_descriptor (insn);
8292 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
611ad29e
JDA
8293}
8294
d2a94ec0 8295/* In HPUX 8.0's shared library scheme, special relocations are needed
23f6f34f 8296 for function labels if they might be passed to a function
d2a94ec0 8297 in a shared library (because shared libraries don't live in code
520a57c8 8298 space), and special magic is needed to construct their address. */
d2a94ec0
TM
8299
8300void
ae9d61ab 8301pa_encode_label (rtx sym)
d2a94ec0 8302{
519104fe 8303 const char *str = XSTR (sym, 0);
10d17cb7
AM
8304 int len = strlen (str) + 1;
8305 char *newstr, *p;
d2a94ec0 8306
5ead67f6 8307 p = newstr = XALLOCAVEC (char, len + 1);
10d17cb7
AM
8308 *p++ = '@';
8309 strcpy (p, str);
67d6f2fc 8310
831c1763 8311 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
d2a94ec0 8312}
23f6f34f 8313
fb49053f 8314static void
b7849684 8315pa_encode_section_info (tree decl, rtx rtl, int first)
fb49053f 8316{
9a60b229
JJ
8317 int old_referenced = 0;
8318
8319 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8320 old_referenced
8321 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8322
51076f96
RC
8323 default_encode_section_info (decl, rtl, first);
8324
fb49053f
RH
8325 if (first && TEXT_SPACE_P (decl))
8326 {
fb49053f
RH
8327 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8328 if (TREE_CODE (decl) == FUNCTION_DECL)
ae9d61ab 8329 pa_encode_label (XEXP (rtl, 0));
fb49053f 8330 }
9a60b229
JJ
8331 else if (old_referenced)
8332 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
fb49053f
RH
8333}
8334
772c5265
RH
8335/* This is sort of inverse to pa_encode_section_info. */
8336
8337static const char *
b7849684 8338pa_strip_name_encoding (const char *str)
772c5265 8339{
7830ba7b
JDA
8340 str += (*str == '@');
8341 str += (*str == '*');
8342 return str;
772c5265
RH
8343}
8344
326bc2de
JL
8345/* Returns 1 if OP is a function label involved in a simple addition
8346 with a constant. Used to keep certain patterns from matching
8347 during instruction combination. */
8348int
ae9d61ab 8349pa_is_function_label_plus_const (rtx op)
326bc2de
JL
8350{
8351 /* Strip off any CONST. */
8352 if (GET_CODE (op) == CONST)
8353 op = XEXP (op, 0);
8354
8355 return (GET_CODE (op) == PLUS
9c575e20 8356 && function_label_operand (XEXP (op, 0), VOIDmode)
326bc2de
JL
8357 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8358}
8359
54374491
JL
8360/* Output assembly code for a thunk to FUNCTION. */
8361
c590b625 8362static void
b7849684
JE
8363pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8364 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8365 tree function)
54374491 8366{
cdcb88d7 8367 static unsigned int current_thunk_number;
5fad1c24 8368 int val_14 = VAL_14_BITS_P (delta);
67b846fa 8369 unsigned int old_last_address = last_address, nbytes = 0;
b2029ad6 8370 char label[17];
cdcb88d7 8371 rtx xoperands[4];
5fad1c24 8372
cdcb88d7
JDA
8373 xoperands[0] = XEXP (DECL_RTL (function), 0);
8374 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8375 xoperands[2] = GEN_INT (delta);
5fad1c24 8376
a9a302d9 8377 final_start_function (emit_barrier (), file, 1);
5fad1c24
JDA
8378
8379 /* Output the thunk. We know that the function is in the same
8380 translation unit (i.e., the same space) as the thunk, and that
8381 thunks are output after their method. Thus, we don't need an
8382 external branch to reach the function. With SOM and GAS,
8383 functions and thunks are effectively in different sections.
8384 Thus, we can always use a IA-relative branch and the linker
8385 will add a long branch stub if necessary.
8386
8387 However, we have to be careful when generating PIC code on the
8388 SOM port to ensure that the sequence does not transfer to an
8389 import stub for the target function as this could clobber the
8390 return value saved at SP-24. This would also apply to the
8391 32-bit linux port if the multi-space model is implemented. */
8392 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8393 && !(flag_pic && TREE_PUBLIC (function))
8394 && (TARGET_GAS || last_address < 262132))
8395 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
677f3fa8 8396 && ((targetm_common.have_named_sections
5fad1c24
JDA
8397 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8398 /* The GNU 64-bit linker has rather poor stub management.
8399 So, we use a long branch from thunks that aren't in
8400 the same section as the target function. */
8401 && ((!TARGET_64BIT
8402 && (DECL_SECTION_NAME (thunk_fndecl)
8403 != DECL_SECTION_NAME (function)))
8404 || ((DECL_SECTION_NAME (thunk_fndecl)
8405 == DECL_SECTION_NAME (function))
8406 && last_address < 262132)))
5dba8769
JDA
8407 /* In this case, we need to be able to reach the start of
8408 the stub table even though the function is likely closer
8409 and can be jumped to directly. */
677f3fa8 8410 || (targetm_common.have_named_sections
2842bb86
JDA
8411 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8412 && DECL_SECTION_NAME (function) == NULL
5dba8769
JDA
8413 && total_code_bytes < MAX_PCREL17F_OFFSET)
8414 /* Likewise. */
677f3fa8 8415 || (!targetm_common.have_named_sections
5dba8769 8416 && total_code_bytes < MAX_PCREL17F_OFFSET))))
5fad1c24 8417 {
cdcb88d7
JDA
8418 if (!val_14)
8419 output_asm_insn ("addil L'%2,%%r26", xoperands);
8420
31fd809b 8421 output_asm_insn ("b %0", xoperands);
cdcb88d7 8422
5fad1c24
JDA
8423 if (val_14)
8424 {
cdcb88d7 8425 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8426 nbytes += 8;
8427 }
8428 else
8429 {
cdcb88d7 8430 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8431 nbytes += 12;
8432 }
8433 }
8434 else if (TARGET_64BIT)
8435 {
568de9bb
JDA
8436 rtx xop[4];
8437
5fad1c24
JDA
8438 /* We only have one call-clobbered scratch register, so we can't
8439 make use of the delay slot if delta doesn't fit in 14 bits. */
8440 if (!val_14)
cdcb88d7
JDA
8441 {
8442 output_asm_insn ("addil L'%2,%%r26", xoperands);
8443 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8444 }
5fad1c24 8445
568de9bb
JDA
8446 /* Load function address into %r1. */
8447 xop[0] = xoperands[0];
8448 xop[1] = gen_rtx_REG (Pmode, 1);
8449 xop[2] = xop[1];
8450 pa_output_pic_pcrel_sequence (xop);
5fad1c24
JDA
8451
8452 if (val_14)
8453 {
cdcb88d7
JDA
8454 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8455 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8456 nbytes += 20;
8457 }
8458 else
8459 {
cdcb88d7 8460 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5fad1c24
JDA
8461 nbytes += 24;
8462 }
8463 }
8464 else if (TARGET_PORTABLE_RUNTIME)
8465 {
cdcb88d7
JDA
8466 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8467 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8468
8469 if (!val_14)
568de9bb 8470 output_asm_insn ("ldil L'%2,%%r26", xoperands);
cdcb88d7
JDA
8471
8472 output_asm_insn ("bv %%r0(%%r22)", xoperands);
5fad1c24
JDA
8473
8474 if (val_14)
8475 {
cdcb88d7 8476 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8477 nbytes += 16;
8478 }
8479 else
8480 {
568de9bb 8481 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8482 nbytes += 20;
8483 }
8484 }
8485 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8486 {
8487 /* The function is accessible from outside this module. The only
8488 way to avoid an import stub between the thunk and function is to
8489 call the function directly with an indirect sequence similar to
8490 that used by $$dyncall. This is possible because $$dyncall acts
8491 as the import stub in an indirect call. */
5fad1c24 8492 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
cdcb88d7
JDA
8493 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8494 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8495 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8496 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8497 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8498 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8499 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8500 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8501
5fad1c24
JDA
8502 if (!val_14)
8503 {
cdcb88d7 8504 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24
JDA
8505 nbytes += 4;
8506 }
cdcb88d7 8507
5fad1c24
JDA
8508 if (TARGET_PA_20)
8509 {
cdcb88d7
JDA
8510 output_asm_insn ("bve (%%r22)", xoperands);
8511 nbytes += 36;
8512 }
8513 else if (TARGET_NO_SPACE_REGS)
8514 {
8515 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8516 nbytes += 36;
8517 }
8518 else
54374491 8519 {
cdcb88d7
JDA
8520 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8521 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8522 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8523 nbytes += 44;
5fad1c24
JDA
8524 }
8525
8526 if (val_14)
cdcb88d7 8527 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8528 else
cdcb88d7 8529 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8530 }
8531 else if (flag_pic)
8532 {
568de9bb 8533 rtx xop[4];
5fad1c24 8534
568de9bb
JDA
8535 /* Load function address into %r22. */
8536 xop[0] = xoperands[0];
8537 xop[1] = gen_rtx_REG (Pmode, 1);
8538 xop[2] = gen_rtx_REG (Pmode, 22);
8539 pa_output_pic_pcrel_sequence (xop);
5fad1c24 8540
cdcb88d7
JDA
8541 if (!val_14)
8542 output_asm_insn ("addil L'%2,%%r26", xoperands);
8543
8544 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8545
5fad1c24
JDA
8546 if (val_14)
8547 {
cdcb88d7 8548 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8549 nbytes += 20;
54374491
JL
8550 }
8551 else
5fad1c24 8552 {
cdcb88d7 8553 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8554 nbytes += 24;
8555 }
54374491
JL
8556 }
8557 else
8558 {
5fad1c24 8559 if (!val_14)
cdcb88d7 8560 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24 8561
cdcb88d7
JDA
8562 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8563 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8564
8565 if (val_14)
54374491 8566 {
cdcb88d7 8567 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8568 nbytes += 12;
54374491
JL
8569 }
8570 else
5fad1c24 8571 {
cdcb88d7 8572 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8573 nbytes += 16;
8574 }
54374491 8575 }
5fad1c24 8576
a9a302d9 8577 final_end_function ();
1a83bfc3 8578
5fad1c24 8579 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
54374491 8580 {
d6b5193b 8581 switch_to_section (data_section);
cdcb88d7 8582 output_asm_insn (".align 4", xoperands);
5fad1c24 8583 ASM_OUTPUT_LABEL (file, label);
cdcb88d7 8584 output_asm_insn (".word P'%0", xoperands);
54374491 8585 }
5fad1c24 8586
54374491 8587 current_thunk_number++;
5fad1c24
JDA
8588 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8589 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8590 last_address += nbytes;
67b846fa
JDA
8591 if (old_last_address > last_address)
8592 last_address = UINT_MAX;
5fad1c24 8593 update_total_code_bytes (nbytes);
54374491
JL
8594}
8595
4977bab6
ZW
8596/* Only direct calls to static functions are allowed to be sibling (tail)
8597 call optimized.
8598
8599 This restriction is necessary because some linker generated stubs will
8600 store return pointers into rp' in some cases which might clobber a
8601 live value already in rp'.
8602
8603 In a sibcall the current function and the target function share stack
8604 space. Thus if the path to the current function and the path to the
8605 target function save a value in rp', they save the value into the
8606 same stack slot, which has undesirable consequences.
8607
8608 Because of the deferred binding nature of shared libraries any function
8609 with external scope could be in a different load module and thus require
8610 rp' to be saved when calling that function. So sibcall optimizations
8611 can only be safe for static function.
8612
8613 Note that GCC never needs return value relocations, so we don't have to
8614 worry about static calls with return value relocations (which require
8615 saving rp').
8616
8617 It is safe to perform a sibcall optimization when the target function
8618 will never return. */
8619static bool
b7849684 8620pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4977bab6 8621{
73096ba9
JDA
8622 if (TARGET_PORTABLE_RUNTIME)
8623 return false;
8624
11f43127 8625 /* Sibcalls are not ok because the arg pointer register is not a fixed
c1207243 8626 register. This prevents the sibcall optimization from occurring. In
11f43127
JDA
8627 addition, there are problems with stub placement using GNU ld. This
8628 is because a normal sibcall branch uses a 17-bit relocation while
8629 a regular call branch uses a 22-bit relocation. As a result, more
8630 care needs to be taken in the placement of long-branch stubs. */
8631 if (TARGET_64BIT)
8632 return false;
8633
73096ba9
JDA
8634 /* Sibcalls are only ok within a translation unit. */
8635 return (decl && !TREE_PUBLIC (decl));
4977bab6
ZW
8636}
8637
8ddf681a
R
8638/* ??? Addition is not commutative on the PA due to the weird implicit
8639 space register selection rules for memory addresses. Therefore, we
8640 don't consider a + b == b + a, as this might be inside a MEM. */
8641static bool
3101faab 8642pa_commutative_p (const_rtx x, int outer_code)
8ddf681a
R
8643{
8644 return (COMMUTATIVE_P (x)
bd7d5043
JDA
8645 && (TARGET_NO_SPACE_REGS
8646 || (outer_code != UNKNOWN && outer_code != MEM)
8ddf681a
R
8647 || GET_CODE (x) != PLUS));
8648}
8649
88e5c029
JL
8650/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8651 use in fmpyadd instructions. */
2fe24884 8652int
ae9d61ab 8653pa_fmpyaddoperands (rtx *operands)
2fe24884 8654{
ef4bddc2 8655 machine_mode mode = GET_MODE (operands[0]);
2fe24884 8656
d85ab966
JL
8657 /* Must be a floating point mode. */
8658 if (mode != SFmode && mode != DFmode)
8659 return 0;
8660
2fe24884 8661 /* All modes must be the same. */
f133af4c
TG
8662 if (! (mode == GET_MODE (operands[1])
8663 && mode == GET_MODE (operands[2])
8664 && mode == GET_MODE (operands[3])
8665 && mode == GET_MODE (operands[4])
8666 && mode == GET_MODE (operands[5])))
2fe24884
JL
8667 return 0;
8668
d85ab966
JL
8669 /* All operands must be registers. */
8670 if (! (GET_CODE (operands[1]) == REG
8671 && GET_CODE (operands[2]) == REG
8672 && GET_CODE (operands[3]) == REG
8673 && GET_CODE (operands[4]) == REG
8674 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8675 return 0;
8676
88e5c029
JL
8677 /* Only 2 real operands to the addition. One of the input operands must
8678 be the same as the output operand. */
2fe24884
JL
8679 if (! rtx_equal_p (operands[3], operands[4])
8680 && ! rtx_equal_p (operands[3], operands[5]))
8681 return 0;
8682
1e5f1716 8683 /* Inout operand of add cannot conflict with any operands from multiply. */
2fe24884
JL
8684 if (rtx_equal_p (operands[3], operands[0])
8685 || rtx_equal_p (operands[3], operands[1])
8686 || rtx_equal_p (operands[3], operands[2]))
8687 return 0;
8688
1e5f1716 8689 /* multiply cannot feed into addition operands. */
2fe24884
JL
8690 if (rtx_equal_p (operands[4], operands[0])
8691 || rtx_equal_p (operands[5], operands[0]))
8692 return 0;
8693
d85ab966
JL
8694 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8695 if (mode == SFmode
88624c0e
JL
8696 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8697 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8698 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8699 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8700 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8701 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8702 return 0;
8703
2fe24884
JL
8704 /* Passed. Operands are suitable for fmpyadd. */
8705 return 1;
8706}
8707
35d434ed
JDA
8708#if !defined(USE_COLLECT2)
8709static void
b7849684 8710pa_asm_out_constructor (rtx symbol, int priority)
35d434ed
JDA
8711{
8712 if (!function_label_operand (symbol, VOIDmode))
ae9d61ab 8713 pa_encode_label (symbol);
35d434ed
JDA
8714
8715#ifdef CTORS_SECTION_ASM_OP
8716 default_ctor_section_asm_out_constructor (symbol, priority);
8717#else
8718# ifdef TARGET_ASM_NAMED_SECTION
8719 default_named_section_asm_out_constructor (symbol, priority);
8720# else
8721 default_stabs_asm_out_constructor (symbol, priority);
8722# endif
8723#endif
8724}
8725
8726static void
b7849684 8727pa_asm_out_destructor (rtx symbol, int priority)
35d434ed
JDA
8728{
8729 if (!function_label_operand (symbol, VOIDmode))
ae9d61ab 8730 pa_encode_label (symbol);
35d434ed
JDA
8731
8732#ifdef DTORS_SECTION_ASM_OP
8733 default_dtor_section_asm_out_destructor (symbol, priority);
8734#else
8735# ifdef TARGET_ASM_NAMED_SECTION
8736 default_named_section_asm_out_destructor (symbol, priority);
8737# else
8738 default_stabs_asm_out_destructor (symbol, priority);
8739# endif
8740#endif
8741}
8742#endif
8743
d4482715
JDA
8744/* This function places uninitialized global data in the bss section.
8745 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8746 function on the SOM port to prevent uninitialized global data from
8747 being placed in the data section. */
8748
8749void
8750pa_asm_output_aligned_bss (FILE *stream,
8751 const char *name,
8752 unsigned HOST_WIDE_INT size,
8753 unsigned int align)
8754{
d6b5193b 8755 switch_to_section (bss_section);
d4482715
JDA
8756 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8757
8758#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8759 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8760#endif
8761
8762#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8763 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8764#endif
8765
8766 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8767 ASM_OUTPUT_LABEL (stream, name);
16998094 8768 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
d4482715
JDA
8769}
8770
8771/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8772 that doesn't allow the alignment of global common storage to be directly
8773 specified. The SOM linker aligns common storage based on the rounded
8774 value of the NUM_BYTES parameter in the .comm directive. It's not
8775 possible to use the .align directive as it doesn't affect the alignment
8776 of the label associated with a .comm directive. */
8777
8778void
8779pa_asm_output_aligned_common (FILE *stream,
8780 const char *name,
8781 unsigned HOST_WIDE_INT size,
8782 unsigned int align)
8783{
22f549fd
JDA
8784 unsigned int max_common_align;
8785
8786 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8787 if (align > max_common_align)
8788 {
d4ee4d25 8789 warning (0, "alignment (%u) for %s exceeds maximum alignment "
22f549fd
JDA
8790 "for global common data. Using %u",
8791 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8792 align = max_common_align;
8793 }
8794
d6b5193b 8795 switch_to_section (bss_section);
d4482715
JDA
8796
8797 assemble_name (stream, name);
16998094 8798 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
d4482715
JDA
8799 MAX (size, align / BITS_PER_UNIT));
8800}
8801
8802/* We can't use .comm for local common storage as the SOM linker effectively
8803 treats the symbol as universal and uses the same storage for local symbols
8804 with the same name in different object files. The .block directive
8805 reserves an uninitialized block of storage. However, it's not common
8806 storage. Fortunately, GCC never requests common storage with the same
8807 name in any given translation unit. */
8808
8809void
8810pa_asm_output_aligned_local (FILE *stream,
8811 const char *name,
8812 unsigned HOST_WIDE_INT size,
8813 unsigned int align)
8814{
d6b5193b 8815 switch_to_section (bss_section);
d4482715
JDA
8816 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8817
8818#ifdef LOCAL_ASM_OP
8819 fprintf (stream, "%s", LOCAL_ASM_OP);
8820 assemble_name (stream, name);
8821 fprintf (stream, "\n");
8822#endif
8823
8824 ASM_OUTPUT_LABEL (stream, name);
16998094 8825 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
d4482715
JDA
8826}
8827
88e5c029
JL
8828/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8829 use in fmpysub instructions. */
2fe24884 8830int
ae9d61ab 8831pa_fmpysuboperands (rtx *operands)
2fe24884 8832{
ef4bddc2 8833 machine_mode mode = GET_MODE (operands[0]);
2fe24884 8834
d85ab966
JL
8835 /* Must be a floating point mode. */
8836 if (mode != SFmode && mode != DFmode)
8837 return 0;
8838
2fe24884 8839 /* All modes must be the same. */
f133af4c
TG
8840 if (! (mode == GET_MODE (operands[1])
8841 && mode == GET_MODE (operands[2])
8842 && mode == GET_MODE (operands[3])
8843 && mode == GET_MODE (operands[4])
8844 && mode == GET_MODE (operands[5])))
2fe24884
JL
8845 return 0;
8846
d85ab966
JL
8847 /* All operands must be registers. */
8848 if (! (GET_CODE (operands[1]) == REG
8849 && GET_CODE (operands[2]) == REG
8850 && GET_CODE (operands[3]) == REG
8851 && GET_CODE (operands[4]) == REG
8852 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8853 return 0;
8854
88e5c029
JL
8855 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8856 operation, so operands[4] must be the same as operand[3]. */
2fe24884
JL
8857 if (! rtx_equal_p (operands[3], operands[4]))
8858 return 0;
8859
1e5f1716 8860 /* multiply cannot feed into subtraction. */
88e5c029 8861 if (rtx_equal_p (operands[5], operands[0]))
2fe24884
JL
8862 return 0;
8863
1e5f1716 8864 /* Inout operand of sub cannot conflict with any operands from multiply. */
2fe24884
JL
8865 if (rtx_equal_p (operands[3], operands[0])
8866 || rtx_equal_p (operands[3], operands[1])
8867 || rtx_equal_p (operands[3], operands[2]))
8868 return 0;
8869
d85ab966
JL
8870 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8871 if (mode == SFmode
88624c0e
JL
8872 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8873 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8874 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8875 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8876 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8877 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8878 return 0;
8879
2fe24884
JL
8880 /* Passed. Operands are suitable for fmpysub. */
8881 return 1;
8882}
8883
c2264220 8884/* Return 1 if the given constant is 2, 4, or 8. These are the valid
b47fbc53
JL
8885 constants for a MULT embedded inside a memory address. */
8886int
8887pa_mem_shadd_constant_p (int val)
8888{
8889 if (val == 2 || val == 4 || val == 8)
8890 return 1;
8891 else
8892 return 0;
8893}
8894
8895/* Return 1 if the given constant is 1, 2, or 3. These are the valid
c2264220 8896 constants for shadd instructions. */
c9a88190 8897int
ae9d61ab 8898pa_shadd_constant_p (int val)
c2264220 8899{
b47fbc53 8900 if (val == 1 || val == 2 || val == 3)
c2264220
JL
8901 return 1;
8902 else
8903 return 0;
8904}
4802a0d6 8905
3232e9d8
JDA
8906/* Return TRUE if INSN branches forward. */
8907
8908static bool
b32d5189 8909forward_branch_p (rtx_insn *insn)
b9821af8 8910{
3232e9d8
JDA
8911 rtx lab = JUMP_LABEL (insn);
8912
8913 /* The INSN must have a jump label. */
8914 gcc_assert (lab != NULL_RTX);
8915
8916 if (INSN_ADDRESSES_SET_P ())
8917 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
b9821af8
JL
8918
8919 while (insn)
8920 {
3232e9d8
JDA
8921 if (insn == lab)
8922 return true;
b9821af8
JL
8923 else
8924 insn = NEXT_INSN (insn);
8925 }
8926
3232e9d8 8927 return false;
b9821af8
JL
8928}
8929
b1092901
JL
8930/* Output an unconditional move and branch insn. */
8931
519104fe 8932const char *
b32d5189 8933pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
b1092901 8934{
16d74a3c
JDA
8935 int length = get_attr_length (insn);
8936
b1092901
JL
8937 /* These are the cases in which we win. */
8938 if (length == 4)
8939 return "mov%I1b,tr %1,%0,%2";
8940
16d74a3c
JDA
8941 /* None of the following cases win, but they don't lose either. */
8942 if (length == 8)
b1092901 8943 {
16d74a3c
JDA
8944 if (dbr_sequence_length () == 0)
8945 {
8946 /* Nothing in the delay slot, fake it by putting the combined
8947 insn (the copy or add) in the delay slot of a bl. */
8948 if (GET_CODE (operands[1]) == CONST_INT)
8949 return "b %2\n\tldi %1,%0";
8950 else
8951 return "b %2\n\tcopy %1,%0";
8952 }
b1092901 8953 else
16d74a3c
JDA
8954 {
8955 /* Something in the delay slot, but we've got a long branch. */
8956 if (GET_CODE (operands[1]) == CONST_INT)
8957 return "ldi %1,%0\n\tb %2";
8958 else
8959 return "copy %1,%0\n\tb %2";
8960 }
b1092901 8961 }
16d74a3c
JDA
8962
8963 if (GET_CODE (operands[1]) == CONST_INT)
8964 output_asm_insn ("ldi %1,%0", operands);
b1092901 8965 else
16d74a3c 8966 output_asm_insn ("copy %1,%0", operands);
ae9d61ab 8967 return pa_output_lbranch (operands[2], insn, 1);
b1092901
JL
8968}
8969
8970/* Output an unconditional add and branch insn. */
8971
519104fe 8972const char *
b32d5189 8973pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
b1092901 8974{
16d74a3c
JDA
8975 int length = get_attr_length (insn);
8976
b1092901
JL
8977 /* To make life easy we want operand0 to be the shared input/output
8978 operand and operand1 to be the readonly operand. */
8979 if (operands[0] == operands[1])
8980 operands[1] = operands[2];
8981
8982 /* These are the cases in which we win. */
8983 if (length == 4)
8984 return "add%I1b,tr %1,%0,%3";
8985
16d74a3c
JDA
8986 /* None of the following cases win, but they don't lose either. */
8987 if (length == 8)
b1092901 8988 {
16d74a3c
JDA
8989 if (dbr_sequence_length () == 0)
8990 /* Nothing in the delay slot, fake it by putting the combined
8991 insn (the copy or add) in the delay slot of a bl. */
8992 return "b %3\n\tadd%I1 %1,%0,%0";
8993 else
8994 /* Something in the delay slot, but we've got a long branch. */
8995 return "add%I1 %1,%0,%0\n\tb %3";
b1092901 8996 }
16d74a3c
JDA
8997
8998 output_asm_insn ("add%I1 %1,%0,%0", operands);
ae9d61ab 8999 return pa_output_lbranch (operands[3], insn, 1);
b1092901
JL
9000}
9001
746a9efa 9002/* We use this hook to perform a PA specific optimization which is difficult
3ba07ad3 9003 to do in earlier passes. */
746a9efa 9004
18dbd950 9005static void
b7849684 9006pa_reorg (void)
746a9efa 9007{
18dbd950 9008 remove_useless_addtr_insns (1);
d8b79470 9009
86001391 9010 if (pa_cpu < PROCESSOR_8000)
18dbd950 9011 pa_combine_instructions ();
aba892c4 9012}
c4bb6b38
JL
9013
9014/* The PA has a number of odd instructions which can perform multiple
9015 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9016 it may be profitable to combine two instructions into one instruction
9017 with two outputs. It's not profitable PA2.0 machines because the
9018 two outputs would take two slots in the reorder buffers.
9019
9020 This routine finds instructions which can be combined and combines
9021 them. We only support some of the potential combinations, and we
9022 only try common ways to find suitable instructions.
9023
9024 * addb can add two registers or a register and a small integer
9025 and jump to a nearby (+-8k) location. Normally the jump to the
9026 nearby location is conditional on the result of the add, but by
9027 using the "true" condition we can make the jump unconditional.
9028 Thus addb can perform two independent operations in one insn.
9029
9030 * movb is similar to addb in that it can perform a reg->reg
9031 or small immediate->reg copy and jump to a nearby (+-8k location).
9032
9033 * fmpyadd and fmpysub can perform a FP multiply and either an
9034 FP add or FP sub if the operands of the multiply and add/sub are
9035 independent (there are other minor restrictions). Note both
9036 the fmpy and fadd/fsub can in theory move to better spots according
9037 to data dependencies, but for now we require the fmpy stay at a
9038 fixed location.
9039
9040 * Many of the memory operations can perform pre & post updates
9041 of index registers. GCC's pre/post increment/decrement addressing
9042 is far too simple to take advantage of all the possibilities. This
9043 pass may not be suitable since those insns may not be independent.
9044
9045 * comclr can compare two ints or an int and a register, nullify
9046 the following instruction and zero some other register. This
9047 is more difficult to use as it's harder to find an insn which
9048 will generate a comclr than finding something like an unconditional
9049 branch. (conditional moves & long branches create comclr insns).
9050
9051 * Most arithmetic operations can conditionally skip the next
9052 instruction. They can be viewed as "perform this operation
9053 and conditionally jump to this nearby location" (where nearby
9054 is an insns away). These are difficult to use due to the
9055 branch length restrictions. */
9056
51723711 9057static void
b7849684 9058pa_combine_instructions (void)
c4bb6b38 9059{
b32d5189 9060 rtx_insn *anchor;
c4bb6b38
JL
9061
9062 /* This can get expensive since the basic algorithm is on the
9063 order of O(n^2) (or worse). Only do it for -O2 or higher
956d6950 9064 levels of optimization. */
c4bb6b38
JL
9065 if (optimize < 2)
9066 return;
9067
9068 /* Walk down the list of insns looking for "anchor" insns which
9069 may be combined with "floating" insns. As the name implies,
9070 "anchor" instructions don't move, while "floating" insns may
9071 move around. */
647d790d
DM
9072 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9073 rtx_insn *new_rtx = make_insn_raw (par);
c4bb6b38
JL
9074
9075 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9076 {
9077 enum attr_pa_combine_type anchor_attr;
9078 enum attr_pa_combine_type floater_attr;
9079
9080 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9081 Also ignore any special USE insns. */
b64925dc 9082 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
c4bb6b38 9083 || GET_CODE (PATTERN (anchor)) == USE
34f0d87a 9084 || GET_CODE (PATTERN (anchor)) == CLOBBER)
c4bb6b38
JL
9085 continue;
9086
9087 anchor_attr = get_attr_pa_combine_type (anchor);
9088 /* See if anchor is an insn suitable for combination. */
9089 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9090 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9091 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9092 && ! forward_branch_p (anchor)))
9093 {
b32d5189 9094 rtx_insn *floater;
c4bb6b38
JL
9095
9096 for (floater = PREV_INSN (anchor);
9097 floater;
9098 floater = PREV_INSN (floater))
9099 {
b64925dc
SB
9100 if (NOTE_P (floater)
9101 || (NONJUMP_INSN_P (floater)
c4bb6b38
JL
9102 && (GET_CODE (PATTERN (floater)) == USE
9103 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9104 continue;
9105
9106 /* Anything except a regular INSN will stop our search. */
39718607 9107 if (! NONJUMP_INSN_P (floater))
c4bb6b38 9108 {
b32d5189 9109 floater = NULL;
c4bb6b38
JL
9110 break;
9111 }
9112
9113 /* See if FLOATER is suitable for combination with the
9114 anchor. */
9115 floater_attr = get_attr_pa_combine_type (floater);
9116 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9117 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9118 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9119 && floater_attr == PA_COMBINE_TYPE_FMPY))
9120 {
9121 /* If ANCHOR and FLOATER can be combined, then we're
9122 done with this pass. */
0a2aaacc 9123 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9124 SET_DEST (PATTERN (floater)),
9125 XEXP (SET_SRC (PATTERN (floater)), 0),
9126 XEXP (SET_SRC (PATTERN (floater)), 1)))
9127 break;
9128 }
9129
9130 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9131 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9132 {
9133 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9134 {
0a2aaacc 9135 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9136 SET_DEST (PATTERN (floater)),
9137 XEXP (SET_SRC (PATTERN (floater)), 0),
9138 XEXP (SET_SRC (PATTERN (floater)), 1)))
9139 break;
9140 }
9141 else
9142 {
0a2aaacc 9143 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9144 SET_DEST (PATTERN (floater)),
9145 SET_SRC (PATTERN (floater)),
9146 SET_SRC (PATTERN (floater))))
9147 break;
9148 }
9149 }
9150 }
9151
9152 /* If we didn't find anything on the backwards scan try forwards. */
9153 if (!floater
9154 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9155 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9156 {
9157 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9158 {
b64925dc
SB
9159 if (NOTE_P (floater)
9160 || (NONJUMP_INSN_P (floater)
c4bb6b38
JL
9161 && (GET_CODE (PATTERN (floater)) == USE
9162 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6619e96c 9163
c4bb6b38
JL
9164 continue;
9165
9166 /* Anything except a regular INSN will stop our search. */
39718607 9167 if (! NONJUMP_INSN_P (floater))
c4bb6b38 9168 {
b32d5189 9169 floater = NULL;
c4bb6b38
JL
9170 break;
9171 }
9172
9173 /* See if FLOATER is suitable for combination with the
9174 anchor. */
9175 floater_attr = get_attr_pa_combine_type (floater);
9176 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9177 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9178 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9179 && floater_attr == PA_COMBINE_TYPE_FMPY))
9180 {
9181 /* If ANCHOR and FLOATER can be combined, then we're
9182 done with this pass. */
0a2aaacc 9183 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
c4bb6b38 9184 SET_DEST (PATTERN (floater)),
831c1763
AM
9185 XEXP (SET_SRC (PATTERN (floater)),
9186 0),
9187 XEXP (SET_SRC (PATTERN (floater)),
9188 1)))
c4bb6b38
JL
9189 break;
9190 }
9191 }
9192 }
9193
9194 /* FLOATER will be nonzero if we found a suitable floating
9195 insn for combination with ANCHOR. */
9196 if (floater
9197 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9198 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9199 {
9200 /* Emit the new instruction and delete the old anchor. */
8faa8118
SB
9201 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9202 copy_rtx (PATTERN (floater)));
9203 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9204 emit_insn_before (temp, anchor);
c5c76735 9205
a38e7aa5 9206 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9207
9208 /* Emit a special USE insn for FLOATER, then delete
9209 the floating insn. */
8faa8118
SB
9210 temp = copy_rtx (PATTERN (floater));
9211 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
c4bb6b38
JL
9212 delete_insn (floater);
9213
9214 continue;
9215 }
9216 else if (floater
9217 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9218 {
c4bb6b38 9219 /* Emit the new_jump instruction and delete the old anchor. */
8faa8118
SB
9220 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9221 copy_rtx (PATTERN (floater)));
9222 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9223 temp = emit_jump_insn_before (temp, anchor);
c5c76735 9224
c4bb6b38 9225 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
a38e7aa5 9226 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9227
9228 /* Emit a special USE insn for FLOATER, then delete
9229 the floating insn. */
8faa8118
SB
9230 temp = copy_rtx (PATTERN (floater));
9231 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
c4bb6b38
JL
9232 delete_insn (floater);
9233 continue;
9234 }
9235 }
9236 }
9237}
9238
0952f89b 9239static int
647d790d 9240pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
b32d5189 9241 int reversed, rtx dest,
b7849684 9242 rtx src1, rtx src2)
c4bb6b38
JL
9243{
9244 int insn_code_number;
b32d5189 9245 rtx_insn *start, *end;
c4bb6b38
JL
9246
9247 /* Create a PARALLEL with the patterns of ANCHOR and
9248 FLOATER, try to recognize it, then test constraints
9249 for the resulting pattern.
9250
9251 If the pattern doesn't match or the constraints
9252 aren't met keep searching for a suitable floater
9253 insn. */
0a2aaacc
KG
9254 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9255 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9256 INSN_CODE (new_rtx) = -1;
9257 insn_code_number = recog_memoized (new_rtx);
daca1a96 9258 basic_block bb = BLOCK_FOR_INSN (anchor);
c4bb6b38 9259 if (insn_code_number < 0
daca1a96 9260 || (extract_insn (new_rtx),
63e7e854 9261 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
c4bb6b38
JL
9262 return 0;
9263
9264 if (reversed)
9265 {
9266 start = anchor;
9267 end = floater;
9268 }
9269 else
9270 {
9271 start = floater;
9272 end = anchor;
9273 }
9274
9275 /* There's up to three operands to consider. One
9276 output and two inputs.
9277
9278 The output must not be used between FLOATER & ANCHOR
9279 exclusive. The inputs must not be set between
9280 FLOATER and ANCHOR exclusive. */
9281
9282 if (reg_used_between_p (dest, start, end))
9283 return 0;
9284
9285 if (reg_set_between_p (src1, start, end))
9286 return 0;
9287
9288 if (reg_set_between_p (src2, start, end))
9289 return 0;
9290
9291 /* If we get here, then everything is good. */
9292 return 1;
9293}
b9cd54d2 9294
2561a923 9295/* Return nonzero if references for INSN are delayed.
b9cd54d2
JL
9296
9297 Millicode insns are actually function calls with some special
9298 constraints on arguments and register usage.
9299
9300 Millicode calls always expect their arguments in the integer argument
9301 registers, and always return their result in %r29 (ret1). They
7d8b1412
AM
9302 are expected to clobber their arguments, %r1, %r29, and the return
9303 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9304
9305 This function tells reorg that the references to arguments and
9306 millicode calls do not appear to happen until after the millicode call.
9307 This allows reorg to put insns which set the argument registers into the
9308 delay slot of the millicode call -- thus they act more like traditional
9309 CALL_INSNs.
9310
1e5f1716 9311 Note we cannot consider side effects of the insn to be delayed because
7d8b1412
AM
9312 the branch and link insn will clobber the return pointer. If we happened
9313 to use the return pointer in the delay slot of the call, then we lose.
b9cd54d2
JL
9314
9315 get_attr_type will try to recognize the given insn, so make sure to
9316 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9317 in particular. */
9318int
84034c69 9319pa_insn_refs_are_delayed (rtx_insn *insn)
b9cd54d2 9320{
b64925dc 9321 return ((NONJUMP_INSN_P (insn)
b9cd54d2
JL
9322 && GET_CODE (PATTERN (insn)) != SEQUENCE
9323 && GET_CODE (PATTERN (insn)) != USE
9324 && GET_CODE (PATTERN (insn)) != CLOBBER
9325 && get_attr_type (insn) == TYPE_MILLI));
9326}
d07d525a 9327
cde0f3fd
PB
9328/* Promote the return value, but not the arguments. */
9329
ef4bddc2 9330static machine_mode
cde0f3fd 9331pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
ef4bddc2 9332 machine_mode mode,
cde0f3fd
PB
9333 int *punsignedp ATTRIBUTE_UNUSED,
9334 const_tree fntype ATTRIBUTE_UNUSED,
9335 int for_return)
9336{
666e3ceb 9337 if (for_return == 0)
cde0f3fd 9338 return mode;
02095c50 9339 return promote_mode (type, mode, punsignedp);
cde0f3fd
PB
9340}
9341
44571d6e
JDA
9342/* On the HP-PA the value is found in register(s) 28(-29), unless
9343 the mode is SF or DF. Then the value is returned in fr4 (32).
9344
cde0f3fd
PB
9345 This must perform the same promotions as PROMOTE_MODE, else promoting
9346 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
44571d6e
JDA
9347
9348 Small structures must be returned in a PARALLEL on PA64 in order
9349 to match the HP Compiler ABI. */
9350
8a5b8538 9351static rtx
9193312a
AS
9352pa_function_value (const_tree valtype,
9353 const_tree func ATTRIBUTE_UNUSED,
9354 bool outgoing ATTRIBUTE_UNUSED)
44571d6e 9355{
ef4bddc2 9356 machine_mode valmode;
44571d6e 9357
4720d5ca
JDA
9358 if (AGGREGATE_TYPE_P (valtype)
9359 || TREE_CODE (valtype) == COMPLEX_TYPE
9360 || TREE_CODE (valtype) == VECTOR_TYPE)
44571d6e 9361 {
cbf6bcbe
JDA
9362 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9363
9364 /* Handle aggregates that fit exactly in a word or double word. */
9365 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9366 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9367
2a04824b
JDA
9368 if (TARGET_64BIT)
9369 {
9370 /* Aggregates with a size less than or equal to 128 bits are
9371 returned in GR 28(-29). They are left justified. The pad
9372 bits are undefined. Larger aggregates are returned in
9373 memory. */
9374 rtx loc[2];
9375 int i, offset = 0;
cbf6bcbe 9376 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
2a04824b
JDA
9377
9378 for (i = 0; i < ub; i++)
9379 {
9380 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9381 gen_rtx_REG (DImode, 28 + i),
9382 GEN_INT (offset));
9383 offset += 8;
9384 }
44571d6e 9385
2a04824b
JDA
9386 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9387 }
cbf6bcbe 9388 else if (valsize > UNITS_PER_WORD)
44571d6e 9389 {
2a04824b
JDA
9390 /* Aggregates 5 to 8 bytes in size are returned in general
9391 registers r28-r29 in the same manner as other non
9392 floating-point objects. The data is right-justified and
9393 zero-extended to 64 bits. This is opposite to the normal
9394 justification used on big endian targets and requires
9395 special treatment. */
9396 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9397 gen_rtx_REG (DImode, 28), const0_rtx);
9398 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
44571d6e 9399 }
44571d6e
JDA
9400 }
9401
9402 if ((INTEGRAL_TYPE_P (valtype)
2ae88ecd 9403 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
44571d6e
JDA
9404 || POINTER_TYPE_P (valtype))
9405 valmode = word_mode;
9406 else
9407 valmode = TYPE_MODE (valtype);
9408
9409 if (TREE_CODE (valtype) == REAL_TYPE
2a04824b 9410 && !AGGREGATE_TYPE_P (valtype)
44571d6e
JDA
9411 && TYPE_MODE (valtype) != TFmode
9412 && !TARGET_SOFT_FLOAT)
9413 return gen_rtx_REG (valmode, 32);
9414
9415 return gen_rtx_REG (valmode, 28);
9416}
9417
8a5b8538
AS
9418/* Implement the TARGET_LIBCALL_VALUE hook. */
9419
9420static rtx
ef4bddc2 9421pa_libcall_value (machine_mode mode,
8a5b8538
AS
9422 const_rtx fun ATTRIBUTE_UNUSED)
9423{
9424 if (! TARGET_SOFT_FLOAT
9425 && (mode == SFmode || mode == DFmode))
9426 return gen_rtx_REG (mode, 32);
9427 else
9428 return gen_rtx_REG (mode, 28);
9429}
9430
9431/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9432
9433static bool
9434pa_function_value_regno_p (const unsigned int regno)
9435{
9436 if (regno == 28
9437 || (! TARGET_SOFT_FLOAT && regno == 32))
9438 return true;
9439
9440 return false;
9441}
9442
fd29bdaf
NF
9443/* Update the data in CUM to advance over an argument
9444 of mode MODE and data type TYPE.
9445 (TYPE is null for libcalls where that information may not be available.) */
9446
9447static void
ef4bddc2 9448pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
fd29bdaf
NF
9449 const_tree type, bool named ATTRIBUTE_UNUSED)
9450{
d5cc9181 9451 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
fd29bdaf
NF
9452 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9453
9454 cum->nargs_prototype--;
9455 cum->words += (arg_size
9456 + ((cum->words & 01)
9457 && type != NULL_TREE
9458 && arg_size > 1));
9459}
9460
520babc7
JL
9461/* Return the location of a parameter that is passed in a register or NULL
9462 if the parameter has any component that is passed in memory.
9463
9464 This is new code and will be pushed to into the net sources after
6619e96c 9465 further testing.
520babc7
JL
9466
9467 ??? We might want to restructure this so that it looks more like other
9468 ports. */
fd29bdaf 9469static rtx
ef4bddc2 9470pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
fd29bdaf 9471 const_tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9472{
d5cc9181 9473 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
520babc7 9474 int max_arg_words = (TARGET_64BIT ? 8 : 4);
015b1ad1 9475 int alignment = 0;
9dff28ab 9476 int arg_size;
520babc7
JL
9477 int fpr_reg_base;
9478 int gpr_reg_base;
9479 rtx retval;
9480
9dff28ab
JDA
9481 if (mode == VOIDmode)
9482 return NULL_RTX;
9483
9484 arg_size = FUNCTION_ARG_SIZE (mode, type);
9485
9486 /* If this arg would be passed partially or totally on the stack, then
78a52f11 9487 this routine should return zero. pa_arg_partial_bytes will
9dff28ab
JDA
9488 handle arguments which are split between regs and stack slots if
9489 the ABI mandates split arguments. */
4720d5ca 9490 if (!TARGET_64BIT)
520babc7 9491 {
9dff28ab
JDA
9492 /* The 32-bit ABI does not split arguments. */
9493 if (cum->words + arg_size > max_arg_words)
520babc7
JL
9494 return NULL_RTX;
9495 }
9496 else
9497 {
015b1ad1
JDA
9498 if (arg_size > 1)
9499 alignment = cum->words & 1;
9dff28ab 9500 if (cum->words + alignment >= max_arg_words)
520babc7
JL
9501 return NULL_RTX;
9502 }
9503
9504 /* The 32bit ABIs and the 64bit ABIs are rather different,
9505 particularly in their handling of FP registers. We might
9506 be able to cleverly share code between them, but I'm not
0952f89b 9507 going to bother in the hope that splitting them up results
015b1ad1 9508 in code that is more easily understood. */
520babc7 9509
520babc7
JL
9510 if (TARGET_64BIT)
9511 {
9512 /* Advance the base registers to their current locations.
9513
9514 Remember, gprs grow towards smaller register numbers while
015b1ad1
JDA
9515 fprs grow to higher register numbers. Also remember that
9516 although FP regs are 32-bit addressable, we pretend that
9517 the registers are 64-bits wide. */
520babc7
JL
9518 gpr_reg_base = 26 - cum->words;
9519 fpr_reg_base = 32 + cum->words;
6619e96c 9520
9dff28ab
JDA
9521 /* Arguments wider than one word and small aggregates need special
9522 treatment. */
9523 if (arg_size > 1
9524 || mode == BLKmode
4720d5ca
JDA
9525 || (type && (AGGREGATE_TYPE_P (type)
9526 || TREE_CODE (type) == COMPLEX_TYPE
9527 || TREE_CODE (type) == VECTOR_TYPE)))
520babc7 9528 {
015b1ad1
JDA
9529 /* Double-extended precision (80-bit), quad-precision (128-bit)
9530 and aggregates including complex numbers are aligned on
9531 128-bit boundaries. The first eight 64-bit argument slots
9532 are associated one-to-one, with general registers r26
9533 through r19, and also with floating-point registers fr4
9534 through fr11. Arguments larger than one word are always
9dff28ab
JDA
9535 passed in general registers.
9536
9537 Using a PARALLEL with a word mode register results in left
9538 justified data on a big-endian target. */
015b1ad1
JDA
9539
9540 rtx loc[8];
9541 int i, offset = 0, ub = arg_size;
9542
9543 /* Align the base register. */
9544 gpr_reg_base -= alignment;
9545
9546 ub = MIN (ub, max_arg_words - cum->words - alignment);
9547 for (i = 0; i < ub; i++)
520babc7 9548 {
015b1ad1
JDA
9549 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9550 gen_rtx_REG (DImode, gpr_reg_base),
9551 GEN_INT (offset));
9552 gpr_reg_base -= 1;
9553 offset += 8;
520babc7 9554 }
015b1ad1 9555
e4b95210 9556 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
520babc7 9557 }
9dff28ab 9558 }
520babc7
JL
9559 else
9560 {
9561 /* If the argument is larger than a word, then we know precisely
9562 which registers we must use. */
015b1ad1 9563 if (arg_size > 1)
520babc7
JL
9564 {
9565 if (cum->words)
9566 {
9567 gpr_reg_base = 23;
9568 fpr_reg_base = 38;
9569 }
9570 else
9571 {
9572 gpr_reg_base = 25;
9573 fpr_reg_base = 34;
9574 }
9dff28ab
JDA
9575
9576 /* Structures 5 to 8 bytes in size are passed in the general
9577 registers in the same manner as other non floating-point
9578 objects. The data is right-justified and zero-extended
7ea18c08
JDA
9579 to 64 bits. This is opposite to the normal justification
9580 used on big endian targets and requires special treatment.
4720d5ca
JDA
9581 We now define BLOCK_REG_PADDING to pad these objects.
9582 Aggregates, complex and vector types are passed in the same
9583 manner as structures. */
9584 if (mode == BLKmode
9585 || (type && (AGGREGATE_TYPE_P (type)
9586 || TREE_CODE (type) == COMPLEX_TYPE
9587 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab 9588 {
44571d6e
JDA
9589 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9590 gen_rtx_REG (DImode, gpr_reg_base),
9591 const0_rtx);
2a04824b 9592 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9dff28ab 9593 }
520babc7
JL
9594 }
9595 else
9596 {
9597 /* We have a single word (32 bits). A simple computation
9598 will get us the register #s we need. */
9599 gpr_reg_base = 26 - cum->words;
9600 fpr_reg_base = 32 + 2 * cum->words;
9601 }
9602 }
9603
b848dc65 9604 /* Determine if the argument needs to be passed in both general and
520babc7 9605 floating point registers. */
b848dc65
JDA
9606 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9607 /* If we are doing soft-float with portable runtime, then there
9608 is no need to worry about FP regs. */
c328adfa 9609 && !TARGET_SOFT_FLOAT
4720d5ca 9610 /* The parameter must be some kind of scalar float, else we just
b848dc65 9611 pass it in integer registers. */
4720d5ca 9612 && GET_MODE_CLASS (mode) == MODE_FLOAT
b848dc65
JDA
9613 /* The target function must not have a prototype. */
9614 && cum->nargs_prototype <= 0
9615 /* libcalls do not need to pass items in both FP and general
9616 registers. */
9617 && type != NULL_TREE
c328adfa
JDA
9618 /* All this hair applies to "outgoing" args only. This includes
9619 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9620 && !cum->incoming)
b848dc65
JDA
9621 /* Also pass outgoing floating arguments in both registers in indirect
9622 calls with the 32 bit ABI and the HP assembler since there is no
9623 way to the specify argument locations in static functions. */
c328adfa
JDA
9624 || (!TARGET_64BIT
9625 && !TARGET_GAS
9626 && !cum->incoming
b848dc65 9627 && cum->indirect
4720d5ca 9628 && GET_MODE_CLASS (mode) == MODE_FLOAT))
520babc7
JL
9629 {
9630 retval
9631 = gen_rtx_PARALLEL
9632 (mode,
9633 gen_rtvec (2,
9634 gen_rtx_EXPR_LIST (VOIDmode,
9635 gen_rtx_REG (mode, fpr_reg_base),
9636 const0_rtx),
9637 gen_rtx_EXPR_LIST (VOIDmode,
9638 gen_rtx_REG (mode, gpr_reg_base),
9639 const0_rtx)));
9640 }
9641 else
9642 {
9643 /* See if we should pass this parameter in a general register. */
9644 if (TARGET_SOFT_FLOAT
9645 /* Indirect calls in the normal 32bit ABI require all arguments
9646 to be passed in general registers. */
9647 || (!TARGET_PORTABLE_RUNTIME
9648 && !TARGET_64BIT
50b424a9 9649 && !TARGET_ELF32
520babc7 9650 && cum->indirect)
4720d5ca
JDA
9651 /* If the parameter is not a scalar floating-point parameter,
9652 then it belongs in GPRs. */
9653 || GET_MODE_CLASS (mode) != MODE_FLOAT
2a04824b
JDA
9654 /* Structure with single SFmode field belongs in GPR. */
9655 || (type && AGGREGATE_TYPE_P (type)))
520babc7
JL
9656 retval = gen_rtx_REG (mode, gpr_reg_base);
9657 else
9658 retval = gen_rtx_REG (mode, fpr_reg_base);
9659 }
9660 return retval;
9661}
9662
c2ed6cf8
NF
9663/* Arguments larger than one word are double word aligned. */
9664
9665static unsigned int
ef4bddc2 9666pa_function_arg_boundary (machine_mode mode, const_tree type)
c2ed6cf8 9667{
c2ed6cf8 9668 bool singleword = (type
b58a864d
NF
9669 ? (integer_zerop (TYPE_SIZE (type))
9670 || !TREE_CONSTANT (TYPE_SIZE (type))
c2ed6cf8 9671 || int_size_in_bytes (type) <= UNITS_PER_WORD)
19cf9bde 9672 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
c2ed6cf8
NF
9673
9674 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9675}
520babc7
JL
9676
9677/* If this arg would be passed totally in registers or totally on the stack,
78a52f11
RH
9678 then this routine should return zero. */
9679
9680static int
ef4bddc2 9681pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
78a52f11 9682 tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9683{
d5cc9181 9684 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
e0c556d3
AM
9685 unsigned int max_arg_words = 8;
9686 unsigned int offset = 0;
520babc7 9687
78a52f11
RH
9688 if (!TARGET_64BIT)
9689 return 0;
9690
e0c556d3 9691 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
520babc7
JL
9692 offset = 1;
9693
e0c556d3 9694 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
fe19a83d 9695 /* Arg fits fully into registers. */
520babc7 9696 return 0;
6619e96c 9697 else if (cum->words + offset >= max_arg_words)
fe19a83d 9698 /* Arg fully on the stack. */
520babc7
JL
9699 return 0;
9700 else
fe19a83d 9701 /* Arg is split. */
78a52f11 9702 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
520babc7
JL
9703}
9704
9705
d6b5193b 9706/* A get_unnamed_section callback for switching to the text section.
9a55eab3
JDA
9707
9708 This function is only used with SOM. Because we don't support
9709 named subspaces, we can only create a new subspace or switch back
774acadf 9710 to the default text subspace. */
774acadf 9711
d6b5193b
RS
9712static void
9713som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9714{
9715 gcc_assert (TARGET_SOM);
774acadf 9716 if (TARGET_GAS)
9a55eab3 9717 {
8c5e065b 9718 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9a55eab3
JDA
9719 {
9720 /* We only want to emit a .nsubspa directive once at the
9721 start of the function. */
9722 cfun->machine->in_nsubspa = 1;
9723
9724 /* Create a new subspace for the text. This provides
9725 better stub placement and one-only functions. */
9726 if (cfun->decl
9727 && DECL_ONE_ONLY (cfun->decl)
9728 && !DECL_WEAK (cfun->decl))
1a83bfc3
JDA
9729 {
9730 output_section_asm_op ("\t.SPACE $TEXT$\n"
9731 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9732 "ACCESS=44,SORT=24,COMDAT");
9733 return;
9734 }
9a55eab3
JDA
9735 }
9736 else
9737 {
9738 /* There isn't a current function or the body of the current
9739 function has been completed. So, we are changing to the
1a83bfc3
JDA
9740 text section to output debugging information. Thus, we
9741 need to forget that we are in the text section so that
9742 varasm.c will call us when text_section is selected again. */
8c5e065b
JDA
9743 gcc_assert (!cfun || !cfun->machine
9744 || cfun->machine->in_nsubspa == 2);
d6b5193b 9745 in_section = NULL;
9a55eab3 9746 }
1a83bfc3
JDA
9747 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9748 return;
9a55eab3 9749 }
d6b5193b
RS
9750 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9751}
9752
1a83bfc3
JDA
9753/* A get_unnamed_section callback for switching to comdat data
9754 sections. This function is only used with SOM. */
9755
9756static void
9757som_output_comdat_data_section_asm_op (const void *data)
9758{
9759 in_section = NULL;
9760 output_section_asm_op (data);
9761}
9762
d6b5193b 9763/* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9a55eab3 9764
d6b5193b
RS
9765static void
9766pa_som_asm_init_sections (void)
9767{
9768 text_section
9769 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9770
9771 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9772 is not being generated. */
9773 som_readonly_data_section
9774 = get_unnamed_section (0, output_section_asm_op,
9775 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9776
9777 /* When secondary definitions are not supported, SOM makes readonly
9778 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9779 the comdat flag. */
9780 som_one_only_readonly_data_section
1a83bfc3 9781 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
d6b5193b
RS
9782 "\t.SPACE $TEXT$\n"
9783 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9784 "ACCESS=0x2c,SORT=16,COMDAT");
9785
9786
9787 /* When secondary definitions are not supported, SOM makes data one-only
9788 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9789 som_one_only_data_section
1a83bfc3
JDA
9790 = get_unnamed_section (SECTION_WRITE,
9791 som_output_comdat_data_section_asm_op,
d6b5193b
RS
9792 "\t.SPACE $PRIVATE$\n"
9793 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9794 "ACCESS=31,SORT=24,COMDAT");
9795
57d138a9
JDA
9796 if (flag_tm)
9797 som_tm_clone_table_section
9798 = get_unnamed_section (0, output_section_asm_op,
9799 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9800
d6b5193b
RS
9801 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9802 which reference data within the $TEXT$ space (for example constant
9803 strings in the $LIT$ subspace).
9804
9805 The assemblers (GAS and HP as) both have problems with handling
9806 the difference of two symbols which is the other correct way to
9807 reference constant data during PIC code generation.
9808
9809 So, there's no way to reference constant data which is in the
9810 $TEXT$ space during PIC generation. Instead place all constant
9811 data into the $PRIVATE$ subspace (this reduces sharing, but it
9812 works correctly). */
9813 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9814
9815 /* We must not have a reference to an external symbol defined in a
9816 shared library in a readonly section, else the SOM linker will
9817 complain.
9818
9819 So, we force exception information into the data section. */
9820 exception_section = data_section;
9a55eab3
JDA
9821}
9822
57d138a9
JDA
9823/* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9824
9825static section *
9826pa_som_tm_clone_table_section (void)
9827{
9828 return som_tm_clone_table_section;
9829}
9830
ae46c4e0
RH
9831/* On hpux10, the linker will give an error if we have a reference
9832 in the read-only data section to a symbol defined in a shared
9833 library. Therefore, expressions that might require a reloc can
9834 not be placed in the read-only data section. */
9835
d6b5193b 9836static section *
24a52160
JDA
9837pa_select_section (tree exp, int reloc,
9838 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
ae46c4e0
RH
9839{
9840 if (TREE_CODE (exp) == VAR_DECL
9841 && TREE_READONLY (exp)
9842 && !TREE_THIS_VOLATILE (exp)
9843 && DECL_INITIAL (exp)
9844 && (DECL_INITIAL (exp) == error_mark_node
9845 || TREE_CONSTANT (DECL_INITIAL (exp)))
9846 && !reloc)
9a55eab3
JDA
9847 {
9848 if (TARGET_SOM
9849 && DECL_ONE_ONLY (exp)
9850 && !DECL_WEAK (exp))
d6b5193b 9851 return som_one_only_readonly_data_section;
9a55eab3 9852 else
d6b5193b 9853 return readonly_data_section;
9a55eab3 9854 }
6615c446 9855 else if (CONSTANT_CLASS_P (exp) && !reloc)
d6b5193b 9856 return readonly_data_section;
9a55eab3
JDA
9857 else if (TARGET_SOM
9858 && TREE_CODE (exp) == VAR_DECL
9859 && DECL_ONE_ONLY (exp)
e41f3691 9860 && !DECL_WEAK (exp))
d6b5193b 9861 return som_one_only_data_section;
ae46c4e0 9862 else
d6b5193b 9863 return data_section;
ae46c4e0 9864}
e2500fed 9865
f258111a
JDA
9866/* Implement pa_reloc_rw_mask. */
9867
9868static int
9869pa_reloc_rw_mask (void)
9870{
9871 /* We force (const (plus (symbol) (const_int))) to memory when the
9872 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9873 handle this construct in read-only memory and we want to avoid
9874 this for ELF. So, we always force an RTX needing relocation to
9875 the data section. */
9876 return 3;
9877}
9878
5eb99654 9879static void
b7849684 9880pa_globalize_label (FILE *stream, const char *name)
5eb99654
KG
9881{
9882 /* We only handle DATA objects here, functions are globalized in
9883 ASM_DECLARE_FUNCTION_NAME. */
9884 if (! FUNCTION_NAME_P (name))
9885 {
9886 fputs ("\t.EXPORT ", stream);
9887 assemble_name (stream, name);
9888 fputs (",DATA\n", stream);
9889 }
9890}
3f12cd9b 9891
bd5bd7ac
KH
9892/* Worker function for TARGET_STRUCT_VALUE_RTX. */
9893
3f12cd9b
KH
9894static rtx
9895pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9896 int incoming ATTRIBUTE_UNUSED)
9897{
9898 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9899}
9900
bd5bd7ac
KH
9901/* Worker function for TARGET_RETURN_IN_MEMORY. */
9902
3f12cd9b 9903bool
586de218 9904pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3f12cd9b
KH
9905{
9906 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9907 PA64 ABI says that objects larger than 128 bits are returned in memory.
9908 Note, int_size_in_bytes can return -1 if the size of the object is
9909 variable or larger than the maximum value that can be expressed as
9910 a HOST_WIDE_INT. It can also return zero for an empty type. The
9911 simplest way to handle variable and empty types is to pass them in
9912 memory. This avoids problems in defining the boundaries of argument
9913 slots, allocating registers, etc. */
9914 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9915 || int_size_in_bytes (type) <= 0);
9916}
9917
744b2d61
JDA
9918/* Structure to hold declaration and name of external symbols that are
9919 emitted by GCC. We generate a vector of these symbols and output them
9920 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9921 This avoids putting out names that are never really used. */
9922
d1b38208 9923typedef struct GTY(()) extern_symbol
744b2d61
JDA
9924{
9925 tree decl;
9926 const char *name;
d4e6fecb 9927} extern_symbol;
744b2d61
JDA
9928
9929/* Define gc'd vector type for extern_symbol. */
744b2d61
JDA
9930
9931/* Vector of extern_symbol pointers. */
9771b263 9932static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
744b2d61
JDA
9933
9934#ifdef ASM_OUTPUT_EXTERNAL_REAL
9935/* Mark DECL (name NAME) as an external reference (assembler output
9936 file FILE). This saves the names to output at the end of the file
9937 if actually referenced. */
9938
9939void
9940pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9941{
744b2d61 9942 gcc_assert (file == asm_out_file);
f32682ca 9943 extern_symbol p = {decl, name};
9771b263 9944 vec_safe_push (extern_symbols, p);
744b2d61
JDA
9945}
9946
9947/* Output text required at the end of an assembler file.
9948 This includes deferred plabels and .import directives for
9949 all external symbols that were actually referenced. */
9950
9951static void
9952pa_hpux_file_end (void)
9953{
9954 unsigned int i;
d4e6fecb 9955 extern_symbol *p;
744b2d61 9956
3674b34d
JDA
9957 if (!NO_DEFERRED_PROFILE_COUNTERS)
9958 output_deferred_profile_counters ();
9959
744b2d61
JDA
9960 output_deferred_plabels ();
9961
9771b263 9962 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
744b2d61
JDA
9963 {
9964 tree decl = p->decl;
9965
9966 if (!TREE_ASM_WRITTEN (decl)
9967 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9968 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9969 }
9970
9771b263 9971 vec_free (extern_symbols);
744b2d61
JDA
9972}
9973#endif
9974
6982c5d4 9975/* Return true if a change from mode FROM to mode TO for a register
0a2aaacc 9976 in register class RCLASS is invalid. */
6982c5d4
JDA
9977
9978bool
ef4bddc2 9979pa_cannot_change_mode_class (machine_mode from, machine_mode to,
0a2aaacc 9980 enum reg_class rclass)
6982c5d4
JDA
9981{
9982 if (from == to)
9983 return false;
9984
212b7e9c
JDA
9985 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9986 return false;
9987
9988 /* Reject changes to/from modes with zero size. */
9989 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
9990 return true;
9991
6982c5d4
JDA
9992 /* Reject changes to/from complex and vector modes. */
9993 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
9994 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
9995 return true;
9996
212b7e9c
JDA
9997 /* There is no way to load QImode or HImode values directly from memory
9998 to a FP register. SImode loads to the FP registers are not zero
9999 extended. On the 64-bit target, this conflicts with the definition
10000 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10001 different sizes in the floating-point registers. */
0a2aaacc 10002 if (MAYBE_FP_REG_CLASS_P (rclass))
6982c5d4
JDA
10003 return true;
10004
f939c3e6 10005 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
6982c5d4
JDA
10006 in specific sets of registers. Thus, we cannot allow changing
10007 to a larger mode when it's larger than a word. */
10008 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10009 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10010 return true;
10011
10012 return false;
10013}
10014
10015/* Returns TRUE if it is a good idea to tie two pseudo registers
10016 when one has mode MODE1 and one has mode MODE2.
f939c3e6
RS
10017 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10018 and MODE2, for any hard reg, then this must be FALSE for correct output.
6982c5d4
JDA
10019
10020 We should return FALSE for QImode and HImode because these modes
10021 are not ok in the floating-point registers. However, this prevents
10022 tieing these modes to SImode and DImode in the general registers.
f939c3e6 10023 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
6982c5d4
JDA
10024 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10025 in the floating-point registers. */
10026
10027bool
ef4bddc2 10028pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
6982c5d4
JDA
10029{
10030 /* Don't tie modes in different classes. */
10031 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10032 return false;
10033
10034 return true;
10035}
10036
859c146c
RH
10037\f
10038/* Length in units of the trampoline instruction code. */
10039
10040#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10041
10042
10043/* Output assembler code for a block containing the constant parts
10044 of a trampoline, leaving space for the variable parts.\
10045
10046 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10047 and then branches to the specified routine.
10048
10049 This code template is copied from text segment to stack location
10050 and then patched with pa_trampoline_init to contain valid values,
10051 and then entered as a subroutine.
10052
10053 It is best to keep this as small as possible to avoid having to
10054 flush multiple lines in the cache. */
10055
10056static void
10057pa_asm_trampoline_template (FILE *f)
10058{
10059 if (!TARGET_64BIT)
10060 {
10061 fputs ("\tldw 36(%r22),%r21\n", f);
10062 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10063 if (ASSEMBLER_DIALECT == 0)
10064 fputs ("\tdepi 0,31,2,%r21\n", f);
10065 else
10066 fputs ("\tdepwi 0,31,2,%r21\n", f);
10067 fputs ("\tldw 4(%r21),%r19\n", f);
10068 fputs ("\tldw 0(%r21),%r21\n", f);
10069 if (TARGET_PA_20)
10070 {
10071 fputs ("\tbve (%r21)\n", f);
10072 fputs ("\tldw 40(%r22),%r29\n", f);
10073 fputs ("\t.word 0\n", f);
10074 fputs ("\t.word 0\n", f);
10075 }
10076 else
10077 {
10078 fputs ("\tldsid (%r21),%r1\n", f);
10079 fputs ("\tmtsp %r1,%sr0\n", f);
10080 fputs ("\tbe 0(%sr0,%r21)\n", f);
10081 fputs ("\tldw 40(%r22),%r29\n", f);
10082 }
10083 fputs ("\t.word 0\n", f);
10084 fputs ("\t.word 0\n", f);
10085 fputs ("\t.word 0\n", f);
10086 fputs ("\t.word 0\n", f);
10087 }
10088 else
10089 {
10090 fputs ("\t.dword 0\n", f);
10091 fputs ("\t.dword 0\n", f);
10092 fputs ("\t.dword 0\n", f);
10093 fputs ("\t.dword 0\n", f);
10094 fputs ("\tmfia %r31\n", f);
10095 fputs ("\tldd 24(%r31),%r1\n", f);
10096 fputs ("\tldd 24(%r1),%r27\n", f);
10097 fputs ("\tldd 16(%r1),%r1\n", f);
10098 fputs ("\tbve (%r1)\n", f);
10099 fputs ("\tldd 32(%r31),%r31\n", f);
10100 fputs ("\t.dword 0 ; fptr\n", f);
10101 fputs ("\t.dword 0 ; static link\n", f);
10102 }
10103}
10104
10105/* Emit RTL insns to initialize the variable parts of a trampoline.
10106 FNADDR is an RTX for the address of the function's pure code.
10107 CXT is an RTX for the static chain value for the function.
10108
10109 Move the function address to the trampoline template at offset 36.
10110 Move the static chain value to trampoline template at offset 40.
10111 Move the trampoline address to trampoline template at offset 44.
10112 Move r19 to trampoline template at offset 48. The latter two
10113 words create a plabel for the indirect call to the trampoline.
10114
10115 A similar sequence is used for the 64-bit port but the plabel is
10116 at the beginning of the trampoline.
10117
10118 Finally, the cache entries for the trampoline code are flushed.
10119 This is necessary to ensure that the trampoline instruction sequence
10120 is written to memory prior to any attempts at prefetching the code
10121 sequence. */
10122
10123static void
10124pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10125{
10126 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10127 rtx start_addr = gen_reg_rtx (Pmode);
10128 rtx end_addr = gen_reg_rtx (Pmode);
10129 rtx line_length = gen_reg_rtx (Pmode);
10130 rtx r_tramp, tmp;
10131
10132 emit_block_move (m_tramp, assemble_trampoline_template (),
10133 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10134 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10135
10136 if (!TARGET_64BIT)
10137 {
10138 tmp = adjust_address (m_tramp, Pmode, 36);
10139 emit_move_insn (tmp, fnaddr);
10140 tmp = adjust_address (m_tramp, Pmode, 40);
10141 emit_move_insn (tmp, chain_value);
10142
10143 /* Create a fat pointer for the trampoline. */
10144 tmp = adjust_address (m_tramp, Pmode, 44);
10145 emit_move_insn (tmp, r_tramp);
10146 tmp = adjust_address (m_tramp, Pmode, 48);
10147 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10148
10149 /* fdc and fic only use registers for the address to flush,
10150 they do not accept integer displacements. We align the
10151 start and end addresses to the beginning of their respective
10152 cache lines to minimize the number of lines flushed. */
10153 emit_insn (gen_andsi3 (start_addr, r_tramp,
10154 GEN_INT (-MIN_CACHELINE_SIZE)));
0a81f074
RS
10155 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10156 TRAMPOLINE_CODE_SIZE-1));
859c146c
RH
10157 emit_insn (gen_andsi3 (end_addr, tmp,
10158 GEN_INT (-MIN_CACHELINE_SIZE)));
10159 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10160 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10161 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10162 gen_reg_rtx (Pmode),
10163 gen_reg_rtx (Pmode)));
10164 }
10165 else
10166 {
10167 tmp = adjust_address (m_tramp, Pmode, 56);
10168 emit_move_insn (tmp, fnaddr);
10169 tmp = adjust_address (m_tramp, Pmode, 64);
10170 emit_move_insn (tmp, chain_value);
10171
10172 /* Create a fat pointer for the trampoline. */
10173 tmp = adjust_address (m_tramp, Pmode, 16);
0a81f074
RS
10174 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10175 r_tramp, 32)));
859c146c
RH
10176 tmp = adjust_address (m_tramp, Pmode, 24);
10177 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10178
10179 /* fdc and fic only use registers for the address to flush,
10180 they do not accept integer displacements. We align the
10181 start and end addresses to the beginning of their respective
10182 cache lines to minimize the number of lines flushed. */
0a81f074 10183 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
859c146c
RH
10184 emit_insn (gen_anddi3 (start_addr, tmp,
10185 GEN_INT (-MIN_CACHELINE_SIZE)));
0a81f074
RS
10186 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10187 TRAMPOLINE_CODE_SIZE - 1));
859c146c
RH
10188 emit_insn (gen_anddi3 (end_addr, tmp,
10189 GEN_INT (-MIN_CACHELINE_SIZE)));
10190 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10191 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10192 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10193 gen_reg_rtx (Pmode),
10194 gen_reg_rtx (Pmode)));
10195 }
017d38f5
MK
10196
10197#ifdef HAVE_ENABLE_EXECUTE_STACK
10198  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
db69559b 10199 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
017d38f5 10200#endif
859c146c
RH
10201}
10202
10203/* Perform any machine-specific adjustment in the address of the trampoline.
10204 ADDR contains the address that was passed to pa_trampoline_init.
10205 Adjust the trampoline address to point to the plabel at offset 44. */
10206
10207static rtx
10208pa_trampoline_adjust_address (rtx addr)
10209{
10210 if (!TARGET_64BIT)
0a81f074 10211 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
859c146c
RH
10212 return addr;
10213}
1f65437d
SE
10214
10215static rtx
10216pa_delegitimize_address (rtx orig_x)
10217{
10218 rtx x = delegitimize_mem_from_attrs (orig_x);
10219
10220 if (GET_CODE (x) == LO_SUM
10221 && GET_CODE (XEXP (x, 1)) == UNSPEC
10222 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10223 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10224 return x;
10225}
859c146c 10226\f
bc707992
JDA
10227static rtx
10228pa_internal_arg_pointer (void)
10229{
10230 /* The argument pointer and the hard frame pointer are the same in
10231 the 32-bit runtime, so we don't need a copy. */
10232 if (TARGET_64BIT)
10233 return copy_to_reg (virtual_incoming_args_rtx);
10234 else
10235 return virtual_incoming_args_rtx;
10236}
10237
10238/* Given FROM and TO register numbers, say whether this elimination is allowed.
10239 Frame pointer elimination is automatically handled. */
10240
10241static bool
10242pa_can_eliminate (const int from, const int to)
10243{
10244 /* The argument cannot be eliminated in the 64-bit runtime. */
10245 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10246 return false;
10247
10248 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10249 ? ! frame_pointer_needed
10250 : true);
10251}
10252
10253/* Define the offset between two registers, FROM to be eliminated and its
10254 replacement TO, at the start of a routine. */
10255HOST_WIDE_INT
10256pa_initial_elimination_offset (int from, int to)
10257{
10258 HOST_WIDE_INT offset;
10259
10260 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10261 && to == STACK_POINTER_REGNUM)
ae9d61ab 10262 offset = -pa_compute_frame_size (get_frame_size (), 0);
bc707992
JDA
10263 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10264 offset = 0;
10265 else
10266 gcc_unreachable ();
10267
10268 return offset;
10269}
10270
5efd84c5
NF
10271static void
10272pa_conditional_register_usage (void)
10273{
10274 int i;
10275
10276 if (!TARGET_64BIT && !TARGET_PA_11)
10277 {
10278 for (i = 56; i <= FP_REG_LAST; i++)
10279 fixed_regs[i] = call_used_regs[i] = 1;
10280 for (i = 33; i < 56; i += 2)
10281 fixed_regs[i] = call_used_regs[i] = 1;
10282 }
10283 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10284 {
10285 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10286 fixed_regs[i] = call_used_regs[i] = 1;
10287 }
10288 if (flag_pic)
10289 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10290}
10291
41a1208a
JDA
10292/* Target hook for c_mode_for_suffix. */
10293
ef4bddc2 10294static machine_mode
41a1208a
JDA
10295pa_c_mode_for_suffix (char suffix)
10296{
10297 if (HPUX_LONG_DOUBLE_LIBRARY)
10298 {
10299 if (suffix == 'q')
10300 return TFmode;
10301 }
10302
10303 return VOIDmode;
10304}
10305
7550cb35
JDA
10306/* Target hook for function_section. */
10307
10308static section *
10309pa_function_section (tree decl, enum node_frequency freq,
10310 bool startup, bool exit)
10311{
10312 /* Put functions in text section if target doesn't have named sections. */
677f3fa8 10313 if (!targetm_common.have_named_sections)
7550cb35
JDA
10314 return text_section;
10315
10316 /* Force nested functions into the same section as the containing
10317 function. */
10318 if (decl
f961457f 10319 && DECL_SECTION_NAME (decl) == NULL
7550cb35
JDA
10320 && DECL_CONTEXT (decl) != NULL_TREE
10321 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
f961457f 10322 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
7550cb35
JDA
10323 return function_section (DECL_CONTEXT (decl));
10324
10325 /* Otherwise, use the default function section. */
10326 return default_function_section (decl, freq, startup, exit);
10327}
10328
1a627b35
RS
10329/* Implement TARGET_LEGITIMATE_CONSTANT_P.
10330
10331 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10332 that need more than three instructions to load prior to reload. This
10333 limit is somewhat arbitrary. It takes three instructions to load a
10334 CONST_INT from memory but two are memory accesses. It may be better
10335 to increase the allowed range for CONST_INTS. We may also be able
10336 to handle CONST_DOUBLES. */
10337
10338static bool
ef4bddc2 10339pa_legitimate_constant_p (machine_mode mode, rtx x)
1a627b35
RS
10340{
10341 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10342 return false;
10343
10344 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10345 return false;
10346
9a201645 10347 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
0e4ae794
JDA
10348 legitimate constants. The other variants can't be handled by
10349 the move patterns after reload starts. */
093a6c99 10350 if (tls_referenced_p (x))
0e4ae794 10351 return false;
9a201645 10352
1a627b35
RS
10353 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10354 return false;
10355
10356 if (TARGET_64BIT
10357 && HOST_BITS_PER_WIDE_INT > 32
10358 && GET_CODE (x) == CONST_INT
10359 && !reload_in_progress
10360 && !reload_completed
10361 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
5877e54e 10362 && !pa_cint_ok_for_move (UINTVAL (x)))
1a627b35
RS
10363 return false;
10364
fda33f15
JDA
10365 if (function_label_operand (x, mode))
10366 return false;
10367
1a627b35
RS
10368 return true;
10369}
10370
fda33f15
JDA
10371/* Implement TARGET_SECTION_TYPE_FLAGS. */
10372
10373static unsigned int
10374pa_section_type_flags (tree decl, const char *name, int reloc)
10375{
10376 unsigned int flags;
10377
10378 flags = default_section_type_flags (decl, name, reloc);
10379
10380 /* Function labels are placed in the constant pool. This can
10381 cause a section conflict if decls are put in ".data.rel.ro"
10382 or ".data.rel.ro.local" using the __attribute__ construct. */
10383 if (strcmp (name, ".data.rel.ro") == 0
10384 || strcmp (name, ".data.rel.ro.local") == 0)
10385 flags |= SECTION_WRITE | SECTION_RELRO;
10386
10387 return flags;
10388}
10389
1a04ac2b
JDA
10390/* pa_legitimate_address_p recognizes an RTL expression that is a
10391 valid memory address for an instruction. The MODE argument is the
10392 machine mode for the MEM expression that wants to use this address.
10393
10394 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10395 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10396 available with floating point loads and stores, and integer loads.
10397 We get better code by allowing indexed addresses in the initial
10398 RTL generation.
10399
10400 The acceptance of indexed addresses as legitimate implies that we
10401 must provide patterns for doing indexed integer stores, or the move
10402 expanders must force the address of an indexed store to a register.
10403 We have adopted the latter approach.
10404
10405 Another function of pa_legitimate_address_p is to ensure that
10406 the base register is a valid pointer for indexed instructions.
10407 On targets that have non-equivalent space registers, we have to
10408 know at the time of assembler output which register in a REG+REG
10409 pair is the base register. The REG_POINTER flag is sometimes lost
10410 in reload and the following passes, so it can't be relied on during
10411 code generation. Thus, we either have to canonicalize the order
10412 of the registers in REG+REG indexed addresses, or treat REG+REG
10413 addresses separately and provide patterns for both permutations.
10414
10415 The latter approach requires several hundred additional lines of
10416 code in pa.md. The downside to canonicalizing is that a PLUS
10417 in the wrong order can't combine to form to make a scaled indexed
10418 memory operand. As we won't need to canonicalize the operands if
10419 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10420
10421 We initially break out scaled indexed addresses in canonical order
10422 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10423 scaled indexed addresses during RTL generation. However, fold_rtx
10424 has its own opinion on how the operands of a PLUS should be ordered.
10425 If one of the operands is equivalent to a constant, it will make
10426 that operand the second operand. As the base register is likely to
10427 be equivalent to a SYMBOL_REF, we have made it the second operand.
10428
10429 pa_legitimate_address_p accepts REG+REG as legitimate when the
10430 operands are in the order INDEX+BASE on targets with non-equivalent
10431 space registers, and in any order on targets with equivalent space
10432 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10433
10434 We treat a SYMBOL_REF as legitimate if it is part of the current
10435 function's constant-pool, because such addresses can actually be
10436 output as REG+SMALLINT. */
10437
10438static bool
ef4bddc2 10439pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
1a04ac2b
JDA
10440{
10441 if ((REG_P (x)
10442 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10443 : REG_OK_FOR_BASE_P (x)))
10444 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10445 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10446 && REG_P (XEXP (x, 0))
10447 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10448 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10449 return true;
10450
10451 if (GET_CODE (x) == PLUS)
10452 {
10453 rtx base, index;
10454
10455 /* For REG+REG, the base register should be in XEXP (x, 1),
10456 so check it first. */
10457 if (REG_P (XEXP (x, 1))
10458 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10459 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10460 base = XEXP (x, 1), index = XEXP (x, 0);
10461 else if (REG_P (XEXP (x, 0))
10462 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10463 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10464 base = XEXP (x, 0), index = XEXP (x, 1);
10465 else
10466 return false;
10467
10468 if (GET_CODE (index) == CONST_INT)
10469 {
10470 if (INT_5_BITS (index))
10471 return true;
10472
10473 /* When INT14_OK_STRICT is false, a secondary reload is needed
10474 to adjust the displacement of SImode and DImode floating point
55ada375
JDA
10475 instructions but this may fail when the register also needs
10476 reloading. So, we return false when STRICT is true. We
1a04ac2b
JDA
10477 also reject long displacements for float mode addresses since
10478 the majority of accesses will use floating point instructions
10479 that don't support 14-bit offsets. */
10480 if (!INT14_OK_STRICT
55ada375 10481 && (strict || !(reload_in_progress || reload_completed))
ceaca33e
JDA
10482 && mode != QImode
10483 && mode != HImode)
10484 return false;
1a04ac2b 10485
ceaca33e 10486 return base14_operand (index, mode);
1a04ac2b
JDA
10487 }
10488
10489 if (!TARGET_DISABLE_INDEXING
10490 /* Only accept the "canonical" INDEX+BASE operand order
10491 on targets with non-equivalent space registers. */
10492 && (TARGET_NO_SPACE_REGS
10493 ? REG_P (index)
10494 : (base == XEXP (x, 1) && REG_P (index)
10495 && (reload_completed
10496 || (reload_in_progress && HARD_REGISTER_P (base))
10497 || REG_POINTER (base))
10498 && (reload_completed
10499 || (reload_in_progress && HARD_REGISTER_P (index))
10500 || !REG_POINTER (index))))
10501 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10502 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10503 : REG_OK_FOR_INDEX_P (index))
10504 && borx_reg_operand (base, Pmode)
10505 && borx_reg_operand (index, Pmode))
10506 return true;
10507
10508 if (!TARGET_DISABLE_INDEXING
10509 && GET_CODE (index) == MULT
10510 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10511 && REG_P (XEXP (index, 0))
10512 && GET_MODE (XEXP (index, 0)) == Pmode
10513 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10514 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10515 && GET_CODE (XEXP (index, 1)) == CONST_INT
10516 && INTVAL (XEXP (index, 1))
10517 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10518 && borx_reg_operand (base, Pmode))
10519 return true;
10520
10521 return false;
10522 }
10523
10524 if (GET_CODE (x) == LO_SUM)
10525 {
10526 rtx y = XEXP (x, 0);
10527
10528 if (GET_CODE (y) == SUBREG)
10529 y = SUBREG_REG (y);
10530
10531 if (REG_P (y)
10532 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10533 : REG_OK_FOR_BASE_P (y)))
10534 {
10535 /* Needed for -fPIC */
10536 if (mode == Pmode
10537 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10538 return true;
10539
10540 if (!INT14_OK_STRICT
55ada375 10541 && (strict || !(reload_in_progress || reload_completed))
ceaca33e
JDA
10542 && mode != QImode
10543 && mode != HImode)
10544 return false;
1a04ac2b
JDA
10545
10546 if (CONSTANT_P (XEXP (x, 1)))
10547 return true;
10548 }
10549 return false;
10550 }
10551
10552 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10553 return true;
10554
10555 return false;
10556}
10557
10558/* Look for machine dependent ways to make the invalid address AD a
10559 valid address.
10560
10561 For the PA, transform:
10562
10563 memory(X + <large int>)
10564
10565 into:
10566
10567 if (<large int> & mask) >= 16
10568 Y = (<large int> & ~mask) + mask + 1 Round up.
10569 else
10570 Y = (<large int> & ~mask) Round down.
10571 Z = X + Y
10572 memory (Z + (<large int> - Y));
10573
10574 This makes reload inheritance and reload_cse work better since Z
10575 can be reused.
10576
10577 There may be more opportunities to improve code with this hook. */
10578
10579rtx
ef4bddc2 10580pa_legitimize_reload_address (rtx ad, machine_mode mode,
1a04ac2b
JDA
10581 int opnum, int type,
10582 int ind_levels ATTRIBUTE_UNUSED)
10583{
10584 long offset, newoffset, mask;
10585 rtx new_rtx, temp = NULL_RTX;
10586
10587 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10588 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10589
10590 if (optimize && GET_CODE (ad) == PLUS)
10591 temp = simplify_binary_operation (PLUS, Pmode,
10592 XEXP (ad, 0), XEXP (ad, 1));
10593
10594 new_rtx = temp ? temp : ad;
10595
10596 if (optimize
10597 && GET_CODE (new_rtx) == PLUS
10598 && GET_CODE (XEXP (new_rtx, 0)) == REG
10599 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10600 {
10601 offset = INTVAL (XEXP ((new_rtx), 1));
10602
10603 /* Choose rounding direction. Round up if we are >= halfway. */
10604 if ((offset & mask) >= ((mask + 1) / 2))
10605 newoffset = (offset & ~mask) + mask + 1;
10606 else
10607 newoffset = offset & ~mask;
10608
10609 /* Ensure that long displacements are aligned. */
10610 if (mask == 0x3fff
10611 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10612 || (TARGET_64BIT && (mode) == DImode)))
10613 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10614
10615 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10616 {
10617 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10618 GEN_INT (newoffset));
10619 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10620 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10621 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10622 opnum, (enum reload_type) type);
10623 return ad;
10624 }
10625 }
10626
10627 return NULL_RTX;
10628}
10629
3ba07ad3
JDA
10630/* Output address vector. */
10631
10632void
10633pa_output_addr_vec (rtx lab, rtx body)
10634{
10635 int idx, vlen = XVECLEN (body, 0);
10636
10637 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10638 if (TARGET_GAS)
10639 fputs ("\t.begin_brtab\n", asm_out_file);
10640 for (idx = 0; idx < vlen; idx++)
10641 {
10642 ASM_OUTPUT_ADDR_VEC_ELT
10643 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10644 }
10645 if (TARGET_GAS)
10646 fputs ("\t.end_brtab\n", asm_out_file);
10647}
10648
10649/* Output address difference vector. */
10650
10651void
10652pa_output_addr_diff_vec (rtx lab, rtx body)
10653{
10654 rtx base = XEXP (XEXP (body, 0), 0);
10655 int idx, vlen = XVECLEN (body, 1);
10656
10657 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10658 if (TARGET_GAS)
10659 fputs ("\t.begin_brtab\n", asm_out_file);
10660 for (idx = 0; idx < vlen; idx++)
10661 {
10662 ASM_OUTPUT_ADDR_DIFF_ELT
10663 (asm_out_file,
10664 body,
10665 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10666 CODE_LABEL_NUMBER (base));
10667 }
10668 if (TARGET_GAS)
10669 fputs ("\t.end_brtab\n", asm_out_file);
10670}
10671
7e7c9d40
JDA
10672/* This is a helper function for the other atomic operations. This function
10673 emits a loop that contains SEQ that iterates until a compare-and-swap
10674 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10675 a set of instructions that takes a value from OLD_REG as an input and
10676 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10677 set to the current contents of MEM. After SEQ, a compare-and-swap will
10678 attempt to update MEM with NEW_REG. The function returns true when the
10679 loop was generated successfully. */
10680
10681static bool
10682pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10683{
10684 machine_mode mode = GET_MODE (mem);
10685 rtx_code_label *label;
10686 rtx cmp_reg, success, oldval;
10687
10688 /* The loop we want to generate looks like
10689
10690 cmp_reg = mem;
10691 label:
10692 old_reg = cmp_reg;
10693 seq;
10694 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10695 if (success)
10696 goto label;
10697
10698 Note that we only do the plain load from memory once. Subsequent
10699 iterations use the value loaded by the compare-and-swap pattern. */
10700
10701 label = gen_label_rtx ();
10702 cmp_reg = gen_reg_rtx (mode);
10703
10704 emit_move_insn (cmp_reg, mem);
10705 emit_label (label);
10706 emit_move_insn (old_reg, cmp_reg);
10707 if (seq)
10708 emit_insn (seq);
10709
10710 success = NULL_RTX;
10711 oldval = cmp_reg;
10712 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10713 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10714 MEMMODEL_RELAXED))
10715 return false;
10716
10717 if (oldval != cmp_reg)
10718 emit_move_insn (cmp_reg, oldval);
10719
10720 /* Mark this jump predicted not taken. */
10721 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
c0c46dc6
JH
10722 GET_MODE (success), 1, label,
10723 profile_probability::guessed_never ());
7e7c9d40
JDA
10724 return true;
10725}
10726
10727/* This function tries to implement an atomic exchange operation using a
10728 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10729 *MEM are returned, using TARGET if possible. No memory model is required
10730 since a compare_and_swap loop is seq-cst. */
10731
10732rtx
10733pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10734{
10735 machine_mode mode = GET_MODE (mem);
10736
10737 if (can_compare_and_swap_p (mode, true))
10738 {
10739 if (!target || !register_operand (target, mode))
10740 target = gen_reg_rtx (mode);
10741 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10742 return target;
10743 }
10744
10745 return NULL_RTX;
10746}
10747
84c9e5ff
JDA
10748/* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10749 arguments passed by hidden reference in the 32-bit HP runtime. Users
10750 can override this behavior for better compatibility with openmp at the
10751 risk of library incompatibilities. Arguments are always passed by value
10752 in the 64-bit HP runtime. */
10753
10754static bool
10755pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10756 machine_mode mode ATTRIBUTE_UNUSED,
10757 const_tree type ATTRIBUTE_UNUSED,
10758 bool named ATTRIBUTE_UNUSED)
10759{
10760 return !TARGET_CALLER_COPIES;
10761}
10762
f939c3e6
RS
10763/* Implement TARGET_HARD_REGNO_MODE_OK. */
10764
10765static bool
10766pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10767{
10768 return PA_HARD_REGNO_MODE_OK (regno, mode);
10769}
10770
e2500fed 10771#include "gt-pa.h"