]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/pa/pa.c
Turn HARD_REGNO_NREGS into a target hook
[thirdparty/gcc.git] / gcc / config / pa / pa.c
CommitLineData
188538df 1/* Subroutines for insn-output.c for HPPA.
cbe34bb5 2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
188538df
TG
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
b7849684 5This file is part of GCC.
188538df 6
b7849684 7GCC is free software; you can redistribute it and/or modify
188538df 8it under the terms of the GNU General Public License as published by
2f83c7d6 9the Free Software Foundation; either version 3, or (at your option)
188538df
TG
10any later version.
11
b7849684 12GCC is distributed in the hope that it will be useful,
188538df
TG
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
2f83c7d6
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
188538df 20
188538df 21#include "config.h"
0b17dd98 22#include "system.h"
4977bab6 23#include "coretypes.h"
4d0cdd0c 24#include "memmodel.h"
c7131fb2 25#include "backend.h"
e11c4407 26#include "target.h"
188538df 27#include "rtl.h"
e11c4407 28#include "tree.h"
c7131fb2 29#include "df.h"
e11c4407
AM
30#include "tm_p.h"
31#include "stringpool.h"
314e6352 32#include "attribs.h"
e11c4407 33#include "optabs.h"
188538df 34#include "regs.h"
e11c4407
AM
35#include "emit-rtl.h"
36#include "recog.h"
37#include "diagnostic-core.h"
188538df 38#include "insn-attr.h"
40e23961 39#include "alias.h"
40e23961 40#include "fold-const.h"
d8a2d370 41#include "stor-layout.h"
d8a2d370
DN
42#include "varasm.h"
43#include "calls.h"
d499455b 44#include "output.h"
823fbbce 45#include "except.h"
36566b39 46#include "explow.h"
becf1647 47#include "expr.h"
e78d8e51 48#include "reload.h"
677f3fa8 49#include "common/common-target.h"
41a1208a 50#include "langhooks.h"
60393bbc 51#include "cfgrtl.h"
96e45421 52#include "opts.h"
9b2b7279 53#include "builtins.h"
188538df 54
994c5d85 55/* This file should be included last. */
d58627a0
RS
56#include "target-def.h"
57
5d50fab3
JL
58/* Return nonzero if there is a bypass for the output of
59 OUT_INSN and the fp store IN_INSN. */
60int
647d790d 61pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
5d50fab3 62{
ef4bddc2
RS
63 machine_mode store_mode;
64 machine_mode other_mode;
5d50fab3
JL
65 rtx set;
66
67 if (recog_memoized (in_insn) < 0
d4f2728a
JDA
68 || (get_attr_type (in_insn) != TYPE_FPSTORE
69 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
5d50fab3
JL
70 || recog_memoized (out_insn) < 0)
71 return 0;
72
73 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
74
75 set = single_set (out_insn);
76 if (!set)
77 return 0;
78
79 other_mode = GET_MODE (SET_SRC (set));
80
81 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
82}
83
84
19ec6a36
AM
85#ifndef DO_FRAME_NOTES
86#ifdef INCOMING_RETURN_ADDR_RTX
87#define DO_FRAME_NOTES 1
88#else
89#define DO_FRAME_NOTES 0
90#endif
91#endif
92
8a5b8538 93static void pa_option_override (void);
d8f95bed 94static void copy_reg_pointer (rtx, rtx);
a2017852 95static void fix_range (const char *);
ef4bddc2 96static int hppa_register_move_cost (machine_mode mode, reg_class_t,
8a5b8538 97 reg_class_t);
ef4bddc2 98static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
e548c9df 99static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
ef4bddc2 100static inline rtx force_mode (machine_mode, rtx);
b7849684
JE
101static void pa_reorg (void);
102static void pa_combine_instructions (void);
647d790d
DM
103static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
104 rtx, rtx);
b32d5189 105static bool forward_branch_p (rtx_insn *);
b7849684 106static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
ae9d61ab 107static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
e0d80a58
JL
108static int compute_movmem_length (rtx_insn *);
109static int compute_clrmem_length (rtx_insn *);
b7849684
JE
110static bool pa_assemble_integer (rtx, unsigned int, int);
111static void remove_useless_addtr_insns (int);
a4295210
JDA
112static void store_reg (int, HOST_WIDE_INT, int);
113static void store_reg_modify (int, int, HOST_WIDE_INT);
114static void load_reg (int, HOST_WIDE_INT, int);
115static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
9193312a 116static rtx pa_function_value (const_tree, const_tree, bool);
ef4bddc2 117static rtx pa_libcall_value (machine_mode, const_rtx);
8a5b8538 118static bool pa_function_value_regno_p (const unsigned int);
42776416 119static void pa_output_function_prologue (FILE *);
67b846fa 120static void update_total_code_bytes (unsigned int);
42776416 121static void pa_output_function_epilogue (FILE *);
b505225b 122static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
ac44248e 123static int pa_adjust_priority (rtx_insn *, int);
b7849684 124static int pa_issue_rate (void);
f258111a 125static int pa_reloc_rw_mask (void);
d6b5193b 126static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
57d138a9 127static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
d6b5193b 128static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
ae46c4e0 129 ATTRIBUTE_UNUSED;
b7849684
JE
130static void pa_encode_section_info (tree, rtx, int);
131static const char *pa_strip_name_encoding (const char *);
132static bool pa_function_ok_for_sibcall (tree, tree);
133static void pa_globalize_label (FILE *, const char *)
a5f3f0ab 134 ATTRIBUTE_UNUSED;
b7849684
JE
135static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
136 HOST_WIDE_INT, tree);
35d434ed 137#if !defined(USE_COLLECT2)
b7849684
JE
138static void pa_asm_out_constructor (rtx, int);
139static void pa_asm_out_destructor (rtx, int);
35d434ed 140#endif
b7849684 141static void pa_init_builtins (void);
ef4bddc2 142static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
3f12cd9b 143static rtx hppa_builtin_saveregs (void);
d7bd8aeb 144static void hppa_va_start (tree, rtx);
726a989a 145static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
18e2a8b8 146static bool pa_scalar_mode_supported_p (scalar_mode);
3101faab 147static bool pa_commutative_p (const_rtx x, int outer_code);
e0d80a58
JL
148static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
149static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
ef4bddc2 150static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
b7849684
JE
151static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
152static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
153static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
154static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
155static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
156static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
157static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
158static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
159static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
160static void output_deferred_plabels (void);
3674b34d 161static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
744b2d61
JDA
162#ifdef ASM_OUTPUT_EXTERNAL_REAL
163static void pa_hpux_file_end (void);
164#endif
50bbeefb 165static void pa_init_libfuncs (void);
3f12cd9b 166static rtx pa_struct_value_rtx (tree, int);
ef4bddc2 167static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
586de218 168 const_tree, bool);
ef4bddc2 169static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
78a52f11 170 tree, bool);
ef4bddc2 171static void pa_function_arg_advance (cumulative_args_t, machine_mode,
fd29bdaf 172 const_tree, bool);
ef4bddc2 173static rtx pa_function_arg (cumulative_args_t, machine_mode,
fd29bdaf 174 const_tree, bool);
76b0cbf8 175static pad_direction pa_function_arg_padding (machine_mode, const_tree);
ef4bddc2 176static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
9a55eab3 177static struct machine_function * pa_init_machine_status (void);
a87cf97e 178static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
ef4bddc2 179 machine_mode,
a87cf97e 180 secondary_reload_info *);
16c16a24 181static void pa_extra_live_on_entry (bitmap);
ef4bddc2
RS
182static machine_mode pa_promote_function_mode (const_tree,
183 machine_mode, int *,
4ce3042d 184 const_tree, int);
2eddfed1 185
859c146c
RH
186static void pa_asm_trampoline_template (FILE *);
187static void pa_trampoline_init (rtx, tree, rtx);
188static rtx pa_trampoline_adjust_address (rtx);
1f65437d 189static rtx pa_delegitimize_address (rtx);
8a5b8538 190static bool pa_print_operand_punct_valid_p (unsigned char);
bc707992
JDA
191static rtx pa_internal_arg_pointer (void);
192static bool pa_can_eliminate (const int, const int);
5efd84c5 193static void pa_conditional_register_usage (void);
ef4bddc2 194static machine_mode pa_c_mode_for_suffix (char);
7550cb35 195static section *pa_function_section (tree, enum node_frequency, bool, bool);
ef4bddc2
RS
196static bool pa_cannot_force_const_mem (machine_mode, rtx);
197static bool pa_legitimate_constant_p (machine_mode, rtx);
fda33f15 198static unsigned int pa_section_type_flags (tree, const char *, int);
ef4bddc2 199static bool pa_legitimate_address_p (machine_mode, rtx, bool);
84c9e5ff
JDA
200static bool pa_callee_copies (cumulative_args_t, machine_mode,
201 const_tree, bool);
c43f4279 202static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
f939c3e6 203static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
99e1629f 204static bool pa_modes_tieable_p (machine_mode, machine_mode);
859c146c 205
d6b5193b
RS
206/* The following extra sections are only used for SOM. */
207static GTY(()) section *som_readonly_data_section;
208static GTY(()) section *som_one_only_readonly_data_section;
209static GTY(()) section *som_one_only_data_section;
57d138a9 210static GTY(()) section *som_tm_clone_table_section;
d6b5193b 211
68386e1e
JL
212/* Counts for the number of callee-saved general and floating point
213 registers which were saved by the current function's prologue. */
214static int gr_saved, fr_saved;
215
16c16a24
JDA
216/* Boolean indicating whether the return pointer was saved by the
217 current function's prologue. */
218static bool rp_saved;
219
b7849684 220static rtx find_addr_reg (rtx);
188538df 221
5fad1c24 222/* Keep track of the number of bytes we have output in the CODE subspace
279c9bde 223 during this compilation so we'll know when to emit inline long-calls. */
a02aa5b0 224unsigned long total_code_bytes;
279c9bde 225
5fad1c24
JDA
226/* The last address of the previous function plus the number of bytes in
227 associated thunks that have been output. This is used to determine if
228 a thunk can use an IA-relative branch to reach its target function. */
67b846fa 229static unsigned int last_address;
5fad1c24 230
93ae92c1 231/* Variables to handle plabels that we discover are necessary at assembly
ddd5a7c1 232 output time. They are output after the current function. */
d1b38208 233struct GTY(()) deferred_plabel
93ae92c1
JL
234{
235 rtx internal_label;
744b2d61 236 rtx symbol;
e2500fed
GK
237};
238static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
239 deferred_plabels;
0f8e3849 240static size_t n_deferred_plabels = 0;
672a6f42
NB
241\f
242/* Initialize the GCC target structure. */
301d03af 243
8a5b8538
AS
244#undef TARGET_OPTION_OVERRIDE
245#define TARGET_OPTION_OVERRIDE pa_option_override
246
301d03af
RS
247#undef TARGET_ASM_ALIGNED_HI_OP
248#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
249#undef TARGET_ASM_ALIGNED_SI_OP
250#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
251#undef TARGET_ASM_ALIGNED_DI_OP
252#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
253#undef TARGET_ASM_UNALIGNED_HI_OP
254#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
255#undef TARGET_ASM_UNALIGNED_SI_OP
256#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
257#undef TARGET_ASM_UNALIGNED_DI_OP
258#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
259#undef TARGET_ASM_INTEGER
260#define TARGET_ASM_INTEGER pa_assemble_integer
261
08c148a8
NB
262#undef TARGET_ASM_FUNCTION_PROLOGUE
263#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
264#undef TARGET_ASM_FUNCTION_EPILOGUE
265#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
93ae92c1 266
9193312a
AS
267#undef TARGET_FUNCTION_VALUE
268#define TARGET_FUNCTION_VALUE pa_function_value
8a5b8538
AS
269#undef TARGET_LIBCALL_VALUE
270#define TARGET_LIBCALL_VALUE pa_libcall_value
271#undef TARGET_FUNCTION_VALUE_REGNO_P
272#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
9193312a 273
506d7b68
PB
274#undef TARGET_LEGITIMIZE_ADDRESS
275#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
276
c237e94a
ZW
277#undef TARGET_SCHED_ADJUST_COST
278#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
279#undef TARGET_SCHED_ADJUST_PRIORITY
280#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
281#undef TARGET_SCHED_ISSUE_RATE
282#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
283
fb49053f
RH
284#undef TARGET_ENCODE_SECTION_INFO
285#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
772c5265
RH
286#undef TARGET_STRIP_NAME_ENCODING
287#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
fb49053f 288
4977bab6
ZW
289#undef TARGET_FUNCTION_OK_FOR_SIBCALL
290#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
291
8ddf681a
R
292#undef TARGET_COMMUTATIVE_P
293#define TARGET_COMMUTATIVE_P pa_commutative_p
294
c590b625
RH
295#undef TARGET_ASM_OUTPUT_MI_THUNK
296#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
3961e8fe
RH
297#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
c590b625 299
a5fe455b 300#undef TARGET_ASM_FILE_END
744b2d61
JDA
301#ifdef ASM_OUTPUT_EXTERNAL_REAL
302#define TARGET_ASM_FILE_END pa_hpux_file_end
303#else
a5fe455b 304#define TARGET_ASM_FILE_END output_deferred_plabels
744b2d61 305#endif
a5fe455b 306
f258111a
JDA
307#undef TARGET_ASM_RELOC_RW_MASK
308#define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
309
8a5b8538
AS
310#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
311#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
312
35d434ed
JDA
313#if !defined(USE_COLLECT2)
314#undef TARGET_ASM_CONSTRUCTOR
315#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
316#undef TARGET_ASM_DESTRUCTOR
317#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
318#endif
319
4677862a
JDA
320#undef TARGET_INIT_BUILTINS
321#define TARGET_INIT_BUILTINS pa_init_builtins
322
41a1208a
JDA
323#undef TARGET_EXPAND_BUILTIN
324#define TARGET_EXPAND_BUILTIN pa_expand_builtin
325
8a5b8538
AS
326#undef TARGET_REGISTER_MOVE_COST
327#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
3c50106f
RH
328#undef TARGET_RTX_COSTS
329#define TARGET_RTX_COSTS hppa_rtx_costs
dcefdf67
RH
330#undef TARGET_ADDRESS_COST
331#define TARGET_ADDRESS_COST hppa_address_cost
3c50106f 332
18dbd950
RS
333#undef TARGET_MACHINE_DEPENDENT_REORG
334#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
335
c15c90bb 336#undef TARGET_INIT_LIBFUNCS
50bbeefb 337#define TARGET_INIT_LIBFUNCS pa_init_libfuncs
c15c90bb 338
cde0f3fd
PB
339#undef TARGET_PROMOTE_FUNCTION_MODE
340#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
3f12cd9b 341#undef TARGET_PROMOTE_PROTOTYPES
586de218 342#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
3f12cd9b
KH
343
344#undef TARGET_STRUCT_VALUE_RTX
345#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
346#undef TARGET_RETURN_IN_MEMORY
347#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
fe984136
RH
348#undef TARGET_MUST_PASS_IN_STACK
349#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8cd5a4e0
RH
350#undef TARGET_PASS_BY_REFERENCE
351#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
6cdd5672 352#undef TARGET_CALLEE_COPIES
84c9e5ff 353#define TARGET_CALLEE_COPIES pa_callee_copies
78a52f11
RH
354#undef TARGET_ARG_PARTIAL_BYTES
355#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
fd29bdaf
NF
356#undef TARGET_FUNCTION_ARG
357#define TARGET_FUNCTION_ARG pa_function_arg
358#undef TARGET_FUNCTION_ARG_ADVANCE
359#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
76b0cbf8
RS
360#undef TARGET_FUNCTION_ARG_PADDING
361#define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
c2ed6cf8
NF
362#undef TARGET_FUNCTION_ARG_BOUNDARY
363#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
3f12cd9b
KH
364
365#undef TARGET_EXPAND_BUILTIN_SAVEREGS
366#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
d7bd8aeb
JJ
367#undef TARGET_EXPAND_BUILTIN_VA_START
368#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
8101c928
RH
369#undef TARGET_GIMPLIFY_VA_ARG_EXPR
370#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
3f12cd9b 371
83c32f2e
JDA
372#undef TARGET_SCALAR_MODE_SUPPORTED_P
373#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
374
51076f96 375#undef TARGET_CANNOT_FORCE_CONST_MEM
fbbf66e7 376#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
51076f96 377
ec963611
JDA
378#undef TARGET_SECONDARY_RELOAD
379#define TARGET_SECONDARY_RELOAD pa_secondary_reload
380
16c16a24
JDA
381#undef TARGET_EXTRA_LIVE_ON_ENTRY
382#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
383
859c146c
RH
384#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
385#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
386#undef TARGET_TRAMPOLINE_INIT
387#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
388#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
389#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
1f65437d
SE
390#undef TARGET_DELEGITIMIZE_ADDRESS
391#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
bc707992
JDA
392#undef TARGET_INTERNAL_ARG_POINTER
393#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
394#undef TARGET_CAN_ELIMINATE
395#define TARGET_CAN_ELIMINATE pa_can_eliminate
5efd84c5
NF
396#undef TARGET_CONDITIONAL_REGISTER_USAGE
397#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
41a1208a
JDA
398#undef TARGET_C_MODE_FOR_SUFFIX
399#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
7550cb35
JDA
400#undef TARGET_ASM_FUNCTION_SECTION
401#define TARGET_ASM_FUNCTION_SECTION pa_function_section
859c146c 402
1a627b35
RS
403#undef TARGET_LEGITIMATE_CONSTANT_P
404#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
fda33f15
JDA
405#undef TARGET_SECTION_TYPE_FLAGS
406#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
1a04ac2b
JDA
407#undef TARGET_LEGITIMATE_ADDRESS_P
408#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
1a627b35 409
d81db636
SB
410#undef TARGET_LRA_P
411#define TARGET_LRA_P hook_bool_void_false
412
c43f4279
RS
413#undef TARGET_HARD_REGNO_NREGS
414#define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
f939c3e6
RS
415#undef TARGET_HARD_REGNO_MODE_OK
416#define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
99e1629f
RS
417#undef TARGET_MODES_TIEABLE_P
418#define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
f939c3e6 419
f6897b10 420struct gcc_target targetm = TARGET_INITIALIZER;
672a6f42 421\f
a2017852
JDA
422/* Parse the -mfixed-range= option string. */
423
424static void
425fix_range (const char *const_str)
426{
427 int i, first, last;
428 char *str, *dash, *comma;
429
430 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
431 REG2 are either register names or register numbers. The effect
432 of this option is to mark the registers in the range from REG1 to
433 REG2 as ``fixed'' so they won't be used by the compiler. This is
419df6a2 434 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
a2017852
JDA
435
436 i = strlen (const_str);
437 str = (char *) alloca (i + 1);
438 memcpy (str, const_str, i + 1);
439
440 while (1)
441 {
442 dash = strchr (str, '-');
443 if (!dash)
444 {
d4ee4d25 445 warning (0, "value of -mfixed-range must have form REG1-REG2");
a2017852
JDA
446 return;
447 }
448 *dash = '\0';
449
450 comma = strchr (dash + 1, ',');
451 if (comma)
452 *comma = '\0';
453
454 first = decode_reg_name (str);
455 if (first < 0)
456 {
d4ee4d25 457 warning (0, "unknown register name: %s", str);
a2017852
JDA
458 return;
459 }
460
461 last = decode_reg_name (dash + 1);
462 if (last < 0)
463 {
d4ee4d25 464 warning (0, "unknown register name: %s", dash + 1);
a2017852
JDA
465 return;
466 }
467
468 *dash = '-';
469
470 if (first > last)
471 {
d4ee4d25 472 warning (0, "%s-%s is an empty range", str, dash + 1);
a2017852
JDA
473 return;
474 }
475
476 for (i = first; i <= last; ++i)
477 fixed_regs[i] = call_used_regs[i] = 1;
478
479 if (!comma)
480 break;
481
482 *comma = ',';
483 str = comma + 1;
484 }
485
486 /* Check if all floating point registers have been fixed. */
487 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
488 if (!fixed_regs[i])
489 break;
490
491 if (i > FP_REG_LAST)
492 target_flags |= MASK_DISABLE_FPREGS;
493}
494
8a5b8538
AS
495/* Implement the TARGET_OPTION_OVERRIDE hook. */
496
497static void
498pa_option_override (void)
66617831 499{
1bb721dc
JM
500 unsigned int i;
501 cl_deferred_option *opt;
9771b263
DN
502 vec<cl_deferred_option> *v
503 = (vec<cl_deferred_option> *) pa_deferred_options;
1bb721dc 504
9771b263
DN
505 if (v)
506 FOR_EACH_VEC_ELT (*v, i, opt)
507 {
508 switch (opt->opt_index)
509 {
510 case OPT_mfixed_range_:
511 fix_range (opt->arg);
512 break;
1bb721dc 513
9771b263
DN
514 default:
515 gcc_unreachable ();
516 }
517 }
1bb721dc 518
6a73009d
JL
519 if (flag_pic && TARGET_PORTABLE_RUNTIME)
520 {
ab532386 521 warning (0, "PIC code generation is not supported in the portable runtime model");
6a73009d
JL
522 }
523
a7721dc0 524 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
6a73009d 525 {
ab532386 526 warning (0, "PIC code generation is not compatible with fast indirect calls");
6a73009d 527 }
0eba3d30 528
54eef932
JL
529 if (! TARGET_GAS && write_symbols != NO_DEBUG)
530 {
d4ee4d25
DD
531 warning (0, "-g is only supported when using GAS on this processor,");
532 warning (0, "-g option disabled");
54eef932
JL
533 write_symbols = NO_DEBUG;
534 }
d07d525a 535
7ee72796
JL
536 /* We only support the "big PIC" model now. And we always generate PIC
537 code when in 64bit mode. */
538 if (flag_pic == 1 || TARGET_64BIT)
520babc7
JL
539 flag_pic = 2;
540
e92abd50
JDA
541 /* Disable -freorder-blocks-and-partition as we don't support hot and
542 cold partitioning. */
543 if (flag_reorder_blocks_and_partition)
544 {
545 inform (input_location,
546 "-freorder-blocks-and-partition does not work "
547 "on this architecture");
548 flag_reorder_blocks_and_partition = 0;
549 flag_reorder_blocks = 1;
550 }
551
301d03af
RS
552 /* We can't guarantee that .dword is available for 32-bit targets. */
553 if (UNITS_PER_WORD == 4)
554 targetm.asm_out.aligned_op.di = NULL;
555
556 /* The unaligned ops are only available when using GAS. */
557 if (!TARGET_GAS)
558 {
559 targetm.asm_out.unaligned_op.hi = NULL;
560 targetm.asm_out.unaligned_op.si = NULL;
561 targetm.asm_out.unaligned_op.di = NULL;
562 }
9a55eab3
JDA
563
564 init_machine_status = pa_init_machine_status;
c47decad
JL
565}
566
41a1208a
JDA
567enum pa_builtins
568{
569 PA_BUILTIN_COPYSIGNQ,
570 PA_BUILTIN_FABSQ,
571 PA_BUILTIN_INFQ,
572 PA_BUILTIN_HUGE_VALQ,
573 PA_BUILTIN_max
574};
575
576static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
577
eab9e742 578static void
b7849684 579pa_init_builtins (void)
4677862a
JDA
580{
581#ifdef DONT_HAVE_FPUTC_UNLOCKED
e79983f4
MM
582 {
583 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
584 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
585 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
586 }
4677862a 587#endif
dfcb2b51 588#if TARGET_HPUX_11
e79983f4
MM
589 {
590 tree decl;
591
592 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
593 set_user_assembler_name (decl, "_Isfinite");
594 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
595 set_user_assembler_name (decl, "_Isfinitef");
596 }
7d522000 597#endif
41a1208a
JDA
598
599 if (HPUX_LONG_DOUBLE_LIBRARY)
600 {
601 tree decl, ftype;
602
603 /* Under HPUX, the __float128 type is a synonym for "long double". */
604 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
605 "__float128");
606
607 /* TFmode support builtins. */
608 ftype = build_function_type_list (long_double_type_node,
609 long_double_type_node,
610 NULL_TREE);
611 decl = add_builtin_function ("__builtin_fabsq", ftype,
612 PA_BUILTIN_FABSQ, BUILT_IN_MD,
613 "_U_Qfabs", NULL_TREE);
614 TREE_READONLY (decl) = 1;
615 pa_builtins[PA_BUILTIN_FABSQ] = decl;
616
617 ftype = build_function_type_list (long_double_type_node,
618 long_double_type_node,
619 long_double_type_node,
620 NULL_TREE);
621 decl = add_builtin_function ("__builtin_copysignq", ftype,
622 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
623 "_U_Qfcopysign", NULL_TREE);
624 TREE_READONLY (decl) = 1;
625 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
626
12526412 627 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
41a1208a
JDA
628 decl = add_builtin_function ("__builtin_infq", ftype,
629 PA_BUILTIN_INFQ, BUILT_IN_MD,
630 NULL, NULL_TREE);
631 pa_builtins[PA_BUILTIN_INFQ] = decl;
632
633 decl = add_builtin_function ("__builtin_huge_valq", ftype,
634 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
635 NULL, NULL_TREE);
636 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
637 }
638}
639
640static rtx
641pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
ef4bddc2 642 machine_mode mode ATTRIBUTE_UNUSED,
41a1208a
JDA
643 int ignore ATTRIBUTE_UNUSED)
644{
645 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
646 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
647
648 switch (fcode)
649 {
650 case PA_BUILTIN_FABSQ:
651 case PA_BUILTIN_COPYSIGNQ:
652 return expand_call (exp, target, ignore);
653
654 case PA_BUILTIN_INFQ:
655 case PA_BUILTIN_HUGE_VALQ:
656 {
ef4bddc2 657 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
41a1208a
JDA
658 REAL_VALUE_TYPE inf;
659 rtx tmp;
660
661 real_inf (&inf);
555affd7 662 tmp = const_double_from_real_value (inf, target_mode);
41a1208a
JDA
663
664 tmp = validize_mem (force_const_mem (target_mode, tmp));
665
666 if (target == 0)
667 target = gen_reg_rtx (target_mode);
668
669 emit_move_insn (target, tmp);
670 return target;
671 }
672
673 default:
674 gcc_unreachable ();
675 }
676
677 return NULL_RTX;
4677862a
JDA
678}
679
9a55eab3
JDA
680/* Function to init struct machine_function.
681 This will be called, via a pointer variable,
682 from push_function_context. */
683
684static struct machine_function *
685pa_init_machine_status (void)
686{
766090c2 687 return ggc_cleared_alloc<machine_function> ();
9a55eab3
JDA
688}
689
d8f95bed
JDA
690/* If FROM is a probable pointer register, mark TO as a probable
691 pointer register with the same pointer alignment as FROM. */
692
693static void
694copy_reg_pointer (rtx to, rtx from)
695{
696 if (REG_POINTER (from))
697 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
698}
699
23f6f34f
TG
700/* Return 1 if X contains a symbolic expression. We know these
701 expressions will have one of a few well defined forms, so
c1d1b3f0
JL
702 we need only check those forms. */
703int
ae9d61ab 704pa_symbolic_expression_p (rtx x)
c1d1b3f0
JL
705{
706
fe19a83d 707 /* Strip off any HIGH. */
c1d1b3f0
JL
708 if (GET_CODE (x) == HIGH)
709 x = XEXP (x, 0);
710
1a04ac2b 711 return symbolic_operand (x, VOIDmode);
c1d1b3f0
JL
712}
713
47abc309 714/* Accept any constant that can be moved in one instruction into a
6746a52e 715 general register. */
23f6f34f 716int
5877e54e 717pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
6746a52e
JL
718{
719 /* OK if ldo, ldil, or zdepi, can be used. */
5b281141 720 return (VAL_14_BITS_P (ival)
ae9d61ab
JDA
721 || pa_ldil_cint_p (ival)
722 || pa_zdepi_cint_p (ival));
6746a52e 723}
188538df 724\f
5b281141
JDA
725/* True iff ldil can be used to load this CONST_INT. The least
726 significant 11 bits of the value must be zero and the value must
727 not change sign when extended from 32 to 64 bits. */
728int
5877e54e 729pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
5b281141 730{
5877e54e 731 unsigned HOST_WIDE_INT x;
5b281141 732
5877e54e
JDA
733 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
734 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
5b281141
JDA
735}
736
831c1763 737/* True iff zdepi can be used to generate this CONST_INT.
a7b376ee 738 zdepi first sign extends a 5-bit signed number to a given field
831c1763 739 length, then places this field anywhere in a zero. */
0e7f4c19 740int
ae9d61ab 741pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
3a5babac 742{
0c235d7e 743 unsigned HOST_WIDE_INT lsb_mask, t;
3a5babac
TG
744
745 /* This might not be obvious, but it's at least fast.
ddd5a7c1 746 This function is critical; we don't have the time loops would take. */
a1747d2c
TG
747 lsb_mask = x & -x;
748 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
749 /* Return true iff t is a power of two. */
3a5babac
TG
750 return ((t & (t - 1)) == 0);
751}
752
23f6f34f
TG
753/* True iff depi or extru can be used to compute (reg & mask).
754 Accept bit pattern like these:
755 0....01....1
756 1....10....0
757 1..10..01..1 */
0e7f4c19 758int
ae9d61ab 759pa_and_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
760{
761 mask = ~mask;
762 mask += mask & -mask;
763 return (mask & (mask - 1)) == 0;
764}
765
0e7f4c19
TG
766/* True iff depi can be used to compute (reg | MASK). */
767int
ae9d61ab 768pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
0e7f4c19
TG
769{
770 mask += mask & -mask;
771 return (mask & (mask - 1)) == 0;
772}
188538df
TG
773\f
774/* Legitimize PIC addresses. If the address is already
775 position-independent, we return ORIG. Newly generated
776 position-independent addresses go to REG. If we need more
777 than one register, we lose. */
778
ae9d61ab 779static rtx
ef4bddc2 780legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
188538df
TG
781{
782 rtx pic_ref = orig;
783
06ae7eb1 784 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
51076f96 785
abc95ed3 786 /* Labels need special handling. */
519104fe 787 if (pic_label_operand (orig, mode))
6bb36601 788 {
e0d80a58 789 rtx_insn *insn;
4d811a05 790
b3d9ecf0
JL
791 /* We do not want to go through the movXX expanders here since that
792 would create recursion.
793
794 Nor do we really want to call a generator for a named pattern
795 since that requires multiple patterns if we want to support
796 multiple word sizes.
797
798 So instead we just emit the raw set, which avoids the movXX
799 expanders completely. */
d8f95bed 800 mark_reg_pointer (reg, BITS_PER_UNIT);
f7df4a84 801 insn = emit_insn (gen_rtx_SET (reg, orig));
4d811a05
JDA
802
803 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bbbbb16a 804 add_reg_note (insn, REG_EQUAL, orig);
4d811a05
JDA
805
806 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
807 and update LABEL_NUSES because this is not done automatically. */
808 if (reload_in_progress || reload_completed)
809 {
810 /* Extract LABEL_REF. */
811 if (GET_CODE (orig) == CONST)
812 orig = XEXP (XEXP (orig, 0), 0);
813 /* Extract CODE_LABEL. */
814 orig = XEXP (orig, 0);
65c5f2a6 815 add_reg_note (insn, REG_LABEL_OPERAND, orig);
27e430a2
JDA
816 /* Make sure we have label and not a note. */
817 if (LABEL_P (orig))
818 LABEL_NUSES (orig)++;
4d811a05 819 }
e3b5732b 820 crtl->uses_pic_offset_table = 1;
6bb36601
JL
821 return reg;
822 }
188538df
TG
823 if (GET_CODE (orig) == SYMBOL_REF)
824 {
e0d80a58
JL
825 rtx_insn *insn;
826 rtx tmp_reg;
9ab81df2 827
144d51f9 828 gcc_assert (reg);
188538df 829
9ab81df2
JDA
830 /* Before reload, allocate a temporary register for the intermediate
831 result. This allows the sequence to be deleted when the final
832 result is unused and the insns are trivially dead. */
833 tmp_reg = ((reload_in_progress || reload_completed)
834 ? reg : gen_reg_rtx (Pmode));
835
9c575e20 836 if (function_label_operand (orig, VOIDmode))
7813231b 837 {
0b076fea
JDA
838 /* Force function label into memory in word mode. */
839 orig = XEXP (force_const_mem (word_mode, orig), 0);
7813231b
JDA
840 /* Load plabel address from DLT. */
841 emit_move_insn (tmp_reg,
842 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
843 gen_rtx_HIGH (word_mode, orig)));
844 pic_ref
845 = gen_const_mem (Pmode,
846 gen_rtx_LO_SUM (Pmode, tmp_reg,
847 gen_rtx_UNSPEC (Pmode,
542a8afa
RH
848 gen_rtvec (1, orig),
849 UNSPEC_DLTIND14R)));
7813231b
JDA
850 emit_move_insn (reg, pic_ref);
851 /* Now load address of function descriptor. */
852 pic_ref = gen_rtx_MEM (Pmode, reg);
853 }
854 else
855 {
856 /* Load symbol reference from DLT. */
857 emit_move_insn (tmp_reg,
858 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
859 gen_rtx_HIGH (word_mode, orig)));
860 pic_ref
861 = gen_const_mem (Pmode,
862 gen_rtx_LO_SUM (Pmode, tmp_reg,
863 gen_rtx_UNSPEC (Pmode,
864 gen_rtvec (1, orig),
865 UNSPEC_DLTIND14R)));
866 }
c5c76735 867
e3b5732b 868 crtl->uses_pic_offset_table = 1;
d8f95bed 869 mark_reg_pointer (reg, BITS_PER_UNIT);
9ab81df2
JDA
870 insn = emit_move_insn (reg, pic_ref);
871
872 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
bd94cb6e 873 set_unique_reg_note (insn, REG_EQUAL, orig);
9ab81df2 874
188538df
TG
875 return reg;
876 }
877 else if (GET_CODE (orig) == CONST)
878 {
f1c7ce82 879 rtx base;
188538df
TG
880
881 if (GET_CODE (XEXP (orig, 0)) == PLUS
882 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
883 return orig;
884
144d51f9
NS
885 gcc_assert (reg);
886 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
887
888 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
889 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
890 base == reg ? 0 : reg);
d8f95bed 891
188538df
TG
892 if (GET_CODE (orig) == CONST_INT)
893 {
a1747d2c 894 if (INT_14_BITS (orig))
0a81f074 895 return plus_constant (Pmode, base, INTVAL (orig));
188538df
TG
896 orig = force_reg (Pmode, orig);
897 }
ad2c71b7 898 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
188538df
TG
899 /* Likewise, should we set special REG_NOTEs here? */
900 }
d8f95bed 901
188538df
TG
902 return pic_ref;
903}
904
51076f96
RC
905static GTY(()) rtx gen_tls_tga;
906
907static rtx
908gen_tls_get_addr (void)
909{
910 if (!gen_tls_tga)
911 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
912 return gen_tls_tga;
913}
914
915static rtx
916hppa_tls_call (rtx arg)
917{
918 rtx ret;
919
920 ret = gen_reg_rtx (Pmode);
921 emit_library_call_value (gen_tls_get_addr (), ret,
db69559b 922 LCT_CONST, Pmode, arg, Pmode);
51076f96
RC
923
924 return ret;
925}
926
927static rtx
928legitimize_tls_address (rtx addr)
929{
e0d80a58
JL
930 rtx ret, tmp, t1, t2, tp;
931 rtx_insn *insn;
51076f96 932
d92f4df0
JDA
933 /* Currently, we can't handle anything but a SYMBOL_REF. */
934 if (GET_CODE (addr) != SYMBOL_REF)
935 return addr;
936
937 switch (SYMBOL_REF_TLS_MODEL (addr))
51076f96
RC
938 {
939 case TLS_MODEL_GLOBAL_DYNAMIC:
940 tmp = gen_reg_rtx (Pmode);
a758fa89
AJ
941 if (flag_pic)
942 emit_insn (gen_tgd_load_pic (tmp, addr));
943 else
944 emit_insn (gen_tgd_load (tmp, addr));
51076f96
RC
945 ret = hppa_tls_call (tmp);
946 break;
947
948 case TLS_MODEL_LOCAL_DYNAMIC:
949 ret = gen_reg_rtx (Pmode);
950 tmp = gen_reg_rtx (Pmode);
951 start_sequence ();
a758fa89
AJ
952 if (flag_pic)
953 emit_insn (gen_tld_load_pic (tmp, addr));
954 else
955 emit_insn (gen_tld_load (tmp, addr));
51076f96
RC
956 t1 = hppa_tls_call (tmp);
957 insn = get_insns ();
958 end_sequence ();
959 t2 = gen_reg_rtx (Pmode);
960 emit_libcall_block (insn, t2, t1,
961 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
962 UNSPEC_TLSLDBASE));
963 emit_insn (gen_tld_offset_load (ret, addr, t2));
964 break;
965
966 case TLS_MODEL_INITIAL_EXEC:
967 tp = gen_reg_rtx (Pmode);
968 tmp = gen_reg_rtx (Pmode);
969 ret = gen_reg_rtx (Pmode);
970 emit_insn (gen_tp_load (tp));
a758fa89
AJ
971 if (flag_pic)
972 emit_insn (gen_tie_load_pic (tmp, addr));
973 else
974 emit_insn (gen_tie_load (tmp, addr));
51076f96
RC
975 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
976 break;
977
978 case TLS_MODEL_LOCAL_EXEC:
979 tp = gen_reg_rtx (Pmode);
980 ret = gen_reg_rtx (Pmode);
981 emit_insn (gen_tp_load (tp));
982 emit_insn (gen_tle_load (ret, addr, tp));
983 break;
984
985 default:
06ae7eb1 986 gcc_unreachable ();
51076f96
RC
987 }
988
989 return ret;
990}
991
92d0b058
JL
992/* Helper for hppa_legitimize_address. Given X, return true if it
993 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
994
995 This respectively represent canonical shift-add rtxs or scaled
996 memory addresses. */
997static bool
998mem_shadd_or_shadd_rtx_p (rtx x)
999{
1000 return ((GET_CODE (x) == ASHIFT
1001 || GET_CODE (x) == MULT)
1002 && GET_CODE (XEXP (x, 1)) == CONST_INT
1003 && ((GET_CODE (x) == ASHIFT
1004 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1005 || (GET_CODE (x) == MULT
1006 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1007}
1008
c1d1b3f0
JL
1009/* Try machine-dependent ways of modifying an illegitimate address
1010 to be legitimate. If we find one, return the new, valid address.
1011 This macro is used in only one place: `memory_address' in explow.c.
1012
1013 OLDX is the address as it was before break_out_memory_refs was called.
1014 In some cases it is useful to look at this to decide what needs to be done.
1015
c1d1b3f0 1016 It is always safe for this macro to do nothing. It exists to recognize
23f6f34f 1017 opportunities to optimize the output.
c1d1b3f0
JL
1018
1019 For the PA, transform:
1020
1021 memory(X + <large int>)
1022
1023 into:
1024
1025 if (<large int> & mask) >= 16
1026 Y = (<large int> & ~mask) + mask + 1 Round up.
1027 else
1028 Y = (<large int> & ~mask) Round down.
1029 Z = X + Y
1030 memory (Z + (<large int> - Y));
1031
23f6f34f 1032 This is for CSE to find several similar references, and only use one Z.
c1d1b3f0 1033
1e5f1716 1034 X can either be a SYMBOL_REF or REG, but because combine cannot
c1d1b3f0
JL
1035 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1036 D will not fit in 14 bits.
1037
1038 MODE_FLOAT references allow displacements which fit in 5 bits, so use
23f6f34f 1039 0x1f as the mask.
c1d1b3f0
JL
1040
1041 MODE_INT references allow displacements which fit in 14 bits, so use
23f6f34f 1042 0x3fff as the mask.
c1d1b3f0
JL
1043
1044 This relies on the fact that most mode MODE_FLOAT references will use FP
1045 registers and most mode MODE_INT references will use integer registers.
1046 (In the rare case of an FP register used in an integer MODE, we depend
1047 on secondary reloads to clean things up.)
1048
1049
1050 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1051 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
ddd5a7c1 1052 addressing modes to be used).
c1d1b3f0 1053
92d0b058
JL
1054 Note that the addresses passed into hppa_legitimize_address always
1055 come from a MEM, so we only have to match the MULT form on incoming
1056 addresses. But to be future proof we also match the ASHIFT form.
1057
1058 However, this routine always places those shift-add sequences into
1059 registers, so we have to generate the ASHIFT form as our output.
1060
c1d1b3f0
JL
1061 Put X and Z into registers. Then put the entire expression into
1062 a register. */
1063
1064rtx
b7849684 1065hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
ef4bddc2 1066 machine_mode mode)
c1d1b3f0 1067{
c1d1b3f0
JL
1068 rtx orig = x;
1069
d8f95bed
JDA
1070 /* We need to canonicalize the order of operands in unscaled indexed
1071 addresses since the code that checks if an address is valid doesn't
1072 always try both orders. */
1073 if (!TARGET_NO_SPACE_REGS
1074 && GET_CODE (x) == PLUS
1075 && GET_MODE (x) == Pmode
1076 && REG_P (XEXP (x, 0))
1077 && REG_P (XEXP (x, 1))
1078 && REG_POINTER (XEXP (x, 0))
1079 && !REG_POINTER (XEXP (x, 1)))
1080 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1081
093a6c99 1082 if (tls_referenced_p (x))
51076f96
RC
1083 return legitimize_tls_address (x);
1084 else if (flag_pic)
6bb36601
JL
1085 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1086
fe19a83d 1087 /* Strip off CONST. */
c1d1b3f0
JL
1088 if (GET_CODE (x) == CONST)
1089 x = XEXP (x, 0);
1090
68944452
JL
1091 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1092 That should always be safe. */
1093 if (GET_CODE (x) == PLUS
1094 && GET_CODE (XEXP (x, 0)) == REG
1095 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1096 {
690d4228
JL
1097 rtx reg = force_reg (Pmode, XEXP (x, 1));
1098 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
68944452
JL
1099 }
1100
326bc2de
JL
1101 /* Note we must reject symbols which represent function addresses
1102 since the assembler/linker can't handle arithmetic on plabels. */
c1d1b3f0
JL
1103 if (GET_CODE (x) == PLUS
1104 && GET_CODE (XEXP (x, 1)) == CONST_INT
326bc2de
JL
1105 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1106 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
c1d1b3f0
JL
1107 || GET_CODE (XEXP (x, 0)) == REG))
1108 {
1109 rtx int_part, ptr_reg;
1110 int newoffset;
1111 int offset = INTVAL (XEXP (x, 1));
f9bd8d8e
JL
1112 int mask;
1113
1114 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1a04ac2b 1115 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
c1d1b3f0 1116
23f6f34f 1117 /* Choose which way to round the offset. Round up if we
c1d1b3f0
JL
1118 are >= halfway to the next boundary. */
1119 if ((offset & mask) >= ((mask + 1) / 2))
1120 newoffset = (offset & ~ mask) + mask + 1;
1121 else
1122 newoffset = (offset & ~ mask);
1123
1124 /* If the newoffset will not fit in 14 bits (ldo), then
1125 handling this would take 4 or 5 instructions (2 to load
1126 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1127 add the new offset and the SYMBOL_REF.) Combine can
1128 not handle 4->2 or 5->2 combinations, so do not create
1129 them. */
1130 if (! VAL_14_BITS_P (newoffset)
1131 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1132 {
0a81f074 1133 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
c1d1b3f0 1134 rtx tmp_reg
e5e28962 1135 = force_reg (Pmode,
ad2c71b7 1136 gen_rtx_HIGH (Pmode, const_part));
c1d1b3f0 1137 ptr_reg
e5e28962 1138 = force_reg (Pmode,
c5c76735
JL
1139 gen_rtx_LO_SUM (Pmode,
1140 tmp_reg, const_part));
c1d1b3f0
JL
1141 }
1142 else
1143 {
1144 if (! VAL_14_BITS_P (newoffset))
e5e28962 1145 int_part = force_reg (Pmode, GEN_INT (newoffset));
c1d1b3f0
JL
1146 else
1147 int_part = GEN_INT (newoffset);
1148
e5e28962 1149 ptr_reg = force_reg (Pmode,
ad2c71b7
JL
1150 gen_rtx_PLUS (Pmode,
1151 force_reg (Pmode, XEXP (x, 0)),
1152 int_part));
c1d1b3f0 1153 }
0a81f074 1154 return plus_constant (Pmode, ptr_reg, offset - newoffset);
c1d1b3f0 1155 }
7426c959 1156
92d0b058 1157 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
7426c959 1158
92d0b058
JL
1159 if (GET_CODE (x) == PLUS
1160 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
ec8e098d 1161 && (OBJECT_P (XEXP (x, 1))
7426c959
JL
1162 || GET_CODE (XEXP (x, 1)) == SUBREG)
1163 && GET_CODE (XEXP (x, 1)) != CONST)
c1d1b3f0 1164 {
92d0b058
JL
1165 /* If we were given a MULT, we must fix the constant
1166 as we're going to create the ASHIFT form. */
1167 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1168 if (GET_CODE (XEXP (x, 0)) == MULT)
1169 shift_val = exact_log2 (shift_val);
78c0acfd 1170
92d0b058 1171 rtx reg1, reg2;
78c0acfd
JL
1172 reg1 = XEXP (x, 1);
1173 if (GET_CODE (reg1) != REG)
1174 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1175
1176 reg2 = XEXP (XEXP (x, 0), 0);
1177 if (GET_CODE (reg2) != REG)
1178 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1179
92d0b058
JL
1180 return force_reg (Pmode,
1181 gen_rtx_PLUS (Pmode,
1182 gen_rtx_ASHIFT (Pmode, reg2,
1183 GEN_INT (shift_val)),
1184 reg1));
c1d1b3f0 1185 }
7426c959 1186
92d0b058 1187 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
305123ba
JL
1188
1189 Only do so for floating point modes since this is more speculative
1190 and we lose if it's an integer store. */
78c0acfd 1191 if (GET_CODE (x) == PLUS
305123ba 1192 && GET_CODE (XEXP (x, 0)) == PLUS
92d0b058 1193 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
78c0acfd 1194 && (mode == SFmode || mode == DFmode))
305123ba 1195 {
92d0b058
JL
1196 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1197
1198 /* If we were given a MULT, we must fix the constant
1199 as we're going to create the ASHIFT form. */
1200 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1201 shift_val = exact_log2 (shift_val);
78c0acfd 1202
92d0b058 1203 /* Try and figure out what to use as a base register. */
b38bccca 1204 rtx reg1, reg2, base, idx;
78c0acfd
JL
1205
1206 reg1 = XEXP (XEXP (x, 0), 1);
1207 reg2 = XEXP (x, 1);
1208 base = NULL_RTX;
1209 idx = NULL_RTX;
1210
1211 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
ae9d61ab 1212 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
3502dc9c 1213 it's a base register below. */
78c0acfd
JL
1214 if (GET_CODE (reg1) != REG)
1215 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1216
1217 if (GET_CODE (reg2) != REG)
1218 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1219
1220 /* Figure out what the base and index are. */
6619e96c 1221
78c0acfd 1222 if (GET_CODE (reg1) == REG
3502dc9c 1223 && REG_POINTER (reg1))
78c0acfd
JL
1224 {
1225 base = reg1;
ad2c71b7 1226 idx = gen_rtx_PLUS (Pmode,
92d0b058
JL
1227 gen_rtx_ASHIFT (Pmode,
1228 XEXP (XEXP (XEXP (x, 0), 0), 0),
1229 GEN_INT (shift_val)),
ad2c71b7 1230 XEXP (x, 1));
78c0acfd
JL
1231 }
1232 else if (GET_CODE (reg2) == REG
3502dc9c 1233 && REG_POINTER (reg2))
78c0acfd
JL
1234 {
1235 base = reg2;
78c0acfd
JL
1236 idx = XEXP (x, 0);
1237 }
1238
1239 if (base == 0)
31d4f31f 1240 return orig;
78c0acfd
JL
1241
1242 /* If the index adds a large constant, try to scale the
1243 constant so that it can be loaded with only one insn. */
1244 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1245 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1246 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1247 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1248 {
1249 /* Divide the CONST_INT by the scale factor, then add it to A. */
1250 int val = INTVAL (XEXP (idx, 1));
92d0b058 1251 val /= (1 << shift_val);
78c0acfd 1252
78c0acfd
JL
1253 reg1 = XEXP (XEXP (idx, 0), 0);
1254 if (GET_CODE (reg1) != REG)
1255 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1256
ad2c71b7 1257 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
78c0acfd
JL
1258
1259 /* We can now generate a simple scaled indexed address. */
c5c76735
JL
1260 return
1261 force_reg
1262 (Pmode, gen_rtx_PLUS (Pmode,
92d0b058
JL
1263 gen_rtx_ASHIFT (Pmode, reg1,
1264 GEN_INT (shift_val)),
c5c76735 1265 base));
78c0acfd
JL
1266 }
1267
1268 /* If B + C is still a valid base register, then add them. */
1269 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1270 && INTVAL (XEXP (idx, 1)) <= 4096
1271 && INTVAL (XEXP (idx, 1)) >= -4096)
1272 {
78c0acfd
JL
1273 rtx reg1, reg2;
1274
ad2c71b7 1275 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
78c0acfd
JL
1276
1277 reg2 = XEXP (XEXP (idx, 0), 0);
1278 if (GET_CODE (reg2) != CONST_INT)
1279 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1280
92d0b058
JL
1281 return force_reg (Pmode,
1282 gen_rtx_PLUS (Pmode,
1283 gen_rtx_ASHIFT (Pmode, reg2,
1284 GEN_INT (shift_val)),
1285 reg1));
78c0acfd
JL
1286 }
1287
1288 /* Get the index into a register, then add the base + index and
1289 return a register holding the result. */
1290
1291 /* First get A into a register. */
1292 reg1 = XEXP (XEXP (idx, 0), 0);
1293 if (GET_CODE (reg1) != REG)
1294 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1295
1296 /* And get B into a register. */
1297 reg2 = XEXP (idx, 1);
1298 if (GET_CODE (reg2) != REG)
1299 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1300
ad2c71b7
JL
1301 reg1 = force_reg (Pmode,
1302 gen_rtx_PLUS (Pmode,
92d0b058
JL
1303 gen_rtx_ASHIFT (Pmode, reg1,
1304 GEN_INT (shift_val)),
ad2c71b7 1305 reg2));
78c0acfd
JL
1306
1307 /* Add the result to our base register and return. */
ad2c71b7 1308 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
6619e96c 1309
305123ba
JL
1310 }
1311
23f6f34f 1312 /* Uh-oh. We might have an address for x[n-100000]. This needs
c2827c50
JL
1313 special handling to avoid creating an indexed memory address
1314 with x-100000 as the base.
6619e96c 1315
c2827c50
JL
1316 If the constant part is small enough, then it's still safe because
1317 there is a guard page at the beginning and end of the data segment.
1318
1319 Scaled references are common enough that we want to try and rearrange the
1320 terms so that we can use indexing for these addresses too. Only
305123ba 1321 do the optimization for floatint point modes. */
7426c959 1322
c2827c50 1323 if (GET_CODE (x) == PLUS
ae9d61ab 1324 && pa_symbolic_expression_p (XEXP (x, 1)))
7426c959
JL
1325 {
1326 /* Ugly. We modify things here so that the address offset specified
1327 by the index expression is computed first, then added to x to form
c2827c50 1328 the entire address. */
7426c959 1329
305123ba 1330 rtx regx1, regx2, regy1, regy2, y;
7426c959
JL
1331
1332 /* Strip off any CONST. */
1333 y = XEXP (x, 1);
1334 if (GET_CODE (y) == CONST)
1335 y = XEXP (y, 0);
1336
77fc9313
RK
1337 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1338 {
305123ba 1339 /* See if this looks like
92d0b058 1340 (plus (mult (reg) (mem_shadd_const))
305123ba
JL
1341 (const (plus (symbol_ref) (const_int))))
1342
78c0acfd 1343 Where const_int is small. In that case the const
6619e96c 1344 expression is a valid pointer for indexing.
78c0acfd
JL
1345
1346 If const_int is big, but can be divided evenly by shadd_const
1347 and added to (reg). This allows more scaled indexed addresses. */
1348 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
92d0b058 1349 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
305123ba 1350 && GET_CODE (XEXP (y, 1)) == CONST_INT
78c0acfd 1351 && INTVAL (XEXP (y, 1)) >= -4096
92d0b058 1352 && INTVAL (XEXP (y, 1)) <= 4095)
78c0acfd 1353 {
92d0b058
JL
1354 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1355
1356 /* If we were given a MULT, we must fix the constant
1357 as we're going to create the ASHIFT form. */
1358 if (GET_CODE (XEXP (x, 0)) == MULT)
1359 shift_val = exact_log2 (shift_val);
1360
78c0acfd
JL
1361 rtx reg1, reg2;
1362
1363 reg1 = XEXP (x, 1);
1364 if (GET_CODE (reg1) != REG)
1365 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1366
1367 reg2 = XEXP (XEXP (x, 0), 0);
1368 if (GET_CODE (reg2) != REG)
1369 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1370
92d0b058
JL
1371 return
1372 force_reg (Pmode,
1373 gen_rtx_PLUS (Pmode,
1374 gen_rtx_ASHIFT (Pmode,
1375 reg2,
1376 GEN_INT (shift_val)),
1377 reg1));
78c0acfd
JL
1378 }
1379 else if ((mode == DFmode || mode == SFmode)
1380 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
92d0b058 1381 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
78c0acfd 1382 && GET_CODE (XEXP (y, 1)) == CONST_INT
92d0b058 1383 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
305123ba 1384 {
92d0b058
JL
1385 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1386
1387 /* If we were given a MULT, we must fix the constant
1388 as we're going to create the ASHIFT form. */
1389 if (GET_CODE (XEXP (x, 0)) == MULT)
1390 shift_val = exact_log2 (shift_val);
1391
305123ba
JL
1392 regx1
1393 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1394 / INTVAL (XEXP (XEXP (x, 0), 1))));
1395 regx2 = XEXP (XEXP (x, 0), 0);
1396 if (GET_CODE (regx2) != REG)
1397 regx2 = force_reg (Pmode, force_operand (regx2, 0));
ad2c71b7
JL
1398 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1399 regx2, regx1));
c5c76735
JL
1400 return
1401 force_reg (Pmode,
1402 gen_rtx_PLUS (Pmode,
92d0b058
JL
1403 gen_rtx_ASHIFT (Pmode, regx2,
1404 GEN_INT (shift_val)),
c5c76735 1405 force_reg (Pmode, XEXP (y, 0))));
305123ba 1406 }
c2827c50
JL
1407 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1408 && INTVAL (XEXP (y, 1)) >= -4096
1409 && INTVAL (XEXP (y, 1)) <= 4095)
1410 {
1411 /* This is safe because of the guard page at the
1412 beginning and end of the data space. Just
1413 return the original address. */
1414 return orig;
1415 }
305123ba
JL
1416 else
1417 {
1418 /* Doesn't look like one we can optimize. */
1419 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1420 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1421 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1422 regx1 = force_reg (Pmode,
ad2c71b7
JL
1423 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1424 regx1, regy2));
1425 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
305123ba 1426 }
77fc9313 1427 }
7426c959
JL
1428 }
1429
c1d1b3f0
JL
1430 return orig;
1431}
1432
8a5b8538
AS
1433/* Implement the TARGET_REGISTER_MOVE_COST hook.
1434
1435 Compute extra cost of moving data between one register class
1436 and another.
1437
1438 Make moves from SAR so expensive they should never happen. We used to
1439 have 0xffff here, but that generates overflow in rare cases.
1440
1441 Copies involving a FP register and a non-FP register are relatively
1442 expensive because they must go through memory.
1443
1444 Other copies are reasonably cheap. */
1445
1446static int
ef4bddc2 1447hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
8a5b8538
AS
1448 reg_class_t from, reg_class_t to)
1449{
1450 if (from == SHIFT_REGS)
1451 return 0x100;
483d7ad3
JDA
1452 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1453 return 18;
8a5b8538
AS
1454 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1455 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1456 return 16;
1457 else
1458 return 2;
1459}
1460
188538df
TG
1461/* For the HPPA, REG and REG+CONST is cost 0
1462 and addresses involving symbolic constants are cost 2.
1463
1464 PIC addresses are very expensive.
1465
1466 It is no coincidence that this has the same structure
1a04ac2b 1467 as pa_legitimate_address_p. */
dcefdf67
RH
1468
1469static int
ef4bddc2 1470hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
b413068c 1471 addr_space_t as ATTRIBUTE_UNUSED,
f40751dd 1472 bool speed ATTRIBUTE_UNUSED)
188538df 1473{
dcefdf67
RH
1474 switch (GET_CODE (X))
1475 {
1476 case REG:
1477 case PLUS:
1478 case LO_SUM:
188538df 1479 return 1;
dcefdf67
RH
1480 case HIGH:
1481 return 2;
1482 default:
1483 return 4;
1484 }
188538df
TG
1485}
1486
3c50106f
RH
1487/* Compute a (partial) cost for rtx X. Return true if the complete
1488 cost has been computed, and false if subexpressions should be
1489 scanned. In either case, *TOTAL contains the cost result. */
1490
1491static bool
e548c9df
AM
1492hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1493 int opno ATTRIBUTE_UNUSED,
68f932c4 1494 int *total, bool speed ATTRIBUTE_UNUSED)
3c50106f 1495{
259febfe 1496 int factor;
e548c9df 1497 int code = GET_CODE (x);
259febfe 1498
3c50106f
RH
1499 switch (code)
1500 {
1501 case CONST_INT:
1502 if (INTVAL (x) == 0)
1503 *total = 0;
1504 else if (INT_14_BITS (x))
1505 *total = 1;
1506 else
1507 *total = 2;
1508 return true;
1509
1510 case HIGH:
1511 *total = 2;
1512 return true;
1513
1514 case CONST:
1515 case LABEL_REF:
1516 case SYMBOL_REF:
1517 *total = 4;
1518 return true;
1519
1520 case CONST_DOUBLE:
1521 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1522 && outer_code != SET)
1523 *total = 0;
1524 else
1525 *total = 8;
1526 return true;
1527
1528 case MULT:
e548c9df 1529 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
259febfe
JDA
1530 {
1531 *total = COSTS_N_INSNS (3);
1532 return true;
1533 }
1534
1535 /* A mode size N times larger than SImode needs O(N*N) more insns. */
e548c9df 1536 factor = GET_MODE_SIZE (mode) / 4;
259febfe
JDA
1537 if (factor == 0)
1538 factor = 1;
1539
1540 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1541 *total = factor * factor * COSTS_N_INSNS (8);
3c50106f 1542 else
259febfe 1543 *total = factor * factor * COSTS_N_INSNS (20);
3c50106f
RH
1544 return true;
1545
1546 case DIV:
e548c9df 1547 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
3c50106f
RH
1548 {
1549 *total = COSTS_N_INSNS (14);
1550 return true;
1551 }
5efb1046 1552 /* FALLTHRU */
3c50106f
RH
1553
1554 case UDIV:
1555 case MOD:
1556 case UMOD:
259febfe 1557 /* A mode size N times larger than SImode needs O(N*N) more insns. */
e548c9df 1558 factor = GET_MODE_SIZE (mode) / 4;
259febfe
JDA
1559 if (factor == 0)
1560 factor = 1;
1561
1562 *total = factor * factor * COSTS_N_INSNS (60);
3c50106f
RH
1563 return true;
1564
1565 case PLUS: /* this includes shNadd insns */
1566 case MINUS:
e548c9df 1567 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
259febfe
JDA
1568 {
1569 *total = COSTS_N_INSNS (3);
1570 return true;
1571 }
1572
1573 /* A size N times larger than UNITS_PER_WORD needs N times as
1574 many insns, taking N times as long. */
e548c9df 1575 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
259febfe
JDA
1576 if (factor == 0)
1577 factor = 1;
1578 *total = factor * COSTS_N_INSNS (1);
3c50106f
RH
1579 return true;
1580
1581 case ASHIFT:
1582 case ASHIFTRT:
1583 case LSHIFTRT:
1584 *total = COSTS_N_INSNS (1);
1585 return true;
1586
1587 default:
1588 return false;
1589 }
1590}
1591
6619e96c
AM
1592/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1593 new rtx with the correct mode. */
1594static inline rtx
ef4bddc2 1595force_mode (machine_mode mode, rtx orig)
6619e96c
AM
1596{
1597 if (mode == GET_MODE (orig))
1598 return orig;
1599
144d51f9 1600 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
6619e96c
AM
1601
1602 return gen_rtx_REG (mode, REGNO (orig));
1603}
1604
fbbf66e7
RS
1605/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1606
1607static bool
ef4bddc2 1608pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
fbbf66e7 1609{
093a6c99 1610 return tls_referenced_p (x);
fbbf66e7
RS
1611}
1612
188538df
TG
1613/* Emit insns to move operands[1] into operands[0].
1614
1615 Return 1 if we have written out everything that needs to be done to
1616 do the move. Otherwise, return 0 and the caller will emit the move
6619e96c 1617 normally.
1b8ad134
JL
1618
1619 Note SCRATCH_REG may not be in the proper mode depending on how it
c1207243 1620 will be used. This routine is responsible for creating a new copy
1b8ad134 1621 of SCRATCH_REG in the proper mode. */
188538df
TG
1622
1623int
ef4bddc2 1624pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
188538df
TG
1625{
1626 register rtx operand0 = operands[0];
1627 register rtx operand1 = operands[1];
428be702 1628 register rtx tem;
188538df 1629
d8f95bed
JDA
1630 /* We can only handle indexed addresses in the destination operand
1631 of floating point stores. Thus, we need to break out indexed
1632 addresses from the destination operand. */
1633 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1634 {
b3a13419 1635 gcc_assert (can_create_pseudo_p ());
d8f95bed
JDA
1636
1637 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1638 operand0 = replace_equiv_address (operand0, tem);
1639 }
1640
1641 /* On targets with non-equivalent space registers, break out unscaled
1642 indexed addresses from the source operand before the final CSE.
1643 We have to do this because the REG_POINTER flag is not correctly
1644 carried through various optimization passes and CSE may substitute
1645 a pseudo without the pointer set for one with the pointer set. As
71cc389b 1646 a result, we loose various opportunities to create insns with
d8f95bed
JDA
1647 unscaled indexed addresses. */
1648 if (!TARGET_NO_SPACE_REGS
1649 && !cse_not_expected
1650 && GET_CODE (operand1) == MEM
1651 && GET_CODE (XEXP (operand1, 0)) == PLUS
1652 && REG_P (XEXP (XEXP (operand1, 0), 0))
1653 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1654 operand1
1655 = replace_equiv_address (operand1,
1656 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1657
54d65918
JL
1658 if (scratch_reg
1659 && reload_in_progress && GET_CODE (operand0) == REG
8a642d97 1660 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
f2034d06 1661 operand0 = reg_equiv_mem (REGNO (operand0));
54d65918
JL
1662 else if (scratch_reg
1663 && reload_in_progress && GET_CODE (operand0) == SUBREG
8a642d97
RK
1664 && GET_CODE (SUBREG_REG (operand0)) == REG
1665 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1666 {
ddef6bc7 1667 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1668 the code which tracks sets/uses for delete_output_reload. */
1669 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
f2034d06 1670 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
ddef6bc7 1671 SUBREG_BYTE (operand0));
55a2c322 1672 operand0 = alter_subreg (&temp, true);
27a2c2b5 1673 }
8a642d97 1674
54d65918
JL
1675 if (scratch_reg
1676 && reload_in_progress && GET_CODE (operand1) == REG
8a642d97 1677 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
f2034d06 1678 operand1 = reg_equiv_mem (REGNO (operand1));
54d65918
JL
1679 else if (scratch_reg
1680 && reload_in_progress && GET_CODE (operand1) == SUBREG
8a642d97
RK
1681 && GET_CODE (SUBREG_REG (operand1)) == REG
1682 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
27a2c2b5 1683 {
ddef6bc7 1684 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
71443006
JL
1685 the code which tracks sets/uses for delete_output_reload. */
1686 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
f2034d06 1687 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
ddef6bc7 1688 SUBREG_BYTE (operand1));
55a2c322 1689 operand1 = alter_subreg (&temp, true);
27a2c2b5 1690 }
8a642d97 1691
54d65918 1692 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
428be702
RK
1693 && ((tem = find_replacement (&XEXP (operand0, 0)))
1694 != XEXP (operand0, 0)))
7c95bbfb 1695 operand0 = replace_equiv_address (operand0, tem);
d8f95bed 1696
54d65918 1697 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
428be702
RK
1698 && ((tem = find_replacement (&XEXP (operand1, 0)))
1699 != XEXP (operand1, 0)))
7c95bbfb 1700 operand1 = replace_equiv_address (operand1, tem);
428be702 1701
4d3cea21 1702 /* Handle secondary reloads for loads/stores of FP registers from
cae80939 1703 REG+D addresses where D does not fit in 5 or 14 bits, including
668c901b
JDA
1704 (subreg (mem (addr))) cases, and reloads for other unsupported
1705 memory operands. */
a4295210 1706 if (scratch_reg
512e4ace 1707 && FP_REG_P (operand0)
1a04ac2b
JDA
1708 && (MEM_P (operand1)
1709 || (GET_CODE (operand1) == SUBREG
512e4ace 1710 && MEM_P (XEXP (operand1, 0)))))
d2a94ec0 1711 {
d3ccfbb9 1712 rtx op1 = operand1;
42fbe27f 1713
d3ccfbb9
JDA
1714 if (GET_CODE (op1) == SUBREG)
1715 op1 = XEXP (op1, 0);
2d7b2c36 1716
668c901b 1717 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
2d7b2c36 1718 {
668c901b
JDA
1719 if (!(TARGET_PA_20
1720 && !TARGET_ELF32
1721 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1722 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
d3ccfbb9 1723 {
668c901b
JDA
1724 /* SCRATCH_REG will hold an address and maybe the actual data.
1725 We want it in WORD_MODE regardless of what mode it was
1726 originally given to us. */
1727 scratch_reg = force_mode (word_mode, scratch_reg);
1728
1729 /* D might not fit in 14 bits either; for such cases load D
1730 into scratch reg. */
1731 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1732 {
1733 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1734 emit_move_insn (scratch_reg,
1735 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1736 Pmode,
1737 XEXP (XEXP (op1, 0), 0),
1738 scratch_reg));
1739 }
1740 else
1741 emit_move_insn (scratch_reg, XEXP (op1, 0));
1742 emit_insn (gen_rtx_SET (operand0,
1743 replace_equiv_address (op1, scratch_reg)));
1744 return 1;
d3ccfbb9 1745 }
668c901b
JDA
1746 }
1747 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1748 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1749 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1750 {
1751 /* Load memory address into SCRATCH_REG. */
1752 scratch_reg = force_mode (word_mode, scratch_reg);
1753 emit_move_insn (scratch_reg, XEXP (op1, 0));
d3ccfbb9
JDA
1754 emit_insn (gen_rtx_SET (operand0,
1755 replace_equiv_address (op1, scratch_reg)));
1756 return 1;
2d7b2c36 1757 }
d2a94ec0 1758 }
a4295210 1759 else if (scratch_reg
512e4ace 1760 && FP_REG_P (operand1)
1a04ac2b
JDA
1761 && (MEM_P (operand0)
1762 || (GET_CODE (operand0) == SUBREG
512e4ace 1763 && MEM_P (XEXP (operand0, 0)))))
d2a94ec0 1764 {
d3ccfbb9 1765 rtx op0 = operand0;
42fbe27f 1766
d3ccfbb9
JDA
1767 if (GET_CODE (op0) == SUBREG)
1768 op0 = XEXP (op0, 0);
1b8ad134 1769
668c901b 1770 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
2d7b2c36 1771 {
668c901b
JDA
1772 if (!(TARGET_PA_20
1773 && !TARGET_ELF32
1774 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1775 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
d3ccfbb9 1776 {
668c901b
JDA
1777 /* SCRATCH_REG will hold an address and maybe the actual data.
1778 We want it in WORD_MODE regardless of what mode it was
1779 originally given to us. */
1780 scratch_reg = force_mode (word_mode, scratch_reg);
1781
1782 /* D might not fit in 14 bits either; for such cases load D
1783 into scratch reg. */
1784 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1785 {
1786 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1787 emit_move_insn (scratch_reg,
1788 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1789 Pmode,
1790 XEXP (XEXP (op0, 0), 0),
1791 scratch_reg));
1792 }
1793 else
1794 emit_move_insn (scratch_reg, XEXP (op0, 0));
1795 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1796 operand1));
1797 return 1;
d3ccfbb9 1798 }
668c901b
JDA
1799 }
1800 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1801 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1802 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1803 {
1804 /* Load memory address into SCRATCH_REG. */
1805 scratch_reg = force_mode (word_mode, scratch_reg);
1806 emit_move_insn (scratch_reg, XEXP (op0, 0));
d3ccfbb9
JDA
1807 emit_insn (gen_rtx_SET (replace_equiv_address (op0, scratch_reg),
1808 operand1));
1809 return 1;
2d7b2c36 1810 }
d2a94ec0 1811 }
c063ad75 1812 /* Handle secondary reloads for loads of FP registers from constant
1a04ac2b
JDA
1813 expressions by forcing the constant into memory. For the most part,
1814 this is only necessary for SImode and DImode.
c063ad75 1815
1a04ac2b 1816 Use scratch_reg to hold the address of the memory location. */
a4295210 1817 else if (scratch_reg
c063ad75 1818 && CONSTANT_P (operand1)
d3ccfbb9 1819 && FP_REG_P (operand0))
c063ad75 1820 {
7c95bbfb 1821 rtx const_mem, xoperands[2];
c063ad75 1822
1a04ac2b
JDA
1823 if (operand1 == CONST0_RTX (mode))
1824 {
f7df4a84 1825 emit_insn (gen_rtx_SET (operand0, operand1));
1a04ac2b
JDA
1826 return 1;
1827 }
1828
1b8ad134
JL
1829 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1830 it in WORD_MODE regardless of what mode it was originally given
1831 to us. */
6619e96c 1832 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 1833
c063ad75
JL
1834 /* Force the constant into memory and put the address of the
1835 memory location into scratch_reg. */
7c95bbfb 1836 const_mem = force_const_mem (mode, operand1);
c063ad75 1837 xoperands[0] = scratch_reg;
7c95bbfb 1838 xoperands[1] = XEXP (const_mem, 0);
ae9d61ab 1839 pa_emit_move_sequence (xoperands, Pmode, 0);
c063ad75
JL
1840
1841 /* Now load the destination register. */
f7df4a84 1842 emit_insn (gen_rtx_SET (operand0,
7c95bbfb 1843 replace_equiv_address (const_mem, scratch_reg)));
c063ad75
JL
1844 return 1;
1845 }
4d3cea21 1846 /* Handle secondary reloads for SAR. These occur when trying to load
483d7ad3 1847 the SAR from memory or a constant. */
a4295210
JDA
1848 else if (scratch_reg
1849 && GET_CODE (operand0) == REG
9c1eed37 1850 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
4d3cea21 1851 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
483d7ad3 1852 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
4d3cea21 1853 {
09ece7b5
JL
1854 /* D might not fit in 14 bits either; for such cases load D into
1855 scratch reg. */
1856 if (GET_CODE (operand1) == MEM
2fd74bff 1857 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
09ece7b5 1858 {
dd8c13e3
JL
1859 /* We are reloading the address into the scratch register, so we
1860 want to make sure the scratch register is a full register. */
6619e96c 1861 scratch_reg = force_mode (word_mode, scratch_reg);
dd8c13e3 1862
6619e96c 1863 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
ad2c71b7
JL
1864 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1865 0)),
690d4228 1866 Pmode,
ad2c71b7
JL
1867 XEXP (XEXP (operand1, 0),
1868 0),
1869 scratch_reg));
dd8c13e3
JL
1870
1871 /* Now we are going to load the scratch register from memory,
1872 we want to load it in the same width as the original MEM,
1873 which must be the same as the width of the ultimate destination,
1874 OPERAND0. */
6619e96c
AM
1875 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1876
7c95bbfb
RH
1877 emit_move_insn (scratch_reg,
1878 replace_equiv_address (operand1, scratch_reg));
09ece7b5
JL
1879 }
1880 else
dd8c13e3
JL
1881 {
1882 /* We want to load the scratch register using the same mode as
1883 the ultimate destination. */
6619e96c
AM
1884 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1885
dd8c13e3
JL
1886 emit_move_insn (scratch_reg, operand1);
1887 }
1888
1889 /* And emit the insn to set the ultimate destination. We know that
1890 the scratch register has the same mode as the destination at this
1891 point. */
4d3cea21
JL
1892 emit_move_insn (operand0, scratch_reg);
1893 return 1;
1894 }
d3ccfbb9 1895
d8f95bed 1896 /* Handle the most common case: storing into a register. */
d3ccfbb9 1897 if (register_operand (operand0, mode))
188538df 1898 {
9a201645
JDA
1899 /* Legitimize TLS symbol references. This happens for references
1900 that aren't a legitimate constant. */
1901 if (PA_SYMBOL_REF_TLS_P (operand1))
1902 operand1 = legitimize_tls_address (operand1);
1903
188538df 1904 if (register_operand (operand1, mode)
b8e42321 1905 || (GET_CODE (operand1) == CONST_INT
5877e54e 1906 && pa_cint_ok_for_move (UINTVAL (operand1)))
f048ca47 1907 || (operand1 == CONST0_RTX (mode))
188538df 1908 || (GET_CODE (operand1) == HIGH
80225b66 1909 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
188538df
TG
1910 /* Only `general_operands' can come here, so MEM is ok. */
1911 || GET_CODE (operand1) == MEM)
1912 {
d8f95bed
JDA
1913 /* Various sets are created during RTL generation which don't
1914 have the REG_POINTER flag correctly set. After the CSE pass,
1915 instruction recognition can fail if we don't consistently
1916 set this flag when performing register copies. This should
1917 also improve the opportunities for creating insns that use
1918 unscaled indexing. */
1919 if (REG_P (operand0) && REG_P (operand1))
1920 {
1921 if (REG_POINTER (operand1)
1922 && !REG_POINTER (operand0)
1923 && !HARD_REGISTER_P (operand0))
1924 copy_reg_pointer (operand0, operand1);
d8f95bed
JDA
1925 }
1926
1927 /* When MEMs are broken out, the REG_POINTER flag doesn't
1928 get set. In some cases, we can set the REG_POINTER flag
1929 from the declaration for the MEM. */
1930 if (REG_P (operand0)
1931 && GET_CODE (operand1) == MEM
1932 && !REG_POINTER (operand0))
1933 {
1934 tree decl = MEM_EXPR (operand1);
1935
1936 /* Set the register pointer flag and register alignment
1937 if the declaration for this memory reference is a
077c8ada
SE
1938 pointer type. */
1939 if (decl)
d8f95bed
JDA
1940 {
1941 tree type;
1942
1943 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1944 tree operand 1. */
1945 if (TREE_CODE (decl) == COMPONENT_REF)
1946 decl = TREE_OPERAND (decl, 1);
1947
1948 type = TREE_TYPE (decl);
dd25a747 1949 type = strip_array_types (type);
d8f95bed
JDA
1950
1951 if (POINTER_TYPE_P (type))
25b75a48 1952 mark_reg_pointer (operand0, BITS_PER_UNIT);
d8f95bed
JDA
1953 }
1954 }
1955
f7df4a84 1956 emit_insn (gen_rtx_SET (operand0, operand1));
188538df
TG
1957 return 1;
1958 }
1959 }
1960 else if (GET_CODE (operand0) == MEM)
1961 {
d66dec28
JL
1962 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1963 && !(reload_in_progress || reload_completed))
1964 {
1965 rtx temp = gen_reg_rtx (DFmode);
1966
f7df4a84
RS
1967 emit_insn (gen_rtx_SET (temp, operand1));
1968 emit_insn (gen_rtx_SET (operand0, temp));
d66dec28
JL
1969 return 1;
1970 }
f048ca47 1971 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
188538df
TG
1972 {
1973 /* Run this case quickly. */
f7df4a84 1974 emit_insn (gen_rtx_SET (operand0, operand1));
188538df
TG
1975 return 1;
1976 }
1bc695cd 1977 if (! (reload_in_progress || reload_completed))
188538df
TG
1978 {
1979 operands[0] = validize_mem (operand0);
1980 operands[1] = operand1 = force_reg (mode, operand1);
1981 }
1982 }
1983
44201dba
JL
1984 /* Simplify the source if we need to.
1985 Note we do have to handle function labels here, even though we do
1986 not consider them legitimate constants. Loop optimizations can
06387d7c 1987 call the emit_move_xxx with one as a source. */
f1c7ce82 1988 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
43940f6b 1989 || (GET_CODE (operand1) == HIGH
d92f4df0
JDA
1990 && symbolic_operand (XEXP (operand1, 0), mode))
1991 || function_label_operand (operand1, VOIDmode)
093a6c99 1992 || tls_referenced_p (operand1))
188538df 1993 {
43940f6b
JL
1994 int ishighonly = 0;
1995
1996 if (GET_CODE (operand1) == HIGH)
1997 {
1998 ishighonly = 1;
1999 operand1 = XEXP (operand1, 0);
2000 }
188538df
TG
2001 if (symbolic_operand (operand1, mode))
2002 {
5eceed92 2003 /* Argh. The assembler and linker can't handle arithmetic
b0fabad3 2004 involving plabels.
5eceed92 2005
b0fabad3
JL
2006 So we force the plabel into memory, load operand0 from
2007 the memory location, then add in the constant part. */
44201dba
JL
2008 if ((GET_CODE (operand1) == CONST
2009 && GET_CODE (XEXP (operand1, 0)) == PLUS
9c575e20
JDA
2010 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2011 VOIDmode))
2012 || function_label_operand (operand1, VOIDmode))
5eceed92 2013 {
8e64b41a 2014 rtx temp, const_part;
b0fabad3
JL
2015
2016 /* Figure out what (if any) scratch register to use. */
2017 if (reload_in_progress || reload_completed)
1b8ad134
JL
2018 {
2019 scratch_reg = scratch_reg ? scratch_reg : operand0;
2020 /* SCRATCH_REG will hold an address and maybe the actual
2021 data. We want it in WORD_MODE regardless of what mode it
2022 was originally given to us. */
6619e96c 2023 scratch_reg = force_mode (word_mode, scratch_reg);
1b8ad134 2024 }
b0fabad3
JL
2025 else if (flag_pic)
2026 scratch_reg = gen_reg_rtx (Pmode);
2027
44201dba
JL
2028 if (GET_CODE (operand1) == CONST)
2029 {
2030 /* Save away the constant part of the expression. */
2031 const_part = XEXP (XEXP (operand1, 0), 1);
144d51f9 2032 gcc_assert (GET_CODE (const_part) == CONST_INT);
44201dba
JL
2033
2034 /* Force the function label into memory. */
2035 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2036 }
2037 else
2038 {
2039 /* No constant part. */
2040 const_part = NULL_RTX;
5eceed92 2041
44201dba
JL
2042 /* Force the function label into memory. */
2043 temp = force_const_mem (mode, operand1);
2044 }
6619e96c 2045
b0fabad3
JL
2046
2047 /* Get the address of the memory location. PIC-ify it if
2048 necessary. */
2049 temp = XEXP (temp, 0);
2050 if (flag_pic)
2051 temp = legitimize_pic_address (temp, mode, scratch_reg);
2052
2053 /* Put the address of the memory location into our destination
2054 register. */
2055 operands[1] = temp;
ae9d61ab 2056 pa_emit_move_sequence (operands, mode, scratch_reg);
b0fabad3
JL
2057
2058 /* Now load from the memory location into our destination
2059 register. */
ad2c71b7 2060 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
ae9d61ab 2061 pa_emit_move_sequence (operands, mode, scratch_reg);
b0fabad3
JL
2062
2063 /* And add back in the constant part. */
44201dba
JL
2064 if (const_part != NULL_RTX)
2065 expand_inc (operand0, const_part);
b0fabad3
JL
2066
2067 return 1;
5eceed92
JL
2068 }
2069
188538df
TG
2070 if (flag_pic)
2071 {
283b768c 2072 rtx_insn *insn;
1bc695cd
JL
2073 rtx temp;
2074
2075 if (reload_in_progress || reload_completed)
1b8ad134
JL
2076 {
2077 temp = scratch_reg ? scratch_reg : operand0;
2078 /* TEMP will hold an address and maybe the actual
2079 data. We want it in WORD_MODE regardless of what mode it
2080 was originally given to us. */
6619e96c 2081 temp = force_mode (word_mode, temp);
1b8ad134 2082 }
1bc695cd
JL
2083 else
2084 temp = gen_reg_rtx (Pmode);
23f6f34f 2085
283b768c
JDA
2086 /* Force (const (plus (symbol) (const_int))) to memory
2087 if the const_int will not fit in 14 bits. Although
2088 this requires a relocation, the instruction sequence
2089 needed to load the value is shorter. */
b0fabad3 2090 if (GET_CODE (operand1) == CONST
bc4a9f17
JL
2091 && GET_CODE (XEXP (operand1, 0)) == PLUS
2092 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
283b768c 2093 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
bc4a9f17 2094 {
283b768c
JDA
2095 rtx x, m = force_const_mem (mode, operand1);
2096
2097 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2098 x = replace_equiv_address (m, x);
2099 insn = emit_move_insn (operand0, x);
bc4a9f17 2100 }
5eceed92
JL
2101 else
2102 {
2103 operands[1] = legitimize_pic_address (operand1, mode, temp);
d8f95bed
JDA
2104 if (REG_P (operand0) && REG_P (operands[1]))
2105 copy_reg_pointer (operand0, operands[1]);
283b768c 2106 insn = emit_move_insn (operand0, operands[1]);
5eceed92 2107 }
283b768c
JDA
2108
2109 /* Put a REG_EQUAL note on this insn. */
2110 set_unique_reg_note (insn, REG_EQUAL, operand1);
188538df 2111 }
6bb36601
JL
2112 /* On the HPPA, references to data space are supposed to use dp,
2113 register 27, but showing it in the RTL inhibits various cse
2114 and loop optimizations. */
23f6f34f 2115 else
188538df 2116 {
5eceed92 2117 rtx temp, set;
43940f6b 2118
23f6f34f 2119 if (reload_in_progress || reload_completed)
1b8ad134
JL
2120 {
2121 temp = scratch_reg ? scratch_reg : operand0;
2122 /* TEMP will hold an address and maybe the actual
2123 data. We want it in WORD_MODE regardless of what mode it
2124 was originally given to us. */
6619e96c 2125 temp = force_mode (word_mode, temp);
1b8ad134 2126 }
43940f6b
JL
2127 else
2128 temp = gen_reg_rtx (mode);
2129
68944452 2130 /* Loading a SYMBOL_REF into a register makes that register
6619e96c 2131 safe to be used as the base in an indexed address.
68944452
JL
2132
2133 Don't mark hard registers though. That loses. */
c34d858f
RK
2134 if (GET_CODE (operand0) == REG
2135 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
d8f95bed 2136 mark_reg_pointer (operand0, BITS_PER_UNIT);
68944452 2137 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
d8f95bed
JDA
2138 mark_reg_pointer (temp, BITS_PER_UNIT);
2139
43940f6b 2140 if (ishighonly)
f7df4a84 2141 set = gen_rtx_SET (operand0, temp);
43940f6b 2142 else
f7df4a84 2143 set = gen_rtx_SET (operand0,
ad2c71b7 2144 gen_rtx_LO_SUM (mode, temp, operand1));
23f6f34f 2145
f7df4a84 2146 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
b0ce651a 2147 emit_insn (set);
326bc2de 2148
188538df 2149 }
43940f6b 2150 return 1;
188538df 2151 }
093a6c99 2152 else if (tls_referenced_p (operand1))
51076f96
RC
2153 {
2154 rtx tmp = operand1;
2155 rtx addend = NULL;
2156
2157 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2158 {
2159 addend = XEXP (XEXP (tmp, 0), 1);
2160 tmp = XEXP (XEXP (tmp, 0), 0);
2161 }
2162
2163 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2164 tmp = legitimize_tls_address (tmp);
2165 if (addend)
2166 {
2167 tmp = gen_rtx_PLUS (mode, tmp, addend);
2168 tmp = force_operand (tmp, operands[0]);
2169 }
2170 operands[1] = tmp;
2171 }
a1747d2c 2172 else if (GET_CODE (operand1) != CONST_INT
5877e54e 2173 || !pa_cint_ok_for_move (UINTVAL (operand1)))
188538df 2174 {
e0d80a58
JL
2175 rtx temp;
2176 rtx_insn *insn;
a4295210 2177 rtx op1 = operand1;
4cce9dd8 2178 HOST_WIDE_INT value = 0;
a4295210
JDA
2179 HOST_WIDE_INT insv = 0;
2180 int insert = 0;
2181
4cce9dd8
RS
2182 if (GET_CODE (operand1) == CONST_INT)
2183 value = INTVAL (operand1);
2184
a4295210
JDA
2185 if (TARGET_64BIT
2186 && GET_CODE (operand1) == CONST_INT
e0c556d3 2187 && HOST_BITS_PER_WIDE_INT > 32
520babc7
JL
2188 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2189 {
e0c556d3 2190 HOST_WIDE_INT nval;
520babc7 2191
b8e42321
JDA
2192 /* Extract the low order 32 bits of the value and sign extend.
2193 If the new value is the same as the original value, we can
2194 can use the original value as-is. If the new value is
2195 different, we use it and insert the most-significant 32-bits
2196 of the original value into the final result. */
a4295210 2197 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
e0c556d3 2198 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
a4295210 2199 if (value != nval)
520babc7 2200 {
b8e42321 2201#if HOST_BITS_PER_WIDE_INT > 32
a4295210 2202 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
b8e42321 2203#endif
a4295210
JDA
2204 insert = 1;
2205 value = nval;
520babc7
JL
2206 operand1 = GEN_INT (nval);
2207 }
2208 }
1bc695cd
JL
2209
2210 if (reload_in_progress || reload_completed)
a4295210 2211 temp = scratch_reg ? scratch_reg : operand0;
1bc695cd
JL
2212 else
2213 temp = gen_reg_rtx (mode);
2214
47abc309
JDA
2215 /* We don't directly split DImode constants on 32-bit targets
2216 because PLUS uses an 11-bit immediate and the insn sequence
2217 generated is not as efficient as the one using HIGH/LO_SUM. */
2218 if (GET_CODE (operand1) == CONST_INT
0eab7815 2219 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
a4295210
JDA
2220 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2221 && !insert)
b8e42321 2222 {
47abc309 2223 /* Directly break constant into high and low parts. This
b8e42321
JDA
2224 provides better optimization opportunities because various
2225 passes recognize constants split with PLUS but not LO_SUM.
2226 We use a 14-bit signed low part except when the addition
2227 of 0x4000 to the high part might change the sign of the
2228 high part. */
b8e42321
JDA
2229 HOST_WIDE_INT low = value & 0x3fff;
2230 HOST_WIDE_INT high = value & ~ 0x3fff;
2231
2232 if (low >= 0x2000)
2233 {
2234 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2235 high += 0x2000;
2236 else
2237 high += 0x4000;
2238 }
2239
2240 low = value - high;
520babc7 2241
f7df4a84 2242 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
b8e42321
JDA
2243 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2244 }
2245 else
520babc7 2246 {
f7df4a84 2247 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
b8e42321 2248 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
520babc7 2249 }
6619e96c 2250
a4295210
JDA
2251 insn = emit_move_insn (operands[0], operands[1]);
2252
2253 /* Now insert the most significant 32 bits of the value
2254 into the register. When we don't have a second register
2255 available, it could take up to nine instructions to load
2256 a 64-bit integer constant. Prior to reload, we force
2257 constants that would take more than three instructions
2258 to load to the constant pool. During and after reload,
2259 we have to handle all possible values. */
2260 if (insert)
2261 {
2262 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2263 register and the value to be inserted is outside the
2264 range that can be loaded with three depdi instructions. */
2265 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2266 {
2267 operand1 = GEN_INT (insv);
2268
f7df4a84 2269 emit_insn (gen_rtx_SET (temp,
a4295210
JDA
2270 gen_rtx_HIGH (mode, operand1)));
2271 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
f0d54148 2272 if (mode == DImode)
225f4747
JDA
2273 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2274 const0_rtx, temp));
f0d54148 2275 else
225f4747
JDA
2276 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2277 const0_rtx, temp));
a4295210
JDA
2278 }
2279 else
2280 {
2281 int len = 5, pos = 27;
2282
2283 /* Insert the bits using the depdi instruction. */
2284 while (pos >= 0)
2285 {
2286 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2287 HOST_WIDE_INT sign = v5 < 0;
2288
2289 /* Left extend the insertion. */
2290 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2291 while (pos > 0 && (insv & 1) == sign)
2292 {
2293 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2294 len += 1;
2295 pos -= 1;
2296 }
2297
f0d54148 2298 if (mode == DImode)
225f4747
JDA
2299 insn = emit_insn (gen_insvdi (operand0,
2300 GEN_INT (len),
2301 GEN_INT (pos),
2302 GEN_INT (v5)));
f0d54148 2303 else
225f4747
JDA
2304 insn = emit_insn (gen_insvsi (operand0,
2305 GEN_INT (len),
2306 GEN_INT (pos),
2307 GEN_INT (v5)));
a4295210
JDA
2308
2309 len = pos > 0 && pos < 5 ? pos : 5;
2310 pos -= len;
2311 }
2312 }
2313 }
b8e42321 2314
bd94cb6e 2315 set_unique_reg_note (insn, REG_EQUAL, op1);
b8e42321 2316
520babc7 2317 return 1;
188538df
TG
2318 }
2319 }
2320 /* Now have insn-emit do whatever it normally does. */
2321 return 0;
2322}
2323
c77c286a 2324/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
c4bb6b38 2325 it will need a link/runtime reloc). */
c77c286a
JL
2326
2327int
ae9d61ab 2328pa_reloc_needed (tree exp)
c77c286a
JL
2329{
2330 int reloc = 0;
2331
2332 switch (TREE_CODE (exp))
2333 {
2334 case ADDR_EXPR:
2335 return 1;
2336
5be014d5 2337 case POINTER_PLUS_EXPR:
c77c286a
JL
2338 case PLUS_EXPR:
2339 case MINUS_EXPR:
ae9d61ab
JDA
2340 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2341 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
c77c286a
JL
2342 break;
2343
1043771b 2344 CASE_CONVERT:
c77c286a 2345 case NON_LVALUE_EXPR:
ae9d61ab 2346 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
c77c286a
JL
2347 break;
2348
2349 case CONSTRUCTOR:
2350 {
28f155be
GB
2351 tree value;
2352 unsigned HOST_WIDE_INT ix;
2353
2354 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2355 if (value)
ae9d61ab 2356 reloc |= pa_reloc_needed (value);
c77c286a
JL
2357 }
2358 break;
2359
2360 case ERROR_MARK:
2361 break;
51723711
KG
2362
2363 default:
2364 break;
c77c286a
JL
2365 }
2366 return reloc;
2367}
2368
188538df
TG
2369\f
2370/* Return the best assembler insn template
71cc389b 2371 for moving operands[1] into operands[0] as a fullword. */
519104fe 2372const char *
ae9d61ab 2373pa_singlemove_string (rtx *operands)
188538df 2374{
0c235d7e
TG
2375 HOST_WIDE_INT intval;
2376
188538df
TG
2377 if (GET_CODE (operands[0]) == MEM)
2378 return "stw %r1,%0";
0c235d7e 2379 if (GET_CODE (operands[1]) == MEM)
188538df 2380 return "ldw %1,%0";
0c235d7e 2381 if (GET_CODE (operands[1]) == CONST_DOUBLE)
e5c2baa1 2382 {
0c235d7e 2383 long i;
e5c2baa1 2384
144d51f9 2385 gcc_assert (GET_MODE (operands[1]) == SFmode);
e5c2baa1 2386
0c235d7e
TG
2387 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2388 bit pattern. */
34a72c33 2389 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
e5c2baa1 2390
0c235d7e
TG
2391 operands[1] = GEN_INT (i);
2392 /* Fall through to CONST_INT case. */
2393 }
2394 if (GET_CODE (operands[1]) == CONST_INT)
e5c2baa1 2395 {
0c235d7e
TG
2396 intval = INTVAL (operands[1]);
2397
2398 if (VAL_14_BITS_P (intval))
2399 return "ldi %1,%0";
2400 else if ((intval & 0x7ff) == 0)
2401 return "ldil L'%1,%0";
ae9d61ab 2402 else if (pa_zdepi_cint_p (intval))
f38b27c7 2403 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
e5c2baa1
RS
2404 else
2405 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2406 }
188538df
TG
2407 return "copy %1,%0";
2408}
2409\f
2410
f133af4c
TG
2411/* Compute position (in OP[1]) and width (in OP[2])
2412 useful for copying IMM to a register using the zdepi
2413 instructions. Store the immediate value to insert in OP[0]. */
519104fe 2414static void
b7849684 2415compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
c819adf2 2416{
0e7f4c19 2417 int lsb, len;
c819adf2 2418
0e7f4c19
TG
2419 /* Find the least significant set bit in IMM. */
2420 for (lsb = 0; lsb < 32; lsb++)
c819adf2 2421 {
0e7f4c19 2422 if ((imm & 1) != 0)
c819adf2 2423 break;
0e7f4c19 2424 imm >>= 1;
c819adf2
TG
2425 }
2426
0e7f4c19
TG
2427 /* Choose variants based on *sign* of the 5-bit field. */
2428 if ((imm & 0x10) == 0)
2429 len = (lsb <= 28) ? 4 : 32 - lsb;
c819adf2
TG
2430 else
2431 {
0e7f4c19 2432 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2433 for (len = 5; len < 32 - lsb; len++)
c819adf2 2434 {
ef8d9a0e 2435 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
c819adf2 2436 break;
c819adf2
TG
2437 }
2438
0e7f4c19
TG
2439 /* Sign extend IMM as a 5-bit value. */
2440 imm = (imm & 0xf) - 0x10;
c819adf2
TG
2441 }
2442
a1747d2c
TG
2443 op[0] = imm;
2444 op[1] = 31 - lsb;
2445 op[2] = len;
c819adf2
TG
2446}
2447
520babc7
JL
2448/* Compute position (in OP[1]) and width (in OP[2])
2449 useful for copying IMM to a register using the depdi,z
2450 instructions. Store the immediate value to insert in OP[0]. */
ae9d61ab
JDA
2451
2452static void
b7849684 2453compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
520babc7 2454{
ef8d9a0e
JDA
2455 int lsb, len, maxlen;
2456
2457 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
520babc7
JL
2458
2459 /* Find the least significant set bit in IMM. */
ef8d9a0e 2460 for (lsb = 0; lsb < maxlen; lsb++)
520babc7
JL
2461 {
2462 if ((imm & 1) != 0)
2463 break;
2464 imm >>= 1;
2465 }
2466
2467 /* Choose variants based on *sign* of the 5-bit field. */
2468 if ((imm & 0x10) == 0)
ef8d9a0e 2469 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
520babc7
JL
2470 else
2471 {
2472 /* Find the width of the bitstring in IMM. */
ef8d9a0e 2473 for (len = 5; len < maxlen - lsb; len++)
520babc7 2474 {
831c1763 2475 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
520babc7
JL
2476 break;
2477 }
2478
ef8d9a0e
JDA
2479 /* Extend length if host is narrow and IMM is negative. */
2480 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2481 len += 32;
2482
520babc7
JL
2483 /* Sign extend IMM as a 5-bit value. */
2484 imm = (imm & 0xf) - 0x10;
2485 }
2486
2487 op[0] = imm;
2488 op[1] = 63 - lsb;
2489 op[2] = len;
2490}
2491
188538df
TG
2492/* Output assembler code to perform a doubleword move insn
2493 with operands OPERANDS. */
2494
519104fe 2495const char *
ae9d61ab 2496pa_output_move_double (rtx *operands)
188538df
TG
2497{
2498 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2499 rtx latehalf[2];
2500 rtx addreg0 = 0, addreg1 = 0;
2b564b9c 2501 int highonly = 0;
188538df
TG
2502
2503 /* First classify both operands. */
2504
2505 if (REG_P (operands[0]))
2506 optype0 = REGOP;
2507 else if (offsettable_memref_p (operands[0]))
2508 optype0 = OFFSOP;
2509 else if (GET_CODE (operands[0]) == MEM)
2510 optype0 = MEMOP;
2511 else
2512 optype0 = RNDOP;
2513
2514 if (REG_P (operands[1]))
2515 optype1 = REGOP;
2516 else if (CONSTANT_P (operands[1]))
2517 optype1 = CNSTOP;
2518 else if (offsettable_memref_p (operands[1]))
2519 optype1 = OFFSOP;
2520 else if (GET_CODE (operands[1]) == MEM)
2521 optype1 = MEMOP;
2522 else
2523 optype1 = RNDOP;
2524
2525 /* Check for the cases that the operand constraints are not
144d51f9
NS
2526 supposed to allow to happen. */
2527 gcc_assert (optype0 == REGOP || optype1 == REGOP);
188538df 2528
5401050b
JDA
2529 /* Handle copies between general and floating registers. */
2530
2531 if (optype0 == REGOP && optype1 == REGOP
2532 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2533 {
2534 if (FP_REG_P (operands[0]))
2535 {
2536 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2537 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2538 return "{fldds|fldd} -16(%%sp),%0";
2539 }
2540 else
2541 {
2542 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2543 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2544 return "{ldws|ldw} -12(%%sp),%R0";
2545 }
2546 }
2547
188538df
TG
2548 /* Handle auto decrementing and incrementing loads and stores
2549 specifically, since the structure of the function doesn't work
2550 for them without major modification. Do it better when we learn
2551 this port about the general inc/dec addressing of PA.
2552 (This was written by tege. Chide him if it doesn't work.) */
2553
2554 if (optype0 == MEMOP)
2555 {
e37ce5f6
JL
2556 /* We have to output the address syntax ourselves, since print_operand
2557 doesn't deal with the addresses we want to use. Fix this later. */
2558
188538df 2559 rtx addr = XEXP (operands[0], 0);
e37ce5f6 2560 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
188538df 2561 {
ad2c71b7 2562 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2563
2564 operands[0] = XEXP (addr, 0);
144d51f9
NS
2565 gcc_assert (GET_CODE (operands[1]) == REG
2566 && GET_CODE (operands[0]) == REG);
e37ce5f6 2567
144d51f9
NS
2568 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2569
2570 /* No overlap between high target register and address
2571 register. (We do this in a non-obvious way to
2572 save a register file writeback) */
2573 if (GET_CODE (addr) == POST_INC)
2574 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2575 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
9682683d 2576 }
e37ce5f6 2577 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
9682683d 2578 {
ad2c71b7 2579 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
e37ce5f6
JL
2580
2581 operands[0] = XEXP (addr, 0);
144d51f9
NS
2582 gcc_assert (GET_CODE (operands[1]) == REG
2583 && GET_CODE (operands[0]) == REG);
2584
2585 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2586 /* No overlap between high target register and address
2587 register. (We do this in a non-obvious way to save a
2588 register file writeback) */
2589 if (GET_CODE (addr) == PRE_INC)
2590 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2591 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
188538df
TG
2592 }
2593 }
2594 if (optype1 == MEMOP)
2595 {
2596 /* We have to output the address syntax ourselves, since print_operand
2597 doesn't deal with the addresses we want to use. Fix this later. */
2598
2599 rtx addr = XEXP (operands[1], 0);
2600 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2601 {
ad2c71b7 2602 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2603
2604 operands[1] = XEXP (addr, 0);
144d51f9
NS
2605 gcc_assert (GET_CODE (operands[0]) == REG
2606 && GET_CODE (operands[1]) == REG);
188538df
TG
2607
2608 if (!reg_overlap_mentioned_p (high_reg, addr))
2609 {
2610 /* No overlap between high target register and address
dd605bb4 2611 register. (We do this in a non-obvious way to
188538df
TG
2612 save a register file writeback) */
2613 if (GET_CODE (addr) == POST_INC)
f38b27c7 2614 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
6126a380 2615 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
188538df
TG
2616 }
2617 else
2618 {
2619 /* This is an undefined situation. We should load into the
2620 address register *and* update that register. Probably
2621 we don't need to handle this at all. */
2622 if (GET_CODE (addr) == POST_INC)
f38b27c7
JL
2623 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2624 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
188538df
TG
2625 }
2626 }
2627 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2628 {
ad2c71b7 2629 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
188538df
TG
2630
2631 operands[1] = XEXP (addr, 0);
144d51f9
NS
2632 gcc_assert (GET_CODE (operands[0]) == REG
2633 && GET_CODE (operands[1]) == REG);
188538df
TG
2634
2635 if (!reg_overlap_mentioned_p (high_reg, addr))
2636 {
2637 /* No overlap between high target register and address
dd605bb4 2638 register. (We do this in a non-obvious way to
188538df
TG
2639 save a register file writeback) */
2640 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2641 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2642 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
188538df
TG
2643 }
2644 else
2645 {
2646 /* This is an undefined situation. We should load into the
2647 address register *and* update that register. Probably
2648 we don't need to handle this at all. */
2649 if (GET_CODE (addr) == PRE_INC)
f38b27c7
JL
2650 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2651 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
188538df
TG
2652 }
2653 }
a89974a2
JL
2654 else if (GET_CODE (addr) == PLUS
2655 && GET_CODE (XEXP (addr, 0)) == MULT)
2656 {
4c6d8726 2657 rtx xoperands[4];
a89974a2 2658
166d826f
JDA
2659 /* Load address into left half of destination register. */
2660 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2661 xoperands[1] = XEXP (addr, 1);
2662 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2663 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2664 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2665 xoperands);
2666 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2667 }
2668 else if (GET_CODE (addr) == PLUS
2669 && REG_P (XEXP (addr, 0))
2670 && REG_P (XEXP (addr, 1)))
2671 {
2672 rtx xoperands[3];
2673
2674 /* Load address into left half of destination register. */
2675 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2676 xoperands[1] = XEXP (addr, 0);
2677 xoperands[2] = XEXP (addr, 1);
2678 output_asm_insn ("{addl|add,l} %1,%2,%0",
2679 xoperands);
2680 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
a89974a2 2681 }
188538df
TG
2682 }
2683
2684 /* If an operand is an unoffsettable memory ref, find a register
2685 we can increment temporarily to make it refer to the second word. */
2686
2687 if (optype0 == MEMOP)
2688 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2689
2690 if (optype1 == MEMOP)
2691 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2692
2693 /* Ok, we can do one word at a time.
2694 Normally we do the low-numbered word first.
2695
2696 In either case, set up in LATEHALF the operands to use
2697 for the high-numbered word and in some cases alter the
2698 operands in OPERANDS to be suitable for the low-numbered word. */
2699
2700 if (optype0 == REGOP)
ad2c71b7 2701 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
188538df 2702 else if (optype0 == OFFSOP)
325fefe0 2703 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
188538df
TG
2704 else
2705 latehalf[0] = operands[0];
2706
2707 if (optype1 == REGOP)
ad2c71b7 2708 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
188538df 2709 else if (optype1 == OFFSOP)
325fefe0 2710 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
188538df 2711 else if (optype1 == CNSTOP)
2b564b9c
JDA
2712 {
2713 if (GET_CODE (operands[1]) == HIGH)
2714 {
2715 operands[1] = XEXP (operands[1], 0);
2716 highonly = 1;
2717 }
2718 split_double (operands[1], &operands[1], &latehalf[1]);
2719 }
188538df
TG
2720 else
2721 latehalf[1] = operands[1];
2722
2723 /* If the first move would clobber the source of the second one,
2724 do them in the other order.
2725
bad883f8 2726 This can happen in two cases:
188538df 2727
bad883f8
JL
2728 mem -> register where the first half of the destination register
2729 is the same register used in the memory's address. Reload
2730 can create such insns.
188538df 2731
bad883f8 2732 mem in this case will be either register indirect or register
6619e96c 2733 indirect plus a valid offset.
bad883f8
JL
2734
2735 register -> register move where REGNO(dst) == REGNO(src + 1)
6619e96c 2736 someone (Tim/Tege?) claimed this can happen for parameter loads.
bad883f8
JL
2737
2738 Handle mem -> register case first. */
2739 if (optype0 == REGOP
2740 && (optype1 == MEMOP || optype1 == OFFSOP)
c9bd6bcd 2741 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
188538df 2742 {
188538df
TG
2743 /* Do the late half first. */
2744 if (addreg1)
498ee10c 2745 output_asm_insn ("ldo 4(%0),%0", &addreg1);
ae9d61ab 2746 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
bad883f8
JL
2747
2748 /* Then clobber. */
188538df 2749 if (addreg1)
498ee10c 2750 output_asm_insn ("ldo -4(%0),%0", &addreg1);
ae9d61ab 2751 return pa_singlemove_string (operands);
188538df
TG
2752 }
2753
bad883f8 2754 /* Now handle register -> register case. */
63a1f834
TG
2755 if (optype0 == REGOP && optype1 == REGOP
2756 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2757 {
ae9d61ab
JDA
2758 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2759 return pa_singlemove_string (operands);
63a1f834
TG
2760 }
2761
188538df
TG
2762 /* Normal case: do the two words, low-numbered first. */
2763
ae9d61ab 2764 output_asm_insn (pa_singlemove_string (operands), operands);
188538df
TG
2765
2766 /* Make any unoffsettable addresses point at high-numbered word. */
2767 if (addreg0)
498ee10c 2768 output_asm_insn ("ldo 4(%0),%0", &addreg0);
188538df 2769 if (addreg1)
498ee10c 2770 output_asm_insn ("ldo 4(%0),%0", &addreg1);
188538df 2771
2b564b9c
JDA
2772 /* Do high-numbered word. */
2773 if (highonly)
2774 output_asm_insn ("ldil L'%1,%0", latehalf);
2775 else
2776 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
188538df
TG
2777
2778 /* Undo the adds we just did. */
2779 if (addreg0)
498ee10c 2780 output_asm_insn ("ldo -4(%0),%0", &addreg0);
188538df 2781 if (addreg1)
498ee10c 2782 output_asm_insn ("ldo -4(%0),%0", &addreg1);
188538df
TG
2783
2784 return "";
2785}
2786\f
519104fe 2787const char *
ae9d61ab 2788pa_output_fp_move_double (rtx *operands)
188538df
TG
2789{
2790 if (FP_REG_P (operands[0]))
2791 {
23f6f34f 2792 if (FP_REG_P (operands[1])
f048ca47 2793 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
55abf18a 2794 output_asm_insn ("fcpy,dbl %f1,%0", operands);
23f6f34f 2795 else
2414e0e2 2796 output_asm_insn ("fldd%F1 %1,%0", operands);
188538df
TG
2797 }
2798 else if (FP_REG_P (operands[1]))
2799 {
2414e0e2 2800 output_asm_insn ("fstd%F0 %1,%0", operands);
188538df 2801 }
144d51f9 2802 else
f048ca47 2803 {
144d51f9
NS
2804 rtx xoperands[2];
2805
2806 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2807
23f6f34f 2808 /* This is a pain. You have to be prepared to deal with an
ddd5a7c1 2809 arbitrary address here including pre/post increment/decrement.
f048ca47
JL
2810
2811 so avoid this in the MD. */
144d51f9
NS
2812 gcc_assert (GET_CODE (operands[0]) == REG);
2813
2814 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2815 xoperands[0] = operands[0];
2816 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
f048ca47 2817 }
188538df
TG
2818 return "";
2819}
2820\f
2821/* Return a REG that occurs in ADDR with coefficient 1.
2822 ADDR can be effectively incremented by incrementing REG. */
2823
2824static rtx
b7849684 2825find_addr_reg (rtx addr)
188538df
TG
2826{
2827 while (GET_CODE (addr) == PLUS)
2828 {
2829 if (GET_CODE (XEXP (addr, 0)) == REG)
2830 addr = XEXP (addr, 0);
2831 else if (GET_CODE (XEXP (addr, 1)) == REG)
2832 addr = XEXP (addr, 1);
2833 else if (CONSTANT_P (XEXP (addr, 0)))
2834 addr = XEXP (addr, 1);
2835 else if (CONSTANT_P (XEXP (addr, 1)))
2836 addr = XEXP (addr, 0);
2837 else
144d51f9 2838 gcc_unreachable ();
188538df 2839 }
144d51f9
NS
2840 gcc_assert (GET_CODE (addr) == REG);
2841 return addr;
188538df
TG
2842}
2843
188538df
TG
2844/* Emit code to perform a block move.
2845
188538df
TG
2846 OPERANDS[0] is the destination pointer as a REG, clobbered.
2847 OPERANDS[1] is the source pointer as a REG, clobbered.
68944452 2848 OPERANDS[2] is a register for temporary storage.
188538df 2849 OPERANDS[3] is a register for temporary storage.
cdc9103c 2850 OPERANDS[4] is the size as a CONST_INT
6619e96c 2851 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
71cc389b 2852 OPERANDS[6] is another temporary register. */
188538df 2853
519104fe 2854const char *
ae9d61ab 2855pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
188538df
TG
2856{
2857 int align = INTVAL (operands[5]);
68944452 2858 unsigned long n_bytes = INTVAL (operands[4]);
188538df 2859
cdc9103c 2860 /* We can't move more than a word at a time because the PA
188538df 2861 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2862 if (align > (TARGET_64BIT ? 8 : 4))
2863 align = (TARGET_64BIT ? 8 : 4);
188538df 2864
68944452
JL
2865 /* Note that we know each loop below will execute at least twice
2866 (else we would have open-coded the copy). */
2867 switch (align)
188538df 2868 {
cdc9103c
JDA
2869 case 8:
2870 /* Pre-adjust the loop counter. */
2871 operands[4] = GEN_INT (n_bytes - 16);
2872 output_asm_insn ("ldi %4,%2", operands);
2873
2874 /* Copying loop. */
2875 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2876 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2877 output_asm_insn ("std,ma %3,8(%0)", operands);
2878 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2879 output_asm_insn ("std,ma %6,8(%0)", operands);
2880
2881 /* Handle the residual. There could be up to 7 bytes of
2882 residual to copy! */
2883 if (n_bytes % 16 != 0)
2884 {
2885 operands[4] = GEN_INT (n_bytes % 8);
2886 if (n_bytes % 16 >= 8)
2887 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2888 if (n_bytes % 8 != 0)
2889 output_asm_insn ("ldd 0(%1),%6", operands);
2890 if (n_bytes % 16 >= 8)
2891 output_asm_insn ("std,ma %3,8(%0)", operands);
2892 if (n_bytes % 8 != 0)
2893 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2894 }
2895 return "";
2896
68944452
JL
2897 case 4:
2898 /* Pre-adjust the loop counter. */
2899 operands[4] = GEN_INT (n_bytes - 8);
2900 output_asm_insn ("ldi %4,%2", operands);
2901
2902 /* Copying loop. */
f38b27c7
JL
2903 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2904 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2905 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2906 output_asm_insn ("addib,>= -8,%2,.-12", operands);
f38b27c7 2907 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
68944452
JL
2908
2909 /* Handle the residual. There could be up to 7 bytes of
2910 residual to copy! */
2911 if (n_bytes % 8 != 0)
2912 {
2913 operands[4] = GEN_INT (n_bytes % 4);
2914 if (n_bytes % 8 >= 4)
f38b27c7 2915 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
68944452 2916 if (n_bytes % 4 != 0)
d2d28085 2917 output_asm_insn ("ldw 0(%1),%6", operands);
68944452 2918 if (n_bytes % 8 >= 4)
f38b27c7 2919 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
68944452 2920 if (n_bytes % 4 != 0)
f38b27c7 2921 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
68944452
JL
2922 }
2923 return "";
188538df 2924
68944452
JL
2925 case 2:
2926 /* Pre-adjust the loop counter. */
2927 operands[4] = GEN_INT (n_bytes - 4);
2928 output_asm_insn ("ldi %4,%2", operands);
188538df 2929
68944452 2930 /* Copying loop. */
f38b27c7
JL
2931 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2932 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2933 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2934 output_asm_insn ("addib,>= -4,%2,.-12", operands);
f38b27c7 2935 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
188538df 2936
68944452
JL
2937 /* Handle the residual. */
2938 if (n_bytes % 4 != 0)
2939 {
2940 if (n_bytes % 4 >= 2)
f38b27c7 2941 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
68944452 2942 if (n_bytes % 2 != 0)
d2d28085 2943 output_asm_insn ("ldb 0(%1),%6", operands);
68944452 2944 if (n_bytes % 4 >= 2)
f38b27c7 2945 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
68944452 2946 if (n_bytes % 2 != 0)
d2d28085 2947 output_asm_insn ("stb %6,0(%0)", operands);
68944452
JL
2948 }
2949 return "";
188538df 2950
68944452
JL
2951 case 1:
2952 /* Pre-adjust the loop counter. */
2953 operands[4] = GEN_INT (n_bytes - 2);
2954 output_asm_insn ("ldi %4,%2", operands);
188538df 2955
68944452 2956 /* Copying loop. */
f38b27c7
JL
2957 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2958 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2959 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
68944452 2960 output_asm_insn ("addib,>= -2,%2,.-12", operands);
f38b27c7 2961 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
188538df 2962
68944452
JL
2963 /* Handle the residual. */
2964 if (n_bytes % 2 != 0)
2965 {
d2d28085
JL
2966 output_asm_insn ("ldb 0(%1),%3", operands);
2967 output_asm_insn ("stb %3,0(%0)", operands);
68944452
JL
2968 }
2969 return "";
188538df 2970
68944452 2971 default:
144d51f9 2972 gcc_unreachable ();
188538df 2973 }
188538df 2974}
3673e996
RS
2975
2976/* Count the number of insns necessary to handle this block move.
2977
2978 Basic structure is the same as emit_block_move, except that we
2979 count insns rather than emit them. */
2980
519104fe 2981static int
e0d80a58 2982compute_movmem_length (rtx_insn *insn)
3673e996
RS
2983{
2984 rtx pat = PATTERN (insn);
a36a47ad
GS
2985 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2986 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
68944452 2987 unsigned int n_insns = 0;
3673e996
RS
2988
2989 /* We can't move more than four bytes at a time because the PA
2990 has no longer integer move insns. (Could use fp mem ops?) */
cdc9103c
JDA
2991 if (align > (TARGET_64BIT ? 8 : 4))
2992 align = (TARGET_64BIT ? 8 : 4);
3673e996 2993
90304f64 2994 /* The basic copying loop. */
68944452 2995 n_insns = 6;
3673e996 2996
68944452
JL
2997 /* Residuals. */
2998 if (n_bytes % (2 * align) != 0)
3673e996 2999 {
90304f64
JL
3000 if ((n_bytes % (2 * align)) >= align)
3001 n_insns += 2;
3002
3003 if ((n_bytes % align) != 0)
3004 n_insns += 2;
3673e996 3005 }
68944452
JL
3006
3007 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3008 return n_insns * 4;
3673e996 3009}
cdc9103c
JDA
3010
3011/* Emit code to perform a block clear.
3012
3013 OPERANDS[0] is the destination pointer as a REG, clobbered.
3014 OPERANDS[1] is a register for temporary storage.
3015 OPERANDS[2] is the size as a CONST_INT
3016 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3017
3018const char *
ae9d61ab 3019pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
cdc9103c
JDA
3020{
3021 int align = INTVAL (operands[3]);
3022 unsigned long n_bytes = INTVAL (operands[2]);
3023
3024 /* We can't clear more than a word at a time because the PA
3025 has no longer integer move insns. */
3026 if (align > (TARGET_64BIT ? 8 : 4))
3027 align = (TARGET_64BIT ? 8 : 4);
3028
3029 /* Note that we know each loop below will execute at least twice
3030 (else we would have open-coded the copy). */
3031 switch (align)
3032 {
3033 case 8:
3034 /* Pre-adjust the loop counter. */
3035 operands[2] = GEN_INT (n_bytes - 16);
3036 output_asm_insn ("ldi %2,%1", operands);
3037
3038 /* Loop. */
3039 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3040 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3041 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3042
3043 /* Handle the residual. There could be up to 7 bytes of
3044 residual to copy! */
3045 if (n_bytes % 16 != 0)
3046 {
3047 operands[2] = GEN_INT (n_bytes % 8);
3048 if (n_bytes % 16 >= 8)
3049 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3050 if (n_bytes % 8 != 0)
3051 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3052 }
3053 return "";
3054
3055 case 4:
3056 /* Pre-adjust the loop counter. */
3057 operands[2] = GEN_INT (n_bytes - 8);
3058 output_asm_insn ("ldi %2,%1", operands);
3059
3060 /* Loop. */
3061 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3062 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3063 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3064
3065 /* Handle the residual. There could be up to 7 bytes of
3066 residual to copy! */
3067 if (n_bytes % 8 != 0)
3068 {
3069 operands[2] = GEN_INT (n_bytes % 4);
3070 if (n_bytes % 8 >= 4)
3071 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3072 if (n_bytes % 4 != 0)
3073 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3074 }
3075 return "";
3076
3077 case 2:
3078 /* Pre-adjust the loop counter. */
3079 operands[2] = GEN_INT (n_bytes - 4);
3080 output_asm_insn ("ldi %2,%1", operands);
3081
3082 /* Loop. */
3083 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3084 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3085 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3086
3087 /* Handle the residual. */
3088 if (n_bytes % 4 != 0)
3089 {
3090 if (n_bytes % 4 >= 2)
3091 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3092 if (n_bytes % 2 != 0)
3093 output_asm_insn ("stb %%r0,0(%0)", operands);
3094 }
3095 return "";
3096
3097 case 1:
3098 /* Pre-adjust the loop counter. */
3099 operands[2] = GEN_INT (n_bytes - 2);
3100 output_asm_insn ("ldi %2,%1", operands);
3101
3102 /* Loop. */
3103 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3104 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3105 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3106
3107 /* Handle the residual. */
3108 if (n_bytes % 2 != 0)
3109 output_asm_insn ("stb %%r0,0(%0)", operands);
3110
3111 return "";
3112
3113 default:
144d51f9 3114 gcc_unreachable ();
cdc9103c
JDA
3115 }
3116}
3117
3118/* Count the number of insns necessary to handle this block move.
3119
3120 Basic structure is the same as emit_block_move, except that we
3121 count insns rather than emit them. */
3122
3123static int
e0d80a58 3124compute_clrmem_length (rtx_insn *insn)
cdc9103c
JDA
3125{
3126 rtx pat = PATTERN (insn);
3127 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3128 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3129 unsigned int n_insns = 0;
3130
3131 /* We can't clear more than a word at a time because the PA
3132 has no longer integer move insns. */
3133 if (align > (TARGET_64BIT ? 8 : 4))
3134 align = (TARGET_64BIT ? 8 : 4);
3135
3136 /* The basic loop. */
3137 n_insns = 4;
3138
3139 /* Residuals. */
3140 if (n_bytes % (2 * align) != 0)
3141 {
3142 if ((n_bytes % (2 * align)) >= align)
3143 n_insns++;
3144
3145 if ((n_bytes % align) != 0)
3146 n_insns++;
3147 }
3148
3149 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3150 return n_insns * 4;
3151}
188538df
TG
3152\f
3153
519104fe 3154const char *
ae9d61ab 3155pa_output_and (rtx *operands)
0e7f4c19 3156{
d2a94ec0 3157 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
0e7f4c19 3158 {
0c235d7e 3159 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
0e7f4c19
TG
3160 int ls0, ls1, ms0, p, len;
3161
3162 for (ls0 = 0; ls0 < 32; ls0++)
3163 if ((mask & (1 << ls0)) == 0)
3164 break;
3165
3166 for (ls1 = ls0; ls1 < 32; ls1++)
3167 if ((mask & (1 << ls1)) != 0)
3168 break;
3169
3170 for (ms0 = ls1; ms0 < 32; ms0++)
3171 if ((mask & (1 << ms0)) == 0)
3172 break;
3173
144d51f9 3174 gcc_assert (ms0 == 32);
0e7f4c19
TG
3175
3176 if (ls1 == 32)
3177 {
3178 len = ls0;
3179
144d51f9 3180 gcc_assert (len);
0e7f4c19 3181
8919037c 3182 operands[2] = GEN_INT (len);
f38b27c7 3183 return "{extru|extrw,u} %1,31,%2,%0";
0e7f4c19
TG
3184 }
3185 else
3186 {
3187 /* We could use this `depi' for the case above as well, but `depi'
3188 requires one more register file access than an `extru'. */
3189
3190 p = 31 - ls0;
3191 len = ls1 - ls0;
3192
8919037c
TG
3193 operands[2] = GEN_INT (p);
3194 operands[3] = GEN_INT (len);
f38b27c7 3195 return "{depi|depwi} 0,%2,%3,%0";
0e7f4c19
TG
3196 }
3197 }
3198 else
3199 return "and %1,%2,%0";
3200}
3201
520babc7
JL
3202/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3203 storing the result in operands[0]. */
0952f89b 3204const char *
ae9d61ab 3205pa_output_64bit_and (rtx *operands)
520babc7
JL
3206{
3207 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3208 {
3209 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3210 int ls0, ls1, ms0, p, len;
520babc7
JL
3211
3212 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
e0c556d3 3213 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
520babc7
JL
3214 break;
3215
3216 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
e0c556d3 3217 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
520babc7
JL
3218 break;
3219
3220 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
e0c556d3 3221 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
520babc7
JL
3222 break;
3223
144d51f9 3224 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
520babc7
JL
3225
3226 if (ls1 == HOST_BITS_PER_WIDE_INT)
3227 {
3228 len = ls0;
3229
144d51f9 3230 gcc_assert (len);
520babc7
JL
3231
3232 operands[2] = GEN_INT (len);
3233 return "extrd,u %1,63,%2,%0";
3234 }
3235 else
3236 {
3237 /* We could use this `depi' for the case above as well, but `depi'
3238 requires one more register file access than an `extru'. */
3239
3240 p = 63 - ls0;
3241 len = ls1 - ls0;
3242
3243 operands[2] = GEN_INT (p);
3244 operands[3] = GEN_INT (len);
3245 return "depdi 0,%2,%3,%0";
3246 }
3247 }
3248 else
3249 return "and %1,%2,%0";
3250}
3251
519104fe 3252const char *
ae9d61ab 3253pa_output_ior (rtx *operands)
0e7f4c19 3254{
0c235d7e 3255 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
f1c7ce82 3256 int bs0, bs1, p, len;
23f6f34f 3257
8365d59b
TG
3258 if (INTVAL (operands[2]) == 0)
3259 return "copy %1,%0";
0e7f4c19 3260
8365d59b
TG
3261 for (bs0 = 0; bs0 < 32; bs0++)
3262 if ((mask & (1 << bs0)) != 0)
3263 break;
0e7f4c19 3264
8365d59b
TG
3265 for (bs1 = bs0; bs1 < 32; bs1++)
3266 if ((mask & (1 << bs1)) == 0)
3267 break;
0e7f4c19 3268
144d51f9 3269 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
0e7f4c19 3270
8365d59b
TG
3271 p = 31 - bs0;
3272 len = bs1 - bs0;
0e7f4c19 3273
8919037c
TG
3274 operands[2] = GEN_INT (p);
3275 operands[3] = GEN_INT (len);
f38b27c7 3276 return "{depi|depwi} -1,%2,%3,%0";
0e7f4c19 3277}
520babc7
JL
3278
3279/* Return a string to perform a bitwise-and of operands[1] with operands[2]
3280 storing the result in operands[0]. */
0952f89b 3281const char *
ae9d61ab 3282pa_output_64bit_ior (rtx *operands)
520babc7
JL
3283{
3284 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
e0c556d3 3285 int bs0, bs1, p, len;
520babc7
JL
3286
3287 if (INTVAL (operands[2]) == 0)
3288 return "copy %1,%0";
3289
3290 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
e0c556d3 3291 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
520babc7
JL
3292 break;
3293
3294 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
e0c556d3 3295 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
520babc7
JL
3296 break;
3297
144d51f9
NS
3298 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3299 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
520babc7
JL
3300
3301 p = 63 - bs0;
3302 len = bs1 - bs0;
3303
3304 operands[2] = GEN_INT (p);
3305 operands[3] = GEN_INT (len);
3306 return "depdi -1,%2,%3,%0";
3307}
0e7f4c19 3308\f
301d03af 3309/* Target hook for assembling integer objects. This code handles
cdcb88d7
JDA
3310 aligned SI and DI integers specially since function references
3311 must be preceded by P%. */
301d03af
RS
3312
3313static bool
b7849684 3314pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
301d03af 3315{
686048e4
JDA
3316 bool result;
3317 tree decl = NULL;
3318
3319 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3320 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3321 calling output_addr_const. Otherwise, it may call assemble_external
3322 in the midst of outputing the assembler code for the SYMBOL_REF.
3323 We restore the SYMBOL_REF_DECL after the output is done. */
3324 if (GET_CODE (x) == SYMBOL_REF)
3325 {
3326 decl = SYMBOL_REF_DECL (x);
3327 if (decl)
3328 {
3329 assemble_external (decl);
3330 SET_SYMBOL_REF_DECL (x, NULL);
3331 }
3332 }
3333
cdcb88d7
JDA
3334 if (size == UNITS_PER_WORD
3335 && aligned_p
301d03af
RS
3336 && function_label_operand (x, VOIDmode))
3337 {
ef719901
JDA
3338 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3339
3340 /* We don't want an OPD when generating fast indirect calls. */
3341 if (!TARGET_FAST_INDIRECT_CALLS)
3342 fputs ("P%", asm_out_file);
3343
301d03af
RS
3344 output_addr_const (asm_out_file, x);
3345 fputc ('\n', asm_out_file);
686048e4 3346 result = true;
301d03af 3347 }
686048e4
JDA
3348 else
3349 result = default_assemble_integer (x, size, aligned_p);
3350
3351 if (decl)
3352 SET_SYMBOL_REF_DECL (x, decl);
3353
3354 return result;
301d03af
RS
3355}
3356\f
188538df 3357/* Output an ascii string. */
f1c7ce82 3358void
ae9d61ab 3359pa_output_ascii (FILE *file, const char *p, int size)
188538df
TG
3360{
3361 int i;
3362 int chars_output;
71cc389b 3363 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
188538df
TG
3364
3365 /* The HP assembler can only take strings of 256 characters at one
3366 time. This is a limitation on input line length, *not* the
3367 length of the string. Sigh. Even worse, it seems that the
3368 restriction is in number of input characters (see \xnn &
3369 \whatever). So we have to do this very carefully. */
3370
e236a9ff 3371 fputs ("\t.STRING \"", file);
188538df
TG
3372
3373 chars_output = 0;
3374 for (i = 0; i < size; i += 4)
3375 {
3376 int co = 0;
3377 int io = 0;
3378 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3379 {
6b5ffd4e 3380 register unsigned int c = (unsigned char) p[i + io];
188538df
TG
3381
3382 if (c == '\"' || c == '\\')
3383 partial_output[co++] = '\\';
3384 if (c >= ' ' && c < 0177)
3385 partial_output[co++] = c;
3386 else
3387 {
3388 unsigned int hexd;
3389 partial_output[co++] = '\\';
3390 partial_output[co++] = 'x';
3391 hexd = c / 16 - 0 + '0';
3392 if (hexd > '9')
3393 hexd -= '9' - 'a' + 1;
3394 partial_output[co++] = hexd;
3395 hexd = c % 16 - 0 + '0';
3396 if (hexd > '9')
3397 hexd -= '9' - 'a' + 1;
3398 partial_output[co++] = hexd;
3399 }
3400 }
3401 if (chars_output + co > 243)
3402 {
e236a9ff 3403 fputs ("\"\n\t.STRING \"", file);
188538df
TG
3404 chars_output = 0;
3405 }
823fbbce 3406 fwrite (partial_output, 1, (size_t) co, file);
188538df
TG
3407 chars_output += co;
3408 co = 0;
3409 }
e236a9ff 3410 fputs ("\"\n", file);
188538df 3411}
5621d717
JL
3412
3413/* Try to rewrite floating point comparisons & branches to avoid
3414 useless add,tr insns.
3415
3416 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3417 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3418 first attempt to remove useless add,tr insns. It is zero
3419 for the second pass as reorg sometimes leaves bogus REG_DEAD
3420 notes lying around.
3421
3422 When CHECK_NOTES is zero we can only eliminate add,tr insns
3423 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3424 instructions. */
519104fe 3425static void
b7849684 3426remove_useless_addtr_insns (int check_notes)
5621d717 3427{
4ce524a1 3428 rtx_insn *insn;
5621d717
JL
3429 static int pass = 0;
3430
3431 /* This is fairly cheap, so always run it when optimizing. */
3432 if (optimize > 0)
3433 {
3434 int fcmp_count = 0;
3435 int fbranch_count = 0;
3436
3437 /* Walk all the insns in this function looking for fcmp & fbranch
3438 instructions. Keep track of how many of each we find. */
18dbd950 3439 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717
JL
3440 {
3441 rtx tmp;
3442
3443 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
b64925dc 3444 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
5621d717
JL
3445 continue;
3446
3447 tmp = PATTERN (insn);
3448
3449 /* It must be a set. */
3450 if (GET_CODE (tmp) != SET)
3451 continue;
3452
3453 /* If the destination is CCFP, then we've found an fcmp insn. */
3454 tmp = SET_DEST (tmp);
3455 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3456 {
3457 fcmp_count++;
3458 continue;
3459 }
6619e96c 3460
5621d717
JL
3461 tmp = PATTERN (insn);
3462 /* If this is an fbranch instruction, bump the fbranch counter. */
3463 if (GET_CODE (tmp) == SET
3464 && SET_DEST (tmp) == pc_rtx
3465 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3466 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3467 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3468 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3469 {
3470 fbranch_count++;
3471 continue;
3472 }
3473 }
3474
3475
3476 /* Find all floating point compare + branch insns. If possible,
3477 reverse the comparison & the branch to avoid add,tr insns. */
18dbd950 3478 for (insn = get_insns (); insn; insn = next_insn (insn))
5621d717 3479 {
4ce524a1
DM
3480 rtx tmp;
3481 rtx_insn *next;
5621d717
JL
3482
3483 /* Ignore anything that isn't an INSN. */
b64925dc 3484 if (! NONJUMP_INSN_P (insn))
5621d717
JL
3485 continue;
3486
3487 tmp = PATTERN (insn);
3488
3489 /* It must be a set. */
3490 if (GET_CODE (tmp) != SET)
3491 continue;
3492
3493 /* The destination must be CCFP, which is register zero. */
3494 tmp = SET_DEST (tmp);
3495 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3496 continue;
3497
3498 /* INSN should be a set of CCFP.
3499
3500 See if the result of this insn is used in a reversed FP
3501 conditional branch. If so, reverse our condition and
3502 the branch. Doing so avoids useless add,tr insns. */
3503 next = next_insn (insn);
3504 while (next)
3505 {
3506 /* Jumps, calls and labels stop our search. */
b64925dc 3507 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
5621d717
JL
3508 break;
3509
3510 /* As does another fcmp insn. */
b64925dc 3511 if (NONJUMP_INSN_P (next)
5621d717
JL
3512 && GET_CODE (PATTERN (next)) == SET
3513 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3514 && REGNO (SET_DEST (PATTERN (next))) == 0)
3515 break;
3516
3517 next = next_insn (next);
3518 }
3519
3520 /* Is NEXT_INSN a branch? */
b64925dc 3521 if (next && JUMP_P (next))
5621d717
JL
3522 {
3523 rtx pattern = PATTERN (next);
3524
112cdef5 3525 /* If it a reversed fp conditional branch (e.g. uses add,tr)
5621d717
JL
3526 and CCFP dies, then reverse our conditional and the branch
3527 to avoid the add,tr. */
3528 if (GET_CODE (pattern) == SET
3529 && SET_DEST (pattern) == pc_rtx
3530 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3531 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3532 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3533 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3534 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3535 && (fcmp_count == fbranch_count
3536 || (check_notes
3537 && find_regno_note (next, REG_DEAD, 0))))
3538 {
3539 /* Reverse the branch. */
3540 tmp = XEXP (SET_SRC (pattern), 1);
3541 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3542 XEXP (SET_SRC (pattern), 2) = tmp;
3543 INSN_CODE (next) = -1;
3544
3545 /* Reverse our condition. */
3546 tmp = PATTERN (insn);
3547 PUT_CODE (XEXP (tmp, 1),
831c1763
AM
3548 (reverse_condition_maybe_unordered
3549 (GET_CODE (XEXP (tmp, 1)))));
5621d717
JL
3550 }
3551 }
3552 }
3553 }
3554
3555 pass = !pass;
3556
3557}
188538df 3558\f
831c1763
AM
3559/* You may have trouble believing this, but this is the 32 bit HP-PA
3560 stack layout. Wow.
188538df
TG
3561
3562 Offset Contents
3563
3564 Variable arguments (optional; any number may be allocated)
3565
3566 SP-(4*(N+9)) arg word N
3567 : :
3568 SP-56 arg word 5
3569 SP-52 arg word 4
3570
3571 Fixed arguments (must be allocated; may remain unused)
3572
3573 SP-48 arg word 3
3574 SP-44 arg word 2
3575 SP-40 arg word 1
3576 SP-36 arg word 0
3577
3578 Frame Marker
3579
3580 SP-32 External Data Pointer (DP)
3581 SP-28 External sr4
3582 SP-24 External/stub RP (RP')
3583 SP-20 Current RP
3584 SP-16 Static Link
3585 SP-12 Clean up
3586 SP-8 Calling Stub RP (RP'')
3587 SP-4 Previous SP
3588
3589 Top of Frame
3590
3591 SP-0 Stack Pointer (points to next available address)
3592
3593*/
3594
3595/* This function saves registers as follows. Registers marked with ' are
3596 this function's registers (as opposed to the previous function's).
3597 If a frame_pointer isn't needed, r4 is saved as a general register;
3598 the space for the frame pointer is still allocated, though, to keep
3599 things simple.
3600
3601
3602 Top of Frame
3603
3604 SP (FP') Previous FP
3605 SP + 4 Alignment filler (sigh)
3606 SP + 8 Space for locals reserved here.
3607 .
3608 .
3609 .
3610 SP + n All call saved register used.
3611 .
3612 .
3613 .
3614 SP + o All call saved fp registers used.
3615 .
3616 .
3617 .
3618 SP + p (SP') points to next available address.
23f6f34f 3619
188538df
TG
3620*/
3621
08c148a8 3622/* Global variables set by output_function_prologue(). */
19ec6a36
AM
3623/* Size of frame. Need to know this to emit return insns from
3624 leaf procedures. */
a4295210
JDA
3625static HOST_WIDE_INT actual_fsize, local_fsize;
3626static int save_fregs;
19ec6a36 3627
aadcdb45 3628/* Emit RTL to store REG at the memory location specified by BASE+DISP.
fc82f2f1 3629 Handle case where DISP > 8k by using the add_high_const patterns.
aadcdb45
JL
3630
3631 Note in DISP > 8k case, we will leave the high part of the address
3632 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
c5c76735 3633
f6bcf44c 3634static void
a4295210 3635store_reg (int reg, HOST_WIDE_INT disp, int base)
188538df 3636{
e0d80a58
JL
3637 rtx dest, src, basereg;
3638 rtx_insn *insn;
19ec6a36
AM
3639
3640 src = gen_rtx_REG (word_mode, reg);
3641 basereg = gen_rtx_REG (Pmode, base);
188538df 3642 if (VAL_14_BITS_P (disp))
aadcdb45 3643 {
0a81f074 3644 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
f6bcf44c 3645 insn = emit_move_insn (dest, src);
aadcdb45 3646 }
a4295210
JDA
3647 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3648 {
3649 rtx delta = GEN_INT (disp);
3650 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3651
3652 emit_move_insn (tmpreg, delta);
5dcc9605 3653 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
a4295210
JDA
3654 if (DO_FRAME_NOTES)
3655 {
bbbbb16a 3656 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 3657 gen_rtx_SET (tmpreg,
bbbbb16a 3658 gen_rtx_PLUS (Pmode, basereg, delta)));
5dcc9605 3659 RTX_FRAME_RELATED_P (insn) = 1;
a4295210 3660 }
5dcc9605
JDA
3661 dest = gen_rtx_MEM (word_mode, tmpreg);
3662 insn = emit_move_insn (dest, src);
a4295210 3663 }
aadcdb45
JL
3664 else
3665 {
19ec6a36
AM
3666 rtx delta = GEN_INT (disp);
3667 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3668 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 3669
19ec6a36
AM
3670 emit_move_insn (tmpreg, high);
3671 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
f6bcf44c
JDA
3672 insn = emit_move_insn (dest, src);
3673 if (DO_FRAME_NOTES)
bbbbb16a 3674 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 3675 gen_rtx_SET (gen_rtx_MEM (word_mode,
bbbbb16a
ILT
3676 gen_rtx_PLUS (word_mode,
3677 basereg,
3678 delta)),
3679 src));
aadcdb45 3680 }
f6bcf44c
JDA
3681
3682 if (DO_FRAME_NOTES)
3683 RTX_FRAME_RELATED_P (insn) = 1;
aadcdb45
JL
3684}
3685
823fbbce
JDA
3686/* Emit RTL to store REG at the memory location specified by BASE and then
3687 add MOD to BASE. MOD must be <= 8k. */
aadcdb45 3688
823fbbce 3689static void
a4295210 3690store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
823fbbce 3691{
e0d80a58
JL
3692 rtx basereg, srcreg, delta;
3693 rtx_insn *insn;
823fbbce 3694
144d51f9 3695 gcc_assert (VAL_14_BITS_P (mod));
823fbbce
JDA
3696
3697 basereg = gen_rtx_REG (Pmode, base);
3698 srcreg = gen_rtx_REG (word_mode, reg);
3699 delta = GEN_INT (mod);
3700
3701 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3702 if (DO_FRAME_NOTES)
3703 {
3704 RTX_FRAME_RELATED_P (insn) = 1;
3705
3706 /* RTX_FRAME_RELATED_P must be set on each frame related set
77c4f044
RH
3707 in a parallel with more than one element. */
3708 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3709 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
823fbbce
JDA
3710 }
3711}
3712
3713/* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3714 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3715 whether to add a frame note or not.
3716
3717 In the DISP > 8k case, we leave the high part of the address in %r1.
3718 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
c5c76735 3719
f6bcf44c 3720static void
a4295210 3721set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
188538df 3722{
e0d80a58 3723 rtx_insn *insn;
19ec6a36 3724
188538df 3725 if (VAL_14_BITS_P (disp))
19ec6a36 3726 {
f6bcf44c 3727 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
0a81f074
RS
3728 plus_constant (Pmode,
3729 gen_rtx_REG (Pmode, base), disp));
19ec6a36 3730 }
a4295210
JDA
3731 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3732 {
3733 rtx basereg = gen_rtx_REG (Pmode, base);
3734 rtx delta = GEN_INT (disp);
3735 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3736
3737 emit_move_insn (tmpreg, delta);
3738 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3739 gen_rtx_PLUS (Pmode, tmpreg, basereg));
5dcc9605 3740 if (DO_FRAME_NOTES)
bbbbb16a 3741 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 3742 gen_rtx_SET (tmpreg,
bbbbb16a 3743 gen_rtx_PLUS (Pmode, basereg, delta)));
a4295210 3744 }
188538df 3745 else
aadcdb45 3746 {
f6bcf44c 3747 rtx basereg = gen_rtx_REG (Pmode, base);
19ec6a36 3748 rtx delta = GEN_INT (disp);
a4295210 3749 rtx tmpreg = gen_rtx_REG (Pmode, 1);
f6bcf44c 3750
a4295210 3751 emit_move_insn (tmpreg,
f6bcf44c 3752 gen_rtx_PLUS (Pmode, basereg,
19ec6a36 3753 gen_rtx_HIGH (Pmode, delta)));
f6bcf44c 3754 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
a4295210 3755 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
aadcdb45 3756 }
f6bcf44c 3757
823fbbce 3758 if (DO_FRAME_NOTES && note)
f6bcf44c 3759 RTX_FRAME_RELATED_P (insn) = 1;
188538df
TG
3760}
3761
a4295210 3762HOST_WIDE_INT
ae9d61ab 3763pa_compute_frame_size (HOST_WIDE_INT size, int *fregs_live)
188538df 3764{
95f3f59e
JDA
3765 int freg_saved = 0;
3766 int i, j;
3767
ae9d61ab 3768 /* The code in pa_expand_prologue and pa_expand_epilogue must
95f3f59e
JDA
3769 be consistent with the rounding and size calculation done here.
3770 Change them at the same time. */
3771
3772 /* We do our own stack alignment. First, round the size of the
3773 stack locals up to a word boundary. */
3774 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3775
3776 /* Space for previous frame pointer + filler. If any frame is
3777 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3778 waste some space here for the sake of HP compatibility. The
3779 first slot is only used when the frame pointer is needed. */
3780 if (size || frame_pointer_needed)
3781 size += STARTING_FRAME_OFFSET;
3782
823fbbce
JDA
3783 /* If the current function calls __builtin_eh_return, then we need
3784 to allocate stack space for registers that will hold data for
3785 the exception handler. */
e3b5732b 3786 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
3787 {
3788 unsigned int i;
3789
3790 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3791 continue;
95f3f59e 3792 size += i * UNITS_PER_WORD;
823fbbce
JDA
3793 }
3794
6261ede7 3795 /* Account for space used by the callee general register saves. */
95f3f59e 3796 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
6fb5fa3c 3797 if (df_regs_ever_live_p (i))
95f3f59e 3798 size += UNITS_PER_WORD;
80225b66 3799
6261ede7 3800 /* Account for space used by the callee floating point register saves. */
88624c0e 3801 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
3802 if (df_regs_ever_live_p (i)
3803 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
80225b66 3804 {
95f3f59e 3805 freg_saved = 1;
9e18f575 3806
6261ede7
JL
3807 /* We always save both halves of the FP register, so always
3808 increment the frame size by 8 bytes. */
95f3f59e 3809 size += 8;
80225b66
TG
3810 }
3811
95f3f59e
JDA
3812 /* If any of the floating registers are saved, account for the
3813 alignment needed for the floating point register save block. */
3814 if (freg_saved)
3815 {
3816 size = (size + 7) & ~7;
3817 if (fregs_live)
3818 *fregs_live = 1;
3819 }
3820
6261ede7 3821 /* The various ABIs include space for the outgoing parameters in the
95f3f59e
JDA
3822 size of the current function's stack frame. We don't need to align
3823 for the outgoing arguments as their alignment is set by the final
3824 rounding for the frame as a whole. */
38173d38 3825 size += crtl->outgoing_args_size;
6261ede7
JL
3826
3827 /* Allocate space for the fixed frame marker. This space must be
685d0e07 3828 allocated for any function that makes calls or allocates
6261ede7 3829 stack space. */
416ff32e 3830 if (!crtl->is_leaf || size)
685d0e07 3831 size += TARGET_64BIT ? 48 : 32;
520babc7 3832
95f3f59e 3833 /* Finally, round to the preferred stack boundary. */
5fad1c24
JDA
3834 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3835 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
188538df 3836}
23f6f34f 3837
08c148a8
NB
3838/* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3839 of memory. If any fpu reg is used in the function, we allocate
3840 such a block here, at the bottom of the frame, just in case it's needed.
3841
3842 If this function is a leaf procedure, then we may choose not
3843 to do a "save" insn. The decision about whether or not
3844 to do this is made in regclass.c. */
3845
c590b625 3846static void
42776416 3847pa_output_function_prologue (FILE *file)
188538df 3848{
ba0bfdac
JL
3849 /* The function's label and associated .PROC must never be
3850 separated and must be output *after* any profiling declarations
3851 to avoid changing spaces/subspaces within a procedure. */
3852 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3853 fputs ("\t.PROC\n", file);
3854
ae9d61ab 3855 /* pa_expand_prologue does the dirty work now. We just need
aadcdb45
JL
3856 to output the assembler directives which denote the start
3857 of a function. */
a4295210 3858 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
416ff32e 3859 if (crtl->is_leaf)
e236a9ff 3860 fputs (",NO_CALLS", file);
16c16a24
JDA
3861 else
3862 fputs (",CALLS", file);
3863 if (rp_saved)
3864 fputs (",SAVE_RP", file);
da3c3336 3865
685d0e07
JDA
3866 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3867 at the beginning of the frame and that it is used as the frame
3868 pointer for the frame. We do this because our current frame
a4d05547 3869 layout doesn't conform to that specified in the HP runtime
685d0e07
JDA
3870 documentation and we need a way to indicate to programs such as
3871 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3872 isn't used by HP compilers but is supported by the assembler.
3873 However, SAVE_SP is supposed to indicate that the previous stack
3874 pointer has been saved in the frame marker. */
da3c3336 3875 if (frame_pointer_needed)
e236a9ff 3876 fputs (",SAVE_SP", file);
da3c3336 3877
68386e1e 3878 /* Pass on information about the number of callee register saves
e8cfae5c
JL
3879 performed in the prologue.
3880
3881 The compiler is supposed to pass the highest register number
23f6f34f 3882 saved, the assembler then has to adjust that number before
e8cfae5c 3883 entering it into the unwind descriptor (to account for any
23f6f34f 3884 caller saved registers with lower register numbers than the
e8cfae5c
JL
3885 first callee saved register). */
3886 if (gr_saved)
3887 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3888
3889 if (fr_saved)
3890 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
68386e1e 3891
e236a9ff 3892 fputs ("\n\t.ENTRY\n", file);
aadcdb45 3893
18dbd950 3894 remove_useless_addtr_insns (0);
aadcdb45
JL
3895}
3896
f1c7ce82 3897void
ae9d61ab 3898pa_expand_prologue (void)
aadcdb45 3899{
4971c587 3900 int merge_sp_adjust_with_store = 0;
a4295210
JDA
3901 HOST_WIDE_INT size = get_frame_size ();
3902 HOST_WIDE_INT offset;
3903 int i;
e0d80a58
JL
3904 rtx tmpreg;
3905 rtx_insn *insn;
aadcdb45 3906
68386e1e
JL
3907 gr_saved = 0;
3908 fr_saved = 0;
8a9c76f3 3909 save_fregs = 0;
6261ede7 3910
95f3f59e 3911 /* Compute total size for frame pointer, filler, locals and rounding to
ae9d61ab 3912 the next word boundary. Similar code appears in pa_compute_frame_size
95f3f59e
JDA
3913 and must be changed in tandem with this code. */
3914 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3915 if (local_fsize || frame_pointer_needed)
3916 local_fsize += STARTING_FRAME_OFFSET;
6261ede7 3917
ae9d61ab 3918 actual_fsize = pa_compute_frame_size (size, &save_fregs);
a11e0df4 3919 if (flag_stack_usage_info)
d3c12306 3920 current_function_static_stack_size = actual_fsize;
188538df 3921
aadcdb45 3922 /* Compute a few things we will use often. */
690d4228 3923 tmpreg = gen_rtx_REG (word_mode, 1);
188538df 3924
23f6f34f 3925 /* Save RP first. The calling conventions manual states RP will
19ec6a36 3926 always be stored into the caller's frame at sp - 20 or sp - 16
520babc7 3927 depending on which ABI is in use. */
e3b5732b 3928 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
16c16a24
JDA
3929 {
3930 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3931 rp_saved = true;
3932 }
3933 else
3934 rp_saved = false;
23f6f34f 3935
aadcdb45 3936 /* Allocate the local frame and set up the frame pointer if needed. */
31d68947
AM
3937 if (actual_fsize != 0)
3938 {
3939 if (frame_pointer_needed)
3940 {
3941 /* Copy the old frame pointer temporarily into %r1. Set up the
3942 new stack pointer, then store away the saved old frame pointer
823fbbce
JDA
3943 into the stack at sp and at the same time update the stack
3944 pointer by actual_fsize bytes. Two versions, first
31d68947
AM
3945 handles small (<8k) frames. The second handles large (>=8k)
3946 frames. */
bc707992 3947 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
823fbbce 3948 if (DO_FRAME_NOTES)
77c4f044 3949 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce 3950
bc707992 3951 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
823fbbce
JDA
3952 if (DO_FRAME_NOTES)
3953 RTX_FRAME_RELATED_P (insn) = 1;
3954
3955 if (VAL_14_BITS_P (actual_fsize))
3956 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
31d68947
AM
3957 else
3958 {
3959 /* It is incorrect to store the saved frame pointer at *sp,
3960 then increment sp (writes beyond the current stack boundary).
3961
3962 So instead use stwm to store at *sp and post-increment the
3963 stack pointer as an atomic operation. Then increment sp to
3964 finish allocating the new frame. */
a4295210
JDA
3965 HOST_WIDE_INT adjust1 = 8192 - 64;
3966 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
19ec6a36 3967
823fbbce 3968 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
f6bcf44c 3969 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 3970 adjust2, 1);
31d68947 3971 }
823fbbce 3972
685d0e07
JDA
3973 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3974 we need to store the previous stack pointer (frame pointer)
3975 into the frame marker on targets that use the HP unwind
3976 library. This allows the HP unwind library to be used to
3977 unwind GCC frames. However, we are not fully compatible
3978 with the HP library because our frame layout differs from
3979 that specified in the HP runtime specification.
3980
3981 We don't want a frame note on this instruction as the frame
3982 marker moves during dynamic stack allocation.
3983
3984 This instruction also serves as a blockage to prevent
3985 register spills from being scheduled before the stack
3986 pointer is raised. This is necessary as we store
3987 registers using the frame pointer as a base register,
3988 and the frame pointer is set before sp is raised. */
3989 if (TARGET_HPUX_UNWIND_LIBRARY)
3990 {
3991 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
3992 GEN_INT (TARGET_64BIT ? -8 : -4));
3993
3994 emit_move_insn (gen_rtx_MEM (word_mode, addr),
bc707992 3995 hard_frame_pointer_rtx);
685d0e07
JDA
3996 }
3997 else
3998 emit_insn (gen_blockage ());
31d68947
AM
3999 }
4000 /* no frame pointer needed. */
4001 else
4002 {
4003 /* In some cases we can perform the first callee register save
4004 and allocating the stack frame at the same time. If so, just
4005 make a note of it and defer allocating the frame until saving
4006 the callee registers. */
1c7a8112 4007 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
31d68947
AM
4008 merge_sp_adjust_with_store = 1;
4009 /* Can not optimize. Adjust the stack frame by actual_fsize
4010 bytes. */
4011 else
f6bcf44c 4012 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 4013 actual_fsize, 1);
31d68947 4014 }
a9d91d6f
RS
4015 }
4016
23f6f34f 4017 /* Normal register save.
aadcdb45
JL
4018
4019 Do not save the frame pointer in the frame_pointer_needed case. It
4020 was done earlier. */
188538df
TG
4021 if (frame_pointer_needed)
4022 {
823fbbce
JDA
4023 offset = local_fsize;
4024
4025 /* Saving the EH return data registers in the frame is the simplest
4026 way to get the frame unwind information emitted. We put them
4027 just before the general registers. */
e3b5732b 4028 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4029 {
4030 unsigned int i, regno;
4031
4032 for (i = 0; ; ++i)
4033 {
4034 regno = EH_RETURN_DATA_REGNO (i);
4035 if (regno == INVALID_REGNUM)
4036 break;
4037
bc707992 4038 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
4039 offset += UNITS_PER_WORD;
4040 }
4041 }
4042
4043 for (i = 18; i >= 4; i--)
6fb5fa3c 4044 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4045 {
bc707992 4046 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 4047 offset += UNITS_PER_WORD;
68386e1e 4048 gr_saved++;
188538df 4049 }
e63ffc38 4050 /* Account for %r3 which is saved in a special place. */
e8cfae5c 4051 gr_saved++;
188538df 4052 }
aadcdb45 4053 /* No frame pointer needed. */
188538df
TG
4054 else
4055 {
823fbbce
JDA
4056 offset = local_fsize - actual_fsize;
4057
4058 /* Saving the EH return data registers in the frame is the simplest
4059 way to get the frame unwind information emitted. */
e3b5732b 4060 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4061 {
4062 unsigned int i, regno;
4063
4064 for (i = 0; ; ++i)
4065 {
4066 regno = EH_RETURN_DATA_REGNO (i);
4067 if (regno == INVALID_REGNUM)
4068 break;
4069
4070 /* If merge_sp_adjust_with_store is nonzero, then we can
4071 optimize the first save. */
4072 if (merge_sp_adjust_with_store)
4073 {
4074 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4075 merge_sp_adjust_with_store = 0;
4076 }
4077 else
4078 store_reg (regno, offset, STACK_POINTER_REGNUM);
4079 offset += UNITS_PER_WORD;
4080 }
4081 }
4082
4083 for (i = 18; i >= 3; i--)
6fb5fa3c 4084 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4085 {
23f6f34f 4086 /* If merge_sp_adjust_with_store is nonzero, then we can
4971c587 4087 optimize the first GR save. */
f133af4c 4088 if (merge_sp_adjust_with_store)
4971c587 4089 {
823fbbce 4090 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4971c587 4091 merge_sp_adjust_with_store = 0;
4971c587
JL
4092 }
4093 else
f6bcf44c 4094 store_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 4095 offset += UNITS_PER_WORD;
68386e1e 4096 gr_saved++;
188538df 4097 }
aadcdb45 4098
4971c587 4099 /* If we wanted to merge the SP adjustment with a GR save, but we never
aadcdb45 4100 did any GR saves, then just emit the adjustment here. */
f133af4c 4101 if (merge_sp_adjust_with_store)
f6bcf44c 4102 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
823fbbce 4103 actual_fsize, 1);
188538df 4104 }
23f6f34f 4105
1c7a8112
AM
4106 /* The hppa calling conventions say that %r19, the pic offset
4107 register, is saved at sp - 32 (in this function's frame)
4108 when generating PIC code. FIXME: What is the correct thing
4109 to do for functions which make no calls and allocate no
4110 frame? Do we need to allocate a frame, or can we just omit
3ffa9dc1
JDA
4111 the save? For now we'll just omit the save.
4112
4113 We don't want a note on this insn as the frame marker can
4114 move if there is a dynamic stack allocation. */
1c7a8112 4115 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
3ffa9dc1
JDA
4116 {
4117 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4118
4119 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4120
4121 }
1c7a8112 4122
188538df
TG
4123 /* Align pointer properly (doubleword boundary). */
4124 offset = (offset + 7) & ~7;
4125
4126 /* Floating point register store. */
4127 if (save_fregs)
188538df 4128 {
823fbbce
JDA
4129 rtx base;
4130
aadcdb45
JL
4131 /* First get the frame or stack pointer to the start of the FP register
4132 save area. */
2b41935c 4133 if (frame_pointer_needed)
823fbbce 4134 {
bc707992
JDA
4135 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4136 base = hard_frame_pointer_rtx;
823fbbce 4137 }
2b41935c 4138 else
823fbbce
JDA
4139 {
4140 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4141 base = stack_pointer_rtx;
4142 }
aadcdb45
JL
4143
4144 /* Now actually save the FP registers. */
88624c0e 4145 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
e63ffc38 4146 {
6fb5fa3c
DB
4147 if (df_regs_ever_live_p (i)
4148 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
e63ffc38 4149 {
e0d80a58
JL
4150 rtx addr, reg;
4151 rtx_insn *insn;
403a3fb7
JDA
4152 addr = gen_rtx_MEM (DFmode,
4153 gen_rtx_POST_INC (word_mode, tmpreg));
19ec6a36 4154 reg = gen_rtx_REG (DFmode, i);
f6bcf44c
JDA
4155 insn = emit_move_insn (addr, reg);
4156 if (DO_FRAME_NOTES)
4157 {
4158 RTX_FRAME_RELATED_P (insn) = 1;
823fbbce
JDA
4159 if (TARGET_64BIT)
4160 {
4161 rtx mem = gen_rtx_MEM (DFmode,
0a81f074
RS
4162 plus_constant (Pmode, base,
4163 offset));
bbbbb16a 4164 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
f7df4a84 4165 gen_rtx_SET (mem, reg));
823fbbce
JDA
4166 }
4167 else
4168 {
4169 rtx meml = gen_rtx_MEM (SFmode,
0a81f074
RS
4170 plus_constant (Pmode, base,
4171 offset));
823fbbce 4172 rtx memr = gen_rtx_MEM (SFmode,
0a81f074
RS
4173 plus_constant (Pmode, base,
4174 offset + 4));
823fbbce
JDA
4175 rtx regl = gen_rtx_REG (SFmode, i);
4176 rtx regr = gen_rtx_REG (SFmode, i + 1);
f7df4a84
RS
4177 rtx setl = gen_rtx_SET (meml, regl);
4178 rtx setr = gen_rtx_SET (memr, regr);
823fbbce
JDA
4179 rtvec vec;
4180
4181 RTX_FRAME_RELATED_P (setl) = 1;
4182 RTX_FRAME_RELATED_P (setr) = 1;
4183 vec = gen_rtvec (2, setl, setr);
bbbbb16a
ILT
4184 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4185 gen_rtx_SEQUENCE (VOIDmode, vec));
823fbbce 4186 }
f6bcf44c
JDA
4187 }
4188 offset += GET_MODE_SIZE (DFmode);
e63ffc38
JL
4189 fr_saved++;
4190 }
4191 }
188538df
TG
4192 }
4193}
4194
19ec6a36
AM
4195/* Emit RTL to load REG from the memory location specified by BASE+DISP.
4196 Handle case where DISP > 8k by using the add_high_const patterns. */
4197
f6bcf44c 4198static void
a4295210 4199load_reg (int reg, HOST_WIDE_INT disp, int base)
19ec6a36 4200{
a4295210
JDA
4201 rtx dest = gen_rtx_REG (word_mode, reg);
4202 rtx basereg = gen_rtx_REG (Pmode, base);
4203 rtx src;
19ec6a36 4204
19ec6a36 4205 if (VAL_14_BITS_P (disp))
0a81f074 4206 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
a4295210 4207 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
19ec6a36 4208 {
a4295210
JDA
4209 rtx delta = GEN_INT (disp);
4210 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4211
4212 emit_move_insn (tmpreg, delta);
4213 if (TARGET_DISABLE_INDEXING)
4214 {
4215 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4216 src = gen_rtx_MEM (word_mode, tmpreg);
4217 }
4218 else
4219 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
19ec6a36
AM
4220 }
4221 else
4222 {
4223 rtx delta = GEN_INT (disp);
4224 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4225 rtx tmpreg = gen_rtx_REG (Pmode, 1);
a4295210 4226
19ec6a36
AM
4227 emit_move_insn (tmpreg, high);
4228 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
19ec6a36 4229 }
a4295210
JDA
4230
4231 emit_move_insn (dest, src);
19ec6a36 4232}
aadcdb45 4233
5fad1c24
JDA
4234/* Update the total code bytes output to the text section. */
4235
4236static void
67b846fa 4237update_total_code_bytes (unsigned int nbytes)
5fad1c24
JDA
4238{
4239 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
62910663 4240 && !IN_NAMED_SECTION_P (cfun->decl))
5fad1c24 4241 {
67b846fa 4242 unsigned int old_total = total_code_bytes;
5fad1c24 4243
67b846fa 4244 total_code_bytes += nbytes;
5fad1c24 4245
67b846fa
JDA
4246 /* Be prepared to handle overflows. */
4247 if (old_total > total_code_bytes)
4248 total_code_bytes = UINT_MAX;
5fad1c24
JDA
4249 }
4250}
4251
08c148a8
NB
4252/* This function generates the assembly code for function exit.
4253 Args are as for output_function_prologue ().
4254
4255 The function epilogue should not depend on the current stack
4256 pointer! It should use the frame pointer only. This is mandatory
4257 because of alloca; we also take advantage of it to omit stack
fe19a83d 4258 adjustments before returning. */
08c148a8
NB
4259
4260static void
42776416 4261pa_output_function_epilogue (FILE *file)
188538df 4262{
84034c69 4263 rtx_insn *insn = get_last_insn ();
5dba8769 4264 bool extra_nop;
5fad1c24 4265
ae9d61ab 4266 /* pa_expand_epilogue does the dirty work now. We just need
aadcdb45 4267 to output the assembler directives which denote the end
08a2b118
RS
4268 of a function.
4269
4270 To make debuggers happy, emit a nop if the epilogue was completely
4271 eliminated due to a volatile call as the last insn in the
23f6f34f 4272 current function. That way the return address (in %r2) will
08a2b118
RS
4273 always point to a valid instruction in the current function. */
4274
4275 /* Get the last real insn. */
b64925dc 4276 if (NOTE_P (insn))
08a2b118
RS
4277 insn = prev_real_insn (insn);
4278
4279 /* If it is a sequence, then look inside. */
b64925dc 4280 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
84034c69 4281 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
08a2b118 4282
23f6f34f 4283 /* If insn is a CALL_INSN, then it must be a call to a volatile
08a2b118 4284 function (otherwise there would be epilogue insns). */
b64925dc 4285 if (insn && CALL_P (insn))
17e6098e
JDA
4286 {
4287 fputs ("\tnop\n", file);
5dba8769 4288 extra_nop = true;
17e6098e 4289 }
5dba8769
JDA
4290 else
4291 extra_nop = false;
23f6f34f 4292
e236a9ff 4293 fputs ("\t.EXIT\n\t.PROCEND\n", file);
17e6098e 4294
9a55eab3
JDA
4295 if (TARGET_SOM && TARGET_GAS)
4296 {
a9a302d9 4297 /* We are done with this subspace except possibly for some additional
9a55eab3
JDA
4298 debug information. Forget that we are in this subspace to ensure
4299 that the next function is output in its own subspace. */
d6b5193b 4300 in_section = NULL;
1a83bfc3 4301 cfun->machine->in_nsubspa = 2;
9a55eab3
JDA
4302 }
4303
5dba8769 4304 /* Thunks do their own insn accounting. */
a9a302d9
JDA
4305 if (cfun->is_thunk)
4306 return;
4307
5fad1c24 4308 if (INSN_ADDRESSES_SET_P ())
17e6098e 4309 {
5dba8769 4310 last_address = extra_nop ? 4 : 0;
5fad1c24 4311 insn = get_last_nonnote_insn ();
501fcaf5
JDA
4312 if (insn)
4313 {
4314 last_address += INSN_ADDRESSES (INSN_UID (insn));
4315 if (INSN_P (insn))
4316 last_address += insn_default_length (insn);
4317 }
5fad1c24
JDA
4318 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4319 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
17e6098e 4320 }
67b846fa
JDA
4321 else
4322 last_address = UINT_MAX;
5fad1c24
JDA
4323
4324 /* Finally, update the total number of code bytes output so far. */
4325 update_total_code_bytes (last_address);
aadcdb45 4326}
4971c587 4327
aadcdb45 4328void
ae9d61ab 4329pa_expand_epilogue (void)
aadcdb45 4330{
23f6f34f 4331 rtx tmpreg;
a4295210
JDA
4332 HOST_WIDE_INT offset;
4333 HOST_WIDE_INT ret_off = 0;
4334 int i;
31d68947 4335 int merge_sp_adjust_with_load = 0;
aadcdb45
JL
4336
4337 /* We will use this often. */
690d4228 4338 tmpreg = gen_rtx_REG (word_mode, 1);
aadcdb45
JL
4339
4340 /* Try to restore RP early to avoid load/use interlocks when
4341 RP gets used in the return (bv) instruction. This appears to still
fe19a83d 4342 be necessary even when we schedule the prologue and epilogue. */
16c16a24 4343 if (rp_saved)
31d68947
AM
4344 {
4345 ret_off = TARGET_64BIT ? -16 : -20;
4346 if (frame_pointer_needed)
4347 {
bc707992 4348 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
31d68947
AM
4349 ret_off = 0;
4350 }
4351 else
4352 {
4353 /* No frame pointer, and stack is smaller than 8k. */
4354 if (VAL_14_BITS_P (ret_off - actual_fsize))
4355 {
f6bcf44c 4356 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
31d68947
AM
4357 ret_off = 0;
4358 }
4359 }
4360 }
aadcdb45
JL
4361
4362 /* General register restores. */
188538df
TG
4363 if (frame_pointer_needed)
4364 {
823fbbce
JDA
4365 offset = local_fsize;
4366
4367 /* If the current function calls __builtin_eh_return, then we need
4368 to restore the saved EH data registers. */
e3b5732b 4369 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4370 {
4371 unsigned int i, regno;
4372
4373 for (i = 0; ; ++i)
4374 {
4375 regno = EH_RETURN_DATA_REGNO (i);
4376 if (regno == INVALID_REGNUM)
4377 break;
4378
bc707992 4379 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
823fbbce
JDA
4380 offset += UNITS_PER_WORD;
4381 }
4382 }
4383
4384 for (i = 18; i >= 4; i--)
6fb5fa3c 4385 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
188538df 4386 {
bc707992 4387 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
d7735a07 4388 offset += UNITS_PER_WORD;
188538df 4389 }
188538df
TG
4390 }
4391 else
4392 {
823fbbce
JDA
4393 offset = local_fsize - actual_fsize;
4394
4395 /* If the current function calls __builtin_eh_return, then we need
4396 to restore the saved EH data registers. */
e3b5732b 4397 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4398 {
4399 unsigned int i, regno;
4400
4401 for (i = 0; ; ++i)
4402 {
4403 regno = EH_RETURN_DATA_REGNO (i);
4404 if (regno == INVALID_REGNUM)
4405 break;
4406
4407 /* Only for the first load.
4408 merge_sp_adjust_with_load holds the register load
4409 with which we will merge the sp adjustment. */
4410 if (merge_sp_adjust_with_load == 0
4411 && local_fsize == 0
4412 && VAL_14_BITS_P (-actual_fsize))
4413 merge_sp_adjust_with_load = regno;
4414 else
4415 load_reg (regno, offset, STACK_POINTER_REGNUM);
4416 offset += UNITS_PER_WORD;
4417 }
4418 }
4419
4420 for (i = 18; i >= 3; i--)
e63ffc38 4421 {
6fb5fa3c 4422 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
e63ffc38 4423 {
e63ffc38
JL
4424 /* Only for the first load.
4425 merge_sp_adjust_with_load holds the register load
4426 with which we will merge the sp adjustment. */
31d68947 4427 if (merge_sp_adjust_with_load == 0
e63ffc38 4428 && local_fsize == 0
31d68947 4429 && VAL_14_BITS_P (-actual_fsize))
e63ffc38
JL
4430 merge_sp_adjust_with_load = i;
4431 else
f6bcf44c 4432 load_reg (i, offset, STACK_POINTER_REGNUM);
d7735a07 4433 offset += UNITS_PER_WORD;
e63ffc38
JL
4434 }
4435 }
188538df 4436 }
aadcdb45 4437
188538df
TG
4438 /* Align pointer properly (doubleword boundary). */
4439 offset = (offset + 7) & ~7;
4440
aadcdb45 4441 /* FP register restores. */
188538df 4442 if (save_fregs)
188538df 4443 {
aadcdb45 4444 /* Adjust the register to index off of. */
2b41935c 4445 if (frame_pointer_needed)
bc707992 4446 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
2b41935c 4447 else
823fbbce 4448 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
aadcdb45
JL
4449
4450 /* Actually do the restores now. */
88624c0e 4451 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
6fb5fa3c
DB
4452 if (df_regs_ever_live_p (i)
4453 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
19ec6a36 4454 {
403a3fb7
JDA
4455 rtx src = gen_rtx_MEM (DFmode,
4456 gen_rtx_POST_INC (word_mode, tmpreg));
19ec6a36 4457 rtx dest = gen_rtx_REG (DFmode, i);
f6bcf44c 4458 emit_move_insn (dest, src);
19ec6a36 4459 }
188538df 4460 }
aadcdb45 4461
1144563f
JL
4462 /* Emit a blockage insn here to keep these insns from being moved to
4463 an earlier spot in the epilogue, or into the main instruction stream.
4464
4465 This is necessary as we must not cut the stack back before all the
4466 restores are finished. */
4467 emit_insn (gen_blockage ());
aadcdb45 4468
6619e96c 4469 /* Reset stack pointer (and possibly frame pointer). The stack
68944452 4470 pointer is initially set to fp + 64 to avoid a race condition. */
31d68947 4471 if (frame_pointer_needed)
188538df 4472 {
19ec6a36 4473 rtx delta = GEN_INT (-64);
823fbbce 4474
bc707992
JDA
4475 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4476 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4477 stack_pointer_rtx, delta));
188538df 4478 }
aadcdb45 4479 /* If we were deferring a callee register restore, do it now. */
31d68947
AM
4480 else if (merge_sp_adjust_with_load)
4481 {
4482 rtx delta = GEN_INT (-actual_fsize);
19ec6a36 4483 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
823fbbce
JDA
4484
4485 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
31d68947 4486 }
aadcdb45 4487 else if (actual_fsize != 0)
823fbbce
JDA
4488 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4489 - actual_fsize, 0);
31d68947
AM
4490
4491 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4492 frame greater than 8k), do so now. */
4493 if (ret_off != 0)
f6bcf44c 4494 load_reg (2, ret_off, STACK_POINTER_REGNUM);
823fbbce 4495
e3b5732b 4496 if (DO_FRAME_NOTES && crtl->calls_eh_return)
823fbbce
JDA
4497 {
4498 rtx sa = EH_RETURN_STACKADJ_RTX;
4499
4500 emit_insn (gen_blockage ());
4501 emit_insn (TARGET_64BIT
4502 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4503 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4504 }
15768583
JDA
4505}
4506
4507bool
4508pa_can_use_return_insn (void)
4509{
4510 if (!reload_completed)
4511 return false;
4512
4513 if (frame_pointer_needed)
4514 return false;
4515
4516 if (df_regs_ever_live_p (2))
4517 return false;
4518
4519 if (crtl->profile)
4520 return false;
4521
ae9d61ab 4522 return pa_compute_frame_size (get_frame_size (), 0) == 0;
188538df
TG
4523}
4524
d777856d 4525rtx
b7849684 4526hppa_pic_save_rtx (void)
824e7605 4527{
d777856d 4528 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
1c7a8112
AM
4529}
4530
3674b34d
JDA
4531#ifndef NO_DEFERRED_PROFILE_COUNTERS
4532#define NO_DEFERRED_PROFILE_COUNTERS 0
4533#endif
4534
3674b34d
JDA
4535
4536/* Vector of funcdef numbers. */
9771b263 4537static vec<int> funcdef_nos;
3674b34d
JDA
4538
4539/* Output deferred profile counters. */
4540static void
4541output_deferred_profile_counters (void)
4542{
4543 unsigned int i;
4544 int align, n;
4545
9771b263 4546 if (funcdef_nos.is_empty ())
3674b34d
JDA
4547 return;
4548
d6b5193b 4549 switch_to_section (data_section);
3674b34d
JDA
4550 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4551 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4552
9771b263 4553 for (i = 0; funcdef_nos.iterate (i, &n); i++)
3674b34d
JDA
4554 {
4555 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4556 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4557 }
4558
9771b263 4559 funcdef_nos.release ();
3674b34d
JDA
4560}
4561
1c7a8112 4562void
b7849684 4563hppa_profile_hook (int label_no)
1c7a8112 4564{
a3d4c92f
RC
4565 /* We use SImode for the address of the function in both 32 and
4566 64-bit code to avoid having to provide DImode versions of the
4567 lcla2 and load_offset_label_address insn patterns. */
4568 rtx reg = gen_reg_rtx (SImode);
19f8b229 4569 rtx_code_label *label_rtx = gen_label_rtx ();
730a27a2
JDA
4570 rtx mcount = gen_rtx_MEM (Pmode, gen_rtx_SYMBOL_REF (Pmode, "_mcount"));
4571 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4572 rtx arg_bytes, begin_label_rtx;
e0d80a58 4573 rtx_insn *call_insn;
8f949e7e 4574 char begin_label_name[16];
730a27a2
JDA
4575 bool use_mcount_pcrel_call;
4576
4577 /* If we can reach _mcount with a pc-relative call, we can optimize
4578 loading the address of the current function. This requires linker
4579 long branch stub support. */
4580 if (!TARGET_PORTABLE_RUNTIME
4581 && !TARGET_LONG_CALLS
4582 && (TARGET_SOM || flag_function_sections))
4583 use_mcount_pcrel_call = TRUE;
4584 else
4585 use_mcount_pcrel_call = FALSE;
1c7a8112 4586
8f949e7e 4587 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
f6f315fe 4588 label_no);
a3d4c92f 4589 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
1c7a8112 4590
1c7a8112
AM
4591 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4592
730a27a2
JDA
4593 if (!use_mcount_pcrel_call)
4594 {
4595 /* The address of the function is loaded into %r25 with an instruction-
4596 relative sequence that avoids the use of relocations. The sequence
4597 is split so that the load_offset_label_address instruction can
4598 occupy the delay slot of the call to _mcount. */
4599 if (TARGET_PA_20)
4600 emit_insn (gen_lcla2 (reg, label_rtx));
4601 else
4602 emit_insn (gen_lcla1 (reg, label_rtx));
1c7a8112 4603
730a27a2
JDA
4604 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode, 25),
4605 reg,
4606 begin_label_rtx,
4607 label_rtx));
4608 }
a3d4c92f 4609
730a27a2
JDA
4610 if (!NO_DEFERRED_PROFILE_COUNTERS)
4611 {
4612 rtx count_label_rtx, addr, r24;
4613 char count_label_name[16];
4614
4615 funcdef_nos.safe_push (label_no);
4616 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4617 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4618 ggc_strdup (count_label_name));
4619
4620 addr = force_reg (Pmode, count_label_rtx);
4621 r24 = gen_rtx_REG (Pmode, 24);
4622 emit_move_insn (r24, addr);
4623
4624 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4625 if (use_mcount_pcrel_call)
4626 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4627 begin_label_rtx));
4628 else
4629 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
a3d4c92f 4630
730a27a2
JDA
4631 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4632 }
4633 else
4634 {
4635 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4636 if (use_mcount_pcrel_call)
4637 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4638 begin_label_rtx));
4639 else
4640 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4641 }
1c7a8112 4642
a3d4c92f
RC
4643 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4644 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4645
1c7a8112
AM
4646 /* Indicate the _mcount call cannot throw, nor will it execute a
4647 non-local goto. */
062a5fd1 4648 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
730a27a2
JDA
4649
4650 /* Allocate space for fixed arguments. */
4651 if (reg_parm_stack_space > crtl->outgoing_args_size)
4652 crtl->outgoing_args_size = reg_parm_stack_space;
824e7605
AM
4653}
4654
e99d6592
MS
4655/* Fetch the return address for the frame COUNT steps up from
4656 the current frame, after the prologue. FRAMEADDR is the
4657 frame pointer of the COUNT frame.
4658
cf3735b8
JDA
4659 We want to ignore any export stub remnants here. To handle this,
4660 we examine the code at the return address, and if it is an export
4661 stub, we return a memory rtx for the stub return address stored
4662 at frame-24.
c28eb6c2
JL
4663
4664 The value returned is used in two different ways:
4665
4666 1. To find a function's caller.
4667
4668 2. To change the return address for a function.
4669
4670 This function handles most instances of case 1; however, it will
4671 fail if there are two levels of stubs to execute on the return
4672 path. The only way I believe that can happen is if the return value
4673 needs a parameter relocation, which never happens for C code.
4674
4675 This function handles most instances of case 2; however, it will
4676 fail if we did not originally have stub code on the return path
cf3735b8 4677 but will need stub code on the new return path. This can happen if
c28eb6c2 4678 the caller & callee are both in the main program, but the new
cf3735b8 4679 return location is in a shared library. */
e99d6592
MS
4680
4681rtx
ae9d61ab 4682pa_return_addr_rtx (int count, rtx frameaddr)
e99d6592
MS
4683{
4684 rtx label;
cf3735b8 4685 rtx rp;
e99d6592
MS
4686 rtx saved_rp;
4687 rtx ins;
4688
df8b5535 4689 /* The instruction stream at the return address of a PA1.X export stub is:
f90b7a5a
PB
4690
4691 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4692 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4693 0x00011820 | stub+16: mtsp r1,sr0
4694 0xe0400002 | stub+20: be,n 0(sr0,rp)
4695
4696 0xe0400002 must be specified as -532676606 so that it won't be
df8b5535 4697 rejected as an invalid immediate operand on 64-bit hosts.
f90b7a5a 4698
df8b5535
JDA
4699 The instruction stream at the return address of a PA2.0 export stub is:
4700
4701 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4702 0xe840d002 | stub+12: bve,n (rp)
4703 */
4704
4705 HOST_WIDE_INT insns[4];
4706 int i, len;
f90b7a5a 4707
cf3735b8
JDA
4708 if (count != 0)
4709 return NULL_RTX;
a7721dc0 4710
cf3735b8 4711 rp = get_hard_reg_initial_val (Pmode, 2);
e99d6592 4712
cf3735b8
JDA
4713 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4714 return rp;
e99d6592 4715
f90b7a5a
PB
4716 /* If there is no export stub then just use the value saved from
4717 the return pointer register. */
4718
a7721dc0 4719 saved_rp = gen_reg_rtx (Pmode);
cf3735b8 4720 emit_move_insn (saved_rp, rp);
e99d6592
MS
4721
4722 /* Get pointer to the instruction stream. We have to mask out the
4723 privilege level from the two low order bits of the return address
4724 pointer here so that ins will point to the start of the first
4725 instruction that would have been executed if we returned. */
cf3735b8 4726 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
e99d6592
MS
4727 label = gen_label_rtx ();
4728
df8b5535
JDA
4729 if (TARGET_PA_20)
4730 {
4731 insns[0] = 0x4bc23fd1;
4732 insns[1] = -398405630;
4733 len = 2;
4734 }
4735 else
4736 {
4737 insns[0] = 0x4bc23fd1;
4738 insns[1] = 0x004010a1;
4739 insns[2] = 0x00011820;
4740 insns[3] = -532676606;
4741 len = 4;
4742 }
4743
e99d6592 4744 /* Check the instruction stream at the normal return address for the
f90b7a5a
PB
4745 export stub. If it is an export stub, than our return address is
4746 really in -24[frameaddr]. */
e99d6592 4747
df8b5535 4748 for (i = 0; i < len; i++)
f90b7a5a 4749 {
0a81f074 4750 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
f90b7a5a
PB
4751 rtx op1 = GEN_INT (insns[i]);
4752 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4753 }
e99d6592 4754
cf3735b8 4755 /* Here we know that our return address points to an export
e99d6592 4756 stub. We don't want to return the address of the export stub,
cf3735b8
JDA
4757 but rather the return address of the export stub. That return
4758 address is stored at -24[frameaddr]. */
e99d6592 4759
cf3735b8
JDA
4760 emit_move_insn (saved_rp,
4761 gen_rtx_MEM (Pmode,
4762 memory_address (Pmode,
0a81f074 4763 plus_constant (Pmode, frameaddr,
cf3735b8 4764 -24))));
e99d6592
MS
4765
4766 emit_label (label);
f90b7a5a 4767
cf3735b8 4768 return saved_rp;
e99d6592
MS
4769}
4770
188538df 4771void
ae9d61ab 4772pa_emit_bcond_fp (rtx operands[])
188538df 4773{
f90b7a5a
PB
4774 enum rtx_code code = GET_CODE (operands[0]);
4775 rtx operand0 = operands[1];
4776 rtx operand1 = operands[2];
4777 rtx label = operands[3];
4778
f7df4a84 4779 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
f90b7a5a
PB
4780 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4781
f7df4a84 4782 emit_jump_insn (gen_rtx_SET (pc_rtx,
ad2c71b7 4783 gen_rtx_IF_THEN_ELSE (VOIDmode,
f90b7a5a 4784 gen_rtx_fmt_ee (NE,
ad2c71b7
JL
4785 VOIDmode,
4786 gen_rtx_REG (CCFPmode, 0),
4787 const0_rtx),
f90b7a5a 4788 gen_rtx_LABEL_REF (VOIDmode, label),
ad2c71b7 4789 pc_rtx)));
188538df
TG
4790
4791}
4792
780f491f
TG
4793/* Adjust the cost of a scheduling dependency. Return the new cost of
4794 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4795
c237e94a 4796static int
b505225b
TS
4797pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4798 unsigned int)
780f491f 4799{
b09fa787
JL
4800 enum attr_type attr_type;
4801
5d50fab3
JL
4802 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4803 true dependencies as they are described with bypasses now. */
b505225b 4804 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
86001391
JQ
4805 return cost;
4806
e150ae4f
TG
4807 if (! recog_memoized (insn))
4808 return 0;
780f491f 4809
b09fa787
JL
4810 attr_type = get_attr_type (insn);
4811
b505225b 4812 switch (dep_type)
780f491f 4813 {
144d51f9 4814 case REG_DEP_ANTI:
780f491f
TG
4815 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4816 cycles later. */
4817
b09fa787 4818 if (attr_type == TYPE_FPLOAD)
780f491f 4819 {
e150ae4f
TG
4820 rtx pat = PATTERN (insn);
4821 rtx dep_pat = PATTERN (dep_insn);
4822 if (GET_CODE (pat) == PARALLEL)
4823 {
4824 /* This happens for the fldXs,mb patterns. */
4825 pat = XVECEXP (pat, 0, 0);
4826 }
4827 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
780f491f 4828 /* If this happens, we have to extend this to schedule
e150ae4f
TG
4829 optimally. Return 0 for now. */
4830 return 0;
780f491f 4831
e150ae4f 4832 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
780f491f 4833 {
e150ae4f
TG
4834 if (! recog_memoized (dep_insn))
4835 return 0;
780f491f
TG
4836 switch (get_attr_type (dep_insn))
4837 {
4838 case TYPE_FPALU:
c47decad
JL
4839 case TYPE_FPMULSGL:
4840 case TYPE_FPMULDBL:
780f491f
TG
4841 case TYPE_FPDIVSGL:
4842 case TYPE_FPDIVDBL:
4843 case TYPE_FPSQRTSGL:
4844 case TYPE_FPSQRTDBL:
e150ae4f 4845 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4846 preceding arithmetic operation has finished if
e150ae4f
TG
4847 the target of the fpload is any of the sources
4848 (or destination) of the arithmetic operation. */
5d50fab3 4849 return insn_default_latency (dep_insn) - 1;
c47decad
JL
4850
4851 default:
4852 return 0;
4853 }
4854 }
4855 }
b09fa787 4856 else if (attr_type == TYPE_FPALU)
c47decad
JL
4857 {
4858 rtx pat = PATTERN (insn);
4859 rtx dep_pat = PATTERN (dep_insn);
4860 if (GET_CODE (pat) == PARALLEL)
4861 {
4862 /* This happens for the fldXs,mb patterns. */
4863 pat = XVECEXP (pat, 0, 0);
4864 }
4865 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4866 /* If this happens, we have to extend this to schedule
4867 optimally. Return 0 for now. */
4868 return 0;
4869
4870 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4871 {
4872 if (! recog_memoized (dep_insn))
4873 return 0;
4874 switch (get_attr_type (dep_insn))
4875 {
4876 case TYPE_FPDIVSGL:
4877 case TYPE_FPDIVDBL:
4878 case TYPE_FPSQRTSGL:
4879 case TYPE_FPSQRTDBL:
4880 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4881 preceding divide or sqrt operation has finished if
c47decad
JL
4882 the target of the ALU flop is any of the sources
4883 (or destination) of the divide or sqrt operation. */
5d50fab3 4884 return insn_default_latency (dep_insn) - 2;
780f491f
TG
4885
4886 default:
4887 return 0;
4888 }
4889 }
4890 }
4891
4892 /* For other anti dependencies, the cost is 0. */
4893 return 0;
144d51f9
NS
4894
4895 case REG_DEP_OUTPUT:
c47decad
JL
4896 /* Output dependency; DEP_INSN writes a register that INSN writes some
4897 cycles later. */
b09fa787 4898 if (attr_type == TYPE_FPLOAD)
c47decad
JL
4899 {
4900 rtx pat = PATTERN (insn);
4901 rtx dep_pat = PATTERN (dep_insn);
4902 if (GET_CODE (pat) == PARALLEL)
4903 {
4904 /* This happens for the fldXs,mb patterns. */
4905 pat = XVECEXP (pat, 0, 0);
4906 }
4907 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4908 /* If this happens, we have to extend this to schedule
4909 optimally. Return 0 for now. */
4910 return 0;
4911
4912 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4913 {
4914 if (! recog_memoized (dep_insn))
4915 return 0;
4916 switch (get_attr_type (dep_insn))
4917 {
4918 case TYPE_FPALU:
4919 case TYPE_FPMULSGL:
4920 case TYPE_FPMULDBL:
4921 case TYPE_FPDIVSGL:
4922 case TYPE_FPDIVDBL:
4923 case TYPE_FPSQRTSGL:
4924 case TYPE_FPSQRTDBL:
4925 /* A fpload can't be issued until one cycle before a
ddd5a7c1 4926 preceding arithmetic operation has finished if
c47decad 4927 the target of the fpload is the destination of the
fae15c93
VM
4928 arithmetic operation.
4929
4930 Exception: For PA7100LC, PA7200 and PA7300, the cost
4931 is 3 cycles, unless they bundle together. We also
4932 pay the penalty if the second insn is a fpload. */
5d50fab3 4933 return insn_default_latency (dep_insn) - 1;
780f491f 4934
c47decad
JL
4935 default:
4936 return 0;
4937 }
4938 }
4939 }
b09fa787 4940 else if (attr_type == TYPE_FPALU)
c47decad
JL
4941 {
4942 rtx pat = PATTERN (insn);
4943 rtx dep_pat = PATTERN (dep_insn);
4944 if (GET_CODE (pat) == PARALLEL)
4945 {
4946 /* This happens for the fldXs,mb patterns. */
4947 pat = XVECEXP (pat, 0, 0);
4948 }
4949 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4950 /* If this happens, we have to extend this to schedule
4951 optimally. Return 0 for now. */
4952 return 0;
4953
4954 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4955 {
4956 if (! recog_memoized (dep_insn))
4957 return 0;
4958 switch (get_attr_type (dep_insn))
4959 {
4960 case TYPE_FPDIVSGL:
4961 case TYPE_FPDIVDBL:
4962 case TYPE_FPSQRTSGL:
4963 case TYPE_FPSQRTDBL:
4964 /* An ALU flop can't be issued until two cycles before a
ddd5a7c1 4965 preceding divide or sqrt operation has finished if
c47decad 4966 the target of the ALU flop is also the target of
38e01259 4967 the divide or sqrt operation. */
5d50fab3 4968 return insn_default_latency (dep_insn) - 2;
c47decad
JL
4969
4970 default:
4971 return 0;
4972 }
4973 }
4974 }
4975
4976 /* For other output dependencies, the cost is 0. */
4977 return 0;
144d51f9
NS
4978
4979 default:
4980 gcc_unreachable ();
c47decad 4981 }
780f491f 4982}
188538df 4983
c237e94a
ZW
4984/* Adjust scheduling priorities. We use this to try and keep addil
4985 and the next use of %r1 close together. */
4986static int
ac44248e 4987pa_adjust_priority (rtx_insn *insn, int priority)
c237e94a
ZW
4988{
4989 rtx set = single_set (insn);
4990 rtx src, dest;
4991 if (set)
4992 {
4993 src = SET_SRC (set);
4994 dest = SET_DEST (set);
4995 if (GET_CODE (src) == LO_SUM
4996 && symbolic_operand (XEXP (src, 1), VOIDmode)
4997 && ! read_only_operand (XEXP (src, 1), VOIDmode))
4998 priority >>= 3;
4999
5000 else if (GET_CODE (src) == MEM
5001 && GET_CODE (XEXP (src, 0)) == LO_SUM
5002 && symbolic_operand (XEXP (XEXP (src, 0), 1), VOIDmode)
5003 && ! read_only_operand (XEXP (XEXP (src, 0), 1), VOIDmode))
5004 priority >>= 1;
5005
5006 else if (GET_CODE (dest) == MEM
5007 && GET_CODE (XEXP (dest, 0)) == LO_SUM
5008 && symbolic_operand (XEXP (XEXP (dest, 0), 1), VOIDmode)
5009 && ! read_only_operand (XEXP (XEXP (dest, 0), 1), VOIDmode))
5010 priority >>= 3;
5011 }
5012 return priority;
5013}
5014
5015/* The 700 can only issue a single insn at a time.
5016 The 7XXX processors can issue two insns at a time.
5017 The 8000 can issue 4 insns at a time. */
5018static int
b7849684 5019pa_issue_rate (void)
c237e94a
ZW
5020{
5021 switch (pa_cpu)
5022 {
5023 case PROCESSOR_700: return 1;
5024 case PROCESSOR_7100: return 2;
5025 case PROCESSOR_7100LC: return 2;
5026 case PROCESSOR_7200: return 2;
fae15c93 5027 case PROCESSOR_7300: return 2;
c237e94a
ZW
5028 case PROCESSOR_8000: return 4;
5029
5030 default:
144d51f9 5031 gcc_unreachable ();
c237e94a
ZW
5032 }
5033}
5034
5035
5036
ab11fb42
JDA
5037/* Return any length plus adjustment needed by INSN which already has
5038 its length computed as LENGTH. Return LENGTH if no adjustment is
5039 necessary.
3673e996
RS
5040
5041 Also compute the length of an inline block move here as it is too
b9821af8 5042 complicated to express as a length attribute in pa.md. */
3673e996 5043int
432d483a 5044pa_adjust_insn_length (rtx_insn *insn, int length)
3673e996
RS
5045{
5046 rtx pat = PATTERN (insn);
5047
ab11fb42
JDA
5048 /* If length is negative or undefined, provide initial length. */
5049 if ((unsigned int) length >= INT_MAX)
5050 {
5051 if (GET_CODE (pat) == SEQUENCE)
432d483a 5052 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
ab11fb42
JDA
5053
5054 switch (get_attr_type (insn))
5055 {
5056 case TYPE_MILLI:
5057 length = pa_attr_length_millicode_call (insn);
5058 break;
5059 case TYPE_CALL:
5060 length = pa_attr_length_call (insn, 0);
5061 break;
5062 case TYPE_SIBCALL:
5063 length = pa_attr_length_call (insn, 1);
5064 break;
5065 case TYPE_DYNCALL:
5066 length = pa_attr_length_indirect_call (insn);
5067 break;
5068 case TYPE_SH_FUNC_ADRS:
5069 length = pa_attr_length_millicode_call (insn) + 20;
5070 break;
5071 default:
5072 gcc_unreachable ();
5073 }
5074 }
5075
3673e996 5076 /* Block move pattern. */
33e67557
SB
5077 if (NONJUMP_INSN_P (insn)
5078 && GET_CODE (pat) == PARALLEL
5079 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5080 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5081 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5082 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5083 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
ab11fb42 5084 length += compute_movmem_length (insn) - 4;
cdc9103c 5085 /* Block clear pattern. */
b64925dc 5086 else if (NONJUMP_INSN_P (insn)
cdc9103c
JDA
5087 && GET_CODE (pat) == PARALLEL
5088 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5089 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5090 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5091 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
ab11fb42 5092 length += compute_clrmem_length (insn) - 4;
3673e996 5093 /* Conditional branch with an unfilled delay slot. */
b64925dc 5094 else if (JUMP_P (insn) && ! simplejump_p (insn))
b9821af8
JL
5095 {
5096 /* Adjust a short backwards conditional with an unfilled delay slot. */
5097 if (GET_CODE (pat) == SET
a1b36964 5098 && length == 4
3232e9d8 5099 && JUMP_LABEL (insn) != NULL_RTX
b9821af8 5100 && ! forward_branch_p (insn))
ab11fb42 5101 length += 4;
b1092901
JL
5102 else if (GET_CODE (pat) == PARALLEL
5103 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5104 && length == 4)
ab11fb42 5105 length += 4;
b9821af8 5106 /* Adjust dbra insn with short backwards conditional branch with
23f6f34f 5107 unfilled delay slot -- only for case where counter is in a
fe19a83d 5108 general register register. */
b9821af8
JL
5109 else if (GET_CODE (pat) == PARALLEL
5110 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5111 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
23f6f34f 5112 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
a1b36964 5113 && length == 4
b9821af8 5114 && ! forward_branch_p (insn))
ab11fb42 5115 length += 4;
b9821af8 5116 }
ab11fb42 5117 return length;
3673e996
RS
5118}
5119
8a5b8538
AS
5120/* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5121
5122static bool
5123pa_print_operand_punct_valid_p (unsigned char code)
5124{
5125 if (code == '@'
5126 || code == '#'
5127 || code == '*'
5128 || code == '^')
5129 return true;
5130
5131 return false;
5132}
5133
188538df
TG
5134/* Print operand X (an rtx) in assembler syntax to file FILE.
5135 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5136 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5137
5138void
ae9d61ab 5139pa_print_operand (FILE *file, rtx x, int code)
188538df
TG
5140{
5141 switch (code)
5142 {
5143 case '#':
5144 /* Output a 'nop' if there's nothing for the delay slot. */
5145 if (dbr_sequence_length () == 0)
5146 fputs ("\n\tnop", file);
5147 return;
5148 case '*':
5bdc5878 5149 /* Output a nullification completer if there's nothing for the */
23f6f34f 5150 /* delay slot or nullification is requested. */
188538df
TG
5151 if (dbr_sequence_length () == 0 ||
5152 (final_sequence &&
5153 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5154 fputs (",n", file);
5155 return;
5156 case 'R':
5157 /* Print out the second register name of a register pair.
5158 I.e., R (6) => 7. */
831c1763 5159 fputs (reg_names[REGNO (x) + 1], file);
188538df
TG
5160 return;
5161 case 'r':
fe19a83d 5162 /* A register or zero. */
f048ca47
JL
5163 if (x == const0_rtx
5164 || (x == CONST0_RTX (DFmode))
5165 || (x == CONST0_RTX (SFmode)))
188538df 5166 {
55abf18a
JL
5167 fputs ("%r0", file);
5168 return;
5169 }
5170 else
5171 break;
5172 case 'f':
fe19a83d 5173 /* A register or zero (floating point). */
55abf18a
JL
5174 if (x == const0_rtx
5175 || (x == CONST0_RTX (DFmode))
5176 || (x == CONST0_RTX (SFmode)))
5177 {
5178 fputs ("%fr0", file);
188538df
TG
5179 return;
5180 }
5181 else
5182 break;
f8eb41cc
JL
5183 case 'A':
5184 {
5185 rtx xoperands[2];
5186
5187 xoperands[0] = XEXP (XEXP (x, 0), 0);
5188 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
ae9d61ab 5189 pa_output_global_address (file, xoperands[1], 0);
f8eb41cc
JL
5190 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5191 return;
5192 }
5193
c85b8963 5194 case 'C': /* Plain (C)ondition */
188538df
TG
5195 case 'X':
5196 switch (GET_CODE (x))
23f6f34f 5197 {
188538df 5198 case EQ:
e236a9ff 5199 fputs ("=", file); break;
188538df 5200 case NE:
e236a9ff 5201 fputs ("<>", file); break;
188538df 5202 case GT:
e236a9ff 5203 fputs (">", file); break;
188538df 5204 case GE:
e236a9ff 5205 fputs (">=", file); break;
188538df 5206 case GEU:
e236a9ff 5207 fputs (">>=", file); break;
188538df 5208 case GTU:
e236a9ff 5209 fputs (">>", file); break;
188538df 5210 case LT:
e236a9ff 5211 fputs ("<", file); break;
188538df 5212 case LE:
e236a9ff 5213 fputs ("<=", file); break;
188538df 5214 case LEU:
e236a9ff 5215 fputs ("<<=", file); break;
188538df 5216 case LTU:
e236a9ff 5217 fputs ("<<", file); break;
188538df 5218 default:
144d51f9 5219 gcc_unreachable ();
188538df
TG
5220 }
5221 return;
c85b8963 5222 case 'N': /* Condition, (N)egated */
188538df
TG
5223 switch (GET_CODE (x))
5224 {
5225 case EQ:
e236a9ff 5226 fputs ("<>", file); break;
188538df 5227 case NE:
e236a9ff 5228 fputs ("=", file); break;
188538df 5229 case GT:
e236a9ff 5230 fputs ("<=", file); break;
188538df 5231 case GE:
e236a9ff 5232 fputs ("<", file); break;
188538df 5233 case GEU:
e236a9ff 5234 fputs ("<<", file); break;
188538df 5235 case GTU:
e236a9ff 5236 fputs ("<<=", file); break;
188538df 5237 case LT:
e236a9ff 5238 fputs (">=", file); break;
188538df 5239 case LE:
e236a9ff 5240 fputs (">", file); break;
188538df 5241 case LEU:
e236a9ff 5242 fputs (">>", file); break;
188538df 5243 case LTU:
e236a9ff 5244 fputs (">>=", file); break;
188538df 5245 default:
144d51f9 5246 gcc_unreachable ();
188538df
TG
5247 }
5248 return;
831c1763 5249 /* For floating point comparisons. Note that the output
69049ba0
JDA
5250 predicates are the complement of the desired mode. The
5251 conditions for GT, GE, LT, LE and LTGT cause an invalid
5252 operation exception if the result is unordered and this
5253 exception is enabled in the floating-point status register. */
d6c0d377
JL
5254 case 'Y':
5255 switch (GET_CODE (x))
5256 {
5257 case EQ:
e236a9ff 5258 fputs ("!=", file); break;
d6c0d377 5259 case NE:
e236a9ff 5260 fputs ("=", file); break;
d6c0d377 5261 case GT:
becf1647 5262 fputs ("!>", file); break;
d6c0d377 5263 case GE:
becf1647 5264 fputs ("!>=", file); break;
d6c0d377 5265 case LT:
becf1647 5266 fputs ("!<", file); break;
d6c0d377 5267 case LE:
becf1647
DA
5268 fputs ("!<=", file); break;
5269 case LTGT:
5270 fputs ("!<>", file); break;
5271 case UNLE:
69049ba0 5272 fputs ("!?<=", file); break;
becf1647 5273 case UNLT:
69049ba0 5274 fputs ("!?<", file); break;
becf1647 5275 case UNGE:
69049ba0 5276 fputs ("!?>=", file); break;
becf1647 5277 case UNGT:
69049ba0 5278 fputs ("!?>", file); break;
becf1647 5279 case UNEQ:
69049ba0 5280 fputs ("!?=", file); break;
becf1647 5281 case UNORDERED:
69049ba0 5282 fputs ("!?", file); break;
becf1647 5283 case ORDERED:
69049ba0 5284 fputs ("?", file); break;
d6c0d377 5285 default:
144d51f9 5286 gcc_unreachable ();
d6c0d377
JL
5287 }
5288 return;
c85b8963
TG
5289 case 'S': /* Condition, operands are (S)wapped. */
5290 switch (GET_CODE (x))
5291 {
5292 case EQ:
e236a9ff 5293 fputs ("=", file); break;
c85b8963 5294 case NE:
e236a9ff 5295 fputs ("<>", file); break;
c85b8963 5296 case GT:
e236a9ff 5297 fputs ("<", file); break;
c85b8963 5298 case GE:
e236a9ff 5299 fputs ("<=", file); break;
c85b8963 5300 case GEU:
e236a9ff 5301 fputs ("<<=", file); break;
c85b8963 5302 case GTU:
e236a9ff 5303 fputs ("<<", file); break;
c85b8963 5304 case LT:
e236a9ff 5305 fputs (">", file); break;
c85b8963 5306 case LE:
e236a9ff 5307 fputs (">=", file); break;
c85b8963 5308 case LEU:
e236a9ff 5309 fputs (">>=", file); break;
c85b8963 5310 case LTU:
e236a9ff 5311 fputs (">>", file); break;
c85b8963 5312 default:
144d51f9 5313 gcc_unreachable ();
23f6f34f 5314 }
c85b8963
TG
5315 return;
5316 case 'B': /* Condition, (B)oth swapped and negate. */
5317 switch (GET_CODE (x))
5318 {
5319 case EQ:
e236a9ff 5320 fputs ("<>", file); break;
c85b8963 5321 case NE:
e236a9ff 5322 fputs ("=", file); break;
c85b8963 5323 case GT:
e236a9ff 5324 fputs (">=", file); break;
c85b8963 5325 case GE:
e236a9ff 5326 fputs (">", file); break;
c85b8963 5327 case GEU:
e236a9ff 5328 fputs (">>", file); break;
c85b8963 5329 case GTU:
e236a9ff 5330 fputs (">>=", file); break;
c85b8963 5331 case LT:
e236a9ff 5332 fputs ("<=", file); break;
c85b8963 5333 case LE:
e236a9ff 5334 fputs ("<", file); break;
c85b8963 5335 case LEU:
e236a9ff 5336 fputs ("<<", file); break;
c85b8963 5337 case LTU:
e236a9ff 5338 fputs ("<<=", file); break;
c85b8963 5339 default:
144d51f9 5340 gcc_unreachable ();
23f6f34f 5341 }
c85b8963
TG
5342 return;
5343 case 'k':
144d51f9
NS
5344 gcc_assert (GET_CODE (x) == CONST_INT);
5345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5346 return;
520babc7 5347 case 'Q':
144d51f9
NS
5348 gcc_assert (GET_CODE (x) == CONST_INT);
5349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5350 return;
c8d6697c 5351 case 'L':
144d51f9
NS
5352 gcc_assert (GET_CODE (x) == CONST_INT);
5353 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5354 return;
b47fbc53
JL
5355 case 'o':
5356 gcc_assert (GET_CODE (x) == CONST_INT
5357 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
34319f9a 5358 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
b47fbc53 5359 return;
4802a0d6 5360 case 'O':
144d51f9
NS
5361 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5362 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5363 return;
520babc7 5364 case 'p':
144d51f9
NS
5365 gcc_assert (GET_CODE (x) == CONST_INT);
5366 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5367 return;
c8d6697c 5368 case 'P':
144d51f9
NS
5369 gcc_assert (GET_CODE (x) == CONST_INT);
5370 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5371 return;
c85b8963
TG
5372 case 'I':
5373 if (GET_CODE (x) == CONST_INT)
5374 fputs ("i", file);
5375 return;
188538df 5376 case 'M':
2414e0e2 5377 case 'F':
188538df
TG
5378 switch (GET_CODE (XEXP (x, 0)))
5379 {
5380 case PRE_DEC:
5381 case PRE_INC:
f38b27c7
JL
5382 if (ASSEMBLER_DIALECT == 0)
5383 fputs ("s,mb", file);
5384 else
5385 fputs (",mb", file);
188538df
TG
5386 break;
5387 case POST_DEC:
5388 case POST_INC:
f38b27c7
JL
5389 if (ASSEMBLER_DIALECT == 0)
5390 fputs ("s,ma", file);
5391 else
5392 fputs (",ma", file);
188538df 5393 break;
2414e0e2 5394 case PLUS:
d8f95bed
JDA
5395 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5396 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5397 {
5398 if (ASSEMBLER_DIALECT == 0)
5399 fputs ("x", file);
5400 }
5401 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5402 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
f38b27c7
JL
5403 {
5404 if (ASSEMBLER_DIALECT == 0)
5405 fputs ("x,s", file);
5406 else
5407 fputs (",s", file);
5408 }
5409 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5410 fputs ("s", file);
188538df
TG
5411 break;
5412 default:
f38b27c7 5413 if (code == 'F' && ASSEMBLER_DIALECT == 0)
2414e0e2 5414 fputs ("s", file);
188538df
TG
5415 break;
5416 }
5417 return;
5418 case 'G':
ae9d61ab 5419 pa_output_global_address (file, x, 0);
ad238e4b
JL
5420 return;
5421 case 'H':
ae9d61ab 5422 pa_output_global_address (file, x, 1);
188538df
TG
5423 return;
5424 case 0: /* Don't do anything special */
5425 break;
a1747d2c
TG
5426 case 'Z':
5427 {
5428 unsigned op[3];
6fda0f5b 5429 compute_zdepwi_operands (INTVAL (x), op);
a1747d2c
TG
5430 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5431 return;
5432 }
520babc7
JL
5433 case 'z':
5434 {
5435 unsigned op[3];
5436 compute_zdepdi_operands (INTVAL (x), op);
5437 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5438 return;
5439 }
11881f37
AM
5440 case 'c':
5441 /* We can get here from a .vtable_inherit due to our
5442 CONSTANT_ADDRESS_P rejecting perfectly good constant
5443 addresses. */
5444 break;
188538df 5445 default:
144d51f9 5446 gcc_unreachable ();
188538df
TG
5447 }
5448 if (GET_CODE (x) == REG)
80225b66 5449 {
3ba1236f 5450 fputs (reg_names [REGNO (x)], file);
520babc7
JL
5451 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5452 {
5453 fputs ("R", file);
5454 return;
5455 }
5456 if (FP_REG_P (x)
5457 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5458 && (REGNO (x) & 1) == 0)
3ba1236f 5459 fputs ("L", file);
80225b66 5460 }
188538df
TG
5461 else if (GET_CODE (x) == MEM)
5462 {
5463 int size = GET_MODE_SIZE (GET_MODE (x));
478a4495 5464 rtx base = NULL_RTX;
188538df
TG
5465 switch (GET_CODE (XEXP (x, 0)))
5466 {
5467 case PRE_DEC:
5468 case POST_DEC:
520babc7 5469 base = XEXP (XEXP (x, 0), 0);
d2d28085 5470 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
188538df
TG
5471 break;
5472 case PRE_INC:
5473 case POST_INC:
520babc7 5474 base = XEXP (XEXP (x, 0), 0);
d2d28085 5475 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
188538df 5476 break;
d8f95bed
JDA
5477 case PLUS:
5478 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
d2d28085 5479 fprintf (file, "%s(%s)",
2414e0e2
JL
5480 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5481 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
d8f95bed 5482 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
d2d28085 5483 fprintf (file, "%s(%s)",
2414e0e2
JL
5484 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5485 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
d8f95bed
JDA
5486 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5487 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5488 {
5489 /* Because the REG_POINTER flag can get lost during reload,
1a04ac2b 5490 pa_legitimate_address_p canonicalizes the order of the
d8f95bed
JDA
5491 index and base registers in the combined move patterns. */
5492 rtx base = XEXP (XEXP (x, 0), 1);
5493 rtx index = XEXP (XEXP (x, 0), 0);
5494
5495 fprintf (file, "%s(%s)",
5496 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5497 }
2414e0e2 5498 else
cc8ca59e 5499 output_address (GET_MODE (x), XEXP (x, 0));
188538df 5500 break;
d8f95bed 5501 default:
cc8ca59e 5502 output_address (GET_MODE (x), XEXP (x, 0));
d8f95bed 5503 break;
188538df
TG
5504 }
5505 }
188538df
TG
5506 else
5507 output_addr_const (file, x);
5508}
5509
fe19a83d 5510/* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
188538df
TG
5511
5512void
ae9d61ab 5513pa_output_global_address (FILE *file, rtx x, int round_constant)
188538df 5514{
43940f6b
JL
5515
5516 /* Imagine (high (const (plus ...))). */
5517 if (GET_CODE (x) == HIGH)
5518 x = XEXP (x, 0);
5519
519104fe 5520 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
744b2d61 5521 output_addr_const (file, x);
6bb36601 5522 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
188538df 5523 {
744b2d61 5524 output_addr_const (file, x);
e236a9ff 5525 fputs ("-$global$", file);
188538df
TG
5526 }
5527 else if (GET_CODE (x) == CONST)
5528 {
519104fe 5529 const char *sep = "";
188538df 5530 int offset = 0; /* assembler wants -$global$ at end */
516c2342 5531 rtx base = NULL_RTX;
23f6f34f 5532
144d51f9 5533 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
188538df 5534 {
3ab604d5 5535 case LABEL_REF:
144d51f9 5536 case SYMBOL_REF:
188538df
TG
5537 base = XEXP (XEXP (x, 0), 0);
5538 output_addr_const (file, base);
144d51f9
NS
5539 break;
5540 case CONST_INT:
5541 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5542 break;
5543 default:
5544 gcc_unreachable ();
188538df 5545 }
188538df 5546
144d51f9 5547 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
188538df 5548 {
3ab604d5 5549 case LABEL_REF:
144d51f9 5550 case SYMBOL_REF:
188538df
TG
5551 base = XEXP (XEXP (x, 0), 1);
5552 output_addr_const (file, base);
144d51f9
NS
5553 break;
5554 case CONST_INT:
5555 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5556 break;
5557 default:
5558 gcc_unreachable ();
188538df 5559 }
188538df 5560
ad238e4b
JL
5561 /* How bogus. The compiler is apparently responsible for
5562 rounding the constant if it uses an LR field selector.
5563
5564 The linker and/or assembler seem a better place since
5565 they have to do this kind of thing already.
5566
5567 If we fail to do this, HP's optimizing linker may eliminate
5568 an addil, but not update the ldw/stw/ldo instruction that
5569 uses the result of the addil. */
5570 if (round_constant)
5571 offset = ((offset + 0x1000) & ~0x1fff);
5572
144d51f9 5573 switch (GET_CODE (XEXP (x, 0)))
188538df 5574 {
144d51f9 5575 case PLUS:
188538df
TG
5576 if (offset < 0)
5577 {
5578 offset = -offset;
5579 sep = "-";
5580 }
5581 else
5582 sep = "+";
144d51f9
NS
5583 break;
5584
5585 case MINUS:
5586 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5587 sep = "-";
5588 break;
188538df 5589
144d51f9
NS
5590 default:
5591 gcc_unreachable ();
5592 }
5593
519104fe 5594 if (!read_only_operand (base, VOIDmode) && !flag_pic)
e236a9ff 5595 fputs ("-$global$", file);
ad238e4b 5596 if (offset)
831c1763 5597 fprintf (file, "%s%d", sep, offset);
188538df
TG
5598 }
5599 else
5600 output_addr_const (file, x);
5601}
5602
1bc7c5b6
ZW
5603/* Output boilerplate text to appear at the beginning of the file.
5604 There are several possible versions. */
5605#define aputs(x) fputs(x, asm_out_file)
5606static inline void
b7849684 5607pa_file_start_level (void)
1bc7c5b6
ZW
5608{
5609 if (TARGET_64BIT)
5610 aputs ("\t.LEVEL 2.0w\n");
5611 else if (TARGET_PA_20)
5612 aputs ("\t.LEVEL 2.0\n");
5613 else if (TARGET_PA_11)
5614 aputs ("\t.LEVEL 1.1\n");
5615 else
5616 aputs ("\t.LEVEL 1.0\n");
5617}
5618
5619static inline void
b7849684 5620pa_file_start_space (int sortspace)
1bc7c5b6
ZW
5621{
5622 aputs ("\t.SPACE $PRIVATE$");
5623 if (sortspace)
5624 aputs (",SORT=16");
57d138a9
JDA
5625 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5626 if (flag_tm)
5627 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5628 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5629 "\n\t.SPACE $TEXT$");
1bc7c5b6
ZW
5630 if (sortspace)
5631 aputs (",SORT=8");
5632 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
57d138a9 5633 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
1bc7c5b6
ZW
5634}
5635
5636static inline void
b7849684 5637pa_file_start_file (int want_version)
1bc7c5b6
ZW
5638{
5639 if (write_symbols != NO_DEBUG)
5640 {
5641 output_file_directive (asm_out_file, main_input_filename);
5642 if (want_version)
5643 aputs ("\t.version\t\"01.01\"\n");
5644 }
5645}
5646
5647static inline void
b7849684 5648pa_file_start_mcount (const char *aswhat)
1bc7c5b6
ZW
5649{
5650 if (profile_flag)
5651 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5652}
5653
5654static void
b7849684 5655pa_elf_file_start (void)
1bc7c5b6
ZW
5656{
5657 pa_file_start_level ();
5658 pa_file_start_mcount ("ENTRY");
5659 pa_file_start_file (0);
5660}
5661
5662static void
b7849684 5663pa_som_file_start (void)
1bc7c5b6
ZW
5664{
5665 pa_file_start_level ();
5666 pa_file_start_space (0);
5667 aputs ("\t.IMPORT $global$,DATA\n"
5668 "\t.IMPORT $$dyncall,MILLICODE\n");
5669 pa_file_start_mcount ("CODE");
5670 pa_file_start_file (0);
5671}
5672
5673static void
b7849684 5674pa_linux_file_start (void)
1bc7c5b6
ZW
5675{
5676 pa_file_start_file (1);
5677 pa_file_start_level ();
5678 pa_file_start_mcount ("CODE");
5679}
5680
5681static void
b7849684 5682pa_hpux64_gas_file_start (void)
1bc7c5b6
ZW
5683{
5684 pa_file_start_level ();
5685#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5686 if (profile_flag)
5687 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5688#endif
5689 pa_file_start_file (1);
5690}
5691
5692static void
b7849684 5693pa_hpux64_hpas_file_start (void)
1bc7c5b6
ZW
5694{
5695 pa_file_start_level ();
5696 pa_file_start_space (1);
5697 pa_file_start_mcount ("CODE");
5698 pa_file_start_file (0);
5699}
5700#undef aputs
5701
7aaf280e
JDA
5702/* Search the deferred plabel list for SYMBOL and return its internal
5703 label. If an entry for SYMBOL is not found, a new entry is created. */
5704
5705rtx
ae9d61ab 5706pa_get_deferred_plabel (rtx symbol)
a02aa5b0 5707{
744b2d61 5708 const char *fname = XSTR (symbol, 0);
a02aa5b0
JDA
5709 size_t i;
5710
5711 /* See if we have already put this function on the list of deferred
5712 plabels. This list is generally small, so a liner search is not
5713 too ugly. If it proves too slow replace it with something faster. */
5714 for (i = 0; i < n_deferred_plabels; i++)
744b2d61 5715 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
a02aa5b0
JDA
5716 break;
5717
5718 /* If the deferred plabel list is empty, or this entry was not found
5719 on the list, create a new entry on the list. */
5720 if (deferred_plabels == NULL || i == n_deferred_plabels)
5721 {
744b2d61
JDA
5722 tree id;
5723
a02aa5b0 5724 if (deferred_plabels == 0)
766090c2 5725 deferred_plabels = ggc_alloc<deferred_plabel> ();
a02aa5b0 5726 else
a9429e29
LB
5727 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5728 deferred_plabels,
5729 n_deferred_plabels + 1);
a02aa5b0
JDA
5730
5731 i = n_deferred_plabels++;
5732 deferred_plabels[i].internal_label = gen_label_rtx ();
744b2d61 5733 deferred_plabels[i].symbol = symbol;
a02aa5b0 5734
744b2d61
JDA
5735 /* Gross. We have just implicitly taken the address of this
5736 function. Mark it in the same manner as assemble_name. */
5737 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5738 if (id)
5739 mark_referenced (id);
a02aa5b0
JDA
5740 }
5741
7aaf280e 5742 return deferred_plabels[i].internal_label;
a02aa5b0
JDA
5743}
5744
a5fe455b 5745static void
b7849684 5746output_deferred_plabels (void)
359255a9 5747{
0f8e3849 5748 size_t i;
1a83bfc3
JDA
5749
5750 /* If we have some deferred plabels, then we need to switch into the
5751 data or readonly data section, and align it to a 4 byte boundary
6416ae7f 5752 before outputting the deferred plabels. */
359255a9
JL
5753 if (n_deferred_plabels)
5754 {
1a83bfc3 5755 switch_to_section (flag_pic ? data_section : readonly_data_section);
a5fe455b 5756 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
359255a9
JL
5757 }
5758
5759 /* Now output the deferred plabels. */
5760 for (i = 0; i < n_deferred_plabels; i++)
5761 {
ecc418c4 5762 targetm.asm_out.internal_label (asm_out_file, "L",
a5fe455b 5763 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
744b2d61 5764 assemble_integer (deferred_plabels[i].symbol,
3d9268b6 5765 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
359255a9
JL
5766 }
5767}
5768
50bbeefb
JDA
5769/* Initialize optabs to point to emulation routines. */
5770
c15c90bb 5771static void
50bbeefb 5772pa_init_libfuncs (void)
c15c90bb 5773{
50bbeefb
JDA
5774 if (HPUX_LONG_DOUBLE_LIBRARY)
5775 {
5776 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5777 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5778 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5779 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5780 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5781 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5782 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5783 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5784 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5785
5786 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5787 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5788 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5789 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5790 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5791 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5792 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5793
5794 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5795 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5796 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5797 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5798
5799 set_conv_libfunc (sfix_optab, SImode, TFmode,
5800 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5801 : "_U_Qfcnvfxt_quad_to_sgl");
5802 set_conv_libfunc (sfix_optab, DImode, TFmode,
5803 "_U_Qfcnvfxt_quad_to_dbl");
5804 set_conv_libfunc (ufix_optab, SImode, TFmode,
5805 "_U_Qfcnvfxt_quad_to_usgl");
5806 set_conv_libfunc (ufix_optab, DImode, TFmode,
5807 "_U_Qfcnvfxt_quad_to_udbl");
5808
5809 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5810 "_U_Qfcnvxf_sgl_to_quad");
5811 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5812 "_U_Qfcnvxf_dbl_to_quad");
5813 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5814 "_U_Qfcnvxf_usgl_to_quad");
5815 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5816 "_U_Qfcnvxf_udbl_to_quad");
5817 }
33a55f29
RH
5818
5819 if (TARGET_SYNC_LIBCALL)
7e7c9d40 5820 init_sync_libfuncs (8);
c15c90bb 5821}
c15c90bb 5822
188538df
TG
5823/* HP's millicode routines mean something special to the assembler.
5824 Keep track of which ones we have used. */
5825
f3a4e54e 5826enum millicodes { remI, remU, divI, divU, mulI, end1000 };
b7849684 5827static void import_milli (enum millicodes);
831c1763 5828static char imported[(int) end1000];
f3a4e54e 5829static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
8b60264b 5830static const char import_string[] = ".IMPORT $$....,MILLICODE";
188538df
TG
5831#define MILLI_START 10
5832
f1c7ce82 5833static void
b7849684 5834import_milli (enum millicodes code)
188538df
TG
5835{
5836 char str[sizeof (import_string)];
23f6f34f 5837
831c1763 5838 if (!imported[(int) code])
188538df 5839 {
831c1763 5840 imported[(int) code] = 1;
188538df 5841 strcpy (str, import_string);
831c1763 5842 strncpy (str + MILLI_START, milli_names[(int) code], 4);
188538df
TG
5843 output_asm_insn (str, 0);
5844 }
5845}
5846
23f6f34f 5847/* The register constraints have put the operands and return value in
fe19a83d 5848 the proper registers. */
188538df 5849
519104fe 5850const char *
b32d5189 5851pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
188538df 5852{
9b38c2fa 5853 import_milli (mulI);
ae9d61ab 5854 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
188538df
TG
5855}
5856
fe19a83d 5857/* Emit the rtl for doing a division by a constant. */
188538df 5858
9b38c2fa 5859/* Do magic division millicodes exist for this value? */
ae9d61ab 5860const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
188538df 5861
23f6f34f 5862/* We'll use an array to keep track of the magic millicodes and
188538df 5863 whether or not we've used them already. [n][0] is signed, [n][1] is
fe19a83d 5864 unsigned. */
188538df 5865
188538df
TG
5866static int div_milli[16][2];
5867
188538df 5868int
ae9d61ab 5869pa_emit_hpdiv_const (rtx *operands, int unsignedp)
188538df
TG
5870{
5871 if (GET_CODE (operands[2]) == CONST_INT
5872 && INTVAL (operands[2]) > 0
5873 && INTVAL (operands[2]) < 16
ae9d61ab 5874 && pa_magic_milli[INTVAL (operands[2])])
188538df 5875 {
7d8b1412
AM
5876 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5877
ad2c71b7 5878 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
188538df 5879 emit
92fd5e41
KH
5880 (gen_rtx_PARALLEL
5881 (VOIDmode,
f7df4a84 5882 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
ad2c71b7
JL
5883 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5884 SImode,
5885 gen_rtx_REG (SImode, 26),
5886 operands[2])),
bd83f9a5 5887 gen_rtx_CLOBBER (VOIDmode, operands[4]),
ad2c71b7
JL
5888 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5889 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5890 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
7d8b1412 5891 gen_rtx_CLOBBER (VOIDmode, ret))));
ad2c71b7 5892 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
188538df
TG
5893 return 1;
5894 }
5895 return 0;
5896}
5897
519104fe 5898const char *
b32d5189 5899pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
188538df
TG
5900{
5901 int divisor;
23f6f34f
TG
5902
5903 /* If the divisor is a constant, try to use one of the special
188538df
TG
5904 opcodes .*/
5905 if (GET_CODE (operands[0]) == CONST_INT)
5906 {
2c4ff308 5907 static char buf[100];
188538df
TG
5908 divisor = INTVAL (operands[0]);
5909 if (!div_milli[divisor][unsignedp])
5910 {
2c4ff308 5911 div_milli[divisor][unsignedp] = 1;
188538df
TG
5912 if (unsignedp)
5913 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5914 else
5915 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
188538df
TG
5916 }
5917 if (unsignedp)
2c4ff308 5918 {
4a0a75dd
KG
5919 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5920 INTVAL (operands[0]));
ae9d61ab
JDA
5921 return pa_output_millicode_call (insn,
5922 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308
JL
5923 }
5924 else
5925 {
4a0a75dd
KG
5926 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5927 INTVAL (operands[0]));
ae9d61ab
JDA
5928 return pa_output_millicode_call (insn,
5929 gen_rtx_SYMBOL_REF (SImode, buf));
2c4ff308 5930 }
188538df 5931 }
fe19a83d 5932 /* Divisor isn't a special constant. */
188538df
TG
5933 else
5934 {
5935 if (unsignedp)
5936 {
5937 import_milli (divU);
ae9d61ab 5938 return pa_output_millicode_call (insn,
ad2c71b7 5939 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
188538df
TG
5940 }
5941 else
5942 {
5943 import_milli (divI);
ae9d61ab 5944 return pa_output_millicode_call (insn,
ad2c71b7 5945 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
188538df
TG
5946 }
5947 }
5948}
5949
fe19a83d 5950/* Output a $$rem millicode to do mod. */
188538df 5951
519104fe 5952const char *
b32d5189 5953pa_output_mod_insn (int unsignedp, rtx_insn *insn)
188538df
TG
5954{
5955 if (unsignedp)
5956 {
5957 import_milli (remU);
ae9d61ab
JDA
5958 return pa_output_millicode_call (insn,
5959 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
188538df
TG
5960 }
5961 else
5962 {
5963 import_milli (remI);
ae9d61ab
JDA
5964 return pa_output_millicode_call (insn,
5965 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
188538df
TG
5966 }
5967}
5968
5969void
e0d80a58 5970pa_output_arg_descriptor (rtx_insn *call_insn)
188538df 5971{
519104fe 5972 const char *arg_regs[4];
ef4bddc2 5973 machine_mode arg_mode;
80225b66 5974 rtx link;
188538df
TG
5975 int i, output_flag = 0;
5976 int regno;
23f6f34f 5977
520babc7 5978 /* We neither need nor want argument location descriptors for the
e25724d8
AM
5979 64bit runtime environment or the ELF32 environment. */
5980 if (TARGET_64BIT || TARGET_ELF32)
520babc7
JL
5981 return;
5982
188538df
TG
5983 for (i = 0; i < 4; i++)
5984 arg_regs[i] = 0;
5985
2822d96e
JL
5986 /* Specify explicitly that no argument relocations should take place
5987 if using the portable runtime calling conventions. */
5988 if (TARGET_PORTABLE_RUNTIME)
5989 {
e236a9ff
JL
5990 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5991 asm_out_file);
2822d96e
JL
5992 return;
5993 }
5994
b64925dc 5995 gcc_assert (CALL_P (call_insn));
144d51f9
NS
5996 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5997 link; link = XEXP (link, 1))
188538df 5998 {
80225b66 5999 rtx use = XEXP (link, 0);
3529be83 6000
80225b66
TG
6001 if (! (GET_CODE (use) == USE
6002 && GET_CODE (XEXP (use, 0)) == REG
6003 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
3529be83
RS
6004 continue;
6005
80225b66
TG
6006 arg_mode = GET_MODE (XEXP (use, 0));
6007 regno = REGNO (XEXP (use, 0));
188538df 6008 if (regno >= 23 && regno <= 26)
a9d91d6f
RS
6009 {
6010 arg_regs[26 - regno] = "GR";
6011 if (arg_mode == DImode)
6012 arg_regs[25 - regno] = "GR";
6013 }
80225b66 6014 else if (regno >= 32 && regno <= 39)
188538df
TG
6015 {
6016 if (arg_mode == SFmode)
80225b66 6017 arg_regs[(regno - 32) / 2] = "FR";
d0616842 6018 else
188538df 6019 {
22d6e660 6020#ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
80225b66
TG
6021 arg_regs[(regno - 34) / 2] = "FR";
6022 arg_regs[(regno - 34) / 2 + 1] = "FU";
188538df 6023#else
80225b66
TG
6024 arg_regs[(regno - 34) / 2] = "FU";
6025 arg_regs[(regno - 34) / 2 + 1] = "FR";
188538df
TG
6026#endif
6027 }
188538df
TG
6028 }
6029 }
6030 fputs ("\t.CALL ", asm_out_file);
6031 for (i = 0; i < 4; i++)
6032 {
6033 if (arg_regs[i])
6034 {
6035 if (output_flag++)
6036 fputc (',', asm_out_file);
6037 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6038 }
6039 }
6040 fputc ('\n', asm_out_file);
6041}
6042\f
1a04ac2b
JDA
6043/* Inform reload about cases where moving X with a mode MODE to or from
6044 a register in RCLASS requires an extra scratch or immediate register.
6045 Return the class needed for the immediate register. */
483d7ad3 6046
a87cf97e
JR
6047static reg_class_t
6048pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
ef4bddc2 6049 machine_mode mode, secondary_reload_info *sri)
ec963611 6050{
715a567d 6051 int regno;
a87cf97e 6052 enum reg_class rclass = (enum reg_class) rclass_i;
e236a9ff 6053
ec963611 6054 /* Handle the easy stuff first. */
0a2aaacc 6055 if (rclass == R1_REGS)
ec963611 6056 return NO_REGS;
e236a9ff 6057
ec963611
JDA
6058 if (REG_P (x))
6059 {
6060 regno = REGNO (x);
0a2aaacc 6061 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
ec963611
JDA
6062 return NO_REGS;
6063 }
69f8a2d6
JDA
6064 else
6065 regno = -1;
188538df 6066
ec963611
JDA
6067 /* If we have something like (mem (mem (...)), we can safely assume the
6068 inner MEM will end up in a general register after reloading, so there's
6069 no need for a secondary reload. */
6070 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6071 return NO_REGS;
188538df 6072
6bb36601 6073 /* Trying to load a constant into a FP register during PIC code
1a04ac2b
JDA
6074 generation requires %r1 as a scratch register. For float modes,
6075 the only legitimate constant is CONST0_RTX. However, there are
6076 a few patterns that accept constant double operands. */
7ee72796 6077 if (flag_pic
0a2aaacc 6078 && FP_REG_CLASS_P (rclass)
ec963611 6079 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
e236a9ff 6080 {
1a04ac2b
JDA
6081 switch (mode)
6082 {
4e10a5a7 6083 case E_SImode:
1a04ac2b
JDA
6084 sri->icode = CODE_FOR_reload_insi_r1;
6085 break;
6086
4e10a5a7 6087 case E_DImode:
1a04ac2b
JDA
6088 sri->icode = CODE_FOR_reload_indi_r1;
6089 break;
6090
4e10a5a7 6091 case E_SFmode:
1a04ac2b
JDA
6092 sri->icode = CODE_FOR_reload_insf_r1;
6093 break;
6094
4e10a5a7 6095 case E_DFmode:
1a04ac2b
JDA
6096 sri->icode = CODE_FOR_reload_indf_r1;
6097 break;
6098
6099 default:
6100 gcc_unreachable ();
6101 }
ec963611 6102 return NO_REGS;
e236a9ff 6103 }
e236a9ff 6104
1a04ac2b
JDA
6105 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6106 register when we're generating PIC code or when the operand isn't
715a567d 6107 readonly. */
ae9d61ab 6108 if (pa_symbolic_expression_p (x))
715a567d
JDA
6109 {
6110 if (GET_CODE (x) == HIGH)
6111 x = XEXP (x, 0);
6112
6113 if (flag_pic || !read_only_operand (x, VOIDmode))
6114 {
1a04ac2b
JDA
6115 switch (mode)
6116 {
4e10a5a7 6117 case E_SImode:
1a04ac2b
JDA
6118 sri->icode = CODE_FOR_reload_insi_r1;
6119 break;
6120
4e10a5a7 6121 case E_DImode:
1a04ac2b
JDA
6122 sri->icode = CODE_FOR_reload_indi_r1;
6123 break;
6124
6125 default:
6126 gcc_unreachable ();
6127 }
715a567d
JDA
6128 return NO_REGS;
6129 }
6130 }
6131
ec963611
JDA
6132 /* Profiling showed the PA port spends about 1.3% of its compilation
6133 time in true_regnum from calls inside pa_secondary_reload_class. */
6134 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6135 regno = true_regnum (x);
39dfb55a 6136
1a04ac2b 6137 /* Handle reloads for floating point loads and stores. */
6982c5d4 6138 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
0a2aaacc 6139 && FP_REG_CLASS_P (rclass))
6982c5d4 6140 {
1a04ac2b 6141 if (MEM_P (x))
6982c5d4
JDA
6142 {
6143 x = XEXP (x, 0);
6144
feb675e4
JDA
6145 /* We don't need a secondary reload for indexed memory addresses.
6146
6147 When INT14_OK_STRICT is true, it might appear that we could
6148 directly allow register indirect memory addresses. However,
6149 this doesn't work because we don't support SUBREGs in
6150 floating-point register copies and reload doesn't tell us
6151 when it's going to use a SUBREG. */
6152 if (IS_INDEX_ADDR_P (x))
6982c5d4 6153 return NO_REGS;
6982c5d4
JDA
6154 }
6155
6156 /* Request a secondary reload with a general scratch register
073a8998 6157 for everything else. ??? Could symbolic operands be handled
6982c5d4 6158 directly when generating non-pic PA 2.0 code? */
f9621cc4
RS
6159 sri->icode = (in_p
6160 ? direct_optab_handler (reload_in_optab, mode)
6161 : direct_optab_handler (reload_out_optab, mode));
6982c5d4
JDA
6162 return NO_REGS;
6163 }
6164
483d7ad3
JDA
6165 /* A SAR<->FP register copy requires an intermediate general register
6166 and secondary memory. We need a secondary reload with a general
6167 scratch register for spills. */
6168 if (rclass == SHIFT_REGS)
ec963611 6169 {
483d7ad3
JDA
6170 /* Handle spill. */
6171 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6172 {
6173 sri->icode = (in_p
6174 ? direct_optab_handler (reload_in_optab, mode)
6175 : direct_optab_handler (reload_out_optab, mode));
6176 return NO_REGS;
6177 }
6178
6179 /* Handle FP copy. */
6180 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6181 return GENERAL_REGS;
ec963611 6182 }
fa5e5c1e 6183
26ee120d 6184 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
483d7ad3
JDA
6185 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6186 && FP_REG_CLASS_P (rclass))
6187 return GENERAL_REGS;
43940f6b 6188
fa5e5c1e 6189 return NO_REGS;
188538df
TG
6190}
6191
16c16a24
JDA
6192/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6193 is only marked as live on entry by df-scan when it is a fixed
6194 register. It isn't a fixed register in the 64-bit runtime,
6195 so we need to mark it here. */
6196
6197static void
6198pa_extra_live_on_entry (bitmap regs)
6199{
6200 if (TARGET_64BIT)
6201 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6202}
6203
6204/* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6205 to prevent it from being deleted. */
6206
6207rtx
6208pa_eh_return_handler_rtx (void)
6209{
6210 rtx tmp;
6211
bc707992 6212 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
16c16a24
JDA
6213 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6214 tmp = gen_rtx_MEM (word_mode, tmp);
6215 tmp->volatil = 1;
6216 return tmp;
6217}
6218
8cd5a4e0
RH
6219/* In the 32-bit runtime, arguments larger than eight bytes are passed
6220 by invisible reference. As a GCC extension, we also pass anything
6221 with a zero or variable size by reference.
6222
6223 The 64-bit runtime does not describe passing any types by invisible
6224 reference. The internals of GCC can't currently handle passing
6225 empty structures, and zero or variable length arrays when they are
6226 not passed entirely on the stack or by reference. Thus, as a GCC
6227 extension, we pass these types by reference. The HP compiler doesn't
6228 support these types, so hopefully there shouldn't be any compatibility
6229 issues. This may have to be revisited when HP releases a C99 compiler
6230 or updates the ABI. */
6231
6232static bool
d5cc9181 6233pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
ef4bddc2 6234 machine_mode mode, const_tree type,
8cd5a4e0
RH
6235 bool named ATTRIBUTE_UNUSED)
6236{
6237 HOST_WIDE_INT size;
6238
6239 if (type)
6240 size = int_size_in_bytes (type);
6241 else
6242 size = GET_MODE_SIZE (mode);
6243
6244 if (TARGET_64BIT)
6245 return size <= 0;
6246 else
6247 return size <= 0 || size > 8;
6248}
6249
76b0cbf8
RS
6250/* Implement TARGET_FUNCTION_ARG_PADDING. */
6251
6252static pad_direction
ef4bddc2 6253pa_function_arg_padding (machine_mode mode, const_tree type)
188538df 6254{
9dff28ab 6255 if (mode == BLKmode
c3e39a47
JDA
6256 || (TARGET_64BIT
6257 && type
6258 && (AGGREGATE_TYPE_P (type)
6259 || TREE_CODE (type) == COMPLEX_TYPE
6260 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab 6261 {
76b0cbf8 6262 /* Return PAD_NONE if justification is not required. */
9dff28ab
JDA
6263 if (type
6264 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6265 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
76b0cbf8 6266 return PAD_NONE;
9dff28ab
JDA
6267
6268 /* The directions set here are ignored when a BLKmode argument larger
6269 than a word is placed in a register. Different code is used for
6270 the stack and registers. This makes it difficult to have a
6271 consistent data representation for both the stack and registers.
6272 For both runtimes, the justification and padding for arguments on
6273 the stack and in registers should be identical. */
6274 if (TARGET_64BIT)
6275 /* The 64-bit runtime specifies left justification for aggregates. */
76b0cbf8 6276 return PAD_UPWARD;
188538df 6277 else
9dff28ab
JDA
6278 /* The 32-bit runtime architecture specifies right justification.
6279 When the argument is passed on the stack, the argument is padded
6280 with garbage on the left. The HP compiler pads with zeros. */
76b0cbf8 6281 return PAD_DOWNWARD;
188538df 6282 }
9dff28ab
JDA
6283
6284 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
76b0cbf8 6285 return PAD_DOWNWARD;
188538df 6286 else
76b0cbf8 6287 return PAD_NONE;
188538df
TG
6288}
6289
188538df 6290\f
648d2ffc
RH
6291/* Do what is necessary for `va_start'. We look at the current function
6292 to determine if stdargs or varargs is used and fill in an initial
6293 va_list. A pointer to this constructor is returned. */
188538df 6294
3f12cd9b 6295static rtx
b7849684 6296hppa_builtin_saveregs (void)
188538df 6297{
5e32727c 6298 rtx offset, dest;
188538df 6299 tree fntype = TREE_TYPE (current_function_decl);
f38958e8 6300 int argadj = ((!stdarg_p (fntype))
188538df
TG
6301 ? UNITS_PER_WORD : 0);
6302
6303 if (argadj)
0a81f074 6304 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
188538df 6305 else
38173d38 6306 offset = crtl->args.arg_offset_rtx;
17e1dfa2 6307
520babc7
JL
6308 if (TARGET_64BIT)
6309 {
6310 int i, off;
6619e96c 6311
520babc7
JL
6312 /* Adjust for varargs/stdarg differences. */
6313 if (argadj)
0a81f074 6314 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
520babc7 6315 else
38173d38 6316 offset = crtl->args.arg_offset_rtx;
520babc7
JL
6317
6318 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6319 from the incoming arg pointer and growing to larger addresses. */
6320 for (i = 26, off = -64; i >= 19; i--, off += 8)
6321 emit_move_insn (gen_rtx_MEM (word_mode,
0a81f074
RS
6322 plus_constant (Pmode,
6323 arg_pointer_rtx, off)),
520babc7
JL
6324 gen_rtx_REG (word_mode, i));
6325
6326 /* The incoming args pointer points just beyond the flushback area;
f710504c 6327 normally this is not a serious concern. However, when we are doing
520babc7
JL
6328 varargs/stdargs we want to make the arg pointer point to the start
6329 of the incoming argument area. */
6330 emit_move_insn (virtual_incoming_args_rtx,
0a81f074 6331 plus_constant (Pmode, arg_pointer_rtx, -64));
520babc7
JL
6332
6333 /* Now return a pointer to the first anonymous argument. */
6334 return copy_to_reg (expand_binop (Pmode, add_optab,
6335 virtual_incoming_args_rtx,
6336 offset, 0, 0, OPTAB_LIB_WIDEN));
6337 }
6338
fe19a83d 6339 /* Store general registers on the stack. */
ad2c71b7 6340 dest = gen_rtx_MEM (BLKmode,
0a81f074 6341 plus_constant (Pmode, crtl->args.internal_arg_pointer,
ad2c71b7 6342 -16));
ba4828e0 6343 set_mem_alias_set (dest, get_varargs_alias_set ());
8ac61af7 6344 set_mem_align (dest, BITS_PER_WORD);
c6b97fac 6345 move_block_from_reg (23, dest, 4);
5e32727c 6346
39dfb55a
JL
6347 /* move_block_from_reg will emit code to store the argument registers
6348 individually as scalar stores.
6349
6350 However, other insns may later load from the same addresses for
956d6950 6351 a structure load (passing a struct to a varargs routine).
39dfb55a
JL
6352
6353 The alias code assumes that such aliasing can never happen, so we
6354 have to keep memory referencing insns from moving up beyond the
6355 last argument register store. So we emit a blockage insn here. */
6356 emit_insn (gen_blockage ());
6357
17e1dfa2 6358 return copy_to_reg (expand_binop (Pmode, add_optab,
38173d38 6359 crtl->args.internal_arg_pointer,
17e1dfa2 6360 offset, 0, 0, OPTAB_LIB_WIDEN));
188538df 6361}
d2a94ec0 6362
d7bd8aeb 6363static void
b7849684 6364hppa_va_start (tree valist, rtx nextarg)
ca5f4364
RH
6365{
6366 nextarg = expand_builtin_saveregs ();
e5faf155 6367 std_expand_builtin_va_start (valist, nextarg);
ca5f4364
RH
6368}
6369
8101c928 6370static tree
726a989a
RB
6371hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6372 gimple_seq *post_p)
ca5f4364 6373{
520babc7
JL
6374 if (TARGET_64BIT)
6375 {
8101c928 6376 /* Args grow upward. We can use the generic routines. */
af064de5 6377 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
ca5f4364 6378 }
9dff28ab 6379 else /* !TARGET_64BIT */
ca5f4364 6380 {
8101c928
RH
6381 tree ptr = build_pointer_type (type);
6382 tree valist_type;
6383 tree t, u;
6384 unsigned int size, ofs;
af064de5 6385 bool indirect;
ca5f4364 6386
af064de5 6387 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
8101c928 6388 if (indirect)
9dff28ab 6389 {
8101c928
RH
6390 type = ptr;
6391 ptr = build_pointer_type (type);
ca5f4364 6392 }
8101c928
RH
6393 size = int_size_in_bytes (type);
6394 valist_type = TREE_TYPE (valist);
9dff28ab 6395
8101c928 6396 /* Args grow down. Not handled by generic routines. */
9dff28ab 6397
5be014d5
AP
6398 u = fold_convert (sizetype, size_in_bytes (type));
6399 u = fold_build1 (NEGATE_EXPR, sizetype, u);
5d49b6a7 6400 t = fold_build_pointer_plus (valist, u);
9dff28ab 6401
e4f1aef1
RG
6402 /* Align to 4 or 8 byte boundary depending on argument size. */
6403
6404 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6405 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
5be014d5 6406 t = fold_convert (valist_type, t);
8101c928 6407
66863d89 6408 t = build2 (MODIFY_EXPR, valist_type, valist, t);
ca5f4364 6409
8101c928
RH
6410 ofs = (8 - size) % 4;
6411 if (ofs != 0)
5d49b6a7 6412 t = fold_build_pointer_plus_hwi (t, ofs);
ca5f4364 6413
8101c928 6414 t = fold_convert (ptr, t);
d6e9821f 6415 t = build_va_arg_indirect_ref (t);
ca5f4364 6416
8101c928 6417 if (indirect)
d6e9821f 6418 t = build_va_arg_indirect_ref (t);
ca5f4364 6419
8101c928
RH
6420 return t;
6421 }
6422}
ca5f4364 6423
83c32f2e
JDA
6424/* True if MODE is valid for the target. By "valid", we mean able to
6425 be manipulated in non-trivial ways. In particular, this means all
6426 the arithmetic is supported.
6427
6428 Currently, TImode is not valid as the HP 64-bit runtime documentation
6429 doesn't document the alignment and calling conventions for this type.
6430 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6431 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6432
6433static bool
18e2a8b8 6434pa_scalar_mode_supported_p (scalar_mode mode)
83c32f2e
JDA
6435{
6436 int precision = GET_MODE_PRECISION (mode);
6437
6438 switch (GET_MODE_CLASS (mode))
6439 {
6440 case MODE_PARTIAL_INT:
6441 case MODE_INT:
6442 if (precision == CHAR_TYPE_SIZE)
6443 return true;
6444 if (precision == SHORT_TYPE_SIZE)
6445 return true;
6446 if (precision == INT_TYPE_SIZE)
6447 return true;
6448 if (precision == LONG_TYPE_SIZE)
6449 return true;
6450 if (precision == LONG_LONG_TYPE_SIZE)
6451 return true;
6452 return false;
6453
6454 case MODE_FLOAT:
6455 if (precision == FLOAT_TYPE_SIZE)
6456 return true;
6457 if (precision == DOUBLE_TYPE_SIZE)
6458 return true;
6459 if (precision == LONG_DOUBLE_TYPE_SIZE)
6460 return true;
6461 return false;
6462
70c1d012
JDA
6463 case MODE_DECIMAL_FLOAT:
6464 return false;
6465
83c32f2e
JDA
6466 default:
6467 gcc_unreachable ();
6468 }
6469}
6470
f5e66865 6471/* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
16923e7b 6472 it branches into the delay slot. Otherwise, return FALSE. */
f5e66865
JDA
6473
6474static bool
84034c69 6475branch_to_delay_slot_p (rtx_insn *insn)
f5e66865 6476{
e0d80a58 6477 rtx_insn *jump_insn;
16923e7b 6478
f5e66865
JDA
6479 if (dbr_sequence_length ())
6480 return FALSE;
6481
7c9796ed 6482 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
16923e7b
JDA
6483 while (insn)
6484 {
6485 insn = next_active_insn (insn);
6486 if (jump_insn == insn)
6487 return TRUE;
6488
6489 /* We can't rely on the length of asms. So, we return FALSE when
6490 the branch is followed by an asm. */
6491 if (!insn
6492 || GET_CODE (PATTERN (insn)) == ASM_INPUT
93671519 6493 || asm_noperands (PATTERN (insn)) >= 0
16923e7b
JDA
6494 || get_attr_length (insn) > 0)
6495 break;
6496 }
6497
6498 return FALSE;
f5e66865
JDA
6499}
6500
16923e7b 6501/* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
f5e66865
JDA
6502
6503 This occurs when INSN has an unfilled delay slot and is followed
16923e7b
JDA
6504 by an asm. Disaster can occur if the asm is empty and the jump
6505 branches into the delay slot. So, we add a nop in the delay slot
6506 when this occurs. */
f5e66865
JDA
6507
6508static bool
84034c69 6509branch_needs_nop_p (rtx_insn *insn)
f5e66865 6510{
e0d80a58 6511 rtx_insn *jump_insn;
f5e66865
JDA
6512
6513 if (dbr_sequence_length ())
6514 return FALSE;
6515
7c9796ed 6516 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
16923e7b
JDA
6517 while (insn)
6518 {
6519 insn = next_active_insn (insn);
6520 if (!insn || jump_insn == insn)
6521 return TRUE;
6522
6523 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
93671519 6524 || asm_noperands (PATTERN (insn)) >= 0)
16923e7b
JDA
6525 && get_attr_length (insn) > 0)
6526 break;
6527 }
6528
6529 return FALSE;
6530}
6531
6532/* Return TRUE if INSN, a forward jump insn, can use nullification
6533 to skip the following instruction. This avoids an extra cycle due
6534 to a mis-predicted branch when we fall through. */
6535
6536static bool
84034c69 6537use_skip_p (rtx_insn *insn)
16923e7b 6538{
7c9796ed 6539 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
16923e7b
JDA
6540
6541 while (insn)
6542 {
6543 insn = next_active_insn (insn);
6544
6545 /* We can't rely on the length of asms, so we can't skip asms. */
6546 if (!insn
6547 || GET_CODE (PATTERN (insn)) == ASM_INPUT
93671519 6548 || asm_noperands (PATTERN (insn)) >= 0)
16923e7b
JDA
6549 break;
6550 if (get_attr_length (insn) == 4
6551 && jump_insn == next_active_insn (insn))
6552 return TRUE;
6553 if (get_attr_length (insn) > 0)
6554 break;
6555 }
6556
6557 return FALSE;
f5e66865
JDA
6558}
6559
23f6f34f
TG
6560/* This routine handles all the normal conditional branch sequences we
6561 might need to generate. It handles compare immediate vs compare
6562 register, nullification of delay slots, varying length branches,
d2364a74 6563 negated branches, and all combinations of the above. It returns the
23f6f34f 6564 output appropriate to emit the branch corresponding to all given
d2364a74
JL
6565 parameters. */
6566
519104fe 6567const char *
b32d5189 6568pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
b1a275e1 6569{
d2364a74 6570 static char buf[100];
16923e7b 6571 bool useskip;
16d74a3c
JDA
6572 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6573 int length = get_attr_length (insn);
6574 int xdelay;
d2364a74 6575
112cdef5 6576 /* A conditional branch to the following instruction (e.g. the delay slot)
02a57c73
JDA
6577 is asking for a disaster. This can happen when not optimizing and
6578 when jump optimization fails.
b1a275e1 6579
7772f0a9
JDA
6580 While it is usually safe to emit nothing, this can fail if the
6581 preceding instruction is a nullified branch with an empty delay
6582 slot and the same branch target as this branch. We could check
6583 for this but jump optimization should eliminate nop jumps. It
6584 is always safe to emit a nop. */
f5e66865 6585 if (branch_to_delay_slot_p (insn))
02a57c73 6586 return "nop";
23f6f34f 6587
ae2ea719
JDA
6588 /* The doubleword form of the cmpib instruction doesn't have the LEU
6589 and GTU conditions while the cmpb instruction does. Since we accept
6590 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6591 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6592 operands[2] = gen_rtx_REG (DImode, 0);
9972f30d
SE
6593 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6594 operands[1] = gen_rtx_REG (DImode, 0);
ae2ea719 6595
b9821af8
JL
6596 /* If this is a long branch with its delay slot unfilled, set `nullify'
6597 as it can nullify the delay slot and save a nop. */
a1b36964 6598 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6599 nullify = 1;
6600
6601 /* If this is a short forward conditional branch which did not get
6602 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6603 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6604 nullify = forward_branch_p (insn);
6605
23f6f34f 6606 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6607 comclr instruction. This avoids a single cycle penalty due to
6608 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6609 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6610
6611 switch (length)
6612 {
b9821af8
JL
6613 /* All short conditional branches except backwards with an unfilled
6614 delay slot. */
a1b36964 6615 case 4:
d2364a74 6616 if (useskip)
f38b27c7 6617 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
d2364a74 6618 else
f38b27c7 6619 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6620 if (GET_MODE (operands[1]) == DImode)
6621 strcat (buf, "*");
d2364a74
JL
6622 if (negated)
6623 strcat (buf, "%B3");
6624 else
6625 strcat (buf, "%S3");
6626 if (useskip)
3b5e5fb3 6627 strcat (buf, " %2,%r1,%%r0");
d2364a74 6628 else if (nullify)
f5e66865
JDA
6629 {
6630 if (branch_needs_nop_p (insn))
6631 strcat (buf, ",n %2,%r1,%0%#");
6632 else
6633 strcat (buf, ",n %2,%r1,%0");
6634 }
23f6f34f 6635 else
dcaeffef 6636 strcat (buf, " %2,%r1,%0");
d2364a74
JL
6637 break;
6638
5bdc5878 6639 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
6640 unfilled delay slot is treated just like a long backward branch
6641 with an unfilled delay slot. */
a1b36964 6642 case 8:
b9821af8 6643 /* Handle weird backwards branch with a filled delay slot
16d74a3c 6644 which is nullified. */
b9821af8
JL
6645 if (dbr_sequence_length () != 0
6646 && ! forward_branch_p (insn)
6647 && nullify)
6648 {
f38b27c7 6649 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6650 if (GET_MODE (operands[1]) == DImode)
6651 strcat (buf, "*");
b9821af8
JL
6652 if (negated)
6653 strcat (buf, "%S3");
6654 else
6655 strcat (buf, "%B3");
3b5e5fb3 6656 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
b9821af8 6657 }
923f781d
JL
6658 /* Handle short backwards branch with an unfilled delay slot.
6659 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6660 taken and untaken branches. */
6661 else if (dbr_sequence_length () == 0
6662 && ! forward_branch_p (insn)
9d98a694
AO
6663 && INSN_ADDRESSES_SET_P ()
6664 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6665 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 6666 {
f38b27c7 6667 strcpy (buf, "{com%I2b,|cmp%I2b,}");
520babc7
JL
6668 if (GET_MODE (operands[1]) == DImode)
6669 strcat (buf, "*");
923f781d 6670 if (negated)
dcaeffef 6671 strcat (buf, "%B3 %2,%r1,%0%#");
923f781d 6672 else
dcaeffef 6673 strcat (buf, "%S3 %2,%r1,%0%#");
923f781d 6674 }
d2364a74 6675 else
b9821af8 6676 {
f38b27c7 6677 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
520babc7
JL
6678 if (GET_MODE (operands[1]) == DImode)
6679 strcat (buf, "*");
b9821af8
JL
6680 if (negated)
6681 strcat (buf, "%S3");
6682 else
6683 strcat (buf, "%B3");
6684 if (nullify)
3b5e5fb3 6685 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
b9821af8 6686 else
3b5e5fb3 6687 strcat (buf, " %2,%r1,%%r0\n\tb %0");
b9821af8 6688 }
d2364a74
JL
6689 break;
6690
16d74a3c 6691 default:
685d0e07 6692 /* The reversed conditional branch must branch over one additional
16d74a3c 6693 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 6694 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
6695 nullified forward branch, the instruction after the reversed
6696 condition branch must be nullified. */
6697 if (dbr_sequence_length () == 0
6698 || (nullify && forward_branch_p (insn)))
6699 {
6700 nullify = 1;
6701 xdelay = 0;
6702 operands[4] = GEN_INT (length);
6703 }
6704 else
6705 {
6706 xdelay = 1;
6707 operands[4] = GEN_INT (length + 4);
6708 }
4bcb9e3f
JL
6709
6710 /* Create a reversed conditional branch which branches around
6711 the following insns. */
685d0e07
JDA
6712 if (GET_MODE (operands[1]) != DImode)
6713 {
6714 if (nullify)
6715 {
6716 if (negated)
6717 strcpy (buf,
6718 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6719 else
6720 strcpy (buf,
6721 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6722 }
6723 else
6724 {
6725 if (negated)
6726 strcpy (buf,
6727 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6728 else
6729 strcpy (buf,
6730 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6731 }
6732 }
4bcb9e3f 6733 else
520babc7 6734 {
685d0e07
JDA
6735 if (nullify)
6736 {
6737 if (negated)
6738 strcpy (buf,
6739 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6740 else
6741 strcpy (buf,
6742 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6743 }
520babc7 6744 else
685d0e07
JDA
6745 {
6746 if (negated)
6747 strcpy (buf,
6748 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6749 else
6750 strcpy (buf,
6751 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6752 }
520babc7 6753 }
4bcb9e3f 6754
16d74a3c 6755 output_asm_insn (buf, operands);
ae9d61ab 6756 return pa_output_lbranch (operands[0], insn, xdelay);
685d0e07
JDA
6757 }
6758 return buf;
6759}
4bcb9e3f 6760
568de9bb
JDA
6761/* Output a PIC pc-relative instruction sequence to load the address of
6762 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6763 or a code label. OPERANDS[1] specifies the register to use to load
6764 the program counter. OPERANDS[3] may be used for label generation
6765 The sequence is always three instructions in length. The program
6766 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6767 Register %r1 is clobbered. */
6768
6769static void
6770pa_output_pic_pcrel_sequence (rtx *operands)
6771{
6772 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6773 if (TARGET_PA_20)
6774 {
6775 /* We can use mfia to determine the current program counter. */
6776 if (TARGET_SOM || !TARGET_GAS)
6777 {
6778 operands[3] = gen_label_rtx ();
6779 targetm.asm_out.internal_label (asm_out_file, "L",
6780 CODE_LABEL_NUMBER (operands[3]));
6781 output_asm_insn ("mfia %1", operands);
6782 output_asm_insn ("addil L'%0-%l3,%1", operands);
6783 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6784 }
6785 else
6786 {
6787 output_asm_insn ("mfia %1", operands);
6788 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6789 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6790 }
6791 }
6792 else
6793 {
6794 /* We need to use a branch to determine the current program counter. */
6795 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6796 if (TARGET_SOM || !TARGET_GAS)
6797 {
6798 operands[3] = gen_label_rtx ();
6799 output_asm_insn ("addil L'%0-%l3,%1", operands);
6800 targetm.asm_out.internal_label (asm_out_file, "L",
6801 CODE_LABEL_NUMBER (operands[3]));
6802 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6803 }
6804 else
6805 {
6806 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6807 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6808 }
6809 }
6810}
6811
16d74a3c
JDA
6812/* This routine handles output of long unconditional branches that
6813 exceed the maximum range of a simple branch instruction. Since
6814 we don't have a register available for the branch, we save register
6815 %r1 in the frame marker, load the branch destination DEST into %r1,
6816 execute the branch, and restore %r1 in the delay slot of the branch.
6817
6818 Since long branches may have an insn in the delay slot and the
6819 delay slot is used to restore %r1, we in general need to extract
6820 this insn and execute it before the branch. However, to facilitate
6821 use of this function by conditional branches, we also provide an
6822 option to not extract the delay insn so that it will be emitted
6823 after the long branch. So, if there is an insn in the delay slot,
6824 it is extracted if XDELAY is nonzero.
6825
6826 The lengths of the various long-branch sequences are 20, 16 and 24
6827 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
4bcb9e3f 6828
685d0e07 6829const char *
b32d5189 6830pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
685d0e07 6831{
568de9bb 6832 rtx xoperands[4];
685d0e07
JDA
6833
6834 xoperands[0] = dest;
4bcb9e3f 6835
685d0e07 6836 /* First, free up the delay slot. */
16d74a3c 6837 if (xdelay && dbr_sequence_length () != 0)
685d0e07
JDA
6838 {
6839 /* We can't handle a jump in the delay slot. */
b64925dc 6840 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
4bcb9e3f 6841
685d0e07 6842 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 6843 optimize, 0, NULL);
4bcb9e3f 6844
685d0e07 6845 /* Now delete the delay insn. */
a38e7aa5 6846 SET_INSN_DELETED (NEXT_INSN (insn));
685d0e07 6847 }
4bcb9e3f 6848
685d0e07
JDA
6849 /* Output an insn to save %r1. The runtime documentation doesn't
6850 specify whether the "Clean Up" slot in the callers frame can
6851 be clobbered by the callee. It isn't copied by HP's builtin
6852 alloca, so this suggests that it can be clobbered if necessary.
6853 The "Static Link" location is copied by HP builtin alloca, so
6854 we avoid using it. Using the cleanup slot might be a problem
6855 if we have to interoperate with languages that pass cleanup
6856 information. However, it should be possible to handle these
6857 situations with GCC's asm feature.
6858
6859 The "Current RP" slot is reserved for the called procedure, so
6860 we try to use it when we don't have a frame of our own. It's
6861 rather unlikely that we won't have a frame when we need to emit
6862 a very long branch.
6863
6864 Really the way to go long term is a register scavenger; goto
6865 the target of the jump and find a register which we can use
6866 as a scratch to hold the value in %r1. Then, we wouldn't have
6867 to free up the delay slot or clobber a slot that may be needed
6868 for other purposes. */
6869 if (TARGET_64BIT)
6870 {
6fb5fa3c 6871 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6872 /* Use the return pointer slot in the frame marker. */
6873 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6874 else
6875 /* Use the slot at -40 in the frame marker since HP builtin
6876 alloca doesn't copy it. */
6877 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6878 }
6879 else
6880 {
6fb5fa3c 6881 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6882 /* Use the return pointer slot in the frame marker. */
6883 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6884 else
6885 /* Use the "Clean Up" slot in the frame marker. In GCC,
6886 the only other use of this location is for copying a
6887 floating point double argument from a floating-point
6888 register to two general registers. The copy is done
aa7f1eb1 6889 as an "atomic" operation when outputting a call, so it
685d0e07
JDA
6890 won't interfere with our using the location here. */
6891 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6892 }
3d9268b6 6893
5fad1c24
JDA
6894 if (TARGET_PORTABLE_RUNTIME)
6895 {
6896 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6897 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6898 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6899 }
6900 else if (flag_pic)
685d0e07 6901 {
568de9bb
JDA
6902 xoperands[1] = gen_rtx_REG (Pmode, 1);
6903 xoperands[2] = xoperands[1];
6904 pa_output_pic_pcrel_sequence (xoperands);
685d0e07
JDA
6905 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6906 }
6907 else
6908 /* Now output a very long branch to the original target. */
6909 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
4bcb9e3f 6910
685d0e07
JDA
6911 /* Now restore the value of %r1 in the delay slot. */
6912 if (TARGET_64BIT)
6913 {
6fb5fa3c 6914 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6915 return "ldd -16(%%r30),%%r1";
6916 else
6917 return "ldd -40(%%r30),%%r1";
6918 }
6919 else
6920 {
6fb5fa3c 6921 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
685d0e07
JDA
6922 return "ldw -20(%%r30),%%r1";
6923 else
6924 return "ldw -12(%%r30),%%r1";
b9821af8 6925 }
d2364a74
JL
6926}
6927
23f6f34f 6928/* This routine handles all the branch-on-bit conditional branch sequences we
d2364a74
JL
6929 might need to generate. It handles nullification of delay slots,
6930 varying length branches, negated branches and all combinations of the
6931 above. it returns the appropriate output template to emit the branch. */
6932
519104fe 6933const char *
b32d5189 6934pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
b1a275e1 6935{
d2364a74 6936 static char buf[100];
16923e7b 6937 bool useskip;
16d74a3c
JDA
6938 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6939 int length = get_attr_length (insn);
6940 int xdelay;
d2364a74 6941
112cdef5 6942 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1 6943 asking for a disaster. I do not think this can happen as this pattern
23f6f34f 6944 is only used when optimizing; jump optimization should eliminate the
b1a275e1 6945 jump. But be prepared just in case. */
23f6f34f 6946
f5e66865 6947 if (branch_to_delay_slot_p (insn))
02a57c73 6948 return "nop";
23f6f34f 6949
b9821af8
JL
6950 /* If this is a long branch with its delay slot unfilled, set `nullify'
6951 as it can nullify the delay slot and save a nop. */
a1b36964 6952 if (length == 8 && dbr_sequence_length () == 0)
b9821af8
JL
6953 nullify = 1;
6954
6955 /* If this is a short forward conditional branch which did not get
6956 its delay slot filled, the delay slot can still be nullified. */
a1b36964 6957 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b9821af8
JL
6958 nullify = forward_branch_p (insn);
6959
23f6f34f 6960 /* A forward branch over a single nullified insn can be done with a
d2364a74
JL
6961 extrs instruction. This avoids a single cycle penalty due to
6962 mis-predicted branch if we fall through (branch not taken). */
16923e7b 6963 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
d2364a74
JL
6964
6965 switch (length)
6966 {
6967
b9821af8
JL
6968 /* All short conditional branches except backwards with an unfilled
6969 delay slot. */
a1b36964 6970 case 4:
d2364a74 6971 if (useskip)
f38b27c7 6972 strcpy (buf, "{extrs,|extrw,s,}");
23f6f34f 6973 else
d2364a74 6974 strcpy (buf, "bb,");
520babc7
JL
6975 if (useskip && GET_MODE (operands[0]) == DImode)
6976 strcpy (buf, "extrd,s,*");
6977 else if (GET_MODE (operands[0]) == DImode)
6978 strcpy (buf, "bb,*");
d2364a74
JL
6979 if ((which == 0 && negated)
6980 || (which == 1 && ! negated))
6981 strcat (buf, ">=");
6982 else
6983 strcat (buf, "<");
6984 if (useskip)
3b5e5fb3 6985 strcat (buf, " %0,%1,1,%%r0");
d2364a74 6986 else if (nullify && negated)
f5e66865
JDA
6987 {
6988 if (branch_needs_nop_p (insn))
6989 strcat (buf, ",n %0,%1,%3%#");
6990 else
6991 strcat (buf, ",n %0,%1,%3");
6992 }
d2364a74 6993 else if (nullify && ! negated)
f5e66865
JDA
6994 {
6995 if (branch_needs_nop_p (insn))
6996 strcat (buf, ",n %0,%1,%2%#");
6997 else
6998 strcat (buf, ",n %0,%1,%2");
6999 }
d2364a74 7000 else if (! nullify && negated)
f5e66865 7001 strcat (buf, " %0,%1,%3");
d2364a74 7002 else if (! nullify && ! negated)
b9821af8 7003 strcat (buf, " %0,%1,%2");
d2364a74
JL
7004 break;
7005
5bdc5878 7006 /* All long conditionals. Note a short backward branch with an
b9821af8
JL
7007 unfilled delay slot is treated just like a long backward branch
7008 with an unfilled delay slot. */
a1b36964 7009 case 8:
b9821af8 7010 /* Handle weird backwards branch with a filled delay slot
16d74a3c 7011 which is nullified. */
b9821af8
JL
7012 if (dbr_sequence_length () != 0
7013 && ! forward_branch_p (insn)
7014 && nullify)
7015 {
7016 strcpy (buf, "bb,");
520babc7
JL
7017 if (GET_MODE (operands[0]) == DImode)
7018 strcat (buf, "*");
b9821af8
JL
7019 if ((which == 0 && negated)
7020 || (which == 1 && ! negated))
7021 strcat (buf, "<");
7022 else
7023 strcat (buf, ">=");
7024 if (negated)
3b5e5fb3 7025 strcat (buf, ",n %0,%1,.+12\n\tb %3");
b9821af8 7026 else
3b5e5fb3 7027 strcat (buf, ",n %0,%1,.+12\n\tb %2");
b9821af8 7028 }
923f781d
JL
7029 /* Handle short backwards branch with an unfilled delay slot.
7030 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7031 taken and untaken branches. */
7032 else if (dbr_sequence_length () == 0
7033 && ! forward_branch_p (insn)
9d98a694
AO
7034 && INSN_ADDRESSES_SET_P ()
7035 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7036 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d
JL
7037 {
7038 strcpy (buf, "bb,");
520babc7
JL
7039 if (GET_MODE (operands[0]) == DImode)
7040 strcat (buf, "*");
923f781d
JL
7041 if ((which == 0 && negated)
7042 || (which == 1 && ! negated))
7043 strcat (buf, ">=");
7044 else
7045 strcat (buf, "<");
7046 if (negated)
7047 strcat (buf, " %0,%1,%3%#");
7048 else
7049 strcat (buf, " %0,%1,%2%#");
7050 }
d2364a74 7051 else
b9821af8 7052 {
520babc7
JL
7053 if (GET_MODE (operands[0]) == DImode)
7054 strcpy (buf, "extrd,s,*");
16d74a3c
JDA
7055 else
7056 strcpy (buf, "{extrs,|extrw,s,}");
b9821af8
JL
7057 if ((which == 0 && negated)
7058 || (which == 1 && ! negated))
7059 strcat (buf, "<");
7060 else
7061 strcat (buf, ">=");
7062 if (nullify && negated)
55abf18a 7063 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
b9821af8 7064 else if (nullify && ! negated)
55abf18a 7065 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
b9821af8 7066 else if (negated)
3b5e5fb3 7067 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
23f6f34f 7068 else
3b5e5fb3 7069 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
b9821af8 7070 }
d2364a74
JL
7071 break;
7072
7073 default:
16d74a3c
JDA
7074 /* The reversed conditional branch must branch over one additional
7075 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7076 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7077 nullified forward branch, the instruction after the reversed
7078 condition branch must be nullified. */
7079 if (dbr_sequence_length () == 0
7080 || (nullify && forward_branch_p (insn)))
7081 {
7082 nullify = 1;
7083 xdelay = 0;
8370f6fa 7084 operands[4] = GEN_INT (length);
16d74a3c
JDA
7085 }
7086 else
7087 {
7088 xdelay = 1;
8370f6fa 7089 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
7090 }
7091
7092 if (GET_MODE (operands[0]) == DImode)
8370f6fa 7093 strcpy (buf, "bb,*");
16d74a3c 7094 else
8370f6fa 7095 strcpy (buf, "bb,");
16d74a3c
JDA
7096 if ((which == 0 && negated)
7097 || (which == 1 && !negated))
8370f6fa 7098 strcat (buf, "<");
16d74a3c 7099 else
8370f6fa 7100 strcat (buf, ">=");
16d74a3c 7101 if (nullify)
8370f6fa 7102 strcat (buf, ",n %0,%1,.+%4");
16d74a3c 7103 else
8370f6fa 7104 strcat (buf, " %0,%1,.+%4");
16d74a3c 7105 output_asm_insn (buf, operands);
ae9d61ab
JDA
7106 return pa_output_lbranch (negated ? operands[3] : operands[2],
7107 insn, xdelay);
b9821af8 7108 }
d2364a74
JL
7109 return buf;
7110}
7111
6a73009d
JL
7112/* This routine handles all the branch-on-variable-bit conditional branch
7113 sequences we might need to generate. It handles nullification of delay
7114 slots, varying length branches, negated branches and all combinations
7115 of the above. it returns the appropriate output template to emit the
7116 branch. */
7117
519104fe 7118const char *
b32d5189 7119pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
ae9d61ab 7120 int which)
6a73009d
JL
7121{
7122 static char buf[100];
16923e7b 7123 bool useskip;
16d74a3c
JDA
7124 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7125 int length = get_attr_length (insn);
7126 int xdelay;
6a73009d 7127
112cdef5 7128 /* A conditional branch to the following instruction (e.g. the delay slot) is
6a73009d
JL
7129 asking for a disaster. I do not think this can happen as this pattern
7130 is only used when optimizing; jump optimization should eliminate the
7131 jump. But be prepared just in case. */
7132
f5e66865 7133 if (branch_to_delay_slot_p (insn))
02a57c73 7134 return "nop";
6a73009d
JL
7135
7136 /* If this is a long branch with its delay slot unfilled, set `nullify'
7137 as it can nullify the delay slot and save a nop. */
7138 if (length == 8 && dbr_sequence_length () == 0)
7139 nullify = 1;
7140
7141 /* If this is a short forward conditional branch which did not get
7142 its delay slot filled, the delay slot can still be nullified. */
7143 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7144 nullify = forward_branch_p (insn);
7145
7146 /* A forward branch over a single nullified insn can be done with a
7147 extrs instruction. This avoids a single cycle penalty due to
7148 mis-predicted branch if we fall through (branch not taken). */
16923e7b 7149 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6a73009d
JL
7150
7151 switch (length)
7152 {
7153
7154 /* All short conditional branches except backwards with an unfilled
7155 delay slot. */
7156 case 4:
7157 if (useskip)
f38b27c7 7158 strcpy (buf, "{vextrs,|extrw,s,}");
6a73009d 7159 else
f38b27c7 7160 strcpy (buf, "{bvb,|bb,}");
520babc7 7161 if (useskip && GET_MODE (operands[0]) == DImode)
e72ed000 7162 strcpy (buf, "extrd,s,*");
520babc7
JL
7163 else if (GET_MODE (operands[0]) == DImode)
7164 strcpy (buf, "bb,*");
6a73009d
JL
7165 if ((which == 0 && negated)
7166 || (which == 1 && ! negated))
7167 strcat (buf, ">=");
7168 else
7169 strcat (buf, "<");
7170 if (useskip)
f38b27c7 7171 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6a73009d 7172 else if (nullify && negated)
f5e66865
JDA
7173 {
7174 if (branch_needs_nop_p (insn))
7175 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7176 else
7177 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7178 }
6a73009d 7179 else if (nullify && ! negated)
f5e66865
JDA
7180 {
7181 if (branch_needs_nop_p (insn))
7182 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7183 else
7184 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7185 }
6a73009d 7186 else if (! nullify && negated)
f5e66865 7187 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
6a73009d 7188 else if (! nullify && ! negated)
f38b27c7 7189 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
6a73009d
JL
7190 break;
7191
5bdc5878 7192 /* All long conditionals. Note a short backward branch with an
6a73009d
JL
7193 unfilled delay slot is treated just like a long backward branch
7194 with an unfilled delay slot. */
7195 case 8:
7196 /* Handle weird backwards branch with a filled delay slot
16d74a3c 7197 which is nullified. */
6a73009d
JL
7198 if (dbr_sequence_length () != 0
7199 && ! forward_branch_p (insn)
7200 && nullify)
7201 {
f38b27c7 7202 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
7203 if (GET_MODE (operands[0]) == DImode)
7204 strcat (buf, "*");
6a73009d
JL
7205 if ((which == 0 && negated)
7206 || (which == 1 && ! negated))
7207 strcat (buf, "<");
7208 else
7209 strcat (buf, ">=");
7210 if (negated)
f38b27c7 7211 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6a73009d 7212 else
f38b27c7 7213 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6a73009d
JL
7214 }
7215 /* Handle short backwards branch with an unfilled delay slot.
7216 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7217 taken and untaken branches. */
7218 else if (dbr_sequence_length () == 0
7219 && ! forward_branch_p (insn)
9d98a694
AO
7220 && INSN_ADDRESSES_SET_P ()
7221 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7222 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6a73009d 7223 {
f38b27c7 7224 strcpy (buf, "{bvb,|bb,}");
520babc7
JL
7225 if (GET_MODE (operands[0]) == DImode)
7226 strcat (buf, "*");
6a73009d
JL
7227 if ((which == 0 && negated)
7228 || (which == 1 && ! negated))
7229 strcat (buf, ">=");
7230 else
7231 strcat (buf, "<");
7232 if (negated)
f38b27c7 7233 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
6a73009d 7234 else
f38b27c7 7235 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
6a73009d
JL
7236 }
7237 else
7238 {
f38b27c7 7239 strcpy (buf, "{vextrs,|extrw,s,}");
520babc7
JL
7240 if (GET_MODE (operands[0]) == DImode)
7241 strcpy (buf, "extrd,s,*");
6a73009d
JL
7242 if ((which == 0 && negated)
7243 || (which == 1 && ! negated))
7244 strcat (buf, "<");
7245 else
7246 strcat (buf, ">=");
7247 if (nullify && negated)
f38b27c7 7248 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6a73009d 7249 else if (nullify && ! negated)
f38b27c7 7250 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6a73009d 7251 else if (negated)
f38b27c7 7252 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6a73009d 7253 else
f38b27c7 7254 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6a73009d
JL
7255 }
7256 break;
7257
7258 default:
16d74a3c
JDA
7259 /* The reversed conditional branch must branch over one additional
7260 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7261 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7262 nullified forward branch, the instruction after the reversed
7263 condition branch must be nullified. */
7264 if (dbr_sequence_length () == 0
7265 || (nullify && forward_branch_p (insn)))
7266 {
7267 nullify = 1;
7268 xdelay = 0;
8370f6fa 7269 operands[4] = GEN_INT (length);
16d74a3c
JDA
7270 }
7271 else
7272 {
7273 xdelay = 1;
8370f6fa 7274 operands[4] = GEN_INT (length + 4);
16d74a3c
JDA
7275 }
7276
7277 if (GET_MODE (operands[0]) == DImode)
8370f6fa 7278 strcpy (buf, "bb,*");
16d74a3c 7279 else
8370f6fa 7280 strcpy (buf, "{bvb,|bb,}");
16d74a3c
JDA
7281 if ((which == 0 && negated)
7282 || (which == 1 && !negated))
8370f6fa 7283 strcat (buf, "<");
16d74a3c 7284 else
8370f6fa 7285 strcat (buf, ">=");
16d74a3c 7286 if (nullify)
8370f6fa 7287 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 7288 else
8370f6fa 7289 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
16d74a3c 7290 output_asm_insn (buf, operands);
ae9d61ab
JDA
7291 return pa_output_lbranch (negated ? operands[3] : operands[2],
7292 insn, xdelay);
6a73009d
JL
7293 }
7294 return buf;
7295}
7296
b1a275e1
JL
7297/* Return the output template for emitting a dbra type insn.
7298
7299 Note it may perform some output operations on its own before
7300 returning the final output string. */
519104fe 7301const char *
b32d5189 7302pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
b1a275e1 7303{
16d74a3c 7304 int length = get_attr_length (insn);
b1a275e1 7305
112cdef5 7306 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7307 asking for a disaster. Be prepared! */
7308
f5e66865 7309 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7310 {
7311 if (which_alternative == 0)
7312 return "ldo %1(%0),%0";
7313 else if (which_alternative == 1)
7314 {
831c1763
AM
7315 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7316 output_asm_insn ("ldw -16(%%r30),%4", operands);
d2d28085 7317 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
f38b27c7 7318 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1
JL
7319 }
7320 else
7321 {
7322 output_asm_insn ("ldw %0,%4", operands);
7323 return "ldo %1(%4),%4\n\tstw %4,%0";
7324 }
7325 }
7326
7327 if (which_alternative == 0)
7328 {
7329 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7330 int xdelay;
b1a275e1
JL
7331
7332 /* If this is a long branch with its delay slot unfilled, set `nullify'
7333 as it can nullify the delay slot and save a nop. */
a1b36964 7334 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7335 nullify = 1;
7336
7337 /* If this is a short forward conditional branch which did not get
7338 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7339 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7340 nullify = forward_branch_p (insn);
7341
144d51f9 7342 switch (length)
b1a275e1 7343 {
144d51f9
NS
7344 case 4:
7345 if (nullify)
f5e66865
JDA
7346 {
7347 if (branch_needs_nop_p (insn))
7348 return "addib,%C2,n %1,%0,%3%#";
7349 else
7350 return "addib,%C2,n %1,%0,%3";
7351 }
144d51f9
NS
7352 else
7353 return "addib,%C2 %1,%0,%3";
7354
7355 case 8:
23f6f34f 7356 /* Handle weird backwards branch with a fulled delay slot
b1a275e1
JL
7357 which is nullified. */
7358 if (dbr_sequence_length () != 0
7359 && ! forward_branch_p (insn)
7360 && nullify)
3b5e5fb3 7361 return "addib,%N2,n %1,%0,.+12\n\tb %3";
923f781d
JL
7362 /* Handle short backwards branch with an unfilled delay slot.
7363 Using a addb;nop rather than addi;bl saves 1 cycle for both
7364 taken and untaken branches. */
7365 else if (dbr_sequence_length () == 0
7366 && ! forward_branch_p (insn)
9d98a694
AO
7367 && INSN_ADDRESSES_SET_P ()
7368 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7369 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7370 return "addib,%C2 %1,%0,%3%#";
23f6f34f
TG
7371
7372 /* Handle normal cases. */
b1a275e1 7373 if (nullify)
3b5e5fb3 7374 return "addi,%N2 %1,%0,%0\n\tb,n %3";
b1a275e1 7375 else
3b5e5fb3 7376 return "addi,%N2 %1,%0,%0\n\tb %3";
144d51f9
NS
7377
7378 default:
16d74a3c
JDA
7379 /* The reversed conditional branch must branch over one additional
7380 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7381 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7382 nullified forward branch, the instruction after the reversed
7383 condition branch must be nullified. */
7384 if (dbr_sequence_length () == 0
7385 || (nullify && forward_branch_p (insn)))
7386 {
7387 nullify = 1;
7388 xdelay = 0;
7389 operands[4] = GEN_INT (length);
7390 }
7391 else
7392 {
7393 xdelay = 1;
7394 operands[4] = GEN_INT (length + 4);
7395 }
7396
7397 if (nullify)
7398 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7399 else
7400 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7401
ae9d61ab 7402 return pa_output_lbranch (operands[3], insn, xdelay);
b1a275e1 7403 }
144d51f9 7404
b1a275e1
JL
7405 }
7406 /* Deal with gross reload from FP register case. */
7407 else if (which_alternative == 1)
7408 {
7409 /* Move loop counter from FP register to MEM then into a GR,
7410 increment the GR, store the GR into MEM, and finally reload
23f6f34f 7411 the FP register from MEM from within the branch's delay slot. */
831c1763
AM
7412 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7413 operands);
d2d28085 7414 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
16d74a3c 7415 if (length == 24)
f38b27c7 7416 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7417 else if (length == 28)
f38b27c7 7418 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7419 else
7420 {
8370f6fa
JDA
7421 operands[5] = GEN_INT (length - 16);
7422 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
16d74a3c 7423 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
ae9d61ab 7424 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7425 }
b1a275e1
JL
7426 }
7427 /* Deal with gross reload from memory case. */
7428 else
7429 {
7430 /* Reload loop counter from memory, the store back to memory
71cc389b 7431 happens in the branch's delay slot. */
b1a275e1 7432 output_asm_insn ("ldw %0,%4", operands);
16d74a3c 7433 if (length == 12)
b1a275e1 7434 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
16d74a3c 7435 else if (length == 16)
3b5e5fb3 7436 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
16d74a3c
JDA
7437 else
7438 {
8370f6fa
JDA
7439 operands[5] = GEN_INT (length - 4);
7440 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
ae9d61ab 7441 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7442 }
b1a275e1
JL
7443 }
7444}
7445
16d74a3c 7446/* Return the output template for emitting a movb type insn.
b1a275e1
JL
7447
7448 Note it may perform some output operations on its own before
7449 returning the final output string. */
519104fe 7450const char *
b32d5189 7451pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
b7849684 7452 int reverse_comparison)
b1a275e1 7453{
16d74a3c 7454 int length = get_attr_length (insn);
b1a275e1 7455
112cdef5 7456 /* A conditional branch to the following instruction (e.g. the delay slot) is
b1a275e1
JL
7457 asking for a disaster. Be prepared! */
7458
f5e66865 7459 if (branch_to_delay_slot_p (insn))
b1a275e1
JL
7460 {
7461 if (which_alternative == 0)
7462 return "copy %1,%0";
7463 else if (which_alternative == 1)
7464 {
831c1763 7465 output_asm_insn ("stw %1,-16(%%r30)", operands);
f38b27c7 7466 return "{fldws|fldw} -16(%%r30),%0";
b1a275e1 7467 }
b1092901 7468 else if (which_alternative == 2)
b1a275e1 7469 return "stw %1,%0";
b1092901
JL
7470 else
7471 return "mtsar %r1";
b1a275e1
JL
7472 }
7473
7474 /* Support the second variant. */
7475 if (reverse_comparison)
7476 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7477
7478 if (which_alternative == 0)
7479 {
7480 int nullify = INSN_ANNULLED_BRANCH_P (insn);
16d74a3c 7481 int xdelay;
b1a275e1
JL
7482
7483 /* If this is a long branch with its delay slot unfilled, set `nullify'
7484 as it can nullify the delay slot and save a nop. */
a1b36964 7485 if (length == 8 && dbr_sequence_length () == 0)
b1a275e1
JL
7486 nullify = 1;
7487
7488 /* If this is a short forward conditional branch which did not get
7489 its delay slot filled, the delay slot can still be nullified. */
a1b36964 7490 if (! nullify && length == 4 && dbr_sequence_length () == 0)
b1a275e1
JL
7491 nullify = forward_branch_p (insn);
7492
144d51f9 7493 switch (length)
b1a275e1 7494 {
144d51f9
NS
7495 case 4:
7496 if (nullify)
f5e66865
JDA
7497 {
7498 if (branch_needs_nop_p (insn))
7499 return "movb,%C2,n %1,%0,%3%#";
7500 else
7501 return "movb,%C2,n %1,%0,%3";
7502 }
144d51f9
NS
7503 else
7504 return "movb,%C2 %1,%0,%3";
7505
7506 case 8:
23f6f34f 7507 /* Handle weird backwards branch with a filled delay slot
b1a275e1
JL
7508 which is nullified. */
7509 if (dbr_sequence_length () != 0
7510 && ! forward_branch_p (insn)
7511 && nullify)
3b5e5fb3 7512 return "movb,%N2,n %1,%0,.+12\n\tb %3";
23f6f34f 7513
923f781d
JL
7514 /* Handle short backwards branch with an unfilled delay slot.
7515 Using a movb;nop rather than or;bl saves 1 cycle for both
7516 taken and untaken branches. */
7517 else if (dbr_sequence_length () == 0
7518 && ! forward_branch_p (insn)
9d98a694
AO
7519 && INSN_ADDRESSES_SET_P ()
7520 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7521 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
923f781d 7522 return "movb,%C2 %1,%0,%3%#";
23f6f34f 7523 /* Handle normal cases. */
b1a275e1 7524 if (nullify)
3b5e5fb3 7525 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
b1a275e1 7526 else
3b5e5fb3 7527 return "or,%N2 %1,%%r0,%0\n\tb %3";
144d51f9
NS
7528
7529 default:
16d74a3c
JDA
7530 /* The reversed conditional branch must branch over one additional
7531 instruction if the delay slot is filled and needs to be extracted
ae9d61ab 7532 by pa_output_lbranch. If the delay slot is empty or this is a
16d74a3c
JDA
7533 nullified forward branch, the instruction after the reversed
7534 condition branch must be nullified. */
7535 if (dbr_sequence_length () == 0
7536 || (nullify && forward_branch_p (insn)))
7537 {
7538 nullify = 1;
7539 xdelay = 0;
7540 operands[4] = GEN_INT (length);
7541 }
7542 else
7543 {
7544 xdelay = 1;
7545 operands[4] = GEN_INT (length + 4);
7546 }
7547
7548 if (nullify)
7549 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7550 else
7551 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7552
ae9d61ab 7553 return pa_output_lbranch (operands[3], insn, xdelay);
b1a275e1 7554 }
b1a275e1 7555 }
16d74a3c 7556 /* Deal with gross reload for FP destination register case. */
b1a275e1
JL
7557 else if (which_alternative == 1)
7558 {
16d74a3c
JDA
7559 /* Move source register to MEM, perform the branch test, then
7560 finally load the FP register from MEM from within the branch's
7561 delay slot. */
831c1763 7562 output_asm_insn ("stw %1,-16(%%r30)", operands);
16d74a3c 7563 if (length == 12)
f38b27c7 7564 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c 7565 else if (length == 16)
f38b27c7 7566 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
16d74a3c
JDA
7567 else
7568 {
8370f6fa
JDA
7569 operands[4] = GEN_INT (length - 4);
7570 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
16d74a3c 7571 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
ae9d61ab 7572 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7573 }
b1a275e1
JL
7574 }
7575 /* Deal with gross reload from memory case. */
b1092901 7576 else if (which_alternative == 2)
b1a275e1
JL
7577 {
7578 /* Reload loop counter from memory, the store back to memory
71cc389b 7579 happens in the branch's delay slot. */
16d74a3c 7580 if (length == 8)
f38b27c7 7581 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
16d74a3c 7582 else if (length == 12)
f38b27c7 7583 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
16d74a3c
JDA
7584 else
7585 {
8370f6fa
JDA
7586 operands[4] = GEN_INT (length);
7587 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7588 operands);
ae9d61ab 7589 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7590 }
b1a275e1 7591 }
b1092901
JL
7592 /* Handle SAR as a destination. */
7593 else
7594 {
16d74a3c 7595 if (length == 8)
f38b27c7 7596 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
16d74a3c 7597 else if (length == 12)
715ab8c3 7598 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
16d74a3c
JDA
7599 else
7600 {
8370f6fa
JDA
7601 operands[4] = GEN_INT (length);
7602 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7603 operands);
ae9d61ab 7604 return pa_output_lbranch (operands[3], insn, 0);
16d74a3c 7605 }
b1092901 7606 }
b1a275e1
JL
7607}
7608
a02aa5b0
JDA
7609/* Copy any FP arguments in INSN into integer registers. */
7610static void
e0d80a58 7611copy_fp_args (rtx_insn *insn)
a02aa5b0
JDA
7612{
7613 rtx link;
7614 rtx xoperands[2];
b1a275e1 7615
a02aa5b0
JDA
7616 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7617 {
7618 int arg_mode, regno;
7619 rtx use = XEXP (link, 0);
f726ea7d 7620
a02aa5b0
JDA
7621 if (! (GET_CODE (use) == USE
7622 && GET_CODE (XEXP (use, 0)) == REG
7623 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7624 continue;
2c4ff308 7625
a02aa5b0
JDA
7626 arg_mode = GET_MODE (XEXP (use, 0));
7627 regno = REGNO (XEXP (use, 0));
520babc7 7628
a02aa5b0
JDA
7629 /* Is it a floating point register? */
7630 if (regno >= 32 && regno <= 39)
7631 {
7632 /* Copy the FP register into an integer register via memory. */
7633 if (arg_mode == SFmode)
7634 {
7635 xoperands[0] = XEXP (use, 0);
7636 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7637 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7638 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7639 }
7640 else
7641 {
7642 xoperands[0] = XEXP (use, 0);
7643 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7644 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7645 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7646 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7647 }
7648 }
279c9bde 7649 }
a02aa5b0
JDA
7650}
7651
7652/* Compute length of the FP argument copy sequence for INSN. */
7653static int
e0d80a58 7654length_fp_args (rtx_insn *insn)
a02aa5b0
JDA
7655{
7656 int length = 0;
7657 rtx link;
279c9bde 7658
a02aa5b0 7659 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
6a73009d 7660 {
a02aa5b0
JDA
7661 int arg_mode, regno;
7662 rtx use = XEXP (link, 0);
7663
7664 if (! (GET_CODE (use) == USE
7665 && GET_CODE (XEXP (use, 0)) == REG
7666 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7667 continue;
6a73009d 7668
a02aa5b0
JDA
7669 arg_mode = GET_MODE (XEXP (use, 0));
7670 regno = REGNO (XEXP (use, 0));
7671
7672 /* Is it a floating point register? */
7673 if (regno >= 32 && regno <= 39)
6a73009d 7674 {
a02aa5b0
JDA
7675 if (arg_mode == SFmode)
7676 length += 8;
7677 else
7678 length += 12;
6a73009d 7679 }
a02aa5b0 7680 }
6a73009d 7681
a02aa5b0
JDA
7682 return length;
7683}
3d9268b6 7684
611ad29e 7685/* Return the attribute length for the millicode call instruction INSN.
ae9d61ab 7686 The length must match the code generated by pa_output_millicode_call.
611ad29e 7687 We include the delay slot in the returned length as it is better to
a02aa5b0 7688 over estimate the length than to under estimate it. */
a7721dc0 7689
a02aa5b0 7690int
432d483a 7691pa_attr_length_millicode_call (rtx_insn *insn)
a02aa5b0 7692{
611ad29e 7693 unsigned long distance = -1;
62910663 7694 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
a02aa5b0 7695
611ad29e
JDA
7696 if (INSN_ADDRESSES_SET_P ())
7697 {
5fad1c24
JDA
7698 distance = (total + insn_current_reference_address (insn));
7699 if (distance < total)
611ad29e
JDA
7700 distance = -1;
7701 }
a02aa5b0
JDA
7702
7703 if (TARGET_64BIT)
7704 {
7705 if (!TARGET_LONG_CALLS && distance < 7600000)
611ad29e 7706 return 8;
a02aa5b0 7707
611ad29e 7708 return 20;
a02aa5b0
JDA
7709 }
7710 else if (TARGET_PORTABLE_RUNTIME)
611ad29e 7711 return 24;
a02aa5b0
JDA
7712 else
7713 {
a43434ff 7714 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
611ad29e 7715 return 8;
a02aa5b0 7716
925cb97d 7717 if (!flag_pic)
611ad29e 7718 return 12;
a02aa5b0 7719
611ad29e 7720 return 24;
a02aa5b0
JDA
7721 }
7722}
7723
bf95e88b 7724/* INSN is a function call.
a7721dc0 7725
a02aa5b0 7726 CALL_DEST is the routine we are calling. */
a7721dc0 7727
a02aa5b0 7728const char *
b32d5189 7729pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
a02aa5b0
JDA
7730{
7731 int attr_length = get_attr_length (insn);
7732 int seq_length = dbr_sequence_length ();
568de9bb 7733 rtx xoperands[4];
a7721dc0 7734
a02aa5b0 7735 xoperands[0] = call_dest;
a02aa5b0
JDA
7736
7737 /* Handle the common case where we are sure that the branch will
7738 reach the beginning of the $CODE$ subspace. The within reach
ab11fb42
JDA
7739 form of the $$sh_func_adrs call has a length of 28. Because it
7740 has an attribute type of sh_func_adrs, it never has a nonzero
7741 sequence length (i.e., the delay slot is never filled). */
a02aa5b0 7742 if (!TARGET_LONG_CALLS
ab11fb42
JDA
7743 && (attr_length == 8
7744 || (attr_length == 28
7745 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
a02aa5b0 7746 {
568de9bb
JDA
7747 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7748 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
a02aa5b0
JDA
7749 }
7750 else
7751 {
7752 if (TARGET_64BIT)
7753 {
7754 /* It might seem that one insn could be saved by accessing
7755 the millicode function using the linkage table. However,
7756 this doesn't work in shared libraries and other dynamically
7757 loaded objects. Using a pc-relative sequence also avoids
7758 problems related to the implicit use of the gp register. */
568de9bb
JDA
7759 xoperands[1] = gen_rtx_REG (Pmode, 1);
7760 xoperands[2] = xoperands[1];
7761 pa_output_pic_pcrel_sequence (xoperands);
a02aa5b0 7762 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
6a73009d 7763 }
6a73009d
JL
7764 else if (TARGET_PORTABLE_RUNTIME)
7765 {
a02aa5b0
JDA
7766 /* Pure portable runtime doesn't allow be/ble; we also don't
7767 have PIC support in the assembler/linker, so this sequence
7768 is needed. */
6a73009d 7769
a02aa5b0
JDA
7770 /* Get the address of our target into %r1. */
7771 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7772 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6a73009d 7773
a02aa5b0
JDA
7774 /* Get our return address into %r31. */
7775 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7776 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
6a73009d 7777
a02aa5b0
JDA
7778 /* Jump to our target address in %r1. */
7779 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7780 }
a02aa5b0 7781 else if (!flag_pic)
6a73009d 7782 {
a02aa5b0 7783 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6248c4dd 7784 if (TARGET_PA_20)
a02aa5b0 7785 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
6248c4dd 7786 else
a02aa5b0 7787 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 7788 }
a02aa5b0 7789 else
6a73009d 7790 {
568de9bb
JDA
7791 xoperands[1] = gen_rtx_REG (Pmode, 31);
7792 xoperands[2] = gen_rtx_REG (Pmode, 1);
7793 pa_output_pic_pcrel_sequence (xoperands);
581d9404 7794
568de9bb
JDA
7795 /* Adjust return address. */
7796 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
6a73009d 7797
a02aa5b0
JDA
7798 /* Jump to our target address in %r1. */
7799 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6a73009d 7800 }
6a73009d
JL
7801 }
7802
a02aa5b0
JDA
7803 if (seq_length == 0)
7804 output_asm_insn ("nop", xoperands);
6a73009d 7805
6a73009d
JL
7806 return "";
7807}
7808
611ad29e
JDA
7809/* Return the attribute length of the call instruction INSN. The SIBCALL
7810 flag indicates whether INSN is a regular call or a sibling call. The
32562302 7811 length returned must be longer than the code actually generated by
ae9d61ab 7812 pa_output_call. Since branch shortening is done before delay branch
32562302
JDA
7813 sequencing, there is no way to determine whether or not the delay
7814 slot will be filled during branch shortening. Even when the delay
7815 slot is filled, we may have to add a nop if the delay slot contains
7816 a branch that can't reach its target. Thus, we always have to include
7817 the delay slot in the length estimate. This used to be done in
7818 pa_adjust_insn_length but we do it here now as some sequences always
7819 fill the delay slot and we can save four bytes in the estimate for
7820 these sequences. */
a02aa5b0
JDA
7821
7822int
432d483a 7823pa_attr_length_call (rtx_insn *insn, int sibcall)
a02aa5b0 7824{
32562302 7825 int local_call;
e40375e0 7826 rtx call, call_dest;
32562302
JDA
7827 tree call_decl;
7828 int length = 0;
7829 rtx pat = PATTERN (insn);
611ad29e 7830 unsigned long distance = -1;
a02aa5b0 7831
b64925dc 7832 gcc_assert (CALL_P (insn));
e40375e0 7833
611ad29e
JDA
7834 if (INSN_ADDRESSES_SET_P ())
7835 {
32562302
JDA
7836 unsigned long total;
7837
7838 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
5fad1c24
JDA
7839 distance = (total + insn_current_reference_address (insn));
7840 if (distance < total)
611ad29e
JDA
7841 distance = -1;
7842 }
a02aa5b0 7843
e40375e0 7844 gcc_assert (GET_CODE (pat) == PARALLEL);
a02aa5b0 7845
e40375e0
JDA
7846 /* Get the call rtx. */
7847 call = XVECEXP (pat, 0, 0);
7848 if (GET_CODE (call) == SET)
7849 call = SET_SRC (call);
7850
7851 gcc_assert (GET_CODE (call) == CALL);
7852
7853 /* Determine if this is a local call. */
7854 call_dest = XEXP (XEXP (call, 0), 0);
32562302 7855 call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7856 local_call = call_decl && targetm.binds_local_p (call_decl);
a02aa5b0 7857
32562302
JDA
7858 /* pc-relative branch. */
7859 if (!TARGET_LONG_CALLS
7860 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
a43434ff 7861 || distance < MAX_PCREL17F_OFFSET))
32562302 7862 length += 8;
a02aa5b0 7863
32562302
JDA
7864 /* 64-bit plabel sequence. */
7865 else if (TARGET_64BIT && !local_call)
7866 length += sibcall ? 28 : 24;
a02aa5b0 7867
32562302
JDA
7868 /* non-pic long absolute branch sequence. */
7869 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7870 length += 12;
a02aa5b0 7871
32562302 7872 /* long pc-relative branch sequence. */
9dbd54be 7873 else if (TARGET_LONG_PIC_SDIFF_CALL
568de9bb 7874 || (TARGET_GAS && !TARGET_SOM && local_call))
32562302
JDA
7875 {
7876 length += 20;
a02aa5b0 7877
0831e1d1 7878 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302
JDA
7879 length += 8;
7880 }
62910663 7881
32562302
JDA
7882 /* 32-bit plabel sequence. */
7883 else
7884 {
7885 length += 32;
a02aa5b0 7886
32562302
JDA
7887 if (TARGET_SOM)
7888 length += length_fp_args (insn);
7889
7890 if (flag_pic)
7891 length += 4;
90330d31 7892
32562302
JDA
7893 if (!TARGET_PA_20)
7894 {
a02aa5b0
JDA
7895 if (!sibcall)
7896 length += 8;
7897
0831e1d1 7898 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
32562302 7899 length += 8;
a02aa5b0
JDA
7900 }
7901 }
32562302
JDA
7902
7903 return length;
a02aa5b0
JDA
7904}
7905
bf95e88b 7906/* INSN is a function call.
6a73009d
JL
7907
7908 CALL_DEST is the routine we are calling. */
7909
519104fe 7910const char *
432d483a 7911pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
6a73009d 7912{
3d9268b6 7913 int seq_length = dbr_sequence_length ();
5fad1c24 7914 tree call_decl = SYMBOL_REF_DECL (call_dest);
ecc418c4 7915 int local_call = call_decl && targetm.binds_local_p (call_decl);
568de9bb 7916 rtx xoperands[4];
a02aa5b0
JDA
7917
7918 xoperands[0] = call_dest;
6a73009d 7919
a02aa5b0 7920 /* Handle the common case where we're sure that the branch will reach
5fad1c24
JDA
7921 the beginning of the "$CODE$" subspace. This is the beginning of
7922 the current function if we are in a named section. */
ae9d61ab 7923 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
2c4ff308 7924 {
520babc7 7925 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
a02aa5b0 7926 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
279c9bde 7927 }
a02aa5b0 7928 else
279c9bde 7929 {
5fad1c24 7930 if (TARGET_64BIT && !local_call)
f726ea7d 7931 {
a02aa5b0
JDA
7932 /* ??? As far as I can tell, the HP linker doesn't support the
7933 long pc-relative sequence described in the 64-bit runtime
7934 architecture. So, we use a slightly longer indirect call. */
ae9d61ab 7935 xoperands[0] = pa_get_deferred_plabel (call_dest);
a02aa5b0
JDA
7936 xoperands[1] = gen_label_rtx ();
7937
7938 /* If this isn't a sibcall, we put the load of %r27 into the
7939 delay slot. We can't do this in a sibcall as we don't
bf95e88b
JDA
7940 have a second call-clobbered scratch register available.
7941 We don't need to do anything when generating fast indirect
7942 calls. */
7943 if (seq_length != 0 && !sibcall)
a02aa5b0
JDA
7944 {
7945 final_scan_insn (NEXT_INSN (insn), asm_out_file,
c9d691e9 7946 optimize, 0, NULL);
a02aa5b0
JDA
7947
7948 /* Now delete the delay insn. */
a38e7aa5 7949 SET_INSN_DELETED (NEXT_INSN (insn));
bf95e88b 7950 seq_length = 0;
a02aa5b0 7951 }
279c9bde 7952
a02aa5b0
JDA
7953 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7954 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7955 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
279c9bde 7956
a02aa5b0 7957 if (sibcall)
279c9bde 7958 {
a02aa5b0
JDA
7959 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7960 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7961 output_asm_insn ("bve (%%r1)", xoperands);
7962 }
7963 else
7964 {
7965 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7966 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7967 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
bf95e88b 7968 seq_length = 1;
279c9bde
JL
7969 }
7970 }
a02aa5b0 7971 else
93ae92c1 7972 {
a02aa5b0
JDA
7973 int indirect_call = 0;
7974
7975 /* Emit a long call. There are several different sequences
7976 of increasing length and complexity. In most cases,
7977 they don't allow an instruction in the delay slot. */
5fad1c24 7978 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
9dbd54be 7979 && !TARGET_LONG_PIC_SDIFF_CALL
568de9bb 7980 && !(TARGET_GAS && !TARGET_SOM && local_call)
5fad1c24 7981 && !TARGET_64BIT)
a02aa5b0
JDA
7982 indirect_call = 1;
7983
7984 if (seq_length != 0
a02aa5b0 7985 && !sibcall
44b86471
JDA
7986 && (!TARGET_PA_20
7987 || indirect_call
7988 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
359255a9 7989 {
a02aa5b0
JDA
7990 /* A non-jump insn in the delay slot. By definition we can
7991 emit this insn before the call (and in fact before argument
7992 relocating. */
c9d691e9 7993 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
5cfc5f84 7994 NULL);
a02aa5b0
JDA
7995
7996 /* Now delete the delay insn. */
a38e7aa5 7997 SET_INSN_DELETED (NEXT_INSN (insn));
bf95e88b 7998 seq_length = 0;
359255a9 7999 }
93ae92c1 8000
5fad1c24 8001 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
359255a9 8002 {
a02aa5b0
JDA
8003 /* This is the best sequence for making long calls in
8004 non-pic code. Unfortunately, GNU ld doesn't provide
8005 the stub needed for external calls, and GAS's support
5fad1c24
JDA
8006 for this with the SOM linker is buggy. It is safe
8007 to use this for local calls. */
a02aa5b0
JDA
8008 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8009 if (sibcall)
8010 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8011 else
8012 {
8013 if (TARGET_PA_20)
8014 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8015 xoperands);
8016 else
8017 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
6a73009d 8018
a02aa5b0 8019 output_asm_insn ("copy %%r31,%%r2", xoperands);
bf95e88b 8020 seq_length = 1;
a02aa5b0
JDA
8021 }
8022 }
8023 else
8024 {
568de9bb
JDA
8025 /* The HP assembler and linker can handle relocations for
8026 the difference of two symbols. The HP assembler
8027 recognizes the sequence as a pc-relative call and
8028 the linker provides stubs when needed. */
8029
8030 /* GAS currently can't generate the relocations that
8031 are needed for the SOM linker under HP-UX using this
8032 sequence. The GNU linker doesn't generate the stubs
8033 that are needed for external calls on TARGET_ELF32
8034 with this sequence. For now, we have to use a longer
8035 plabel sequence when using GAS for non local calls. */
8036 if (TARGET_LONG_PIC_SDIFF_CALL
8037 || (TARGET_GAS && !TARGET_SOM && local_call))
3d9268b6 8038 {
568de9bb
JDA
8039 xoperands[1] = gen_rtx_REG (Pmode, 1);
8040 xoperands[2] = xoperands[1];
8041 pa_output_pic_pcrel_sequence (xoperands);
3d9268b6 8042 }
520babc7
JL
8043 else
8044 {
a02aa5b0
JDA
8045 /* Emit a long plabel-based call sequence. This is
8046 essentially an inline implementation of $$dyncall.
8047 We don't actually try to call $$dyncall as this is
8048 as difficult as calling the function itself. */
ae9d61ab 8049 xoperands[0] = pa_get_deferred_plabel (call_dest);
a02aa5b0
JDA
8050 xoperands[1] = gen_label_rtx ();
8051
8052 /* Since the call is indirect, FP arguments in registers
8053 need to be copied to the general registers. Then, the
8054 argument relocation stub will copy them back. */
8055 if (TARGET_SOM)
8056 copy_fp_args (insn);
8057
8058 if (flag_pic)
8059 {
8060 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8061 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8062 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8063 }
8064 else
8065 {
8066 output_asm_insn ("addil LR'%0-$global$,%%r27",
8067 xoperands);
8068 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8069 xoperands);
8070 }
279c9bde 8071
a02aa5b0
JDA
8072 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8073 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8074 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8075 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
6a73009d 8076
a02aa5b0
JDA
8077 if (!sibcall && !TARGET_PA_20)
8078 {
8079 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
0831e1d1 8080 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8081 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8082 else
8083 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
a02aa5b0
JDA
8084 }
8085 }
6a73009d 8086
a02aa5b0 8087 if (TARGET_PA_20)
520babc7 8088 {
a02aa5b0
JDA
8089 if (sibcall)
8090 output_asm_insn ("bve (%%r1)", xoperands);
8091 else
8092 {
8093 if (indirect_call)
8094 {
8095 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8096 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
bf95e88b 8097 seq_length = 1;
a02aa5b0
JDA
8098 }
8099 else
8100 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8101 }
520babc7
JL
8102 }
8103 else
8104 {
0831e1d1 8105 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
90330d31
JDA
8106 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8107 xoperands);
279c9bde 8108
a02aa5b0 8109 if (sibcall)
90330d31 8110 {
0831e1d1 8111 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8112 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8113 else
8114 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8115 }
a02aa5b0
JDA
8116 else
8117 {
0831e1d1 8118 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
90330d31
JDA
8119 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8120 else
8121 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
279c9bde 8122
a02aa5b0
JDA
8123 if (indirect_call)
8124 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8125 else
8126 output_asm_insn ("copy %%r31,%%r2", xoperands);
bf95e88b 8127 seq_length = 1;
a02aa5b0
JDA
8128 }
8129 }
8130 }
279c9bde 8131 }
2c4ff308 8132 }
23f6f34f 8133
bf95e88b 8134 if (seq_length == 0)
a02aa5b0 8135 output_asm_insn ("nop", xoperands);
2c4ff308 8136
2c4ff308
JL
8137 return "";
8138}
8139
611ad29e
JDA
8140/* Return the attribute length of the indirect call instruction INSN.
8141 The length must match the code generated by output_indirect call.
8142 The returned length includes the delay slot. Currently, the delay
8143 slot of an indirect call sequence is not exposed and it is used by
8144 the sequence itself. */
8145
8146int
432d483a 8147pa_attr_length_indirect_call (rtx_insn *insn)
611ad29e
JDA
8148{
8149 unsigned long distance = -1;
62910663 8150 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
611ad29e
JDA
8151
8152 if (INSN_ADDRESSES_SET_P ())
8153 {
5fad1c24
JDA
8154 distance = (total + insn_current_reference_address (insn));
8155 if (distance < total)
611ad29e
JDA
8156 distance = -1;
8157 }
8158
8159 if (TARGET_64BIT)
8160 return 12;
8161
445f9a50 8162 if (TARGET_FAST_INDIRECT_CALLS)
611ad29e
JDA
8163 return 8;
8164
611ad29e 8165 if (TARGET_PORTABLE_RUNTIME)
cc5cec10 8166 return 16;
611ad29e 8167
445f9a50
JDA
8168 /* Inline version of $$dyncall. */
8169 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8170 return 20;
8171
8172 if (!TARGET_LONG_CALLS
8173 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8174 || distance < MAX_PCREL17F_OFFSET))
8175 return 8;
8176
611ad29e 8177 /* Out of reach, can use ble. */
445f9a50
JDA
8178 if (!flag_pic)
8179 return 12;
8180
8181 /* Inline version of $$dyncall. */
8182 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8183 return 20;
8184
8185 if (!optimize_size)
8186 return 36;
8187
8188 /* Long PIC pc-relative call. */
8189 return 20;
611ad29e
JDA
8190}
8191
8192const char *
432d483a 8193pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
611ad29e 8194{
568de9bb 8195 rtx xoperands[4];
445f9a50 8196 int length;
611ad29e
JDA
8197
8198 if (TARGET_64BIT)
8199 {
8200 xoperands[0] = call_dest;
445f9a50
JDA
8201 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8202 "bve,l (%%r2),%%r2\n\t"
8203 "ldd 24(%0),%%r27", xoperands);
611ad29e
JDA
8204 return "";
8205 }
8206
8207 /* First the special case for kernels, level 0 systems, etc. */
8208 if (TARGET_FAST_INDIRECT_CALLS)
445f9a50
JDA
8209 {
8210 pa_output_arg_descriptor (insn);
8211 if (TARGET_PA_20)
8212 return "bve,l,n (%%r22),%%r2\n\tnop";
8213 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8214 }
8215
8216 if (TARGET_PORTABLE_RUNTIME)
8217 {
8218 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8219 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8220 pa_output_arg_descriptor (insn);
8221 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8222 }
8223
8224 /* Maybe emit a fast inline version of $$dyncall. */
8225 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8226 {
8227 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8228 "ldw 2(%%r22),%%r19\n\t"
8229 "ldw -2(%%r22),%%r22", xoperands);
8230 pa_output_arg_descriptor (insn);
8231 if (TARGET_NO_SPACE_REGS)
8232 {
8233 if (TARGET_PA_20)
8234 return "bve,l,n (%%r22),%%r2\n\tnop";
8235 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8236 }
8237 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8238 }
611ad29e
JDA
8239
8240 /* Now the normal case -- we can reach $$dyncall directly or
8241 we're sure that we can get there via a long-branch stub.
8242
8243 No need to check target flags as the length uniquely identifies
8244 the remaining cases. */
445f9a50
JDA
8245 length = pa_attr_length_indirect_call (insn);
8246 if (length == 8)
2c774817 8247 {
445f9a50
JDA
8248 pa_output_arg_descriptor (insn);
8249
40fc2e0b
JDA
8250 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8251 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8252 variant of the B,L instruction can't be used on the SOM target. */
8253 if (TARGET_PA_20 && !TARGET_SOM)
445f9a50 8254 return "b,l,n $$dyncall,%%r2\n\tnop";
2c774817 8255 else
445f9a50 8256 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
2c774817 8257 }
611ad29e
JDA
8258
8259 /* Long millicode call, but we are not generating PIC or portable runtime
8260 code. */
445f9a50
JDA
8261 if (length == 12)
8262 {
8263 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8264 pa_output_arg_descriptor (insn);
8265 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8266 }
611ad29e 8267
445f9a50
JDA
8268 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8269 pc-relative call sequence is five instructions. The inline PA 2.0
8270 version of $$dyncall is also five instructions. The PA 1.X versions
8271 are longer but still an overall win. */
8272 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8273 {
8274 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8275 "ldw 2(%%r22),%%r19\n\t"
8276 "ldw -2(%%r22),%%r22", xoperands);
8277 if (TARGET_NO_SPACE_REGS)
8278 {
8279 pa_output_arg_descriptor (insn);
8280 if (TARGET_PA_20)
8281 return "bve,l,n (%%r22),%%r2\n\tnop";
8282 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8283 }
8284 if (TARGET_PA_20)
8285 {
8286 pa_output_arg_descriptor (insn);
8287 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8288 }
8289 output_asm_insn ("bl .+8,%%r2\n\t"
8290 "ldo 16(%%r2),%%r2\n\t"
8291 "ldsid (%%r22),%%r1\n\t"
8292 "mtsp %%r1,%%sr0", xoperands);
8293 pa_output_arg_descriptor (insn);
8294 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8295 }
8296
611ad29e 8297 /* We need a long PIC call to $$dyncall. */
568de9bb
JDA
8298 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8299 xoperands[1] = gen_rtx_REG (Pmode, 2);
8300 xoperands[2] = gen_rtx_REG (Pmode, 1);
8301 pa_output_pic_pcrel_sequence (xoperands);
445f9a50
JDA
8302 pa_output_arg_descriptor (insn);
8303 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
611ad29e
JDA
8304}
8305
d2a94ec0 8306/* In HPUX 8.0's shared library scheme, special relocations are needed
23f6f34f 8307 for function labels if they might be passed to a function
d2a94ec0 8308 in a shared library (because shared libraries don't live in code
520a57c8 8309 space), and special magic is needed to construct their address. */
d2a94ec0
TM
8310
8311void
ae9d61ab 8312pa_encode_label (rtx sym)
d2a94ec0 8313{
519104fe 8314 const char *str = XSTR (sym, 0);
10d17cb7
AM
8315 int len = strlen (str) + 1;
8316 char *newstr, *p;
d2a94ec0 8317
5ead67f6 8318 p = newstr = XALLOCAVEC (char, len + 1);
10d17cb7
AM
8319 *p++ = '@';
8320 strcpy (p, str);
67d6f2fc 8321
831c1763 8322 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
d2a94ec0 8323}
23f6f34f 8324
fb49053f 8325static void
b7849684 8326pa_encode_section_info (tree decl, rtx rtl, int first)
fb49053f 8327{
9a60b229
JJ
8328 int old_referenced = 0;
8329
8330 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8331 old_referenced
8332 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8333
51076f96
RC
8334 default_encode_section_info (decl, rtl, first);
8335
fb49053f
RH
8336 if (first && TEXT_SPACE_P (decl))
8337 {
fb49053f
RH
8338 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8339 if (TREE_CODE (decl) == FUNCTION_DECL)
ae9d61ab 8340 pa_encode_label (XEXP (rtl, 0));
fb49053f 8341 }
9a60b229
JJ
8342 else if (old_referenced)
8343 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
fb49053f
RH
8344}
8345
772c5265
RH
8346/* This is sort of inverse to pa_encode_section_info. */
8347
8348static const char *
b7849684 8349pa_strip_name_encoding (const char *str)
772c5265 8350{
7830ba7b
JDA
8351 str += (*str == '@');
8352 str += (*str == '*');
8353 return str;
772c5265
RH
8354}
8355
326bc2de
JL
8356/* Returns 1 if OP is a function label involved in a simple addition
8357 with a constant. Used to keep certain patterns from matching
8358 during instruction combination. */
8359int
ae9d61ab 8360pa_is_function_label_plus_const (rtx op)
326bc2de
JL
8361{
8362 /* Strip off any CONST. */
8363 if (GET_CODE (op) == CONST)
8364 op = XEXP (op, 0);
8365
8366 return (GET_CODE (op) == PLUS
9c575e20 8367 && function_label_operand (XEXP (op, 0), VOIDmode)
326bc2de
JL
8368 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8369}
8370
54374491
JL
8371/* Output assembly code for a thunk to FUNCTION. */
8372
c590b625 8373static void
b7849684
JE
8374pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8375 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8376 tree function)
54374491 8377{
cdcb88d7 8378 static unsigned int current_thunk_number;
5fad1c24 8379 int val_14 = VAL_14_BITS_P (delta);
67b846fa 8380 unsigned int old_last_address = last_address, nbytes = 0;
b2029ad6 8381 char label[17];
cdcb88d7 8382 rtx xoperands[4];
5fad1c24 8383
cdcb88d7
JDA
8384 xoperands[0] = XEXP (DECL_RTL (function), 0);
8385 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8386 xoperands[2] = GEN_INT (delta);
5fad1c24 8387
a9a302d9 8388 final_start_function (emit_barrier (), file, 1);
5fad1c24
JDA
8389
8390 /* Output the thunk. We know that the function is in the same
8391 translation unit (i.e., the same space) as the thunk, and that
8392 thunks are output after their method. Thus, we don't need an
8393 external branch to reach the function. With SOM and GAS,
8394 functions and thunks are effectively in different sections.
8395 Thus, we can always use a IA-relative branch and the linker
8396 will add a long branch stub if necessary.
8397
8398 However, we have to be careful when generating PIC code on the
8399 SOM port to ensure that the sequence does not transfer to an
8400 import stub for the target function as this could clobber the
8401 return value saved at SP-24. This would also apply to the
8402 32-bit linux port if the multi-space model is implemented. */
8403 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8404 && !(flag_pic && TREE_PUBLIC (function))
8405 && (TARGET_GAS || last_address < 262132))
8406 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
677f3fa8 8407 && ((targetm_common.have_named_sections
5fad1c24
JDA
8408 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8409 /* The GNU 64-bit linker has rather poor stub management.
8410 So, we use a long branch from thunks that aren't in
8411 the same section as the target function. */
8412 && ((!TARGET_64BIT
8413 && (DECL_SECTION_NAME (thunk_fndecl)
8414 != DECL_SECTION_NAME (function)))
8415 || ((DECL_SECTION_NAME (thunk_fndecl)
8416 == DECL_SECTION_NAME (function))
8417 && last_address < 262132)))
5dba8769
JDA
8418 /* In this case, we need to be able to reach the start of
8419 the stub table even though the function is likely closer
8420 and can be jumped to directly. */
677f3fa8 8421 || (targetm_common.have_named_sections
2842bb86
JDA
8422 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8423 && DECL_SECTION_NAME (function) == NULL
5dba8769
JDA
8424 && total_code_bytes < MAX_PCREL17F_OFFSET)
8425 /* Likewise. */
677f3fa8 8426 || (!targetm_common.have_named_sections
5dba8769 8427 && total_code_bytes < MAX_PCREL17F_OFFSET))))
5fad1c24 8428 {
cdcb88d7
JDA
8429 if (!val_14)
8430 output_asm_insn ("addil L'%2,%%r26", xoperands);
8431
31fd809b 8432 output_asm_insn ("b %0", xoperands);
cdcb88d7 8433
5fad1c24
JDA
8434 if (val_14)
8435 {
cdcb88d7 8436 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8437 nbytes += 8;
8438 }
8439 else
8440 {
cdcb88d7 8441 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8442 nbytes += 12;
8443 }
8444 }
8445 else if (TARGET_64BIT)
8446 {
568de9bb
JDA
8447 rtx xop[4];
8448
5fad1c24
JDA
8449 /* We only have one call-clobbered scratch register, so we can't
8450 make use of the delay slot if delta doesn't fit in 14 bits. */
8451 if (!val_14)
cdcb88d7
JDA
8452 {
8453 output_asm_insn ("addil L'%2,%%r26", xoperands);
8454 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8455 }
5fad1c24 8456
568de9bb
JDA
8457 /* Load function address into %r1. */
8458 xop[0] = xoperands[0];
8459 xop[1] = gen_rtx_REG (Pmode, 1);
8460 xop[2] = xop[1];
8461 pa_output_pic_pcrel_sequence (xop);
5fad1c24
JDA
8462
8463 if (val_14)
8464 {
cdcb88d7
JDA
8465 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8466 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8467 nbytes += 20;
8468 }
8469 else
8470 {
cdcb88d7 8471 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
5fad1c24
JDA
8472 nbytes += 24;
8473 }
8474 }
8475 else if (TARGET_PORTABLE_RUNTIME)
8476 {
cdcb88d7
JDA
8477 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8478 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8479
8480 if (!val_14)
568de9bb 8481 output_asm_insn ("ldil L'%2,%%r26", xoperands);
cdcb88d7
JDA
8482
8483 output_asm_insn ("bv %%r0(%%r22)", xoperands);
5fad1c24
JDA
8484
8485 if (val_14)
8486 {
cdcb88d7 8487 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8488 nbytes += 16;
8489 }
8490 else
8491 {
568de9bb 8492 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
5fad1c24
JDA
8493 nbytes += 20;
8494 }
8495 }
8496 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8497 {
8498 /* The function is accessible from outside this module. The only
8499 way to avoid an import stub between the thunk and function is to
8500 call the function directly with an indirect sequence similar to
8501 that used by $$dyncall. This is possible because $$dyncall acts
8502 as the import stub in an indirect call. */
5fad1c24 8503 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
cdcb88d7
JDA
8504 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8505 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8506 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8507 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8508 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8509 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8510 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8511 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8512
5fad1c24
JDA
8513 if (!val_14)
8514 {
cdcb88d7 8515 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24
JDA
8516 nbytes += 4;
8517 }
cdcb88d7 8518
5fad1c24
JDA
8519 if (TARGET_PA_20)
8520 {
cdcb88d7
JDA
8521 output_asm_insn ("bve (%%r22)", xoperands);
8522 nbytes += 36;
8523 }
8524 else if (TARGET_NO_SPACE_REGS)
8525 {
8526 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8527 nbytes += 36;
8528 }
8529 else
54374491 8530 {
cdcb88d7
JDA
8531 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8532 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8533 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8534 nbytes += 44;
5fad1c24
JDA
8535 }
8536
8537 if (val_14)
cdcb88d7 8538 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8539 else
cdcb88d7 8540 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8541 }
8542 else if (flag_pic)
8543 {
568de9bb 8544 rtx xop[4];
5fad1c24 8545
568de9bb
JDA
8546 /* Load function address into %r22. */
8547 xop[0] = xoperands[0];
8548 xop[1] = gen_rtx_REG (Pmode, 1);
8549 xop[2] = gen_rtx_REG (Pmode, 22);
8550 pa_output_pic_pcrel_sequence (xop);
5fad1c24 8551
cdcb88d7
JDA
8552 if (!val_14)
8553 output_asm_insn ("addil L'%2,%%r26", xoperands);
8554
8555 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8556
5fad1c24
JDA
8557 if (val_14)
8558 {
cdcb88d7 8559 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8560 nbytes += 20;
54374491
JL
8561 }
8562 else
5fad1c24 8563 {
cdcb88d7 8564 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8565 nbytes += 24;
8566 }
54374491
JL
8567 }
8568 else
8569 {
5fad1c24 8570 if (!val_14)
cdcb88d7 8571 output_asm_insn ("addil L'%2,%%r26", xoperands);
5fad1c24 8572
cdcb88d7
JDA
8573 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8574 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
5fad1c24
JDA
8575
8576 if (val_14)
54374491 8577 {
cdcb88d7 8578 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
5fad1c24 8579 nbytes += 12;
54374491
JL
8580 }
8581 else
5fad1c24 8582 {
cdcb88d7 8583 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
5fad1c24
JDA
8584 nbytes += 16;
8585 }
54374491 8586 }
5fad1c24 8587
a9a302d9 8588 final_end_function ();
1a83bfc3 8589
5fad1c24 8590 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
54374491 8591 {
d6b5193b 8592 switch_to_section (data_section);
cdcb88d7 8593 output_asm_insn (".align 4", xoperands);
5fad1c24 8594 ASM_OUTPUT_LABEL (file, label);
cdcb88d7 8595 output_asm_insn (".word P'%0", xoperands);
54374491 8596 }
5fad1c24 8597
54374491 8598 current_thunk_number++;
5fad1c24
JDA
8599 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8600 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8601 last_address += nbytes;
67b846fa
JDA
8602 if (old_last_address > last_address)
8603 last_address = UINT_MAX;
5fad1c24 8604 update_total_code_bytes (nbytes);
54374491
JL
8605}
8606
4977bab6
ZW
8607/* Only direct calls to static functions are allowed to be sibling (tail)
8608 call optimized.
8609
8610 This restriction is necessary because some linker generated stubs will
8611 store return pointers into rp' in some cases which might clobber a
8612 live value already in rp'.
8613
8614 In a sibcall the current function and the target function share stack
8615 space. Thus if the path to the current function and the path to the
8616 target function save a value in rp', they save the value into the
8617 same stack slot, which has undesirable consequences.
8618
8619 Because of the deferred binding nature of shared libraries any function
8620 with external scope could be in a different load module and thus require
8621 rp' to be saved when calling that function. So sibcall optimizations
8622 can only be safe for static function.
8623
8624 Note that GCC never needs return value relocations, so we don't have to
8625 worry about static calls with return value relocations (which require
8626 saving rp').
8627
8628 It is safe to perform a sibcall optimization when the target function
8629 will never return. */
8630static bool
b7849684 8631pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4977bab6 8632{
73096ba9
JDA
8633 if (TARGET_PORTABLE_RUNTIME)
8634 return false;
8635
11f43127 8636 /* Sibcalls are not ok because the arg pointer register is not a fixed
c1207243 8637 register. This prevents the sibcall optimization from occurring. In
11f43127
JDA
8638 addition, there are problems with stub placement using GNU ld. This
8639 is because a normal sibcall branch uses a 17-bit relocation while
8640 a regular call branch uses a 22-bit relocation. As a result, more
8641 care needs to be taken in the placement of long-branch stubs. */
8642 if (TARGET_64BIT)
8643 return false;
8644
73096ba9
JDA
8645 /* Sibcalls are only ok within a translation unit. */
8646 return (decl && !TREE_PUBLIC (decl));
4977bab6
ZW
8647}
8648
8ddf681a
R
8649/* ??? Addition is not commutative on the PA due to the weird implicit
8650 space register selection rules for memory addresses. Therefore, we
8651 don't consider a + b == b + a, as this might be inside a MEM. */
8652static bool
3101faab 8653pa_commutative_p (const_rtx x, int outer_code)
8ddf681a
R
8654{
8655 return (COMMUTATIVE_P (x)
bd7d5043
JDA
8656 && (TARGET_NO_SPACE_REGS
8657 || (outer_code != UNKNOWN && outer_code != MEM)
8ddf681a
R
8658 || GET_CODE (x) != PLUS));
8659}
8660
88e5c029
JL
8661/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8662 use in fmpyadd instructions. */
2fe24884 8663int
ae9d61ab 8664pa_fmpyaddoperands (rtx *operands)
2fe24884 8665{
ef4bddc2 8666 machine_mode mode = GET_MODE (operands[0]);
2fe24884 8667
d85ab966
JL
8668 /* Must be a floating point mode. */
8669 if (mode != SFmode && mode != DFmode)
8670 return 0;
8671
2fe24884 8672 /* All modes must be the same. */
f133af4c
TG
8673 if (! (mode == GET_MODE (operands[1])
8674 && mode == GET_MODE (operands[2])
8675 && mode == GET_MODE (operands[3])
8676 && mode == GET_MODE (operands[4])
8677 && mode == GET_MODE (operands[5])))
2fe24884
JL
8678 return 0;
8679
d85ab966
JL
8680 /* All operands must be registers. */
8681 if (! (GET_CODE (operands[1]) == REG
8682 && GET_CODE (operands[2]) == REG
8683 && GET_CODE (operands[3]) == REG
8684 && GET_CODE (operands[4]) == REG
8685 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8686 return 0;
8687
88e5c029
JL
8688 /* Only 2 real operands to the addition. One of the input operands must
8689 be the same as the output operand. */
2fe24884
JL
8690 if (! rtx_equal_p (operands[3], operands[4])
8691 && ! rtx_equal_p (operands[3], operands[5]))
8692 return 0;
8693
1e5f1716 8694 /* Inout operand of add cannot conflict with any operands from multiply. */
2fe24884
JL
8695 if (rtx_equal_p (operands[3], operands[0])
8696 || rtx_equal_p (operands[3], operands[1])
8697 || rtx_equal_p (operands[3], operands[2]))
8698 return 0;
8699
1e5f1716 8700 /* multiply cannot feed into addition operands. */
2fe24884
JL
8701 if (rtx_equal_p (operands[4], operands[0])
8702 || rtx_equal_p (operands[5], operands[0]))
8703 return 0;
8704
d85ab966
JL
8705 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8706 if (mode == SFmode
88624c0e
JL
8707 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8708 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8709 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8710 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8711 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8712 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8713 return 0;
8714
2fe24884
JL
8715 /* Passed. Operands are suitable for fmpyadd. */
8716 return 1;
8717}
8718
35d434ed
JDA
8719#if !defined(USE_COLLECT2)
8720static void
b7849684 8721pa_asm_out_constructor (rtx symbol, int priority)
35d434ed
JDA
8722{
8723 if (!function_label_operand (symbol, VOIDmode))
ae9d61ab 8724 pa_encode_label (symbol);
35d434ed
JDA
8725
8726#ifdef CTORS_SECTION_ASM_OP
8727 default_ctor_section_asm_out_constructor (symbol, priority);
8728#else
8729# ifdef TARGET_ASM_NAMED_SECTION
8730 default_named_section_asm_out_constructor (symbol, priority);
8731# else
8732 default_stabs_asm_out_constructor (symbol, priority);
8733# endif
8734#endif
8735}
8736
8737static void
b7849684 8738pa_asm_out_destructor (rtx symbol, int priority)
35d434ed
JDA
8739{
8740 if (!function_label_operand (symbol, VOIDmode))
ae9d61ab 8741 pa_encode_label (symbol);
35d434ed
JDA
8742
8743#ifdef DTORS_SECTION_ASM_OP
8744 default_dtor_section_asm_out_destructor (symbol, priority);
8745#else
8746# ifdef TARGET_ASM_NAMED_SECTION
8747 default_named_section_asm_out_destructor (symbol, priority);
8748# else
8749 default_stabs_asm_out_destructor (symbol, priority);
8750# endif
8751#endif
8752}
8753#endif
8754
d4482715
JDA
8755/* This function places uninitialized global data in the bss section.
8756 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8757 function on the SOM port to prevent uninitialized global data from
8758 being placed in the data section. */
8759
8760void
8761pa_asm_output_aligned_bss (FILE *stream,
8762 const char *name,
8763 unsigned HOST_WIDE_INT size,
8764 unsigned int align)
8765{
d6b5193b 8766 switch_to_section (bss_section);
d4482715
JDA
8767 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8768
8769#ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8770 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8771#endif
8772
8773#ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8774 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8775#endif
8776
8777 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8778 ASM_OUTPUT_LABEL (stream, name);
16998094 8779 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
d4482715
JDA
8780}
8781
8782/* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8783 that doesn't allow the alignment of global common storage to be directly
8784 specified. The SOM linker aligns common storage based on the rounded
8785 value of the NUM_BYTES parameter in the .comm directive. It's not
8786 possible to use the .align directive as it doesn't affect the alignment
8787 of the label associated with a .comm directive. */
8788
8789void
8790pa_asm_output_aligned_common (FILE *stream,
8791 const char *name,
8792 unsigned HOST_WIDE_INT size,
8793 unsigned int align)
8794{
22f549fd
JDA
8795 unsigned int max_common_align;
8796
8797 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8798 if (align > max_common_align)
8799 {
d4ee4d25 8800 warning (0, "alignment (%u) for %s exceeds maximum alignment "
22f549fd
JDA
8801 "for global common data. Using %u",
8802 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8803 align = max_common_align;
8804 }
8805
d6b5193b 8806 switch_to_section (bss_section);
d4482715
JDA
8807
8808 assemble_name (stream, name);
16998094 8809 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
d4482715
JDA
8810 MAX (size, align / BITS_PER_UNIT));
8811}
8812
8813/* We can't use .comm for local common storage as the SOM linker effectively
8814 treats the symbol as universal and uses the same storage for local symbols
8815 with the same name in different object files. The .block directive
8816 reserves an uninitialized block of storage. However, it's not common
8817 storage. Fortunately, GCC never requests common storage with the same
8818 name in any given translation unit. */
8819
8820void
8821pa_asm_output_aligned_local (FILE *stream,
8822 const char *name,
8823 unsigned HOST_WIDE_INT size,
8824 unsigned int align)
8825{
d6b5193b 8826 switch_to_section (bss_section);
d4482715
JDA
8827 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8828
8829#ifdef LOCAL_ASM_OP
8830 fprintf (stream, "%s", LOCAL_ASM_OP);
8831 assemble_name (stream, name);
8832 fprintf (stream, "\n");
8833#endif
8834
8835 ASM_OUTPUT_LABEL (stream, name);
16998094 8836 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
d4482715
JDA
8837}
8838
88e5c029
JL
8839/* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8840 use in fmpysub instructions. */
2fe24884 8841int
ae9d61ab 8842pa_fmpysuboperands (rtx *operands)
2fe24884 8843{
ef4bddc2 8844 machine_mode mode = GET_MODE (operands[0]);
2fe24884 8845
d85ab966
JL
8846 /* Must be a floating point mode. */
8847 if (mode != SFmode && mode != DFmode)
8848 return 0;
8849
2fe24884 8850 /* All modes must be the same. */
f133af4c
TG
8851 if (! (mode == GET_MODE (operands[1])
8852 && mode == GET_MODE (operands[2])
8853 && mode == GET_MODE (operands[3])
8854 && mode == GET_MODE (operands[4])
8855 && mode == GET_MODE (operands[5])))
2fe24884
JL
8856 return 0;
8857
d85ab966
JL
8858 /* All operands must be registers. */
8859 if (! (GET_CODE (operands[1]) == REG
8860 && GET_CODE (operands[2]) == REG
8861 && GET_CODE (operands[3]) == REG
8862 && GET_CODE (operands[4]) == REG
8863 && GET_CODE (operands[5]) == REG))
2fe24884
JL
8864 return 0;
8865
88e5c029
JL
8866 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8867 operation, so operands[4] must be the same as operand[3]. */
2fe24884
JL
8868 if (! rtx_equal_p (operands[3], operands[4]))
8869 return 0;
8870
1e5f1716 8871 /* multiply cannot feed into subtraction. */
88e5c029 8872 if (rtx_equal_p (operands[5], operands[0]))
2fe24884
JL
8873 return 0;
8874
1e5f1716 8875 /* Inout operand of sub cannot conflict with any operands from multiply. */
2fe24884
JL
8876 if (rtx_equal_p (operands[3], operands[0])
8877 || rtx_equal_p (operands[3], operands[1])
8878 || rtx_equal_p (operands[3], operands[2]))
8879 return 0;
8880
d85ab966
JL
8881 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8882 if (mode == SFmode
88624c0e
JL
8883 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8884 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8885 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8886 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8887 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8888 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
d85ab966
JL
8889 return 0;
8890
2fe24884
JL
8891 /* Passed. Operands are suitable for fmpysub. */
8892 return 1;
8893}
8894
c2264220 8895/* Return 1 if the given constant is 2, 4, or 8. These are the valid
b47fbc53
JL
8896 constants for a MULT embedded inside a memory address. */
8897int
8898pa_mem_shadd_constant_p (int val)
8899{
8900 if (val == 2 || val == 4 || val == 8)
8901 return 1;
8902 else
8903 return 0;
8904}
8905
8906/* Return 1 if the given constant is 1, 2, or 3. These are the valid
c2264220 8907 constants for shadd instructions. */
c9a88190 8908int
ae9d61ab 8909pa_shadd_constant_p (int val)
c2264220 8910{
b47fbc53 8911 if (val == 1 || val == 2 || val == 3)
c2264220
JL
8912 return 1;
8913 else
8914 return 0;
8915}
4802a0d6 8916
3232e9d8
JDA
8917/* Return TRUE if INSN branches forward. */
8918
8919static bool
b32d5189 8920forward_branch_p (rtx_insn *insn)
b9821af8 8921{
3232e9d8
JDA
8922 rtx lab = JUMP_LABEL (insn);
8923
8924 /* The INSN must have a jump label. */
8925 gcc_assert (lab != NULL_RTX);
8926
8927 if (INSN_ADDRESSES_SET_P ())
8928 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
b9821af8
JL
8929
8930 while (insn)
8931 {
3232e9d8
JDA
8932 if (insn == lab)
8933 return true;
b9821af8
JL
8934 else
8935 insn = NEXT_INSN (insn);
8936 }
8937
3232e9d8 8938 return false;
b9821af8
JL
8939}
8940
b1092901
JL
8941/* Output an unconditional move and branch insn. */
8942
519104fe 8943const char *
b32d5189 8944pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
b1092901 8945{
16d74a3c
JDA
8946 int length = get_attr_length (insn);
8947
b1092901
JL
8948 /* These are the cases in which we win. */
8949 if (length == 4)
8950 return "mov%I1b,tr %1,%0,%2";
8951
16d74a3c
JDA
8952 /* None of the following cases win, but they don't lose either. */
8953 if (length == 8)
b1092901 8954 {
16d74a3c
JDA
8955 if (dbr_sequence_length () == 0)
8956 {
8957 /* Nothing in the delay slot, fake it by putting the combined
8958 insn (the copy or add) in the delay slot of a bl. */
8959 if (GET_CODE (operands[1]) == CONST_INT)
8960 return "b %2\n\tldi %1,%0";
8961 else
8962 return "b %2\n\tcopy %1,%0";
8963 }
b1092901 8964 else
16d74a3c
JDA
8965 {
8966 /* Something in the delay slot, but we've got a long branch. */
8967 if (GET_CODE (operands[1]) == CONST_INT)
8968 return "ldi %1,%0\n\tb %2";
8969 else
8970 return "copy %1,%0\n\tb %2";
8971 }
b1092901 8972 }
16d74a3c
JDA
8973
8974 if (GET_CODE (operands[1]) == CONST_INT)
8975 output_asm_insn ("ldi %1,%0", operands);
b1092901 8976 else
16d74a3c 8977 output_asm_insn ("copy %1,%0", operands);
ae9d61ab 8978 return pa_output_lbranch (operands[2], insn, 1);
b1092901
JL
8979}
8980
8981/* Output an unconditional add and branch insn. */
8982
519104fe 8983const char *
b32d5189 8984pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
b1092901 8985{
16d74a3c
JDA
8986 int length = get_attr_length (insn);
8987
b1092901
JL
8988 /* To make life easy we want operand0 to be the shared input/output
8989 operand and operand1 to be the readonly operand. */
8990 if (operands[0] == operands[1])
8991 operands[1] = operands[2];
8992
8993 /* These are the cases in which we win. */
8994 if (length == 4)
8995 return "add%I1b,tr %1,%0,%3";
8996
16d74a3c
JDA
8997 /* None of the following cases win, but they don't lose either. */
8998 if (length == 8)
b1092901 8999 {
16d74a3c
JDA
9000 if (dbr_sequence_length () == 0)
9001 /* Nothing in the delay slot, fake it by putting the combined
9002 insn (the copy or add) in the delay slot of a bl. */
9003 return "b %3\n\tadd%I1 %1,%0,%0";
9004 else
9005 /* Something in the delay slot, but we've got a long branch. */
9006 return "add%I1 %1,%0,%0\n\tb %3";
b1092901 9007 }
16d74a3c
JDA
9008
9009 output_asm_insn ("add%I1 %1,%0,%0", operands);
ae9d61ab 9010 return pa_output_lbranch (operands[3], insn, 1);
b1092901
JL
9011}
9012
746a9efa 9013/* We use this hook to perform a PA specific optimization which is difficult
3ba07ad3 9014 to do in earlier passes. */
746a9efa 9015
18dbd950 9016static void
b7849684 9017pa_reorg (void)
746a9efa 9018{
18dbd950 9019 remove_useless_addtr_insns (1);
d8b79470 9020
86001391 9021 if (pa_cpu < PROCESSOR_8000)
18dbd950 9022 pa_combine_instructions ();
aba892c4 9023}
c4bb6b38
JL
9024
9025/* The PA has a number of odd instructions which can perform multiple
9026 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9027 it may be profitable to combine two instructions into one instruction
9028 with two outputs. It's not profitable PA2.0 machines because the
9029 two outputs would take two slots in the reorder buffers.
9030
9031 This routine finds instructions which can be combined and combines
9032 them. We only support some of the potential combinations, and we
9033 only try common ways to find suitable instructions.
9034
9035 * addb can add two registers or a register and a small integer
9036 and jump to a nearby (+-8k) location. Normally the jump to the
9037 nearby location is conditional on the result of the add, but by
9038 using the "true" condition we can make the jump unconditional.
9039 Thus addb can perform two independent operations in one insn.
9040
9041 * movb is similar to addb in that it can perform a reg->reg
9042 or small immediate->reg copy and jump to a nearby (+-8k location).
9043
9044 * fmpyadd and fmpysub can perform a FP multiply and either an
9045 FP add or FP sub if the operands of the multiply and add/sub are
9046 independent (there are other minor restrictions). Note both
9047 the fmpy and fadd/fsub can in theory move to better spots according
9048 to data dependencies, but for now we require the fmpy stay at a
9049 fixed location.
9050
9051 * Many of the memory operations can perform pre & post updates
9052 of index registers. GCC's pre/post increment/decrement addressing
9053 is far too simple to take advantage of all the possibilities. This
9054 pass may not be suitable since those insns may not be independent.
9055
9056 * comclr can compare two ints or an int and a register, nullify
9057 the following instruction and zero some other register. This
9058 is more difficult to use as it's harder to find an insn which
9059 will generate a comclr than finding something like an unconditional
9060 branch. (conditional moves & long branches create comclr insns).
9061
9062 * Most arithmetic operations can conditionally skip the next
9063 instruction. They can be viewed as "perform this operation
9064 and conditionally jump to this nearby location" (where nearby
9065 is an insns away). These are difficult to use due to the
9066 branch length restrictions. */
9067
51723711 9068static void
b7849684 9069pa_combine_instructions (void)
c4bb6b38 9070{
b32d5189 9071 rtx_insn *anchor;
c4bb6b38
JL
9072
9073 /* This can get expensive since the basic algorithm is on the
9074 order of O(n^2) (or worse). Only do it for -O2 or higher
956d6950 9075 levels of optimization. */
c4bb6b38
JL
9076 if (optimize < 2)
9077 return;
9078
9079 /* Walk down the list of insns looking for "anchor" insns which
9080 may be combined with "floating" insns. As the name implies,
9081 "anchor" instructions don't move, while "floating" insns may
9082 move around. */
647d790d
DM
9083 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9084 rtx_insn *new_rtx = make_insn_raw (par);
c4bb6b38
JL
9085
9086 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9087 {
9088 enum attr_pa_combine_type anchor_attr;
9089 enum attr_pa_combine_type floater_attr;
9090
9091 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9092 Also ignore any special USE insns. */
b64925dc 9093 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
c4bb6b38 9094 || GET_CODE (PATTERN (anchor)) == USE
34f0d87a 9095 || GET_CODE (PATTERN (anchor)) == CLOBBER)
c4bb6b38
JL
9096 continue;
9097
9098 anchor_attr = get_attr_pa_combine_type (anchor);
9099 /* See if anchor is an insn suitable for combination. */
9100 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9101 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9102 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9103 && ! forward_branch_p (anchor)))
9104 {
b32d5189 9105 rtx_insn *floater;
c4bb6b38
JL
9106
9107 for (floater = PREV_INSN (anchor);
9108 floater;
9109 floater = PREV_INSN (floater))
9110 {
b64925dc
SB
9111 if (NOTE_P (floater)
9112 || (NONJUMP_INSN_P (floater)
c4bb6b38
JL
9113 && (GET_CODE (PATTERN (floater)) == USE
9114 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9115 continue;
9116
9117 /* Anything except a regular INSN will stop our search. */
39718607 9118 if (! NONJUMP_INSN_P (floater))
c4bb6b38 9119 {
b32d5189 9120 floater = NULL;
c4bb6b38
JL
9121 break;
9122 }
9123
9124 /* See if FLOATER is suitable for combination with the
9125 anchor. */
9126 floater_attr = get_attr_pa_combine_type (floater);
9127 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9128 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9129 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9130 && floater_attr == PA_COMBINE_TYPE_FMPY))
9131 {
9132 /* If ANCHOR and FLOATER can be combined, then we're
9133 done with this pass. */
0a2aaacc 9134 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9135 SET_DEST (PATTERN (floater)),
9136 XEXP (SET_SRC (PATTERN (floater)), 0),
9137 XEXP (SET_SRC (PATTERN (floater)), 1)))
9138 break;
9139 }
9140
9141 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9142 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9143 {
9144 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9145 {
0a2aaacc 9146 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9147 SET_DEST (PATTERN (floater)),
9148 XEXP (SET_SRC (PATTERN (floater)), 0),
9149 XEXP (SET_SRC (PATTERN (floater)), 1)))
9150 break;
9151 }
9152 else
9153 {
0a2aaacc 9154 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
c4bb6b38
JL
9155 SET_DEST (PATTERN (floater)),
9156 SET_SRC (PATTERN (floater)),
9157 SET_SRC (PATTERN (floater))))
9158 break;
9159 }
9160 }
9161 }
9162
9163 /* If we didn't find anything on the backwards scan try forwards. */
9164 if (!floater
9165 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9166 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9167 {
9168 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9169 {
b64925dc
SB
9170 if (NOTE_P (floater)
9171 || (NONJUMP_INSN_P (floater)
c4bb6b38
JL
9172 && (GET_CODE (PATTERN (floater)) == USE
9173 || GET_CODE (PATTERN (floater)) == CLOBBER)))
6619e96c 9174
c4bb6b38
JL
9175 continue;
9176
9177 /* Anything except a regular INSN will stop our search. */
39718607 9178 if (! NONJUMP_INSN_P (floater))
c4bb6b38 9179 {
b32d5189 9180 floater = NULL;
c4bb6b38
JL
9181 break;
9182 }
9183
9184 /* See if FLOATER is suitable for combination with the
9185 anchor. */
9186 floater_attr = get_attr_pa_combine_type (floater);
9187 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9188 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9189 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9190 && floater_attr == PA_COMBINE_TYPE_FMPY))
9191 {
9192 /* If ANCHOR and FLOATER can be combined, then we're
9193 done with this pass. */
0a2aaacc 9194 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
c4bb6b38 9195 SET_DEST (PATTERN (floater)),
831c1763
AM
9196 XEXP (SET_SRC (PATTERN (floater)),
9197 0),
9198 XEXP (SET_SRC (PATTERN (floater)),
9199 1)))
c4bb6b38
JL
9200 break;
9201 }
9202 }
9203 }
9204
9205 /* FLOATER will be nonzero if we found a suitable floating
9206 insn for combination with ANCHOR. */
9207 if (floater
9208 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9209 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9210 {
9211 /* Emit the new instruction and delete the old anchor. */
8faa8118
SB
9212 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9213 copy_rtx (PATTERN (floater)));
9214 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9215 emit_insn_before (temp, anchor);
c5c76735 9216
a38e7aa5 9217 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9218
9219 /* Emit a special USE insn for FLOATER, then delete
9220 the floating insn. */
8faa8118
SB
9221 temp = copy_rtx (PATTERN (floater));
9222 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
c4bb6b38
JL
9223 delete_insn (floater);
9224
9225 continue;
9226 }
9227 else if (floater
9228 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9229 {
c4bb6b38 9230 /* Emit the new_jump instruction and delete the old anchor. */
8faa8118
SB
9231 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9232 copy_rtx (PATTERN (floater)));
9233 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9234 temp = emit_jump_insn_before (temp, anchor);
c5c76735 9235
c4bb6b38 9236 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
a38e7aa5 9237 SET_INSN_DELETED (anchor);
c4bb6b38
JL
9238
9239 /* Emit a special USE insn for FLOATER, then delete
9240 the floating insn. */
8faa8118
SB
9241 temp = copy_rtx (PATTERN (floater));
9242 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
c4bb6b38
JL
9243 delete_insn (floater);
9244 continue;
9245 }
9246 }
9247 }
9248}
9249
0952f89b 9250static int
647d790d 9251pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
b32d5189 9252 int reversed, rtx dest,
b7849684 9253 rtx src1, rtx src2)
c4bb6b38
JL
9254{
9255 int insn_code_number;
b32d5189 9256 rtx_insn *start, *end;
c4bb6b38
JL
9257
9258 /* Create a PARALLEL with the patterns of ANCHOR and
9259 FLOATER, try to recognize it, then test constraints
9260 for the resulting pattern.
9261
9262 If the pattern doesn't match or the constraints
9263 aren't met keep searching for a suitable floater
9264 insn. */
0a2aaacc
KG
9265 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9266 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9267 INSN_CODE (new_rtx) = -1;
9268 insn_code_number = recog_memoized (new_rtx);
daca1a96 9269 basic_block bb = BLOCK_FOR_INSN (anchor);
c4bb6b38 9270 if (insn_code_number < 0
daca1a96 9271 || (extract_insn (new_rtx),
63e7e854 9272 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
c4bb6b38
JL
9273 return 0;
9274
9275 if (reversed)
9276 {
9277 start = anchor;
9278 end = floater;
9279 }
9280 else
9281 {
9282 start = floater;
9283 end = anchor;
9284 }
9285
9286 /* There's up to three operands to consider. One
9287 output and two inputs.
9288
9289 The output must not be used between FLOATER & ANCHOR
9290 exclusive. The inputs must not be set between
9291 FLOATER and ANCHOR exclusive. */
9292
9293 if (reg_used_between_p (dest, start, end))
9294 return 0;
9295
9296 if (reg_set_between_p (src1, start, end))
9297 return 0;
9298
9299 if (reg_set_between_p (src2, start, end))
9300 return 0;
9301
9302 /* If we get here, then everything is good. */
9303 return 1;
9304}
b9cd54d2 9305
2561a923 9306/* Return nonzero if references for INSN are delayed.
b9cd54d2
JL
9307
9308 Millicode insns are actually function calls with some special
9309 constraints on arguments and register usage.
9310
9311 Millicode calls always expect their arguments in the integer argument
9312 registers, and always return their result in %r29 (ret1). They
7d8b1412
AM
9313 are expected to clobber their arguments, %r1, %r29, and the return
9314 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9315
9316 This function tells reorg that the references to arguments and
9317 millicode calls do not appear to happen until after the millicode call.
9318 This allows reorg to put insns which set the argument registers into the
9319 delay slot of the millicode call -- thus they act more like traditional
9320 CALL_INSNs.
9321
1e5f1716 9322 Note we cannot consider side effects of the insn to be delayed because
7d8b1412
AM
9323 the branch and link insn will clobber the return pointer. If we happened
9324 to use the return pointer in the delay slot of the call, then we lose.
b9cd54d2
JL
9325
9326 get_attr_type will try to recognize the given insn, so make sure to
9327 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9328 in particular. */
9329int
84034c69 9330pa_insn_refs_are_delayed (rtx_insn *insn)
b9cd54d2 9331{
b64925dc 9332 return ((NONJUMP_INSN_P (insn)
b9cd54d2
JL
9333 && GET_CODE (PATTERN (insn)) != SEQUENCE
9334 && GET_CODE (PATTERN (insn)) != USE
9335 && GET_CODE (PATTERN (insn)) != CLOBBER
9336 && get_attr_type (insn) == TYPE_MILLI));
9337}
d07d525a 9338
cde0f3fd
PB
9339/* Promote the return value, but not the arguments. */
9340
ef4bddc2 9341static machine_mode
cde0f3fd 9342pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
ef4bddc2 9343 machine_mode mode,
cde0f3fd
PB
9344 int *punsignedp ATTRIBUTE_UNUSED,
9345 const_tree fntype ATTRIBUTE_UNUSED,
9346 int for_return)
9347{
666e3ceb 9348 if (for_return == 0)
cde0f3fd 9349 return mode;
02095c50 9350 return promote_mode (type, mode, punsignedp);
cde0f3fd
PB
9351}
9352
44571d6e
JDA
9353/* On the HP-PA the value is found in register(s) 28(-29), unless
9354 the mode is SF or DF. Then the value is returned in fr4 (32).
9355
cde0f3fd
PB
9356 This must perform the same promotions as PROMOTE_MODE, else promoting
9357 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
44571d6e
JDA
9358
9359 Small structures must be returned in a PARALLEL on PA64 in order
9360 to match the HP Compiler ABI. */
9361
8a5b8538 9362static rtx
9193312a
AS
9363pa_function_value (const_tree valtype,
9364 const_tree func ATTRIBUTE_UNUSED,
9365 bool outgoing ATTRIBUTE_UNUSED)
44571d6e 9366{
ef4bddc2 9367 machine_mode valmode;
44571d6e 9368
4720d5ca
JDA
9369 if (AGGREGATE_TYPE_P (valtype)
9370 || TREE_CODE (valtype) == COMPLEX_TYPE
9371 || TREE_CODE (valtype) == VECTOR_TYPE)
44571d6e 9372 {
cbf6bcbe
JDA
9373 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9374
9375 /* Handle aggregates that fit exactly in a word or double word. */
9376 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9377 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9378
2a04824b
JDA
9379 if (TARGET_64BIT)
9380 {
9381 /* Aggregates with a size less than or equal to 128 bits are
9382 returned in GR 28(-29). They are left justified. The pad
9383 bits are undefined. Larger aggregates are returned in
9384 memory. */
9385 rtx loc[2];
9386 int i, offset = 0;
cbf6bcbe 9387 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
2a04824b
JDA
9388
9389 for (i = 0; i < ub; i++)
9390 {
9391 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9392 gen_rtx_REG (DImode, 28 + i),
9393 GEN_INT (offset));
9394 offset += 8;
9395 }
44571d6e 9396
2a04824b
JDA
9397 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9398 }
cbf6bcbe 9399 else if (valsize > UNITS_PER_WORD)
44571d6e 9400 {
2a04824b
JDA
9401 /* Aggregates 5 to 8 bytes in size are returned in general
9402 registers r28-r29 in the same manner as other non
9403 floating-point objects. The data is right-justified and
9404 zero-extended to 64 bits. This is opposite to the normal
9405 justification used on big endian targets and requires
9406 special treatment. */
9407 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9408 gen_rtx_REG (DImode, 28), const0_rtx);
9409 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
44571d6e 9410 }
44571d6e
JDA
9411 }
9412
9413 if ((INTEGRAL_TYPE_P (valtype)
2ae88ecd 9414 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
44571d6e
JDA
9415 || POINTER_TYPE_P (valtype))
9416 valmode = word_mode;
9417 else
9418 valmode = TYPE_MODE (valtype);
9419
9420 if (TREE_CODE (valtype) == REAL_TYPE
2a04824b 9421 && !AGGREGATE_TYPE_P (valtype)
44571d6e
JDA
9422 && TYPE_MODE (valtype) != TFmode
9423 && !TARGET_SOFT_FLOAT)
9424 return gen_rtx_REG (valmode, 32);
9425
9426 return gen_rtx_REG (valmode, 28);
9427}
9428
8a5b8538
AS
9429/* Implement the TARGET_LIBCALL_VALUE hook. */
9430
9431static rtx
ef4bddc2 9432pa_libcall_value (machine_mode mode,
8a5b8538
AS
9433 const_rtx fun ATTRIBUTE_UNUSED)
9434{
9435 if (! TARGET_SOFT_FLOAT
9436 && (mode == SFmode || mode == DFmode))
9437 return gen_rtx_REG (mode, 32);
9438 else
9439 return gen_rtx_REG (mode, 28);
9440}
9441
9442/* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9443
9444static bool
9445pa_function_value_regno_p (const unsigned int regno)
9446{
9447 if (regno == 28
9448 || (! TARGET_SOFT_FLOAT && regno == 32))
9449 return true;
9450
9451 return false;
9452}
9453
fd29bdaf
NF
9454/* Update the data in CUM to advance over an argument
9455 of mode MODE and data type TYPE.
9456 (TYPE is null for libcalls where that information may not be available.) */
9457
9458static void
ef4bddc2 9459pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
fd29bdaf
NF
9460 const_tree type, bool named ATTRIBUTE_UNUSED)
9461{
d5cc9181 9462 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
fd29bdaf
NF
9463 int arg_size = FUNCTION_ARG_SIZE (mode, type);
9464
9465 cum->nargs_prototype--;
9466 cum->words += (arg_size
9467 + ((cum->words & 01)
9468 && type != NULL_TREE
9469 && arg_size > 1));
9470}
9471
520babc7
JL
9472/* Return the location of a parameter that is passed in a register or NULL
9473 if the parameter has any component that is passed in memory.
9474
9475 This is new code and will be pushed to into the net sources after
6619e96c 9476 further testing.
520babc7
JL
9477
9478 ??? We might want to restructure this so that it looks more like other
9479 ports. */
fd29bdaf 9480static rtx
ef4bddc2 9481pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
fd29bdaf 9482 const_tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9483{
d5cc9181 9484 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
520babc7 9485 int max_arg_words = (TARGET_64BIT ? 8 : 4);
015b1ad1 9486 int alignment = 0;
9dff28ab 9487 int arg_size;
520babc7
JL
9488 int fpr_reg_base;
9489 int gpr_reg_base;
9490 rtx retval;
9491
9dff28ab
JDA
9492 if (mode == VOIDmode)
9493 return NULL_RTX;
9494
9495 arg_size = FUNCTION_ARG_SIZE (mode, type);
9496
9497 /* If this arg would be passed partially or totally on the stack, then
78a52f11 9498 this routine should return zero. pa_arg_partial_bytes will
9dff28ab
JDA
9499 handle arguments which are split between regs and stack slots if
9500 the ABI mandates split arguments. */
4720d5ca 9501 if (!TARGET_64BIT)
520babc7 9502 {
9dff28ab
JDA
9503 /* The 32-bit ABI does not split arguments. */
9504 if (cum->words + arg_size > max_arg_words)
520babc7
JL
9505 return NULL_RTX;
9506 }
9507 else
9508 {
015b1ad1
JDA
9509 if (arg_size > 1)
9510 alignment = cum->words & 1;
9dff28ab 9511 if (cum->words + alignment >= max_arg_words)
520babc7
JL
9512 return NULL_RTX;
9513 }
9514
9515 /* The 32bit ABIs and the 64bit ABIs are rather different,
9516 particularly in their handling of FP registers. We might
9517 be able to cleverly share code between them, but I'm not
0952f89b 9518 going to bother in the hope that splitting them up results
015b1ad1 9519 in code that is more easily understood. */
520babc7 9520
520babc7
JL
9521 if (TARGET_64BIT)
9522 {
9523 /* Advance the base registers to their current locations.
9524
9525 Remember, gprs grow towards smaller register numbers while
015b1ad1
JDA
9526 fprs grow to higher register numbers. Also remember that
9527 although FP regs are 32-bit addressable, we pretend that
9528 the registers are 64-bits wide. */
520babc7
JL
9529 gpr_reg_base = 26 - cum->words;
9530 fpr_reg_base = 32 + cum->words;
6619e96c 9531
9dff28ab
JDA
9532 /* Arguments wider than one word and small aggregates need special
9533 treatment. */
9534 if (arg_size > 1
9535 || mode == BLKmode
4720d5ca
JDA
9536 || (type && (AGGREGATE_TYPE_P (type)
9537 || TREE_CODE (type) == COMPLEX_TYPE
9538 || TREE_CODE (type) == VECTOR_TYPE)))
520babc7 9539 {
015b1ad1
JDA
9540 /* Double-extended precision (80-bit), quad-precision (128-bit)
9541 and aggregates including complex numbers are aligned on
9542 128-bit boundaries. The first eight 64-bit argument slots
9543 are associated one-to-one, with general registers r26
9544 through r19, and also with floating-point registers fr4
9545 through fr11. Arguments larger than one word are always
9dff28ab
JDA
9546 passed in general registers.
9547
9548 Using a PARALLEL with a word mode register results in left
9549 justified data on a big-endian target. */
015b1ad1
JDA
9550
9551 rtx loc[8];
9552 int i, offset = 0, ub = arg_size;
9553
9554 /* Align the base register. */
9555 gpr_reg_base -= alignment;
9556
9557 ub = MIN (ub, max_arg_words - cum->words - alignment);
9558 for (i = 0; i < ub; i++)
520babc7 9559 {
015b1ad1
JDA
9560 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9561 gen_rtx_REG (DImode, gpr_reg_base),
9562 GEN_INT (offset));
9563 gpr_reg_base -= 1;
9564 offset += 8;
520babc7 9565 }
015b1ad1 9566
e4b95210 9567 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
520babc7 9568 }
9dff28ab 9569 }
520babc7
JL
9570 else
9571 {
9572 /* If the argument is larger than a word, then we know precisely
9573 which registers we must use. */
015b1ad1 9574 if (arg_size > 1)
520babc7
JL
9575 {
9576 if (cum->words)
9577 {
9578 gpr_reg_base = 23;
9579 fpr_reg_base = 38;
9580 }
9581 else
9582 {
9583 gpr_reg_base = 25;
9584 fpr_reg_base = 34;
9585 }
9dff28ab
JDA
9586
9587 /* Structures 5 to 8 bytes in size are passed in the general
9588 registers in the same manner as other non floating-point
9589 objects. The data is right-justified and zero-extended
7ea18c08
JDA
9590 to 64 bits. This is opposite to the normal justification
9591 used on big endian targets and requires special treatment.
4720d5ca
JDA
9592 We now define BLOCK_REG_PADDING to pad these objects.
9593 Aggregates, complex and vector types are passed in the same
9594 manner as structures. */
9595 if (mode == BLKmode
9596 || (type && (AGGREGATE_TYPE_P (type)
9597 || TREE_CODE (type) == COMPLEX_TYPE
9598 || TREE_CODE (type) == VECTOR_TYPE)))
9dff28ab 9599 {
44571d6e
JDA
9600 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9601 gen_rtx_REG (DImode, gpr_reg_base),
9602 const0_rtx);
2a04824b 9603 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9dff28ab 9604 }
520babc7
JL
9605 }
9606 else
9607 {
9608 /* We have a single word (32 bits). A simple computation
9609 will get us the register #s we need. */
9610 gpr_reg_base = 26 - cum->words;
9611 fpr_reg_base = 32 + 2 * cum->words;
9612 }
9613 }
9614
b848dc65 9615 /* Determine if the argument needs to be passed in both general and
520babc7 9616 floating point registers. */
b848dc65
JDA
9617 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9618 /* If we are doing soft-float with portable runtime, then there
9619 is no need to worry about FP regs. */
c328adfa 9620 && !TARGET_SOFT_FLOAT
4720d5ca 9621 /* The parameter must be some kind of scalar float, else we just
b848dc65 9622 pass it in integer registers. */
4720d5ca 9623 && GET_MODE_CLASS (mode) == MODE_FLOAT
b848dc65
JDA
9624 /* The target function must not have a prototype. */
9625 && cum->nargs_prototype <= 0
9626 /* libcalls do not need to pass items in both FP and general
9627 registers. */
9628 && type != NULL_TREE
c328adfa
JDA
9629 /* All this hair applies to "outgoing" args only. This includes
9630 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9631 && !cum->incoming)
b848dc65
JDA
9632 /* Also pass outgoing floating arguments in both registers in indirect
9633 calls with the 32 bit ABI and the HP assembler since there is no
9634 way to the specify argument locations in static functions. */
c328adfa
JDA
9635 || (!TARGET_64BIT
9636 && !TARGET_GAS
9637 && !cum->incoming
b848dc65 9638 && cum->indirect
4720d5ca 9639 && GET_MODE_CLASS (mode) == MODE_FLOAT))
520babc7
JL
9640 {
9641 retval
9642 = gen_rtx_PARALLEL
9643 (mode,
9644 gen_rtvec (2,
9645 gen_rtx_EXPR_LIST (VOIDmode,
9646 gen_rtx_REG (mode, fpr_reg_base),
9647 const0_rtx),
9648 gen_rtx_EXPR_LIST (VOIDmode,
9649 gen_rtx_REG (mode, gpr_reg_base),
9650 const0_rtx)));
9651 }
9652 else
9653 {
9654 /* See if we should pass this parameter in a general register. */
9655 if (TARGET_SOFT_FLOAT
9656 /* Indirect calls in the normal 32bit ABI require all arguments
9657 to be passed in general registers. */
9658 || (!TARGET_PORTABLE_RUNTIME
9659 && !TARGET_64BIT
50b424a9 9660 && !TARGET_ELF32
520babc7 9661 && cum->indirect)
4720d5ca
JDA
9662 /* If the parameter is not a scalar floating-point parameter,
9663 then it belongs in GPRs. */
9664 || GET_MODE_CLASS (mode) != MODE_FLOAT
2a04824b
JDA
9665 /* Structure with single SFmode field belongs in GPR. */
9666 || (type && AGGREGATE_TYPE_P (type)))
520babc7
JL
9667 retval = gen_rtx_REG (mode, gpr_reg_base);
9668 else
9669 retval = gen_rtx_REG (mode, fpr_reg_base);
9670 }
9671 return retval;
9672}
9673
c2ed6cf8
NF
9674/* Arguments larger than one word are double word aligned. */
9675
9676static unsigned int
ef4bddc2 9677pa_function_arg_boundary (machine_mode mode, const_tree type)
c2ed6cf8 9678{
c2ed6cf8 9679 bool singleword = (type
b58a864d
NF
9680 ? (integer_zerop (TYPE_SIZE (type))
9681 || !TREE_CONSTANT (TYPE_SIZE (type))
c2ed6cf8 9682 || int_size_in_bytes (type) <= UNITS_PER_WORD)
19cf9bde 9683 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
c2ed6cf8
NF
9684
9685 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9686}
520babc7
JL
9687
9688/* If this arg would be passed totally in registers or totally on the stack,
78a52f11
RH
9689 then this routine should return zero. */
9690
9691static int
ef4bddc2 9692pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
78a52f11 9693 tree type, bool named ATTRIBUTE_UNUSED)
520babc7 9694{
d5cc9181 9695 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
e0c556d3
AM
9696 unsigned int max_arg_words = 8;
9697 unsigned int offset = 0;
520babc7 9698
78a52f11
RH
9699 if (!TARGET_64BIT)
9700 return 0;
9701
e0c556d3 9702 if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
520babc7
JL
9703 offset = 1;
9704
e0c556d3 9705 if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
fe19a83d 9706 /* Arg fits fully into registers. */
520babc7 9707 return 0;
6619e96c 9708 else if (cum->words + offset >= max_arg_words)
fe19a83d 9709 /* Arg fully on the stack. */
520babc7
JL
9710 return 0;
9711 else
fe19a83d 9712 /* Arg is split. */
78a52f11 9713 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
520babc7
JL
9714}
9715
9716
d6b5193b 9717/* A get_unnamed_section callback for switching to the text section.
9a55eab3
JDA
9718
9719 This function is only used with SOM. Because we don't support
9720 named subspaces, we can only create a new subspace or switch back
774acadf 9721 to the default text subspace. */
774acadf 9722
d6b5193b
RS
9723static void
9724som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9725{
9726 gcc_assert (TARGET_SOM);
774acadf 9727 if (TARGET_GAS)
9a55eab3 9728 {
8c5e065b 9729 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9a55eab3
JDA
9730 {
9731 /* We only want to emit a .nsubspa directive once at the
9732 start of the function. */
9733 cfun->machine->in_nsubspa = 1;
9734
9735 /* Create a new subspace for the text. This provides
9736 better stub placement and one-only functions. */
9737 if (cfun->decl
9738 && DECL_ONE_ONLY (cfun->decl)
9739 && !DECL_WEAK (cfun->decl))
1a83bfc3
JDA
9740 {
9741 output_section_asm_op ("\t.SPACE $TEXT$\n"
9742 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9743 "ACCESS=44,SORT=24,COMDAT");
9744 return;
9745 }
9a55eab3
JDA
9746 }
9747 else
9748 {
9749 /* There isn't a current function or the body of the current
9750 function has been completed. So, we are changing to the
1a83bfc3
JDA
9751 text section to output debugging information. Thus, we
9752 need to forget that we are in the text section so that
9753 varasm.c will call us when text_section is selected again. */
8c5e065b
JDA
9754 gcc_assert (!cfun || !cfun->machine
9755 || cfun->machine->in_nsubspa == 2);
d6b5193b 9756 in_section = NULL;
9a55eab3 9757 }
1a83bfc3
JDA
9758 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9759 return;
9a55eab3 9760 }
d6b5193b
RS
9761 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9762}
9763
1a83bfc3
JDA
9764/* A get_unnamed_section callback for switching to comdat data
9765 sections. This function is only used with SOM. */
9766
9767static void
9768som_output_comdat_data_section_asm_op (const void *data)
9769{
9770 in_section = NULL;
9771 output_section_asm_op (data);
9772}
9773
d6b5193b 9774/* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9a55eab3 9775
d6b5193b
RS
9776static void
9777pa_som_asm_init_sections (void)
9778{
9779 text_section
9780 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9781
9782 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9783 is not being generated. */
9784 som_readonly_data_section
9785 = get_unnamed_section (0, output_section_asm_op,
9786 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9787
9788 /* When secondary definitions are not supported, SOM makes readonly
9789 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9790 the comdat flag. */
9791 som_one_only_readonly_data_section
1a83bfc3 9792 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
d6b5193b
RS
9793 "\t.SPACE $TEXT$\n"
9794 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9795 "ACCESS=0x2c,SORT=16,COMDAT");
9796
9797
9798 /* When secondary definitions are not supported, SOM makes data one-only
9799 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9800 som_one_only_data_section
1a83bfc3
JDA
9801 = get_unnamed_section (SECTION_WRITE,
9802 som_output_comdat_data_section_asm_op,
d6b5193b
RS
9803 "\t.SPACE $PRIVATE$\n"
9804 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9805 "ACCESS=31,SORT=24,COMDAT");
9806
57d138a9
JDA
9807 if (flag_tm)
9808 som_tm_clone_table_section
9809 = get_unnamed_section (0, output_section_asm_op,
9810 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9811
d6b5193b
RS
9812 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9813 which reference data within the $TEXT$ space (for example constant
9814 strings in the $LIT$ subspace).
9815
9816 The assemblers (GAS and HP as) both have problems with handling
9817 the difference of two symbols which is the other correct way to
9818 reference constant data during PIC code generation.
9819
9820 So, there's no way to reference constant data which is in the
9821 $TEXT$ space during PIC generation. Instead place all constant
9822 data into the $PRIVATE$ subspace (this reduces sharing, but it
9823 works correctly). */
9824 readonly_data_section = flag_pic ? data_section : som_readonly_data_section;
9825
9826 /* We must not have a reference to an external symbol defined in a
9827 shared library in a readonly section, else the SOM linker will
9828 complain.
9829
9830 So, we force exception information into the data section. */
9831 exception_section = data_section;
9a55eab3
JDA
9832}
9833
57d138a9
JDA
9834/* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9835
9836static section *
9837pa_som_tm_clone_table_section (void)
9838{
9839 return som_tm_clone_table_section;
9840}
9841
ae46c4e0
RH
9842/* On hpux10, the linker will give an error if we have a reference
9843 in the read-only data section to a symbol defined in a shared
9844 library. Therefore, expressions that might require a reloc can
9845 not be placed in the read-only data section. */
9846
d6b5193b 9847static section *
24a52160
JDA
9848pa_select_section (tree exp, int reloc,
9849 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
ae46c4e0
RH
9850{
9851 if (TREE_CODE (exp) == VAR_DECL
9852 && TREE_READONLY (exp)
9853 && !TREE_THIS_VOLATILE (exp)
9854 && DECL_INITIAL (exp)
9855 && (DECL_INITIAL (exp) == error_mark_node
9856 || TREE_CONSTANT (DECL_INITIAL (exp)))
9857 && !reloc)
9a55eab3
JDA
9858 {
9859 if (TARGET_SOM
9860 && DECL_ONE_ONLY (exp)
9861 && !DECL_WEAK (exp))
d6b5193b 9862 return som_one_only_readonly_data_section;
9a55eab3 9863 else
d6b5193b 9864 return readonly_data_section;
9a55eab3 9865 }
6615c446 9866 else if (CONSTANT_CLASS_P (exp) && !reloc)
d6b5193b 9867 return readonly_data_section;
9a55eab3
JDA
9868 else if (TARGET_SOM
9869 && TREE_CODE (exp) == VAR_DECL
9870 && DECL_ONE_ONLY (exp)
e41f3691 9871 && !DECL_WEAK (exp))
d6b5193b 9872 return som_one_only_data_section;
ae46c4e0 9873 else
d6b5193b 9874 return data_section;
ae46c4e0 9875}
e2500fed 9876
f258111a
JDA
9877/* Implement pa_reloc_rw_mask. */
9878
9879static int
9880pa_reloc_rw_mask (void)
9881{
9882 /* We force (const (plus (symbol) (const_int))) to memory when the
9883 const_int doesn't fit in a 14-bit integer. The SOM linker can't
9884 handle this construct in read-only memory and we want to avoid
9885 this for ELF. So, we always force an RTX needing relocation to
9886 the data section. */
9887 return 3;
9888}
9889
5eb99654 9890static void
b7849684 9891pa_globalize_label (FILE *stream, const char *name)
5eb99654
KG
9892{
9893 /* We only handle DATA objects here, functions are globalized in
9894 ASM_DECLARE_FUNCTION_NAME. */
9895 if (! FUNCTION_NAME_P (name))
9896 {
9897 fputs ("\t.EXPORT ", stream);
9898 assemble_name (stream, name);
9899 fputs (",DATA\n", stream);
9900 }
9901}
3f12cd9b 9902
bd5bd7ac
KH
9903/* Worker function for TARGET_STRUCT_VALUE_RTX. */
9904
3f12cd9b
KH
9905static rtx
9906pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9907 int incoming ATTRIBUTE_UNUSED)
9908{
9909 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9910}
9911
bd5bd7ac
KH
9912/* Worker function for TARGET_RETURN_IN_MEMORY. */
9913
3f12cd9b 9914bool
586de218 9915pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
3f12cd9b
KH
9916{
9917 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9918 PA64 ABI says that objects larger than 128 bits are returned in memory.
9919 Note, int_size_in_bytes can return -1 if the size of the object is
9920 variable or larger than the maximum value that can be expressed as
9921 a HOST_WIDE_INT. It can also return zero for an empty type. The
9922 simplest way to handle variable and empty types is to pass them in
9923 memory. This avoids problems in defining the boundaries of argument
9924 slots, allocating registers, etc. */
9925 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9926 || int_size_in_bytes (type) <= 0);
9927}
9928
744b2d61
JDA
9929/* Structure to hold declaration and name of external symbols that are
9930 emitted by GCC. We generate a vector of these symbols and output them
9931 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9932 This avoids putting out names that are never really used. */
9933
d1b38208 9934typedef struct GTY(()) extern_symbol
744b2d61
JDA
9935{
9936 tree decl;
9937 const char *name;
d4e6fecb 9938} extern_symbol;
744b2d61
JDA
9939
9940/* Define gc'd vector type for extern_symbol. */
744b2d61
JDA
9941
9942/* Vector of extern_symbol pointers. */
9771b263 9943static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
744b2d61
JDA
9944
9945#ifdef ASM_OUTPUT_EXTERNAL_REAL
9946/* Mark DECL (name NAME) as an external reference (assembler output
9947 file FILE). This saves the names to output at the end of the file
9948 if actually referenced. */
9949
9950void
9951pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9952{
744b2d61 9953 gcc_assert (file == asm_out_file);
f32682ca 9954 extern_symbol p = {decl, name};
9771b263 9955 vec_safe_push (extern_symbols, p);
744b2d61
JDA
9956}
9957
9958/* Output text required at the end of an assembler file.
9959 This includes deferred plabels and .import directives for
9960 all external symbols that were actually referenced. */
9961
9962static void
9963pa_hpux_file_end (void)
9964{
9965 unsigned int i;
d4e6fecb 9966 extern_symbol *p;
744b2d61 9967
3674b34d
JDA
9968 if (!NO_DEFERRED_PROFILE_COUNTERS)
9969 output_deferred_profile_counters ();
9970
744b2d61
JDA
9971 output_deferred_plabels ();
9972
9771b263 9973 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
744b2d61
JDA
9974 {
9975 tree decl = p->decl;
9976
9977 if (!TREE_ASM_WRITTEN (decl)
9978 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9979 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9980 }
9981
9771b263 9982 vec_free (extern_symbols);
744b2d61
JDA
9983}
9984#endif
9985
6982c5d4 9986/* Return true if a change from mode FROM to mode TO for a register
0a2aaacc 9987 in register class RCLASS is invalid. */
6982c5d4
JDA
9988
9989bool
ef4bddc2 9990pa_cannot_change_mode_class (machine_mode from, machine_mode to,
0a2aaacc 9991 enum reg_class rclass)
6982c5d4
JDA
9992{
9993 if (from == to)
9994 return false;
9995
212b7e9c
JDA
9996 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
9997 return false;
9998
9999 /* Reject changes to/from modes with zero size. */
10000 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10001 return true;
10002
6982c5d4
JDA
10003 /* Reject changes to/from complex and vector modes. */
10004 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10005 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10006 return true;
10007
212b7e9c
JDA
10008 /* There is no way to load QImode or HImode values directly from memory
10009 to a FP register. SImode loads to the FP registers are not zero
10010 extended. On the 64-bit target, this conflicts with the definition
10011 of LOAD_EXTEND_OP. Thus, we can't allow changing between modes with
10012 different sizes in the floating-point registers. */
0a2aaacc 10013 if (MAYBE_FP_REG_CLASS_P (rclass))
6982c5d4
JDA
10014 return true;
10015
f939c3e6 10016 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
6982c5d4
JDA
10017 in specific sets of registers. Thus, we cannot allow changing
10018 to a larger mode when it's larger than a word. */
10019 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10020 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10021 return true;
10022
10023 return false;
10024}
10025
99e1629f 10026/* Implement TARGET_MODES_TIEABLE_P.
6982c5d4
JDA
10027
10028 We should return FALSE for QImode and HImode because these modes
10029 are not ok in the floating-point registers. However, this prevents
10030 tieing these modes to SImode and DImode in the general registers.
f939c3e6 10031 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
6982c5d4
JDA
10032 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
10033 in the floating-point registers. */
10034
99e1629f 10035static bool
ef4bddc2 10036pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
6982c5d4
JDA
10037{
10038 /* Don't tie modes in different classes. */
10039 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10040 return false;
10041
10042 return true;
10043}
10044
859c146c
RH
10045\f
10046/* Length in units of the trampoline instruction code. */
10047
10048#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10049
10050
10051/* Output assembler code for a block containing the constant parts
10052 of a trampoline, leaving space for the variable parts.\
10053
10054 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10055 and then branches to the specified routine.
10056
10057 This code template is copied from text segment to stack location
10058 and then patched with pa_trampoline_init to contain valid values,
10059 and then entered as a subroutine.
10060
10061 It is best to keep this as small as possible to avoid having to
10062 flush multiple lines in the cache. */
10063
10064static void
10065pa_asm_trampoline_template (FILE *f)
10066{
10067 if (!TARGET_64BIT)
10068 {
10069 fputs ("\tldw 36(%r22),%r21\n", f);
10070 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10071 if (ASSEMBLER_DIALECT == 0)
10072 fputs ("\tdepi 0,31,2,%r21\n", f);
10073 else
10074 fputs ("\tdepwi 0,31,2,%r21\n", f);
10075 fputs ("\tldw 4(%r21),%r19\n", f);
10076 fputs ("\tldw 0(%r21),%r21\n", f);
10077 if (TARGET_PA_20)
10078 {
10079 fputs ("\tbve (%r21)\n", f);
10080 fputs ("\tldw 40(%r22),%r29\n", f);
10081 fputs ("\t.word 0\n", f);
10082 fputs ("\t.word 0\n", f);
10083 }
10084 else
10085 {
10086 fputs ("\tldsid (%r21),%r1\n", f);
10087 fputs ("\tmtsp %r1,%sr0\n", f);
10088 fputs ("\tbe 0(%sr0,%r21)\n", f);
10089 fputs ("\tldw 40(%r22),%r29\n", f);
10090 }
10091 fputs ("\t.word 0\n", f);
10092 fputs ("\t.word 0\n", f);
10093 fputs ("\t.word 0\n", f);
10094 fputs ("\t.word 0\n", f);
10095 }
10096 else
10097 {
10098 fputs ("\t.dword 0\n", f);
10099 fputs ("\t.dword 0\n", f);
10100 fputs ("\t.dword 0\n", f);
10101 fputs ("\t.dword 0\n", f);
10102 fputs ("\tmfia %r31\n", f);
10103 fputs ("\tldd 24(%r31),%r1\n", f);
10104 fputs ("\tldd 24(%r1),%r27\n", f);
10105 fputs ("\tldd 16(%r1),%r1\n", f);
10106 fputs ("\tbve (%r1)\n", f);
10107 fputs ("\tldd 32(%r31),%r31\n", f);
10108 fputs ("\t.dword 0 ; fptr\n", f);
10109 fputs ("\t.dword 0 ; static link\n", f);
10110 }
10111}
10112
10113/* Emit RTL insns to initialize the variable parts of a trampoline.
10114 FNADDR is an RTX for the address of the function's pure code.
10115 CXT is an RTX for the static chain value for the function.
10116
10117 Move the function address to the trampoline template at offset 36.
10118 Move the static chain value to trampoline template at offset 40.
10119 Move the trampoline address to trampoline template at offset 44.
10120 Move r19 to trampoline template at offset 48. The latter two
10121 words create a plabel for the indirect call to the trampoline.
10122
10123 A similar sequence is used for the 64-bit port but the plabel is
10124 at the beginning of the trampoline.
10125
10126 Finally, the cache entries for the trampoline code are flushed.
10127 This is necessary to ensure that the trampoline instruction sequence
10128 is written to memory prior to any attempts at prefetching the code
10129 sequence. */
10130
10131static void
10132pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10133{
10134 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10135 rtx start_addr = gen_reg_rtx (Pmode);
10136 rtx end_addr = gen_reg_rtx (Pmode);
10137 rtx line_length = gen_reg_rtx (Pmode);
10138 rtx r_tramp, tmp;
10139
10140 emit_block_move (m_tramp, assemble_trampoline_template (),
10141 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10142 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10143
10144 if (!TARGET_64BIT)
10145 {
10146 tmp = adjust_address (m_tramp, Pmode, 36);
10147 emit_move_insn (tmp, fnaddr);
10148 tmp = adjust_address (m_tramp, Pmode, 40);
10149 emit_move_insn (tmp, chain_value);
10150
10151 /* Create a fat pointer for the trampoline. */
10152 tmp = adjust_address (m_tramp, Pmode, 44);
10153 emit_move_insn (tmp, r_tramp);
10154 tmp = adjust_address (m_tramp, Pmode, 48);
10155 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10156
10157 /* fdc and fic only use registers for the address to flush,
10158 they do not accept integer displacements. We align the
10159 start and end addresses to the beginning of their respective
10160 cache lines to minimize the number of lines flushed. */
10161 emit_insn (gen_andsi3 (start_addr, r_tramp,
10162 GEN_INT (-MIN_CACHELINE_SIZE)));
0a81f074
RS
10163 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10164 TRAMPOLINE_CODE_SIZE-1));
859c146c
RH
10165 emit_insn (gen_andsi3 (end_addr, tmp,
10166 GEN_INT (-MIN_CACHELINE_SIZE)));
10167 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10168 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10169 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10170 gen_reg_rtx (Pmode),
10171 gen_reg_rtx (Pmode)));
10172 }
10173 else
10174 {
10175 tmp = adjust_address (m_tramp, Pmode, 56);
10176 emit_move_insn (tmp, fnaddr);
10177 tmp = adjust_address (m_tramp, Pmode, 64);
10178 emit_move_insn (tmp, chain_value);
10179
10180 /* Create a fat pointer for the trampoline. */
10181 tmp = adjust_address (m_tramp, Pmode, 16);
0a81f074
RS
10182 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10183 r_tramp, 32)));
859c146c
RH
10184 tmp = adjust_address (m_tramp, Pmode, 24);
10185 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10186
10187 /* fdc and fic only use registers for the address to flush,
10188 they do not accept integer displacements. We align the
10189 start and end addresses to the beginning of their respective
10190 cache lines to minimize the number of lines flushed. */
0a81f074 10191 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
859c146c
RH
10192 emit_insn (gen_anddi3 (start_addr, tmp,
10193 GEN_INT (-MIN_CACHELINE_SIZE)));
0a81f074
RS
10194 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10195 TRAMPOLINE_CODE_SIZE - 1));
859c146c
RH
10196 emit_insn (gen_anddi3 (end_addr, tmp,
10197 GEN_INT (-MIN_CACHELINE_SIZE)));
10198 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10199 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10200 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10201 gen_reg_rtx (Pmode),
10202 gen_reg_rtx (Pmode)));
10203 }
017d38f5
MK
10204
10205#ifdef HAVE_ENABLE_EXECUTE_STACK
10206  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
db69559b 10207 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
017d38f5 10208#endif
859c146c
RH
10209}
10210
10211/* Perform any machine-specific adjustment in the address of the trampoline.
10212 ADDR contains the address that was passed to pa_trampoline_init.
10213 Adjust the trampoline address to point to the plabel at offset 44. */
10214
10215static rtx
10216pa_trampoline_adjust_address (rtx addr)
10217{
10218 if (!TARGET_64BIT)
0a81f074 10219 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
859c146c
RH
10220 return addr;
10221}
1f65437d
SE
10222
10223static rtx
10224pa_delegitimize_address (rtx orig_x)
10225{
10226 rtx x = delegitimize_mem_from_attrs (orig_x);
10227
10228 if (GET_CODE (x) == LO_SUM
10229 && GET_CODE (XEXP (x, 1)) == UNSPEC
10230 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10231 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10232 return x;
10233}
859c146c 10234\f
bc707992
JDA
10235static rtx
10236pa_internal_arg_pointer (void)
10237{
10238 /* The argument pointer and the hard frame pointer are the same in
10239 the 32-bit runtime, so we don't need a copy. */
10240 if (TARGET_64BIT)
10241 return copy_to_reg (virtual_incoming_args_rtx);
10242 else
10243 return virtual_incoming_args_rtx;
10244}
10245
10246/* Given FROM and TO register numbers, say whether this elimination is allowed.
10247 Frame pointer elimination is automatically handled. */
10248
10249static bool
10250pa_can_eliminate (const int from, const int to)
10251{
10252 /* The argument cannot be eliminated in the 64-bit runtime. */
10253 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10254 return false;
10255
10256 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10257 ? ! frame_pointer_needed
10258 : true);
10259}
10260
10261/* Define the offset between two registers, FROM to be eliminated and its
10262 replacement TO, at the start of a routine. */
10263HOST_WIDE_INT
10264pa_initial_elimination_offset (int from, int to)
10265{
10266 HOST_WIDE_INT offset;
10267
10268 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10269 && to == STACK_POINTER_REGNUM)
ae9d61ab 10270 offset = -pa_compute_frame_size (get_frame_size (), 0);
bc707992
JDA
10271 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10272 offset = 0;
10273 else
10274 gcc_unreachable ();
10275
10276 return offset;
10277}
10278
5efd84c5
NF
10279static void
10280pa_conditional_register_usage (void)
10281{
10282 int i;
10283
10284 if (!TARGET_64BIT && !TARGET_PA_11)
10285 {
10286 for (i = 56; i <= FP_REG_LAST; i++)
10287 fixed_regs[i] = call_used_regs[i] = 1;
10288 for (i = 33; i < 56; i += 2)
10289 fixed_regs[i] = call_used_regs[i] = 1;
10290 }
10291 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10292 {
10293 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10294 fixed_regs[i] = call_used_regs[i] = 1;
10295 }
10296 if (flag_pic)
10297 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10298}
10299
41a1208a
JDA
10300/* Target hook for c_mode_for_suffix. */
10301
ef4bddc2 10302static machine_mode
41a1208a
JDA
10303pa_c_mode_for_suffix (char suffix)
10304{
10305 if (HPUX_LONG_DOUBLE_LIBRARY)
10306 {
10307 if (suffix == 'q')
10308 return TFmode;
10309 }
10310
10311 return VOIDmode;
10312}
10313
7550cb35
JDA
10314/* Target hook for function_section. */
10315
10316static section *
10317pa_function_section (tree decl, enum node_frequency freq,
10318 bool startup, bool exit)
10319{
10320 /* Put functions in text section if target doesn't have named sections. */
677f3fa8 10321 if (!targetm_common.have_named_sections)
7550cb35
JDA
10322 return text_section;
10323
10324 /* Force nested functions into the same section as the containing
10325 function. */
10326 if (decl
f961457f 10327 && DECL_SECTION_NAME (decl) == NULL
7550cb35
JDA
10328 && DECL_CONTEXT (decl) != NULL_TREE
10329 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
f961457f 10330 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
7550cb35
JDA
10331 return function_section (DECL_CONTEXT (decl));
10332
10333 /* Otherwise, use the default function section. */
10334 return default_function_section (decl, freq, startup, exit);
10335}
10336
1a627b35
RS
10337/* Implement TARGET_LEGITIMATE_CONSTANT_P.
10338
10339 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10340 that need more than three instructions to load prior to reload. This
10341 limit is somewhat arbitrary. It takes three instructions to load a
10342 CONST_INT from memory but two are memory accesses. It may be better
10343 to increase the allowed range for CONST_INTS. We may also be able
10344 to handle CONST_DOUBLES. */
10345
10346static bool
ef4bddc2 10347pa_legitimate_constant_p (machine_mode mode, rtx x)
1a627b35
RS
10348{
10349 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10350 return false;
10351
10352 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10353 return false;
10354
9a201645 10355 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
0e4ae794
JDA
10356 legitimate constants. The other variants can't be handled by
10357 the move patterns after reload starts. */
093a6c99 10358 if (tls_referenced_p (x))
0e4ae794 10359 return false;
9a201645 10360
1a627b35
RS
10361 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10362 return false;
10363
10364 if (TARGET_64BIT
10365 && HOST_BITS_PER_WIDE_INT > 32
10366 && GET_CODE (x) == CONST_INT
10367 && !reload_in_progress
10368 && !reload_completed
10369 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
5877e54e 10370 && !pa_cint_ok_for_move (UINTVAL (x)))
1a627b35
RS
10371 return false;
10372
fda33f15
JDA
10373 if (function_label_operand (x, mode))
10374 return false;
10375
1a627b35
RS
10376 return true;
10377}
10378
fda33f15
JDA
10379/* Implement TARGET_SECTION_TYPE_FLAGS. */
10380
10381static unsigned int
10382pa_section_type_flags (tree decl, const char *name, int reloc)
10383{
10384 unsigned int flags;
10385
10386 flags = default_section_type_flags (decl, name, reloc);
10387
10388 /* Function labels are placed in the constant pool. This can
10389 cause a section conflict if decls are put in ".data.rel.ro"
10390 or ".data.rel.ro.local" using the __attribute__ construct. */
10391 if (strcmp (name, ".data.rel.ro") == 0
10392 || strcmp (name, ".data.rel.ro.local") == 0)
10393 flags |= SECTION_WRITE | SECTION_RELRO;
10394
10395 return flags;
10396}
10397
1a04ac2b
JDA
10398/* pa_legitimate_address_p recognizes an RTL expression that is a
10399 valid memory address for an instruction. The MODE argument is the
10400 machine mode for the MEM expression that wants to use this address.
10401
10402 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10403 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10404 available with floating point loads and stores, and integer loads.
10405 We get better code by allowing indexed addresses in the initial
10406 RTL generation.
10407
10408 The acceptance of indexed addresses as legitimate implies that we
10409 must provide patterns for doing indexed integer stores, or the move
10410 expanders must force the address of an indexed store to a register.
10411 We have adopted the latter approach.
10412
10413 Another function of pa_legitimate_address_p is to ensure that
10414 the base register is a valid pointer for indexed instructions.
10415 On targets that have non-equivalent space registers, we have to
10416 know at the time of assembler output which register in a REG+REG
10417 pair is the base register. The REG_POINTER flag is sometimes lost
10418 in reload and the following passes, so it can't be relied on during
10419 code generation. Thus, we either have to canonicalize the order
10420 of the registers in REG+REG indexed addresses, or treat REG+REG
10421 addresses separately and provide patterns for both permutations.
10422
10423 The latter approach requires several hundred additional lines of
10424 code in pa.md. The downside to canonicalizing is that a PLUS
10425 in the wrong order can't combine to form to make a scaled indexed
10426 memory operand. As we won't need to canonicalize the operands if
10427 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10428
10429 We initially break out scaled indexed addresses in canonical order
10430 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10431 scaled indexed addresses during RTL generation. However, fold_rtx
10432 has its own opinion on how the operands of a PLUS should be ordered.
10433 If one of the operands is equivalent to a constant, it will make
10434 that operand the second operand. As the base register is likely to
10435 be equivalent to a SYMBOL_REF, we have made it the second operand.
10436
10437 pa_legitimate_address_p accepts REG+REG as legitimate when the
10438 operands are in the order INDEX+BASE on targets with non-equivalent
10439 space registers, and in any order on targets with equivalent space
10440 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10441
10442 We treat a SYMBOL_REF as legitimate if it is part of the current
10443 function's constant-pool, because such addresses can actually be
10444 output as REG+SMALLINT. */
10445
10446static bool
ef4bddc2 10447pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
1a04ac2b
JDA
10448{
10449 if ((REG_P (x)
10450 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10451 : REG_OK_FOR_BASE_P (x)))
10452 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10453 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10454 && REG_P (XEXP (x, 0))
10455 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10456 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10457 return true;
10458
10459 if (GET_CODE (x) == PLUS)
10460 {
10461 rtx base, index;
10462
10463 /* For REG+REG, the base register should be in XEXP (x, 1),
10464 so check it first. */
10465 if (REG_P (XEXP (x, 1))
10466 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10467 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10468 base = XEXP (x, 1), index = XEXP (x, 0);
10469 else if (REG_P (XEXP (x, 0))
10470 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10471 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10472 base = XEXP (x, 0), index = XEXP (x, 1);
10473 else
10474 return false;
10475
10476 if (GET_CODE (index) == CONST_INT)
10477 {
10478 if (INT_5_BITS (index))
10479 return true;
10480
10481 /* When INT14_OK_STRICT is false, a secondary reload is needed
10482 to adjust the displacement of SImode and DImode floating point
55ada375
JDA
10483 instructions but this may fail when the register also needs
10484 reloading. So, we return false when STRICT is true. We
1a04ac2b
JDA
10485 also reject long displacements for float mode addresses since
10486 the majority of accesses will use floating point instructions
10487 that don't support 14-bit offsets. */
10488 if (!INT14_OK_STRICT
55ada375 10489 && (strict || !(reload_in_progress || reload_completed))
ceaca33e
JDA
10490 && mode != QImode
10491 && mode != HImode)
10492 return false;
1a04ac2b 10493
ceaca33e 10494 return base14_operand (index, mode);
1a04ac2b
JDA
10495 }
10496
10497 if (!TARGET_DISABLE_INDEXING
10498 /* Only accept the "canonical" INDEX+BASE operand order
10499 on targets with non-equivalent space registers. */
10500 && (TARGET_NO_SPACE_REGS
10501 ? REG_P (index)
10502 : (base == XEXP (x, 1) && REG_P (index)
10503 && (reload_completed
10504 || (reload_in_progress && HARD_REGISTER_P (base))
10505 || REG_POINTER (base))
10506 && (reload_completed
10507 || (reload_in_progress && HARD_REGISTER_P (index))
10508 || !REG_POINTER (index))))
10509 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10510 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10511 : REG_OK_FOR_INDEX_P (index))
10512 && borx_reg_operand (base, Pmode)
10513 && borx_reg_operand (index, Pmode))
10514 return true;
10515
10516 if (!TARGET_DISABLE_INDEXING
10517 && GET_CODE (index) == MULT
10518 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10519 && REG_P (XEXP (index, 0))
10520 && GET_MODE (XEXP (index, 0)) == Pmode
10521 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10522 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10523 && GET_CODE (XEXP (index, 1)) == CONST_INT
10524 && INTVAL (XEXP (index, 1))
10525 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10526 && borx_reg_operand (base, Pmode))
10527 return true;
10528
10529 return false;
10530 }
10531
10532 if (GET_CODE (x) == LO_SUM)
10533 {
10534 rtx y = XEXP (x, 0);
10535
10536 if (GET_CODE (y) == SUBREG)
10537 y = SUBREG_REG (y);
10538
10539 if (REG_P (y)
10540 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10541 : REG_OK_FOR_BASE_P (y)))
10542 {
10543 /* Needed for -fPIC */
10544 if (mode == Pmode
10545 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10546 return true;
10547
10548 if (!INT14_OK_STRICT
55ada375 10549 && (strict || !(reload_in_progress || reload_completed))
ceaca33e
JDA
10550 && mode != QImode
10551 && mode != HImode)
10552 return false;
1a04ac2b
JDA
10553
10554 if (CONSTANT_P (XEXP (x, 1)))
10555 return true;
10556 }
10557 return false;
10558 }
10559
10560 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10561 return true;
10562
10563 return false;
10564}
10565
10566/* Look for machine dependent ways to make the invalid address AD a
10567 valid address.
10568
10569 For the PA, transform:
10570
10571 memory(X + <large int>)
10572
10573 into:
10574
10575 if (<large int> & mask) >= 16
10576 Y = (<large int> & ~mask) + mask + 1 Round up.
10577 else
10578 Y = (<large int> & ~mask) Round down.
10579 Z = X + Y
10580 memory (Z + (<large int> - Y));
10581
10582 This makes reload inheritance and reload_cse work better since Z
10583 can be reused.
10584
10585 There may be more opportunities to improve code with this hook. */
10586
10587rtx
ef4bddc2 10588pa_legitimize_reload_address (rtx ad, machine_mode mode,
1a04ac2b
JDA
10589 int opnum, int type,
10590 int ind_levels ATTRIBUTE_UNUSED)
10591{
10592 long offset, newoffset, mask;
10593 rtx new_rtx, temp = NULL_RTX;
10594
10595 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10596 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10597
10598 if (optimize && GET_CODE (ad) == PLUS)
10599 temp = simplify_binary_operation (PLUS, Pmode,
10600 XEXP (ad, 0), XEXP (ad, 1));
10601
10602 new_rtx = temp ? temp : ad;
10603
10604 if (optimize
10605 && GET_CODE (new_rtx) == PLUS
10606 && GET_CODE (XEXP (new_rtx, 0)) == REG
10607 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10608 {
10609 offset = INTVAL (XEXP ((new_rtx), 1));
10610
10611 /* Choose rounding direction. Round up if we are >= halfway. */
10612 if ((offset & mask) >= ((mask + 1) / 2))
10613 newoffset = (offset & ~mask) + mask + 1;
10614 else
10615 newoffset = offset & ~mask;
10616
10617 /* Ensure that long displacements are aligned. */
10618 if (mask == 0x3fff
10619 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10620 || (TARGET_64BIT && (mode) == DImode)))
10621 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10622
10623 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10624 {
10625 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10626 GEN_INT (newoffset));
10627 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10628 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10629 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10630 opnum, (enum reload_type) type);
10631 return ad;
10632 }
10633 }
10634
10635 return NULL_RTX;
10636}
10637
3ba07ad3
JDA
10638/* Output address vector. */
10639
10640void
10641pa_output_addr_vec (rtx lab, rtx body)
10642{
10643 int idx, vlen = XVECLEN (body, 0);
10644
10645 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10646 if (TARGET_GAS)
10647 fputs ("\t.begin_brtab\n", asm_out_file);
10648 for (idx = 0; idx < vlen; idx++)
10649 {
10650 ASM_OUTPUT_ADDR_VEC_ELT
10651 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10652 }
10653 if (TARGET_GAS)
10654 fputs ("\t.end_brtab\n", asm_out_file);
10655}
10656
10657/* Output address difference vector. */
10658
10659void
10660pa_output_addr_diff_vec (rtx lab, rtx body)
10661{
10662 rtx base = XEXP (XEXP (body, 0), 0);
10663 int idx, vlen = XVECLEN (body, 1);
10664
10665 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10666 if (TARGET_GAS)
10667 fputs ("\t.begin_brtab\n", asm_out_file);
10668 for (idx = 0; idx < vlen; idx++)
10669 {
10670 ASM_OUTPUT_ADDR_DIFF_ELT
10671 (asm_out_file,
10672 body,
10673 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10674 CODE_LABEL_NUMBER (base));
10675 }
10676 if (TARGET_GAS)
10677 fputs ("\t.end_brtab\n", asm_out_file);
10678}
10679
7e7c9d40
JDA
10680/* This is a helper function for the other atomic operations. This function
10681 emits a loop that contains SEQ that iterates until a compare-and-swap
10682 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10683 a set of instructions that takes a value from OLD_REG as an input and
10684 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10685 set to the current contents of MEM. After SEQ, a compare-and-swap will
10686 attempt to update MEM with NEW_REG. The function returns true when the
10687 loop was generated successfully. */
10688
10689static bool
10690pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10691{
10692 machine_mode mode = GET_MODE (mem);
10693 rtx_code_label *label;
10694 rtx cmp_reg, success, oldval;
10695
10696 /* The loop we want to generate looks like
10697
10698 cmp_reg = mem;
10699 label:
10700 old_reg = cmp_reg;
10701 seq;
10702 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10703 if (success)
10704 goto label;
10705
10706 Note that we only do the plain load from memory once. Subsequent
10707 iterations use the value loaded by the compare-and-swap pattern. */
10708
10709 label = gen_label_rtx ();
10710 cmp_reg = gen_reg_rtx (mode);
10711
10712 emit_move_insn (cmp_reg, mem);
10713 emit_label (label);
10714 emit_move_insn (old_reg, cmp_reg);
10715 if (seq)
10716 emit_insn (seq);
10717
10718 success = NULL_RTX;
10719 oldval = cmp_reg;
10720 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10721 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10722 MEMMODEL_RELAXED))
10723 return false;
10724
10725 if (oldval != cmp_reg)
10726 emit_move_insn (cmp_reg, oldval);
10727
10728 /* Mark this jump predicted not taken. */
10729 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
c0c46dc6
JH
10730 GET_MODE (success), 1, label,
10731 profile_probability::guessed_never ());
7e7c9d40
JDA
10732 return true;
10733}
10734
10735/* This function tries to implement an atomic exchange operation using a
10736 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10737 *MEM are returned, using TARGET if possible. No memory model is required
10738 since a compare_and_swap loop is seq-cst. */
10739
10740rtx
10741pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10742{
10743 machine_mode mode = GET_MODE (mem);
10744
10745 if (can_compare_and_swap_p (mode, true))
10746 {
10747 if (!target || !register_operand (target, mode))
10748 target = gen_reg_rtx (mode);
10749 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10750 return target;
10751 }
10752
10753 return NULL_RTX;
10754}
10755
84c9e5ff
JDA
10756/* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10757 arguments passed by hidden reference in the 32-bit HP runtime. Users
10758 can override this behavior for better compatibility with openmp at the
10759 risk of library incompatibilities. Arguments are always passed by value
10760 in the 64-bit HP runtime. */
10761
10762static bool
10763pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10764 machine_mode mode ATTRIBUTE_UNUSED,
10765 const_tree type ATTRIBUTE_UNUSED,
10766 bool named ATTRIBUTE_UNUSED)
10767{
10768 return !TARGET_CALLER_COPIES;
10769}
10770
c43f4279
RS
10771/* Implement TARGET_HARD_REGNO_NREGS. */
10772
10773static unsigned int
10774pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10775{
10776 return PA_HARD_REGNO_NREGS (regno, mode);
10777}
10778
f939c3e6
RS
10779/* Implement TARGET_HARD_REGNO_MODE_OK. */
10780
10781static bool
10782pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10783{
10784 return PA_HARD_REGNO_MODE_OK (regno, mode);
10785}
10786
e2500fed 10787#include "gt-pa.h"