]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/pa/pa.c
Use checking forms of DECL_FUNCTION_CODE (PR 91421)
[thirdparty/gcc.git] / gcc / config / pa / pa.c
1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992-2019 Free Software Foundation, Inc.
3 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "attribs.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "varasm.h"
45 #include "calls.h"
46 #include "output.h"
47 #include "except.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "reload.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "cfgrtl.h"
54 #include "opts.h"
55 #include "builtins.h"
56
57 /* This file should be included last. */
58 #include "target-def.h"
59
60 /* Return nonzero if there is a bypass for the output of
61 OUT_INSN and the fp store IN_INSN. */
62 int
63 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
64 {
65 machine_mode store_mode;
66 machine_mode other_mode;
67 rtx set;
68
69 if (recog_memoized (in_insn) < 0
70 || (get_attr_type (in_insn) != TYPE_FPSTORE
71 && get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
72 || recog_memoized (out_insn) < 0)
73 return 0;
74
75 store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
76
77 set = single_set (out_insn);
78 if (!set)
79 return 0;
80
81 other_mode = GET_MODE (SET_SRC (set));
82
83 return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
84 }
85
86
87 #ifndef DO_FRAME_NOTES
88 #ifdef INCOMING_RETURN_ADDR_RTX
89 #define DO_FRAME_NOTES 1
90 #else
91 #define DO_FRAME_NOTES 0
92 #endif
93 #endif
94
95 static void pa_option_override (void);
96 static void copy_reg_pointer (rtx, rtx);
97 static void fix_range (const char *);
98 static int hppa_register_move_cost (machine_mode mode, reg_class_t,
99 reg_class_t);
100 static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
101 static bool hppa_rtx_costs (rtx, machine_mode, int, int, int *, bool);
102 static inline rtx force_mode (machine_mode, rtx);
103 static void pa_reorg (void);
104 static void pa_combine_instructions (void);
105 static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
106 rtx, rtx);
107 static bool forward_branch_p (rtx_insn *);
108 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
109 static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
110 static int compute_cpymem_length (rtx_insn *);
111 static int compute_clrmem_length (rtx_insn *);
112 static bool pa_assemble_integer (rtx, unsigned int, int);
113 static void remove_useless_addtr_insns (int);
114 static void store_reg (int, HOST_WIDE_INT, int);
115 static void store_reg_modify (int, int, HOST_WIDE_INT);
116 static void load_reg (int, HOST_WIDE_INT, int);
117 static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
118 static rtx pa_function_value (const_tree, const_tree, bool);
119 static rtx pa_libcall_value (machine_mode, const_rtx);
120 static bool pa_function_value_regno_p (const unsigned int);
121 static void pa_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
122 static void pa_linux_output_function_prologue (FILE *) ATTRIBUTE_UNUSED;
123 static void update_total_code_bytes (unsigned int);
124 static void pa_output_function_epilogue (FILE *);
125 static int pa_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
126 static int pa_issue_rate (void);
127 static int pa_reloc_rw_mask (void);
128 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
129 static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
130 static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
131 ATTRIBUTE_UNUSED;
132 static void pa_encode_section_info (tree, rtx, int);
133 static const char *pa_strip_name_encoding (const char *);
134 static bool pa_function_ok_for_sibcall (tree, tree);
135 static void pa_globalize_label (FILE *, const char *)
136 ATTRIBUTE_UNUSED;
137 static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
138 HOST_WIDE_INT, tree);
139 #if !defined(USE_COLLECT2)
140 static void pa_asm_out_constructor (rtx, int);
141 static void pa_asm_out_destructor (rtx, int);
142 #endif
143 static void pa_init_builtins (void);
144 static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
145 static rtx hppa_builtin_saveregs (void);
146 static void hppa_va_start (tree, rtx);
147 static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
148 static bool pa_scalar_mode_supported_p (scalar_mode);
149 static bool pa_commutative_p (const_rtx x, int outer_code);
150 static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
151 static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
152 static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
153 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
154 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
155 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
156 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
157 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
158 static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
159 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
160 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
161 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
162 static void output_deferred_plabels (void);
163 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
164 static void pa_file_end (void);
165 static void pa_init_libfuncs (void);
166 static rtx pa_struct_value_rtx (tree, int);
167 static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
168 const_tree, bool);
169 static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
170 tree, bool);
171 static void pa_function_arg_advance (cumulative_args_t, machine_mode,
172 const_tree, bool);
173 static rtx pa_function_arg (cumulative_args_t, machine_mode,
174 const_tree, bool);
175 static pad_direction pa_function_arg_padding (machine_mode, const_tree);
176 static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
177 static struct machine_function * pa_init_machine_status (void);
178 static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
179 machine_mode,
180 secondary_reload_info *);
181 static bool pa_secondary_memory_needed (machine_mode,
182 reg_class_t, reg_class_t);
183 static void pa_extra_live_on_entry (bitmap);
184 static machine_mode pa_promote_function_mode (const_tree,
185 machine_mode, int *,
186 const_tree, int);
187
188 static void pa_asm_trampoline_template (FILE *);
189 static void pa_trampoline_init (rtx, tree, rtx);
190 static rtx pa_trampoline_adjust_address (rtx);
191 static rtx pa_delegitimize_address (rtx);
192 static bool pa_print_operand_punct_valid_p (unsigned char);
193 static rtx pa_internal_arg_pointer (void);
194 static bool pa_can_eliminate (const int, const int);
195 static void pa_conditional_register_usage (void);
196 static machine_mode pa_c_mode_for_suffix (char);
197 static section *pa_function_section (tree, enum node_frequency, bool, bool);
198 static bool pa_cannot_force_const_mem (machine_mode, rtx);
199 static bool pa_legitimate_constant_p (machine_mode, rtx);
200 static unsigned int pa_section_type_flags (tree, const char *, int);
201 static bool pa_legitimate_address_p (machine_mode, rtx, bool);
202 static bool pa_callee_copies (cumulative_args_t, machine_mode,
203 const_tree, bool);
204 static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
205 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
206 static bool pa_modes_tieable_p (machine_mode, machine_mode);
207 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
208 static HOST_WIDE_INT pa_starting_frame_offset (void);
209
210 /* The following extra sections are only used for SOM. */
211 static GTY(()) section *som_readonly_data_section;
212 static GTY(()) section *som_one_only_readonly_data_section;
213 static GTY(()) section *som_one_only_data_section;
214 static GTY(()) section *som_tm_clone_table_section;
215
216 /* Counts for the number of callee-saved general and floating point
217 registers which were saved by the current function's prologue. */
218 static int gr_saved, fr_saved;
219
220 /* Boolean indicating whether the return pointer was saved by the
221 current function's prologue. */
222 static bool rp_saved;
223
224 static rtx find_addr_reg (rtx);
225
226 /* Keep track of the number of bytes we have output in the CODE subspace
227 during this compilation so we'll know when to emit inline long-calls. */
228 unsigned long total_code_bytes;
229
230 /* The last address of the previous function plus the number of bytes in
231 associated thunks that have been output. This is used to determine if
232 a thunk can use an IA-relative branch to reach its target function. */
233 static unsigned int last_address;
234
235 /* Variables to handle plabels that we discover are necessary at assembly
236 output time. They are output after the current function. */
237 struct GTY(()) deferred_plabel
238 {
239 rtx internal_label;
240 rtx symbol;
241 };
242 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
243 deferred_plabels;
244 static size_t n_deferred_plabels = 0;
245 \f
246 /* Initialize the GCC target structure. */
247
248 #undef TARGET_OPTION_OVERRIDE
249 #define TARGET_OPTION_OVERRIDE pa_option_override
250
251 #undef TARGET_ASM_ALIGNED_HI_OP
252 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
253 #undef TARGET_ASM_ALIGNED_SI_OP
254 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
255 #undef TARGET_ASM_ALIGNED_DI_OP
256 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
257 #undef TARGET_ASM_UNALIGNED_HI_OP
258 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
259 #undef TARGET_ASM_UNALIGNED_SI_OP
260 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
261 #undef TARGET_ASM_UNALIGNED_DI_OP
262 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
263 #undef TARGET_ASM_INTEGER
264 #define TARGET_ASM_INTEGER pa_assemble_integer
265
266 #undef TARGET_ASM_FUNCTION_EPILOGUE
267 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
268
269 #undef TARGET_FUNCTION_VALUE
270 #define TARGET_FUNCTION_VALUE pa_function_value
271 #undef TARGET_LIBCALL_VALUE
272 #define TARGET_LIBCALL_VALUE pa_libcall_value
273 #undef TARGET_FUNCTION_VALUE_REGNO_P
274 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
275
276 #undef TARGET_LEGITIMIZE_ADDRESS
277 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
278
279 #undef TARGET_SCHED_ADJUST_COST
280 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
281 #undef TARGET_SCHED_ISSUE_RATE
282 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
283
284 #undef TARGET_ENCODE_SECTION_INFO
285 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
286 #undef TARGET_STRIP_NAME_ENCODING
287 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
288
289 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
290 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
291
292 #undef TARGET_COMMUTATIVE_P
293 #define TARGET_COMMUTATIVE_P pa_commutative_p
294
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
299
300 #undef TARGET_ASM_FILE_END
301 #define TARGET_ASM_FILE_END pa_file_end
302
303 #undef TARGET_ASM_RELOC_RW_MASK
304 #define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
305
306 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
307 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
308
309 #if !defined(USE_COLLECT2)
310 #undef TARGET_ASM_CONSTRUCTOR
311 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
312 #undef TARGET_ASM_DESTRUCTOR
313 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
314 #endif
315
316 #undef TARGET_INIT_BUILTINS
317 #define TARGET_INIT_BUILTINS pa_init_builtins
318
319 #undef TARGET_EXPAND_BUILTIN
320 #define TARGET_EXPAND_BUILTIN pa_expand_builtin
321
322 #undef TARGET_REGISTER_MOVE_COST
323 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
324 #undef TARGET_RTX_COSTS
325 #define TARGET_RTX_COSTS hppa_rtx_costs
326 #undef TARGET_ADDRESS_COST
327 #define TARGET_ADDRESS_COST hppa_address_cost
328
329 #undef TARGET_MACHINE_DEPENDENT_REORG
330 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
331
332 #undef TARGET_INIT_LIBFUNCS
333 #define TARGET_INIT_LIBFUNCS pa_init_libfuncs
334
335 #undef TARGET_PROMOTE_FUNCTION_MODE
336 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
337 #undef TARGET_PROMOTE_PROTOTYPES
338 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
339
340 #undef TARGET_STRUCT_VALUE_RTX
341 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
342 #undef TARGET_RETURN_IN_MEMORY
343 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
344 #undef TARGET_MUST_PASS_IN_STACK
345 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
346 #undef TARGET_PASS_BY_REFERENCE
347 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
348 #undef TARGET_CALLEE_COPIES
349 #define TARGET_CALLEE_COPIES pa_callee_copies
350 #undef TARGET_ARG_PARTIAL_BYTES
351 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
352 #undef TARGET_FUNCTION_ARG
353 #define TARGET_FUNCTION_ARG pa_function_arg
354 #undef TARGET_FUNCTION_ARG_ADVANCE
355 #define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
356 #undef TARGET_FUNCTION_ARG_PADDING
357 #define TARGET_FUNCTION_ARG_PADDING pa_function_arg_padding
358 #undef TARGET_FUNCTION_ARG_BOUNDARY
359 #define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
360
361 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
362 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
363 #undef TARGET_EXPAND_BUILTIN_VA_START
364 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
365 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
366 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
367
368 #undef TARGET_SCALAR_MODE_SUPPORTED_P
369 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
370
371 #undef TARGET_CANNOT_FORCE_CONST_MEM
372 #define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
373
374 #undef TARGET_SECONDARY_RELOAD
375 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
376 #undef TARGET_SECONDARY_MEMORY_NEEDED
377 #define TARGET_SECONDARY_MEMORY_NEEDED pa_secondary_memory_needed
378
379 #undef TARGET_EXTRA_LIVE_ON_ENTRY
380 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
381
382 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
383 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
384 #undef TARGET_TRAMPOLINE_INIT
385 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
386 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
387 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
388 #undef TARGET_DELEGITIMIZE_ADDRESS
389 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
390 #undef TARGET_INTERNAL_ARG_POINTER
391 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
392 #undef TARGET_CAN_ELIMINATE
393 #define TARGET_CAN_ELIMINATE pa_can_eliminate
394 #undef TARGET_CONDITIONAL_REGISTER_USAGE
395 #define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
396 #undef TARGET_C_MODE_FOR_SUFFIX
397 #define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
398 #undef TARGET_ASM_FUNCTION_SECTION
399 #define TARGET_ASM_FUNCTION_SECTION pa_function_section
400
401 #undef TARGET_LEGITIMATE_CONSTANT_P
402 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
403 #undef TARGET_SECTION_TYPE_FLAGS
404 #define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
405 #undef TARGET_LEGITIMATE_ADDRESS_P
406 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
407
408 #undef TARGET_LRA_P
409 #define TARGET_LRA_P hook_bool_void_false
410
411 #undef TARGET_HARD_REGNO_NREGS
412 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
413 #undef TARGET_HARD_REGNO_MODE_OK
414 #define TARGET_HARD_REGNO_MODE_OK pa_hard_regno_mode_ok
415 #undef TARGET_MODES_TIEABLE_P
416 #define TARGET_MODES_TIEABLE_P pa_modes_tieable_p
417
418 #undef TARGET_CAN_CHANGE_MODE_CLASS
419 #define TARGET_CAN_CHANGE_MODE_CLASS pa_can_change_mode_class
420
421 #undef TARGET_CONSTANT_ALIGNMENT
422 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
423
424 #undef TARGET_STARTING_FRAME_OFFSET
425 #define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
426
427 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
428 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
429
430 struct gcc_target targetm = TARGET_INITIALIZER;
431 \f
432 /* Parse the -mfixed-range= option string. */
433
434 static void
435 fix_range (const char *const_str)
436 {
437 int i, first, last;
438 char *str, *dash, *comma;
439
440 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
441 REG2 are either register names or register numbers. The effect
442 of this option is to mark the registers in the range from REG1 to
443 REG2 as ``fixed'' so they won't be used by the compiler. This is
444 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
445
446 i = strlen (const_str);
447 str = (char *) alloca (i + 1);
448 memcpy (str, const_str, i + 1);
449
450 while (1)
451 {
452 dash = strchr (str, '-');
453 if (!dash)
454 {
455 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
456 return;
457 }
458 *dash = '\0';
459
460 comma = strchr (dash + 1, ',');
461 if (comma)
462 *comma = '\0';
463
464 first = decode_reg_name (str);
465 if (first < 0)
466 {
467 warning (0, "unknown register name: %s", str);
468 return;
469 }
470
471 last = decode_reg_name (dash + 1);
472 if (last < 0)
473 {
474 warning (0, "unknown register name: %s", dash + 1);
475 return;
476 }
477
478 *dash = '-';
479
480 if (first > last)
481 {
482 warning (0, "%s-%s is an empty range", str, dash + 1);
483 return;
484 }
485
486 for (i = first; i <= last; ++i)
487 fixed_regs[i] = call_used_regs[i] = 1;
488
489 if (!comma)
490 break;
491
492 *comma = ',';
493 str = comma + 1;
494 }
495
496 /* Check if all floating point registers have been fixed. */
497 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
498 if (!fixed_regs[i])
499 break;
500
501 if (i > FP_REG_LAST)
502 target_flags |= MASK_DISABLE_FPREGS;
503 }
504
505 /* Implement the TARGET_OPTION_OVERRIDE hook. */
506
507 static void
508 pa_option_override (void)
509 {
510 unsigned int i;
511 cl_deferred_option *opt;
512 vec<cl_deferred_option> *v
513 = (vec<cl_deferred_option> *) pa_deferred_options;
514
515 if (v)
516 FOR_EACH_VEC_ELT (*v, i, opt)
517 {
518 switch (opt->opt_index)
519 {
520 case OPT_mfixed_range_:
521 fix_range (opt->arg);
522 break;
523
524 default:
525 gcc_unreachable ();
526 }
527 }
528
529 if (flag_pic && TARGET_PORTABLE_RUNTIME)
530 {
531 warning (0, "PIC code generation is not supported in the portable runtime model");
532 }
533
534 if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
535 {
536 warning (0, "PIC code generation is not compatible with fast indirect calls");
537 }
538
539 if (! TARGET_GAS && write_symbols != NO_DEBUG)
540 {
541 warning (0, "%<-g%> is only supported when using GAS on this processor,");
542 warning (0, "%<-g%> option disabled");
543 write_symbols = NO_DEBUG;
544 }
545
546 /* We only support the "big PIC" model now. And we always generate PIC
547 code when in 64bit mode. */
548 if (flag_pic == 1 || TARGET_64BIT)
549 flag_pic = 2;
550
551 /* Disable -freorder-blocks-and-partition as we don't support hot and
552 cold partitioning. */
553 if (flag_reorder_blocks_and_partition)
554 {
555 inform (input_location,
556 "%<-freorder-blocks-and-partition%> does not work "
557 "on this architecture");
558 flag_reorder_blocks_and_partition = 0;
559 flag_reorder_blocks = 1;
560 }
561
562 /* We can't guarantee that .dword is available for 32-bit targets. */
563 if (UNITS_PER_WORD == 4)
564 targetm.asm_out.aligned_op.di = NULL;
565
566 /* The unaligned ops are only available when using GAS. */
567 if (!TARGET_GAS)
568 {
569 targetm.asm_out.unaligned_op.hi = NULL;
570 targetm.asm_out.unaligned_op.si = NULL;
571 targetm.asm_out.unaligned_op.di = NULL;
572 }
573
574 init_machine_status = pa_init_machine_status;
575 }
576
577 enum pa_builtins
578 {
579 PA_BUILTIN_COPYSIGNQ,
580 PA_BUILTIN_FABSQ,
581 PA_BUILTIN_INFQ,
582 PA_BUILTIN_HUGE_VALQ,
583 PA_BUILTIN_max
584 };
585
586 static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
587
588 static void
589 pa_init_builtins (void)
590 {
591 #ifdef DONT_HAVE_FPUTC_UNLOCKED
592 {
593 tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
594 set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
595 builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
596 }
597 #endif
598 #if TARGET_HPUX_11
599 {
600 tree decl;
601
602 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
603 set_user_assembler_name (decl, "_Isfinite");
604 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
605 set_user_assembler_name (decl, "_Isfinitef");
606 }
607 #endif
608
609 if (HPUX_LONG_DOUBLE_LIBRARY)
610 {
611 tree decl, ftype;
612
613 /* Under HPUX, the __float128 type is a synonym for "long double". */
614 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
615 "__float128");
616
617 /* TFmode support builtins. */
618 ftype = build_function_type_list (long_double_type_node,
619 long_double_type_node,
620 NULL_TREE);
621 decl = add_builtin_function ("__builtin_fabsq", ftype,
622 PA_BUILTIN_FABSQ, BUILT_IN_MD,
623 "_U_Qfabs", NULL_TREE);
624 TREE_READONLY (decl) = 1;
625 pa_builtins[PA_BUILTIN_FABSQ] = decl;
626
627 ftype = build_function_type_list (long_double_type_node,
628 long_double_type_node,
629 long_double_type_node,
630 NULL_TREE);
631 decl = add_builtin_function ("__builtin_copysignq", ftype,
632 PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
633 "_U_Qfcopysign", NULL_TREE);
634 TREE_READONLY (decl) = 1;
635 pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
636
637 ftype = build_function_type_list (long_double_type_node, NULL_TREE);
638 decl = add_builtin_function ("__builtin_infq", ftype,
639 PA_BUILTIN_INFQ, BUILT_IN_MD,
640 NULL, NULL_TREE);
641 pa_builtins[PA_BUILTIN_INFQ] = decl;
642
643 decl = add_builtin_function ("__builtin_huge_valq", ftype,
644 PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
645 NULL, NULL_TREE);
646 pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
647 }
648 }
649
650 static rtx
651 pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
652 machine_mode mode ATTRIBUTE_UNUSED,
653 int ignore ATTRIBUTE_UNUSED)
654 {
655 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
656 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
657
658 switch (fcode)
659 {
660 case PA_BUILTIN_FABSQ:
661 case PA_BUILTIN_COPYSIGNQ:
662 return expand_call (exp, target, ignore);
663
664 case PA_BUILTIN_INFQ:
665 case PA_BUILTIN_HUGE_VALQ:
666 {
667 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
668 REAL_VALUE_TYPE inf;
669 rtx tmp;
670
671 real_inf (&inf);
672 tmp = const_double_from_real_value (inf, target_mode);
673
674 tmp = validize_mem (force_const_mem (target_mode, tmp));
675
676 if (target == 0)
677 target = gen_reg_rtx (target_mode);
678
679 emit_move_insn (target, tmp);
680 return target;
681 }
682
683 default:
684 gcc_unreachable ();
685 }
686
687 return NULL_RTX;
688 }
689
690 /* Function to init struct machine_function.
691 This will be called, via a pointer variable,
692 from push_function_context. */
693
694 static struct machine_function *
695 pa_init_machine_status (void)
696 {
697 return ggc_cleared_alloc<machine_function> ();
698 }
699
700 /* If FROM is a probable pointer register, mark TO as a probable
701 pointer register with the same pointer alignment as FROM. */
702
703 static void
704 copy_reg_pointer (rtx to, rtx from)
705 {
706 if (REG_POINTER (from))
707 mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
708 }
709
710 /* Return 1 if X contains a symbolic expression. We know these
711 expressions will have one of a few well defined forms, so
712 we need only check those forms. */
713 int
714 pa_symbolic_expression_p (rtx x)
715 {
716
717 /* Strip off any HIGH. */
718 if (GET_CODE (x) == HIGH)
719 x = XEXP (x, 0);
720
721 return symbolic_operand (x, VOIDmode);
722 }
723
724 /* Accept any constant that can be moved in one instruction into a
725 general register. */
726 int
727 pa_cint_ok_for_move (unsigned HOST_WIDE_INT ival)
728 {
729 /* OK if ldo, ldil, or zdepi, can be used. */
730 return (VAL_14_BITS_P (ival)
731 || pa_ldil_cint_p (ival)
732 || pa_zdepi_cint_p (ival));
733 }
734 \f
735 /* True iff ldil can be used to load this CONST_INT. The least
736 significant 11 bits of the value must be zero and the value must
737 not change sign when extended from 32 to 64 bits. */
738 int
739 pa_ldil_cint_p (unsigned HOST_WIDE_INT ival)
740 {
741 unsigned HOST_WIDE_INT x;
742
743 x = ival & (((unsigned HOST_WIDE_INT) -1 << 31) | 0x7ff);
744 return x == 0 || x == ((unsigned HOST_WIDE_INT) -1 << 31);
745 }
746
747 /* True iff zdepi can be used to generate this CONST_INT.
748 zdepi first sign extends a 5-bit signed number to a given field
749 length, then places this field anywhere in a zero. */
750 int
751 pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
752 {
753 unsigned HOST_WIDE_INT lsb_mask, t;
754
755 /* This might not be obvious, but it's at least fast.
756 This function is critical; we don't have the time loops would take. */
757 lsb_mask = x & -x;
758 t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
759 /* Return true iff t is a power of two. */
760 return ((t & (t - 1)) == 0);
761 }
762
763 /* True iff depi or extru can be used to compute (reg & mask).
764 Accept bit pattern like these:
765 0....01....1
766 1....10....0
767 1..10..01..1 */
768 int
769 pa_and_mask_p (unsigned HOST_WIDE_INT mask)
770 {
771 mask = ~mask;
772 mask += mask & -mask;
773 return (mask & (mask - 1)) == 0;
774 }
775
776 /* True iff depi can be used to compute (reg | MASK). */
777 int
778 pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
779 {
780 mask += mask & -mask;
781 return (mask & (mask - 1)) == 0;
782 }
783 \f
784 /* Legitimize PIC addresses. If the address is already
785 position-independent, we return ORIG. Newly generated
786 position-independent addresses go to REG. If we need more
787 than one register, we lose. */
788
789 static rtx
790 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
791 {
792 rtx pic_ref = orig;
793
794 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
795
796 /* Labels need special handling. */
797 if (pic_label_operand (orig, mode))
798 {
799 rtx_insn *insn;
800
801 /* We do not want to go through the movXX expanders here since that
802 would create recursion.
803
804 Nor do we really want to call a generator for a named pattern
805 since that requires multiple patterns if we want to support
806 multiple word sizes.
807
808 So instead we just emit the raw set, which avoids the movXX
809 expanders completely. */
810 mark_reg_pointer (reg, BITS_PER_UNIT);
811 insn = emit_insn (gen_rtx_SET (reg, orig));
812
813 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
814 add_reg_note (insn, REG_EQUAL, orig);
815
816 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
817 and update LABEL_NUSES because this is not done automatically. */
818 if (reload_in_progress || reload_completed)
819 {
820 /* Extract LABEL_REF. */
821 if (GET_CODE (orig) == CONST)
822 orig = XEXP (XEXP (orig, 0), 0);
823 /* Extract CODE_LABEL. */
824 orig = XEXP (orig, 0);
825 add_reg_note (insn, REG_LABEL_OPERAND, orig);
826 /* Make sure we have label and not a note. */
827 if (LABEL_P (orig))
828 LABEL_NUSES (orig)++;
829 }
830 crtl->uses_pic_offset_table = 1;
831 return reg;
832 }
833 if (GET_CODE (orig) == SYMBOL_REF)
834 {
835 rtx_insn *insn;
836 rtx tmp_reg;
837
838 gcc_assert (reg);
839
840 /* Before reload, allocate a temporary register for the intermediate
841 result. This allows the sequence to be deleted when the final
842 result is unused and the insns are trivially dead. */
843 tmp_reg = ((reload_in_progress || reload_completed)
844 ? reg : gen_reg_rtx (Pmode));
845
846 if (function_label_operand (orig, VOIDmode))
847 {
848 /* Force function label into memory in word mode. */
849 orig = XEXP (force_const_mem (word_mode, orig), 0);
850 /* Load plabel address from DLT. */
851 emit_move_insn (tmp_reg,
852 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
853 gen_rtx_HIGH (word_mode, orig)));
854 pic_ref
855 = gen_const_mem (Pmode,
856 gen_rtx_LO_SUM (Pmode, tmp_reg,
857 gen_rtx_UNSPEC (Pmode,
858 gen_rtvec (1, orig),
859 UNSPEC_DLTIND14R)));
860 emit_move_insn (reg, pic_ref);
861 /* Now load address of function descriptor. */
862 pic_ref = gen_rtx_MEM (Pmode, reg);
863 }
864 else
865 {
866 /* Load symbol reference from DLT. */
867 emit_move_insn (tmp_reg,
868 gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
869 gen_rtx_HIGH (word_mode, orig)));
870 pic_ref
871 = gen_const_mem (Pmode,
872 gen_rtx_LO_SUM (Pmode, tmp_reg,
873 gen_rtx_UNSPEC (Pmode,
874 gen_rtvec (1, orig),
875 UNSPEC_DLTIND14R)));
876 }
877
878 crtl->uses_pic_offset_table = 1;
879 mark_reg_pointer (reg, BITS_PER_UNIT);
880 insn = emit_move_insn (reg, pic_ref);
881
882 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
883 set_unique_reg_note (insn, REG_EQUAL, orig);
884
885 return reg;
886 }
887 else if (GET_CODE (orig) == CONST)
888 {
889 rtx base;
890
891 if (GET_CODE (XEXP (orig, 0)) == PLUS
892 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
893 return orig;
894
895 gcc_assert (reg);
896 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
897
898 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
899 orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
900 base == reg ? 0 : reg);
901
902 if (GET_CODE (orig) == CONST_INT)
903 {
904 if (INT_14_BITS (orig))
905 return plus_constant (Pmode, base, INTVAL (orig));
906 orig = force_reg (Pmode, orig);
907 }
908 pic_ref = gen_rtx_PLUS (Pmode, base, orig);
909 /* Likewise, should we set special REG_NOTEs here? */
910 }
911
912 return pic_ref;
913 }
914
915 static GTY(()) rtx gen_tls_tga;
916
917 static rtx
918 gen_tls_get_addr (void)
919 {
920 if (!gen_tls_tga)
921 gen_tls_tga = init_one_libfunc ("__tls_get_addr");
922 return gen_tls_tga;
923 }
924
925 static rtx
926 hppa_tls_call (rtx arg)
927 {
928 rtx ret;
929
930 ret = gen_reg_rtx (Pmode);
931 emit_library_call_value (gen_tls_get_addr (), ret,
932 LCT_CONST, Pmode, arg, Pmode);
933
934 return ret;
935 }
936
937 static rtx
938 legitimize_tls_address (rtx addr)
939 {
940 rtx ret, tmp, t1, t2, tp;
941 rtx_insn *insn;
942
943 /* Currently, we can't handle anything but a SYMBOL_REF. */
944 if (GET_CODE (addr) != SYMBOL_REF)
945 return addr;
946
947 switch (SYMBOL_REF_TLS_MODEL (addr))
948 {
949 case TLS_MODEL_GLOBAL_DYNAMIC:
950 tmp = gen_reg_rtx (Pmode);
951 if (flag_pic)
952 emit_insn (gen_tgd_load_pic (tmp, addr));
953 else
954 emit_insn (gen_tgd_load (tmp, addr));
955 ret = hppa_tls_call (tmp);
956 break;
957
958 case TLS_MODEL_LOCAL_DYNAMIC:
959 ret = gen_reg_rtx (Pmode);
960 tmp = gen_reg_rtx (Pmode);
961 start_sequence ();
962 if (flag_pic)
963 emit_insn (gen_tld_load_pic (tmp, addr));
964 else
965 emit_insn (gen_tld_load (tmp, addr));
966 t1 = hppa_tls_call (tmp);
967 insn = get_insns ();
968 end_sequence ();
969 t2 = gen_reg_rtx (Pmode);
970 emit_libcall_block (insn, t2, t1,
971 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
972 UNSPEC_TLSLDBASE));
973 emit_insn (gen_tld_offset_load (ret, addr, t2));
974 break;
975
976 case TLS_MODEL_INITIAL_EXEC:
977 tp = gen_reg_rtx (Pmode);
978 tmp = gen_reg_rtx (Pmode);
979 ret = gen_reg_rtx (Pmode);
980 emit_insn (gen_tp_load (tp));
981 if (flag_pic)
982 emit_insn (gen_tie_load_pic (tmp, addr));
983 else
984 emit_insn (gen_tie_load (tmp, addr));
985 emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
986 break;
987
988 case TLS_MODEL_LOCAL_EXEC:
989 tp = gen_reg_rtx (Pmode);
990 ret = gen_reg_rtx (Pmode);
991 emit_insn (gen_tp_load (tp));
992 emit_insn (gen_tle_load (ret, addr, tp));
993 break;
994
995 default:
996 gcc_unreachable ();
997 }
998
999 return ret;
1000 }
1001
1002 /* Helper for hppa_legitimize_address. Given X, return true if it
1003 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
1004
1005 This respectively represent canonical shift-add rtxs or scaled
1006 memory addresses. */
1007 static bool
1008 mem_shadd_or_shadd_rtx_p (rtx x)
1009 {
1010 return ((GET_CODE (x) == ASHIFT
1011 || GET_CODE (x) == MULT)
1012 && GET_CODE (XEXP (x, 1)) == CONST_INT
1013 && ((GET_CODE (x) == ASHIFT
1014 && pa_shadd_constant_p (INTVAL (XEXP (x, 1))))
1015 || (GET_CODE (x) == MULT
1016 && pa_mem_shadd_constant_p (INTVAL (XEXP (x, 1))))));
1017 }
1018
1019 /* Try machine-dependent ways of modifying an illegitimate address
1020 to be legitimate. If we find one, return the new, valid address.
1021 This macro is used in only one place: `memory_address' in explow.c.
1022
1023 OLDX is the address as it was before break_out_memory_refs was called.
1024 In some cases it is useful to look at this to decide what needs to be done.
1025
1026 It is always safe for this macro to do nothing. It exists to recognize
1027 opportunities to optimize the output.
1028
1029 For the PA, transform:
1030
1031 memory(X + <large int>)
1032
1033 into:
1034
1035 if (<large int> & mask) >= 16
1036 Y = (<large int> & ~mask) + mask + 1 Round up.
1037 else
1038 Y = (<large int> & ~mask) Round down.
1039 Z = X + Y
1040 memory (Z + (<large int> - Y));
1041
1042 This is for CSE to find several similar references, and only use one Z.
1043
1044 X can either be a SYMBOL_REF or REG, but because combine cannot
1045 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
1046 D will not fit in 14 bits.
1047
1048 MODE_FLOAT references allow displacements which fit in 5 bits, so use
1049 0x1f as the mask.
1050
1051 MODE_INT references allow displacements which fit in 14 bits, so use
1052 0x3fff as the mask.
1053
1054 This relies on the fact that most mode MODE_FLOAT references will use FP
1055 registers and most mode MODE_INT references will use integer registers.
1056 (In the rare case of an FP register used in an integer MODE, we depend
1057 on secondary reloads to clean things up.)
1058
1059
1060 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
1061 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
1062 addressing modes to be used).
1063
1064 Note that the addresses passed into hppa_legitimize_address always
1065 come from a MEM, so we only have to match the MULT form on incoming
1066 addresses. But to be future proof we also match the ASHIFT form.
1067
1068 However, this routine always places those shift-add sequences into
1069 registers, so we have to generate the ASHIFT form as our output.
1070
1071 Put X and Z into registers. Then put the entire expression into
1072 a register. */
1073
1074 rtx
1075 hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1076 machine_mode mode)
1077 {
1078 rtx orig = x;
1079
1080 /* We need to canonicalize the order of operands in unscaled indexed
1081 addresses since the code that checks if an address is valid doesn't
1082 always try both orders. */
1083 if (!TARGET_NO_SPACE_REGS
1084 && GET_CODE (x) == PLUS
1085 && GET_MODE (x) == Pmode
1086 && REG_P (XEXP (x, 0))
1087 && REG_P (XEXP (x, 1))
1088 && REG_POINTER (XEXP (x, 0))
1089 && !REG_POINTER (XEXP (x, 1)))
1090 return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
1091
1092 if (tls_referenced_p (x))
1093 return legitimize_tls_address (x);
1094 else if (flag_pic)
1095 return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
1096
1097 /* Strip off CONST. */
1098 if (GET_CODE (x) == CONST)
1099 x = XEXP (x, 0);
1100
1101 /* Special case. Get the SYMBOL_REF into a register and use indexing.
1102 That should always be safe. */
1103 if (GET_CODE (x) == PLUS
1104 && GET_CODE (XEXP (x, 0)) == REG
1105 && GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
1106 {
1107 rtx reg = force_reg (Pmode, XEXP (x, 1));
1108 return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
1109 }
1110
1111 /* Note we must reject symbols which represent function addresses
1112 since the assembler/linker can't handle arithmetic on plabels. */
1113 if (GET_CODE (x) == PLUS
1114 && GET_CODE (XEXP (x, 1)) == CONST_INT
1115 && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1116 && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
1117 || GET_CODE (XEXP (x, 0)) == REG))
1118 {
1119 rtx int_part, ptr_reg;
1120 int newoffset;
1121 int offset = INTVAL (XEXP (x, 1));
1122 int mask;
1123
1124 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
1125 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
1126
1127 /* Choose which way to round the offset. Round up if we
1128 are >= halfway to the next boundary. */
1129 if ((offset & mask) >= ((mask + 1) / 2))
1130 newoffset = (offset & ~ mask) + mask + 1;
1131 else
1132 newoffset = (offset & ~ mask);
1133
1134 /* If the newoffset will not fit in 14 bits (ldo), then
1135 handling this would take 4 or 5 instructions (2 to load
1136 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1137 add the new offset and the SYMBOL_REF.) Combine cannot
1138 handle 4->2 or 5->2 combinations, so do not create
1139 them. */
1140 if (! VAL_14_BITS_P (newoffset)
1141 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
1142 {
1143 rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
1144 rtx tmp_reg
1145 = force_reg (Pmode,
1146 gen_rtx_HIGH (Pmode, const_part));
1147 ptr_reg
1148 = force_reg (Pmode,
1149 gen_rtx_LO_SUM (Pmode,
1150 tmp_reg, const_part));
1151 }
1152 else
1153 {
1154 if (! VAL_14_BITS_P (newoffset))
1155 int_part = force_reg (Pmode, GEN_INT (newoffset));
1156 else
1157 int_part = GEN_INT (newoffset);
1158
1159 ptr_reg = force_reg (Pmode,
1160 gen_rtx_PLUS (Pmode,
1161 force_reg (Pmode, XEXP (x, 0)),
1162 int_part));
1163 }
1164 return plus_constant (Pmode, ptr_reg, offset - newoffset);
1165 }
1166
1167 /* Handle (plus (mult (a) (mem_shadd_constant)) (b)). */
1168
1169 if (GET_CODE (x) == PLUS
1170 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1171 && (OBJECT_P (XEXP (x, 1))
1172 || GET_CODE (XEXP (x, 1)) == SUBREG)
1173 && GET_CODE (XEXP (x, 1)) != CONST)
1174 {
1175 /* If we were given a MULT, we must fix the constant
1176 as we're going to create the ASHIFT form. */
1177 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1178 if (GET_CODE (XEXP (x, 0)) == MULT)
1179 shift_val = exact_log2 (shift_val);
1180
1181 rtx reg1, reg2;
1182 reg1 = XEXP (x, 1);
1183 if (GET_CODE (reg1) != REG)
1184 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1185
1186 reg2 = XEXP (XEXP (x, 0), 0);
1187 if (GET_CODE (reg2) != REG)
1188 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1189
1190 return force_reg (Pmode,
1191 gen_rtx_PLUS (Pmode,
1192 gen_rtx_ASHIFT (Pmode, reg2,
1193 GEN_INT (shift_val)),
1194 reg1));
1195 }
1196
1197 /* Similarly for (plus (plus (mult (a) (mem_shadd_constant)) (b)) (c)).
1198
1199 Only do so for floating point modes since this is more speculative
1200 and we lose if it's an integer store. */
1201 if (GET_CODE (x) == PLUS
1202 && GET_CODE (XEXP (x, 0)) == PLUS
1203 && mem_shadd_or_shadd_rtx_p (XEXP (XEXP (x, 0), 0))
1204 && (mode == SFmode || mode == DFmode))
1205 {
1206 int shift_val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
1207
1208 /* If we were given a MULT, we must fix the constant
1209 as we're going to create the ASHIFT form. */
1210 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
1211 shift_val = exact_log2 (shift_val);
1212
1213 /* Try and figure out what to use as a base register. */
1214 rtx reg1, reg2, base, idx;
1215
1216 reg1 = XEXP (XEXP (x, 0), 1);
1217 reg2 = XEXP (x, 1);
1218 base = NULL_RTX;
1219 idx = NULL_RTX;
1220
1221 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1222 then pa_emit_move_sequence will turn on REG_POINTER so we'll know
1223 it's a base register below. */
1224 if (GET_CODE (reg1) != REG)
1225 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1226
1227 if (GET_CODE (reg2) != REG)
1228 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1229
1230 /* Figure out what the base and index are. */
1231
1232 if (GET_CODE (reg1) == REG
1233 && REG_POINTER (reg1))
1234 {
1235 base = reg1;
1236 idx = gen_rtx_PLUS (Pmode,
1237 gen_rtx_ASHIFT (Pmode,
1238 XEXP (XEXP (XEXP (x, 0), 0), 0),
1239 GEN_INT (shift_val)),
1240 XEXP (x, 1));
1241 }
1242 else if (GET_CODE (reg2) == REG
1243 && REG_POINTER (reg2))
1244 {
1245 base = reg2;
1246 idx = XEXP (x, 0);
1247 }
1248
1249 if (base == 0)
1250 return orig;
1251
1252 /* If the index adds a large constant, try to scale the
1253 constant so that it can be loaded with only one insn. */
1254 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1255 && VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
1256 / INTVAL (XEXP (XEXP (idx, 0), 1)))
1257 && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
1258 {
1259 /* Divide the CONST_INT by the scale factor, then add it to A. */
1260 int val = INTVAL (XEXP (idx, 1));
1261 val /= (1 << shift_val);
1262
1263 reg1 = XEXP (XEXP (idx, 0), 0);
1264 if (GET_CODE (reg1) != REG)
1265 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1266
1267 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
1268
1269 /* We can now generate a simple scaled indexed address. */
1270 return
1271 force_reg
1272 (Pmode, gen_rtx_PLUS (Pmode,
1273 gen_rtx_ASHIFT (Pmode, reg1,
1274 GEN_INT (shift_val)),
1275 base));
1276 }
1277
1278 /* If B + C is still a valid base register, then add them. */
1279 if (GET_CODE (XEXP (idx, 1)) == CONST_INT
1280 && INTVAL (XEXP (idx, 1)) <= 4096
1281 && INTVAL (XEXP (idx, 1)) >= -4096)
1282 {
1283 rtx reg1, reg2;
1284
1285 reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
1286
1287 reg2 = XEXP (XEXP (idx, 0), 0);
1288 if (GET_CODE (reg2) != CONST_INT)
1289 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1290
1291 return force_reg (Pmode,
1292 gen_rtx_PLUS (Pmode,
1293 gen_rtx_ASHIFT (Pmode, reg2,
1294 GEN_INT (shift_val)),
1295 reg1));
1296 }
1297
1298 /* Get the index into a register, then add the base + index and
1299 return a register holding the result. */
1300
1301 /* First get A into a register. */
1302 reg1 = XEXP (XEXP (idx, 0), 0);
1303 if (GET_CODE (reg1) != REG)
1304 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1305
1306 /* And get B into a register. */
1307 reg2 = XEXP (idx, 1);
1308 if (GET_CODE (reg2) != REG)
1309 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1310
1311 reg1 = force_reg (Pmode,
1312 gen_rtx_PLUS (Pmode,
1313 gen_rtx_ASHIFT (Pmode, reg1,
1314 GEN_INT (shift_val)),
1315 reg2));
1316
1317 /* Add the result to our base register and return. */
1318 return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
1319
1320 }
1321
1322 /* Uh-oh. We might have an address for x[n-100000]. This needs
1323 special handling to avoid creating an indexed memory address
1324 with x-100000 as the base.
1325
1326 If the constant part is small enough, then it's still safe because
1327 there is a guard page at the beginning and end of the data segment.
1328
1329 Scaled references are common enough that we want to try and rearrange the
1330 terms so that we can use indexing for these addresses too. Only
1331 do the optimization for floatint point modes. */
1332
1333 if (GET_CODE (x) == PLUS
1334 && pa_symbolic_expression_p (XEXP (x, 1)))
1335 {
1336 /* Ugly. We modify things here so that the address offset specified
1337 by the index expression is computed first, then added to x to form
1338 the entire address. */
1339
1340 rtx regx1, regx2, regy1, regy2, y;
1341
1342 /* Strip off any CONST. */
1343 y = XEXP (x, 1);
1344 if (GET_CODE (y) == CONST)
1345 y = XEXP (y, 0);
1346
1347 if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
1348 {
1349 /* See if this looks like
1350 (plus (mult (reg) (mem_shadd_const))
1351 (const (plus (symbol_ref) (const_int))))
1352
1353 Where const_int is small. In that case the const
1354 expression is a valid pointer for indexing.
1355
1356 If const_int is big, but can be divided evenly by shadd_const
1357 and added to (reg). This allows more scaled indexed addresses. */
1358 if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1359 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1360 && GET_CODE (XEXP (y, 1)) == CONST_INT
1361 && INTVAL (XEXP (y, 1)) >= -4096
1362 && INTVAL (XEXP (y, 1)) <= 4095)
1363 {
1364 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1365
1366 /* If we were given a MULT, we must fix the constant
1367 as we're going to create the ASHIFT form. */
1368 if (GET_CODE (XEXP (x, 0)) == MULT)
1369 shift_val = exact_log2 (shift_val);
1370
1371 rtx reg1, reg2;
1372
1373 reg1 = XEXP (x, 1);
1374 if (GET_CODE (reg1) != REG)
1375 reg1 = force_reg (Pmode, force_operand (reg1, 0));
1376
1377 reg2 = XEXP (XEXP (x, 0), 0);
1378 if (GET_CODE (reg2) != REG)
1379 reg2 = force_reg (Pmode, force_operand (reg2, 0));
1380
1381 return
1382 force_reg (Pmode,
1383 gen_rtx_PLUS (Pmode,
1384 gen_rtx_ASHIFT (Pmode,
1385 reg2,
1386 GEN_INT (shift_val)),
1387 reg1));
1388 }
1389 else if ((mode == DFmode || mode == SFmode)
1390 && GET_CODE (XEXP (y, 0)) == SYMBOL_REF
1391 && mem_shadd_or_shadd_rtx_p (XEXP (x, 0))
1392 && GET_CODE (XEXP (y, 1)) == CONST_INT
1393 && INTVAL (XEXP (y, 1)) % (1 << INTVAL (XEXP (XEXP (x, 0), 1))) == 0)
1394 {
1395 int shift_val = INTVAL (XEXP (XEXP (x, 0), 1));
1396
1397 /* If we were given a MULT, we must fix the constant
1398 as we're going to create the ASHIFT form. */
1399 if (GET_CODE (XEXP (x, 0)) == MULT)
1400 shift_val = exact_log2 (shift_val);
1401
1402 regx1
1403 = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
1404 / INTVAL (XEXP (XEXP (x, 0), 1))));
1405 regx2 = XEXP (XEXP (x, 0), 0);
1406 if (GET_CODE (regx2) != REG)
1407 regx2 = force_reg (Pmode, force_operand (regx2, 0));
1408 regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1409 regx2, regx1));
1410 return
1411 force_reg (Pmode,
1412 gen_rtx_PLUS (Pmode,
1413 gen_rtx_ASHIFT (Pmode, regx2,
1414 GEN_INT (shift_val)),
1415 force_reg (Pmode, XEXP (y, 0))));
1416 }
1417 else if (GET_CODE (XEXP (y, 1)) == CONST_INT
1418 && INTVAL (XEXP (y, 1)) >= -4096
1419 && INTVAL (XEXP (y, 1)) <= 4095)
1420 {
1421 /* This is safe because of the guard page at the
1422 beginning and end of the data space. Just
1423 return the original address. */
1424 return orig;
1425 }
1426 else
1427 {
1428 /* Doesn't look like one we can optimize. */
1429 regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
1430 regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
1431 regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
1432 regx1 = force_reg (Pmode,
1433 gen_rtx_fmt_ee (GET_CODE (y), Pmode,
1434 regx1, regy2));
1435 return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
1436 }
1437 }
1438 }
1439
1440 return orig;
1441 }
1442
1443 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1444
1445 Compute extra cost of moving data between one register class
1446 and another.
1447
1448 Make moves from SAR so expensive they should never happen. We used to
1449 have 0xffff here, but that generates overflow in rare cases.
1450
1451 Copies involving a FP register and a non-FP register are relatively
1452 expensive because they must go through memory.
1453
1454 Other copies are reasonably cheap. */
1455
1456 static int
1457 hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
1458 reg_class_t from, reg_class_t to)
1459 {
1460 if (from == SHIFT_REGS)
1461 return 0x100;
1462 else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
1463 return 18;
1464 else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
1465 || (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
1466 return 16;
1467 else
1468 return 2;
1469 }
1470
1471 /* For the HPPA, REG and REG+CONST is cost 0
1472 and addresses involving symbolic constants are cost 2.
1473
1474 PIC addresses are very expensive.
1475
1476 It is no coincidence that this has the same structure
1477 as pa_legitimate_address_p. */
1478
1479 static int
1480 hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
1481 addr_space_t as ATTRIBUTE_UNUSED,
1482 bool speed ATTRIBUTE_UNUSED)
1483 {
1484 switch (GET_CODE (X))
1485 {
1486 case REG:
1487 case PLUS:
1488 case LO_SUM:
1489 return 1;
1490 case HIGH:
1491 return 2;
1492 default:
1493 return 4;
1494 }
1495 }
1496
1497 /* Compute a (partial) cost for rtx X. Return true if the complete
1498 cost has been computed, and false if subexpressions should be
1499 scanned. In either case, *TOTAL contains the cost result. */
1500
1501 static bool
1502 hppa_rtx_costs (rtx x, machine_mode mode, int outer_code,
1503 int opno ATTRIBUTE_UNUSED,
1504 int *total, bool speed ATTRIBUTE_UNUSED)
1505 {
1506 int factor;
1507 int code = GET_CODE (x);
1508
1509 switch (code)
1510 {
1511 case CONST_INT:
1512 if (INTVAL (x) == 0)
1513 *total = 0;
1514 else if (INT_14_BITS (x))
1515 *total = 1;
1516 else
1517 *total = 2;
1518 return true;
1519
1520 case HIGH:
1521 *total = 2;
1522 return true;
1523
1524 case CONST:
1525 case LABEL_REF:
1526 case SYMBOL_REF:
1527 *total = 4;
1528 return true;
1529
1530 case CONST_DOUBLE:
1531 if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
1532 && outer_code != SET)
1533 *total = 0;
1534 else
1535 *total = 8;
1536 return true;
1537
1538 case MULT:
1539 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1540 {
1541 *total = COSTS_N_INSNS (3);
1542 return true;
1543 }
1544
1545 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1546 factor = GET_MODE_SIZE (mode) / 4;
1547 if (factor == 0)
1548 factor = 1;
1549
1550 if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
1551 *total = factor * factor * COSTS_N_INSNS (8);
1552 else
1553 *total = factor * factor * COSTS_N_INSNS (20);
1554 return true;
1555
1556 case DIV:
1557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1558 {
1559 *total = COSTS_N_INSNS (14);
1560 return true;
1561 }
1562 /* FALLTHRU */
1563
1564 case UDIV:
1565 case MOD:
1566 case UMOD:
1567 /* A mode size N times larger than SImode needs O(N*N) more insns. */
1568 factor = GET_MODE_SIZE (mode) / 4;
1569 if (factor == 0)
1570 factor = 1;
1571
1572 *total = factor * factor * COSTS_N_INSNS (60);
1573 return true;
1574
1575 case PLUS: /* this includes shNadd insns */
1576 case MINUS:
1577 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1578 {
1579 *total = COSTS_N_INSNS (3);
1580 return true;
1581 }
1582
1583 /* A size N times larger than UNITS_PER_WORD needs N times as
1584 many insns, taking N times as long. */
1585 factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
1586 if (factor == 0)
1587 factor = 1;
1588 *total = factor * COSTS_N_INSNS (1);
1589 return true;
1590
1591 case ASHIFT:
1592 case ASHIFTRT:
1593 case LSHIFTRT:
1594 *total = COSTS_N_INSNS (1);
1595 return true;
1596
1597 default:
1598 return false;
1599 }
1600 }
1601
1602 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1603 new rtx with the correct mode. */
1604 static inline rtx
1605 force_mode (machine_mode mode, rtx orig)
1606 {
1607 if (mode == GET_MODE (orig))
1608 return orig;
1609
1610 gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
1611
1612 return gen_rtx_REG (mode, REGNO (orig));
1613 }
1614
1615 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1616
1617 static bool
1618 pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1619 {
1620 return tls_referenced_p (x);
1621 }
1622
1623 /* Emit insns to move operands[1] into operands[0].
1624
1625 Return 1 if we have written out everything that needs to be done to
1626 do the move. Otherwise, return 0 and the caller will emit the move
1627 normally.
1628
1629 Note SCRATCH_REG may not be in the proper mode depending on how it
1630 will be used. This routine is responsible for creating a new copy
1631 of SCRATCH_REG in the proper mode. */
1632
1633 int
1634 pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
1635 {
1636 register rtx operand0 = operands[0];
1637 register rtx operand1 = operands[1];
1638 register rtx tem;
1639
1640 /* We can only handle indexed addresses in the destination operand
1641 of floating point stores. Thus, we need to break out indexed
1642 addresses from the destination operand. */
1643 if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
1644 {
1645 gcc_assert (can_create_pseudo_p ());
1646
1647 tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
1648 operand0 = replace_equiv_address (operand0, tem);
1649 }
1650
1651 /* On targets with non-equivalent space registers, break out unscaled
1652 indexed addresses from the source operand before the final CSE.
1653 We have to do this because the REG_POINTER flag is not correctly
1654 carried through various optimization passes and CSE may substitute
1655 a pseudo without the pointer set for one with the pointer set. As
1656 a result, we loose various opportunities to create insns with
1657 unscaled indexed addresses. */
1658 if (!TARGET_NO_SPACE_REGS
1659 && !cse_not_expected
1660 && GET_CODE (operand1) == MEM
1661 && GET_CODE (XEXP (operand1, 0)) == PLUS
1662 && REG_P (XEXP (XEXP (operand1, 0), 0))
1663 && REG_P (XEXP (XEXP (operand1, 0), 1)))
1664 operand1
1665 = replace_equiv_address (operand1,
1666 copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
1667
1668 if (scratch_reg
1669 && reload_in_progress && GET_CODE (operand0) == REG
1670 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
1671 operand0 = reg_equiv_mem (REGNO (operand0));
1672 else if (scratch_reg
1673 && reload_in_progress && GET_CODE (operand0) == SUBREG
1674 && GET_CODE (SUBREG_REG (operand0)) == REG
1675 && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
1676 {
1677 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1678 the code which tracks sets/uses for delete_output_reload. */
1679 rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
1680 reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
1681 SUBREG_BYTE (operand0));
1682 operand0 = alter_subreg (&temp, true);
1683 }
1684
1685 if (scratch_reg
1686 && reload_in_progress && GET_CODE (operand1) == REG
1687 && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
1688 operand1 = reg_equiv_mem (REGNO (operand1));
1689 else if (scratch_reg
1690 && reload_in_progress && GET_CODE (operand1) == SUBREG
1691 && GET_CODE (SUBREG_REG (operand1)) == REG
1692 && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
1693 {
1694 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1695 the code which tracks sets/uses for delete_output_reload. */
1696 rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
1697 reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
1698 SUBREG_BYTE (operand1));
1699 operand1 = alter_subreg (&temp, true);
1700 }
1701
1702 if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
1703 && ((tem = find_replacement (&XEXP (operand0, 0)))
1704 != XEXP (operand0, 0)))
1705 operand0 = replace_equiv_address (operand0, tem);
1706
1707 if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
1708 && ((tem = find_replacement (&XEXP (operand1, 0)))
1709 != XEXP (operand1, 0)))
1710 operand1 = replace_equiv_address (operand1, tem);
1711
1712 /* Handle secondary reloads for loads/stores of FP registers from
1713 REG+D addresses where D does not fit in 5 or 14 bits, including
1714 (subreg (mem (addr))) cases, and reloads for other unsupported
1715 memory operands. */
1716 if (scratch_reg
1717 && FP_REG_P (operand0)
1718 && (MEM_P (operand1)
1719 || (GET_CODE (operand1) == SUBREG
1720 && MEM_P (XEXP (operand1, 0)))))
1721 {
1722 rtx op1 = operand1;
1723
1724 if (GET_CODE (op1) == SUBREG)
1725 op1 = XEXP (op1, 0);
1726
1727 if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
1728 {
1729 if (!(TARGET_PA_20
1730 && !TARGET_ELF32
1731 && INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1732 && !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
1733 {
1734 /* SCRATCH_REG will hold an address and maybe the actual data.
1735 We want it in WORD_MODE regardless of what mode it was
1736 originally given to us. */
1737 scratch_reg = force_mode (word_mode, scratch_reg);
1738
1739 /* D might not fit in 14 bits either; for such cases load D
1740 into scratch reg. */
1741 if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
1742 {
1743 emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
1744 emit_move_insn (scratch_reg,
1745 gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
1746 Pmode,
1747 XEXP (XEXP (op1, 0), 0),
1748 scratch_reg));
1749 }
1750 else
1751 emit_move_insn (scratch_reg, XEXP (op1, 0));
1752 op1 = replace_equiv_address (op1, scratch_reg);
1753 }
1754 }
1755 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
1756 || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
1757 || IS_INDEX_ADDR_P (XEXP (op1, 0)))
1758 {
1759 /* Load memory address into SCRATCH_REG. */
1760 scratch_reg = force_mode (word_mode, scratch_reg);
1761 emit_move_insn (scratch_reg, XEXP (op1, 0));
1762 op1 = replace_equiv_address (op1, scratch_reg);
1763 }
1764 emit_insn (gen_rtx_SET (operand0, op1));
1765 return 1;
1766 }
1767 else if (scratch_reg
1768 && FP_REG_P (operand1)
1769 && (MEM_P (operand0)
1770 || (GET_CODE (operand0) == SUBREG
1771 && MEM_P (XEXP (operand0, 0)))))
1772 {
1773 rtx op0 = operand0;
1774
1775 if (GET_CODE (op0) == SUBREG)
1776 op0 = XEXP (op0, 0);
1777
1778 if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
1779 {
1780 if (!(TARGET_PA_20
1781 && !TARGET_ELF32
1782 && INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1783 && !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
1784 {
1785 /* SCRATCH_REG will hold an address and maybe the actual data.
1786 We want it in WORD_MODE regardless of what mode it was
1787 originally given to us. */
1788 scratch_reg = force_mode (word_mode, scratch_reg);
1789
1790 /* D might not fit in 14 bits either; for such cases load D
1791 into scratch reg. */
1792 if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
1793 {
1794 emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
1795 emit_move_insn (scratch_reg,
1796 gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
1797 Pmode,
1798 XEXP (XEXP (op0, 0), 0),
1799 scratch_reg));
1800 }
1801 else
1802 emit_move_insn (scratch_reg, XEXP (op0, 0));
1803 op0 = replace_equiv_address (op0, scratch_reg);
1804 }
1805 }
1806 else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
1807 || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
1808 || IS_INDEX_ADDR_P (XEXP (op0, 0)))
1809 {
1810 /* Load memory address into SCRATCH_REG. */
1811 scratch_reg = force_mode (word_mode, scratch_reg);
1812 emit_move_insn (scratch_reg, XEXP (op0, 0));
1813 op0 = replace_equiv_address (op0, scratch_reg);
1814 }
1815 emit_insn (gen_rtx_SET (op0, operand1));
1816 return 1;
1817 }
1818 /* Handle secondary reloads for loads of FP registers from constant
1819 expressions by forcing the constant into memory. For the most part,
1820 this is only necessary for SImode and DImode.
1821
1822 Use scratch_reg to hold the address of the memory location. */
1823 else if (scratch_reg
1824 && CONSTANT_P (operand1)
1825 && FP_REG_P (operand0))
1826 {
1827 rtx const_mem, xoperands[2];
1828
1829 if (operand1 == CONST0_RTX (mode))
1830 {
1831 emit_insn (gen_rtx_SET (operand0, operand1));
1832 return 1;
1833 }
1834
1835 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1836 it in WORD_MODE regardless of what mode it was originally given
1837 to us. */
1838 scratch_reg = force_mode (word_mode, scratch_reg);
1839
1840 /* Force the constant into memory and put the address of the
1841 memory location into scratch_reg. */
1842 const_mem = force_const_mem (mode, operand1);
1843 xoperands[0] = scratch_reg;
1844 xoperands[1] = XEXP (const_mem, 0);
1845 pa_emit_move_sequence (xoperands, Pmode, 0);
1846
1847 /* Now load the destination register. */
1848 emit_insn (gen_rtx_SET (operand0,
1849 replace_equiv_address (const_mem, scratch_reg)));
1850 return 1;
1851 }
1852 /* Handle secondary reloads for SAR. These occur when trying to load
1853 the SAR from memory or a constant. */
1854 else if (scratch_reg
1855 && GET_CODE (operand0) == REG
1856 && REGNO (operand0) < FIRST_PSEUDO_REGISTER
1857 && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
1858 && (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
1859 {
1860 /* D might not fit in 14 bits either; for such cases load D into
1861 scratch reg. */
1862 if (GET_CODE (operand1) == MEM
1863 && !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
1864 {
1865 /* We are reloading the address into the scratch register, so we
1866 want to make sure the scratch register is a full register. */
1867 scratch_reg = force_mode (word_mode, scratch_reg);
1868
1869 emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
1870 emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
1871 0)),
1872 Pmode,
1873 XEXP (XEXP (operand1, 0),
1874 0),
1875 scratch_reg));
1876
1877 /* Now we are going to load the scratch register from memory,
1878 we want to load it in the same width as the original MEM,
1879 which must be the same as the width of the ultimate destination,
1880 OPERAND0. */
1881 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1882
1883 emit_move_insn (scratch_reg,
1884 replace_equiv_address (operand1, scratch_reg));
1885 }
1886 else
1887 {
1888 /* We want to load the scratch register using the same mode as
1889 the ultimate destination. */
1890 scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
1891
1892 emit_move_insn (scratch_reg, operand1);
1893 }
1894
1895 /* And emit the insn to set the ultimate destination. We know that
1896 the scratch register has the same mode as the destination at this
1897 point. */
1898 emit_move_insn (operand0, scratch_reg);
1899 return 1;
1900 }
1901
1902 /* Handle the most common case: storing into a register. */
1903 if (register_operand (operand0, mode))
1904 {
1905 /* Legitimize TLS symbol references. This happens for references
1906 that aren't a legitimate constant. */
1907 if (PA_SYMBOL_REF_TLS_P (operand1))
1908 operand1 = legitimize_tls_address (operand1);
1909
1910 if (register_operand (operand1, mode)
1911 || (GET_CODE (operand1) == CONST_INT
1912 && pa_cint_ok_for_move (UINTVAL (operand1)))
1913 || (operand1 == CONST0_RTX (mode))
1914 || (GET_CODE (operand1) == HIGH
1915 && !symbolic_operand (XEXP (operand1, 0), VOIDmode))
1916 /* Only `general_operands' can come here, so MEM is ok. */
1917 || GET_CODE (operand1) == MEM)
1918 {
1919 /* Various sets are created during RTL generation which don't
1920 have the REG_POINTER flag correctly set. After the CSE pass,
1921 instruction recognition can fail if we don't consistently
1922 set this flag when performing register copies. This should
1923 also improve the opportunities for creating insns that use
1924 unscaled indexing. */
1925 if (REG_P (operand0) && REG_P (operand1))
1926 {
1927 if (REG_POINTER (operand1)
1928 && !REG_POINTER (operand0)
1929 && !HARD_REGISTER_P (operand0))
1930 copy_reg_pointer (operand0, operand1);
1931 }
1932
1933 /* When MEMs are broken out, the REG_POINTER flag doesn't
1934 get set. In some cases, we can set the REG_POINTER flag
1935 from the declaration for the MEM. */
1936 if (REG_P (operand0)
1937 && GET_CODE (operand1) == MEM
1938 && !REG_POINTER (operand0))
1939 {
1940 tree decl = MEM_EXPR (operand1);
1941
1942 /* Set the register pointer flag and register alignment
1943 if the declaration for this memory reference is a
1944 pointer type. */
1945 if (decl)
1946 {
1947 tree type;
1948
1949 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1950 tree operand 1. */
1951 if (TREE_CODE (decl) == COMPONENT_REF)
1952 decl = TREE_OPERAND (decl, 1);
1953
1954 type = TREE_TYPE (decl);
1955 type = strip_array_types (type);
1956
1957 if (POINTER_TYPE_P (type))
1958 mark_reg_pointer (operand0, BITS_PER_UNIT);
1959 }
1960 }
1961
1962 emit_insn (gen_rtx_SET (operand0, operand1));
1963 return 1;
1964 }
1965 }
1966 else if (GET_CODE (operand0) == MEM)
1967 {
1968 if (mode == DFmode && operand1 == CONST0_RTX (mode)
1969 && !(reload_in_progress || reload_completed))
1970 {
1971 rtx temp = gen_reg_rtx (DFmode);
1972
1973 emit_insn (gen_rtx_SET (temp, operand1));
1974 emit_insn (gen_rtx_SET (operand0, temp));
1975 return 1;
1976 }
1977 if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
1978 {
1979 /* Run this case quickly. */
1980 emit_insn (gen_rtx_SET (operand0, operand1));
1981 return 1;
1982 }
1983 if (! (reload_in_progress || reload_completed))
1984 {
1985 operands[0] = validize_mem (operand0);
1986 operands[1] = operand1 = force_reg (mode, operand1);
1987 }
1988 }
1989
1990 /* Simplify the source if we need to.
1991 Note we do have to handle function labels here, even though we do
1992 not consider them legitimate constants. Loop optimizations can
1993 call the emit_move_xxx with one as a source. */
1994 if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
1995 || (GET_CODE (operand1) == HIGH
1996 && symbolic_operand (XEXP (operand1, 0), mode))
1997 || function_label_operand (operand1, VOIDmode)
1998 || tls_referenced_p (operand1))
1999 {
2000 int ishighonly = 0;
2001
2002 if (GET_CODE (operand1) == HIGH)
2003 {
2004 ishighonly = 1;
2005 operand1 = XEXP (operand1, 0);
2006 }
2007 if (symbolic_operand (operand1, mode))
2008 {
2009 /* Argh. The assembler and linker can't handle arithmetic
2010 involving plabels.
2011
2012 So we force the plabel into memory, load operand0 from
2013 the memory location, then add in the constant part. */
2014 if ((GET_CODE (operand1) == CONST
2015 && GET_CODE (XEXP (operand1, 0)) == PLUS
2016 && function_label_operand (XEXP (XEXP (operand1, 0), 0),
2017 VOIDmode))
2018 || function_label_operand (operand1, VOIDmode))
2019 {
2020 rtx temp, const_part;
2021
2022 /* Figure out what (if any) scratch register to use. */
2023 if (reload_in_progress || reload_completed)
2024 {
2025 scratch_reg = scratch_reg ? scratch_reg : operand0;
2026 /* SCRATCH_REG will hold an address and maybe the actual
2027 data. We want it in WORD_MODE regardless of what mode it
2028 was originally given to us. */
2029 scratch_reg = force_mode (word_mode, scratch_reg);
2030 }
2031 else if (flag_pic)
2032 scratch_reg = gen_reg_rtx (Pmode);
2033
2034 if (GET_CODE (operand1) == CONST)
2035 {
2036 /* Save away the constant part of the expression. */
2037 const_part = XEXP (XEXP (operand1, 0), 1);
2038 gcc_assert (GET_CODE (const_part) == CONST_INT);
2039
2040 /* Force the function label into memory. */
2041 temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
2042 }
2043 else
2044 {
2045 /* No constant part. */
2046 const_part = NULL_RTX;
2047
2048 /* Force the function label into memory. */
2049 temp = force_const_mem (mode, operand1);
2050 }
2051
2052
2053 /* Get the address of the memory location. PIC-ify it if
2054 necessary. */
2055 temp = XEXP (temp, 0);
2056 if (flag_pic)
2057 temp = legitimize_pic_address (temp, mode, scratch_reg);
2058
2059 /* Put the address of the memory location into our destination
2060 register. */
2061 operands[1] = temp;
2062 pa_emit_move_sequence (operands, mode, scratch_reg);
2063
2064 /* Now load from the memory location into our destination
2065 register. */
2066 operands[1] = gen_rtx_MEM (Pmode, operands[0]);
2067 pa_emit_move_sequence (operands, mode, scratch_reg);
2068
2069 /* And add back in the constant part. */
2070 if (const_part != NULL_RTX)
2071 expand_inc (operand0, const_part);
2072
2073 return 1;
2074 }
2075
2076 if (flag_pic)
2077 {
2078 rtx_insn *insn;
2079 rtx temp;
2080
2081 if (reload_in_progress || reload_completed)
2082 {
2083 temp = scratch_reg ? scratch_reg : operand0;
2084 /* TEMP will hold an address and maybe the actual
2085 data. We want it in WORD_MODE regardless of what mode it
2086 was originally given to us. */
2087 temp = force_mode (word_mode, temp);
2088 }
2089 else
2090 temp = gen_reg_rtx (Pmode);
2091
2092 /* Force (const (plus (symbol) (const_int))) to memory
2093 if the const_int will not fit in 14 bits. Although
2094 this requires a relocation, the instruction sequence
2095 needed to load the value is shorter. */
2096 if (GET_CODE (operand1) == CONST
2097 && GET_CODE (XEXP (operand1, 0)) == PLUS
2098 && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
2099 && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
2100 {
2101 rtx x, m = force_const_mem (mode, operand1);
2102
2103 x = legitimize_pic_address (XEXP (m, 0), mode, temp);
2104 x = replace_equiv_address (m, x);
2105 insn = emit_move_insn (operand0, x);
2106 }
2107 else
2108 {
2109 operands[1] = legitimize_pic_address (operand1, mode, temp);
2110 if (REG_P (operand0) && REG_P (operands[1]))
2111 copy_reg_pointer (operand0, operands[1]);
2112 insn = emit_move_insn (operand0, operands[1]);
2113 }
2114
2115 /* Put a REG_EQUAL note on this insn. */
2116 set_unique_reg_note (insn, REG_EQUAL, operand1);
2117 }
2118 /* On the HPPA, references to data space are supposed to use dp,
2119 register 27, but showing it in the RTL inhibits various cse
2120 and loop optimizations. */
2121 else
2122 {
2123 rtx temp, set;
2124
2125 if (reload_in_progress || reload_completed)
2126 {
2127 temp = scratch_reg ? scratch_reg : operand0;
2128 /* TEMP will hold an address and maybe the actual
2129 data. We want it in WORD_MODE regardless of what mode it
2130 was originally given to us. */
2131 temp = force_mode (word_mode, temp);
2132 }
2133 else
2134 temp = gen_reg_rtx (mode);
2135
2136 /* Loading a SYMBOL_REF into a register makes that register
2137 safe to be used as the base in an indexed address.
2138
2139 Don't mark hard registers though. That loses. */
2140 if (GET_CODE (operand0) == REG
2141 && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
2142 mark_reg_pointer (operand0, BITS_PER_UNIT);
2143 if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
2144 mark_reg_pointer (temp, BITS_PER_UNIT);
2145
2146 if (ishighonly)
2147 set = gen_rtx_SET (operand0, temp);
2148 else
2149 set = gen_rtx_SET (operand0,
2150 gen_rtx_LO_SUM (mode, temp, operand1));
2151
2152 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2153 emit_insn (set);
2154
2155 }
2156 return 1;
2157 }
2158 else if (tls_referenced_p (operand1))
2159 {
2160 rtx tmp = operand1;
2161 rtx addend = NULL;
2162
2163 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
2164 {
2165 addend = XEXP (XEXP (tmp, 0), 1);
2166 tmp = XEXP (XEXP (tmp, 0), 0);
2167 }
2168
2169 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
2170 tmp = legitimize_tls_address (tmp);
2171 if (addend)
2172 {
2173 tmp = gen_rtx_PLUS (mode, tmp, addend);
2174 tmp = force_operand (tmp, operands[0]);
2175 }
2176 operands[1] = tmp;
2177 }
2178 else if (GET_CODE (operand1) != CONST_INT
2179 || !pa_cint_ok_for_move (UINTVAL (operand1)))
2180 {
2181 rtx temp;
2182 rtx_insn *insn;
2183 rtx op1 = operand1;
2184 HOST_WIDE_INT value = 0;
2185 HOST_WIDE_INT insv = 0;
2186 int insert = 0;
2187
2188 if (GET_CODE (operand1) == CONST_INT)
2189 value = INTVAL (operand1);
2190
2191 if (TARGET_64BIT
2192 && GET_CODE (operand1) == CONST_INT
2193 && HOST_BITS_PER_WIDE_INT > 32
2194 && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
2195 {
2196 HOST_WIDE_INT nval;
2197
2198 /* Extract the low order 32 bits of the value and sign extend.
2199 If the new value is the same as the original value, we can
2200 can use the original value as-is. If the new value is
2201 different, we use it and insert the most-significant 32-bits
2202 of the original value into the final result. */
2203 nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
2204 ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
2205 if (value != nval)
2206 {
2207 #if HOST_BITS_PER_WIDE_INT > 32
2208 insv = value >= 0 ? value >> 32 : ~(~value >> 32);
2209 #endif
2210 insert = 1;
2211 value = nval;
2212 operand1 = GEN_INT (nval);
2213 }
2214 }
2215
2216 if (reload_in_progress || reload_completed)
2217 temp = scratch_reg ? scratch_reg : operand0;
2218 else
2219 temp = gen_reg_rtx (mode);
2220
2221 /* We don't directly split DImode constants on 32-bit targets
2222 because PLUS uses an 11-bit immediate and the insn sequence
2223 generated is not as efficient as the one using HIGH/LO_SUM. */
2224 if (GET_CODE (operand1) == CONST_INT
2225 && GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
2226 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
2227 && !insert)
2228 {
2229 /* Directly break constant into high and low parts. This
2230 provides better optimization opportunities because various
2231 passes recognize constants split with PLUS but not LO_SUM.
2232 We use a 14-bit signed low part except when the addition
2233 of 0x4000 to the high part might change the sign of the
2234 high part. */
2235 HOST_WIDE_INT low = value & 0x3fff;
2236 HOST_WIDE_INT high = value & ~ 0x3fff;
2237
2238 if (low >= 0x2000)
2239 {
2240 if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
2241 high += 0x2000;
2242 else
2243 high += 0x4000;
2244 }
2245
2246 low = value - high;
2247
2248 emit_insn (gen_rtx_SET (temp, GEN_INT (high)));
2249 operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
2250 }
2251 else
2252 {
2253 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, operand1)));
2254 operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
2255 }
2256
2257 insn = emit_move_insn (operands[0], operands[1]);
2258
2259 /* Now insert the most significant 32 bits of the value
2260 into the register. When we don't have a second register
2261 available, it could take up to nine instructions to load
2262 a 64-bit integer constant. Prior to reload, we force
2263 constants that would take more than three instructions
2264 to load to the constant pool. During and after reload,
2265 we have to handle all possible values. */
2266 if (insert)
2267 {
2268 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2269 register and the value to be inserted is outside the
2270 range that can be loaded with three depdi instructions. */
2271 if (temp != operand0 && (insv >= 16384 || insv < -16384))
2272 {
2273 operand1 = GEN_INT (insv);
2274
2275 emit_insn (gen_rtx_SET (temp,
2276 gen_rtx_HIGH (mode, operand1)));
2277 emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
2278 if (mode == DImode)
2279 insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
2280 const0_rtx, temp));
2281 else
2282 insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
2283 const0_rtx, temp));
2284 }
2285 else
2286 {
2287 int len = 5, pos = 27;
2288
2289 /* Insert the bits using the depdi instruction. */
2290 while (pos >= 0)
2291 {
2292 HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
2293 HOST_WIDE_INT sign = v5 < 0;
2294
2295 /* Left extend the insertion. */
2296 insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
2297 while (pos > 0 && (insv & 1) == sign)
2298 {
2299 insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
2300 len += 1;
2301 pos -= 1;
2302 }
2303
2304 if (mode == DImode)
2305 insn = emit_insn (gen_insvdi (operand0,
2306 GEN_INT (len),
2307 GEN_INT (pos),
2308 GEN_INT (v5)));
2309 else
2310 insn = emit_insn (gen_insvsi (operand0,
2311 GEN_INT (len),
2312 GEN_INT (pos),
2313 GEN_INT (v5)));
2314
2315 len = pos > 0 && pos < 5 ? pos : 5;
2316 pos -= len;
2317 }
2318 }
2319 }
2320
2321 set_unique_reg_note (insn, REG_EQUAL, op1);
2322
2323 return 1;
2324 }
2325 }
2326 /* Now have insn-emit do whatever it normally does. */
2327 return 0;
2328 }
2329
2330 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2331 it will need a link/runtime reloc). */
2332
2333 int
2334 pa_reloc_needed (tree exp)
2335 {
2336 int reloc = 0;
2337
2338 switch (TREE_CODE (exp))
2339 {
2340 case ADDR_EXPR:
2341 return 1;
2342
2343 case POINTER_PLUS_EXPR:
2344 case PLUS_EXPR:
2345 case MINUS_EXPR:
2346 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2347 reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
2348 break;
2349
2350 CASE_CONVERT:
2351 case NON_LVALUE_EXPR:
2352 reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
2353 break;
2354
2355 case CONSTRUCTOR:
2356 {
2357 tree value;
2358 unsigned HOST_WIDE_INT ix;
2359
2360 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
2361 if (value)
2362 reloc |= pa_reloc_needed (value);
2363 }
2364 break;
2365
2366 case ERROR_MARK:
2367 break;
2368
2369 default:
2370 break;
2371 }
2372 return reloc;
2373 }
2374
2375 \f
2376 /* Return the best assembler insn template
2377 for moving operands[1] into operands[0] as a fullword. */
2378 const char *
2379 pa_singlemove_string (rtx *operands)
2380 {
2381 HOST_WIDE_INT intval;
2382
2383 if (GET_CODE (operands[0]) == MEM)
2384 return "stw %r1,%0";
2385 if (GET_CODE (operands[1]) == MEM)
2386 return "ldw %1,%0";
2387 if (GET_CODE (operands[1]) == CONST_DOUBLE)
2388 {
2389 long i;
2390
2391 gcc_assert (GET_MODE (operands[1]) == SFmode);
2392
2393 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2394 bit pattern. */
2395 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (operands[1]), i);
2396
2397 operands[1] = GEN_INT (i);
2398 /* Fall through to CONST_INT case. */
2399 }
2400 if (GET_CODE (operands[1]) == CONST_INT)
2401 {
2402 intval = INTVAL (operands[1]);
2403
2404 if (VAL_14_BITS_P (intval))
2405 return "ldi %1,%0";
2406 else if ((intval & 0x7ff) == 0)
2407 return "ldil L'%1,%0";
2408 else if (pa_zdepi_cint_p (intval))
2409 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2410 else
2411 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2412 }
2413 return "copy %1,%0";
2414 }
2415 \f
2416
2417 /* Compute position (in OP[1]) and width (in OP[2])
2418 useful for copying IMM to a register using the zdepi
2419 instructions. Store the immediate value to insert in OP[0]. */
2420 static void
2421 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2422 {
2423 int lsb, len;
2424
2425 /* Find the least significant set bit in IMM. */
2426 for (lsb = 0; lsb < 32; lsb++)
2427 {
2428 if ((imm & 1) != 0)
2429 break;
2430 imm >>= 1;
2431 }
2432
2433 /* Choose variants based on *sign* of the 5-bit field. */
2434 if ((imm & 0x10) == 0)
2435 len = (lsb <= 28) ? 4 : 32 - lsb;
2436 else
2437 {
2438 /* Find the width of the bitstring in IMM. */
2439 for (len = 5; len < 32 - lsb; len++)
2440 {
2441 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2442 break;
2443 }
2444
2445 /* Sign extend IMM as a 5-bit value. */
2446 imm = (imm & 0xf) - 0x10;
2447 }
2448
2449 op[0] = imm;
2450 op[1] = 31 - lsb;
2451 op[2] = len;
2452 }
2453
2454 /* Compute position (in OP[1]) and width (in OP[2])
2455 useful for copying IMM to a register using the depdi,z
2456 instructions. Store the immediate value to insert in OP[0]. */
2457
2458 static void
2459 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
2460 {
2461 int lsb, len, maxlen;
2462
2463 maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
2464
2465 /* Find the least significant set bit in IMM. */
2466 for (lsb = 0; lsb < maxlen; lsb++)
2467 {
2468 if ((imm & 1) != 0)
2469 break;
2470 imm >>= 1;
2471 }
2472
2473 /* Choose variants based on *sign* of the 5-bit field. */
2474 if ((imm & 0x10) == 0)
2475 len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
2476 else
2477 {
2478 /* Find the width of the bitstring in IMM. */
2479 for (len = 5; len < maxlen - lsb; len++)
2480 {
2481 if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
2482 break;
2483 }
2484
2485 /* Extend length if host is narrow and IMM is negative. */
2486 if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
2487 len += 32;
2488
2489 /* Sign extend IMM as a 5-bit value. */
2490 imm = (imm & 0xf) - 0x10;
2491 }
2492
2493 op[0] = imm;
2494 op[1] = 63 - lsb;
2495 op[2] = len;
2496 }
2497
2498 /* Output assembler code to perform a doubleword move insn
2499 with operands OPERANDS. */
2500
2501 const char *
2502 pa_output_move_double (rtx *operands)
2503 {
2504 enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
2505 rtx latehalf[2];
2506 rtx addreg0 = 0, addreg1 = 0;
2507 int highonly = 0;
2508
2509 /* First classify both operands. */
2510
2511 if (REG_P (operands[0]))
2512 optype0 = REGOP;
2513 else if (offsettable_memref_p (operands[0]))
2514 optype0 = OFFSOP;
2515 else if (GET_CODE (operands[0]) == MEM)
2516 optype0 = MEMOP;
2517 else
2518 optype0 = RNDOP;
2519
2520 if (REG_P (operands[1]))
2521 optype1 = REGOP;
2522 else if (CONSTANT_P (operands[1]))
2523 optype1 = CNSTOP;
2524 else if (offsettable_memref_p (operands[1]))
2525 optype1 = OFFSOP;
2526 else if (GET_CODE (operands[1]) == MEM)
2527 optype1 = MEMOP;
2528 else
2529 optype1 = RNDOP;
2530
2531 /* Check for the cases that the operand constraints are not
2532 supposed to allow to happen. */
2533 gcc_assert (optype0 == REGOP || optype1 == REGOP);
2534
2535 /* Handle copies between general and floating registers. */
2536
2537 if (optype0 == REGOP && optype1 == REGOP
2538 && FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
2539 {
2540 if (FP_REG_P (operands[0]))
2541 {
2542 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
2543 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
2544 return "{fldds|fldd} -16(%%sp),%0";
2545 }
2546 else
2547 {
2548 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
2549 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
2550 return "{ldws|ldw} -12(%%sp),%R0";
2551 }
2552 }
2553
2554 /* Handle auto decrementing and incrementing loads and stores
2555 specifically, since the structure of the function doesn't work
2556 for them without major modification. Do it better when we learn
2557 this port about the general inc/dec addressing of PA.
2558 (This was written by tege. Chide him if it doesn't work.) */
2559
2560 if (optype0 == MEMOP)
2561 {
2562 /* We have to output the address syntax ourselves, since print_operand
2563 doesn't deal with the addresses we want to use. Fix this later. */
2564
2565 rtx addr = XEXP (operands[0], 0);
2566 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2567 {
2568 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2569
2570 operands[0] = XEXP (addr, 0);
2571 gcc_assert (GET_CODE (operands[1]) == REG
2572 && GET_CODE (operands[0]) == REG);
2573
2574 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2575
2576 /* No overlap between high target register and address
2577 register. (We do this in a non-obvious way to
2578 save a register file writeback) */
2579 if (GET_CODE (addr) == POST_INC)
2580 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2581 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2582 }
2583 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2584 {
2585 rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
2586
2587 operands[0] = XEXP (addr, 0);
2588 gcc_assert (GET_CODE (operands[1]) == REG
2589 && GET_CODE (operands[0]) == REG);
2590
2591 gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
2592 /* No overlap between high target register and address
2593 register. (We do this in a non-obvious way to save a
2594 register file writeback) */
2595 if (GET_CODE (addr) == PRE_INC)
2596 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2597 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2598 }
2599 }
2600 if (optype1 == MEMOP)
2601 {
2602 /* We have to output the address syntax ourselves, since print_operand
2603 doesn't deal with the addresses we want to use. Fix this later. */
2604
2605 rtx addr = XEXP (operands[1], 0);
2606 if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
2607 {
2608 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2609
2610 operands[1] = XEXP (addr, 0);
2611 gcc_assert (GET_CODE (operands[0]) == REG
2612 && GET_CODE (operands[1]) == REG);
2613
2614 if (!reg_overlap_mentioned_p (high_reg, addr))
2615 {
2616 /* No overlap between high target register and address
2617 register. (We do this in a non-obvious way to
2618 save a register file writeback) */
2619 if (GET_CODE (addr) == POST_INC)
2620 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2621 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2622 }
2623 else
2624 {
2625 /* This is an undefined situation. We should load into the
2626 address register *and* update that register. Probably
2627 we don't need to handle this at all. */
2628 if (GET_CODE (addr) == POST_INC)
2629 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2630 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2631 }
2632 }
2633 else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
2634 {
2635 rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
2636
2637 operands[1] = XEXP (addr, 0);
2638 gcc_assert (GET_CODE (operands[0]) == REG
2639 && GET_CODE (operands[1]) == REG);
2640
2641 if (!reg_overlap_mentioned_p (high_reg, addr))
2642 {
2643 /* No overlap between high target register and address
2644 register. (We do this in a non-obvious way to
2645 save a register file writeback) */
2646 if (GET_CODE (addr) == PRE_INC)
2647 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2648 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2649 }
2650 else
2651 {
2652 /* This is an undefined situation. We should load into the
2653 address register *and* update that register. Probably
2654 we don't need to handle this at all. */
2655 if (GET_CODE (addr) == PRE_INC)
2656 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2657 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2658 }
2659 }
2660 else if (GET_CODE (addr) == PLUS
2661 && GET_CODE (XEXP (addr, 0)) == MULT)
2662 {
2663 rtx xoperands[4];
2664
2665 /* Load address into left half of destination register. */
2666 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2667 xoperands[1] = XEXP (addr, 1);
2668 xoperands[2] = XEXP (XEXP (addr, 0), 0);
2669 xoperands[3] = XEXP (XEXP (addr, 0), 1);
2670 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2671 xoperands);
2672 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2673 }
2674 else if (GET_CODE (addr) == PLUS
2675 && REG_P (XEXP (addr, 0))
2676 && REG_P (XEXP (addr, 1)))
2677 {
2678 rtx xoperands[3];
2679
2680 /* Load address into left half of destination register. */
2681 xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
2682 xoperands[1] = XEXP (addr, 0);
2683 xoperands[2] = XEXP (addr, 1);
2684 output_asm_insn ("{addl|add,l} %1,%2,%0",
2685 xoperands);
2686 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2687 }
2688 }
2689
2690 /* If an operand is an unoffsettable memory ref, find a register
2691 we can increment temporarily to make it refer to the second word. */
2692
2693 if (optype0 == MEMOP)
2694 addreg0 = find_addr_reg (XEXP (operands[0], 0));
2695
2696 if (optype1 == MEMOP)
2697 addreg1 = find_addr_reg (XEXP (operands[1], 0));
2698
2699 /* Ok, we can do one word at a time.
2700 Normally we do the low-numbered word first.
2701
2702 In either case, set up in LATEHALF the operands to use
2703 for the high-numbered word and in some cases alter the
2704 operands in OPERANDS to be suitable for the low-numbered word. */
2705
2706 if (optype0 == REGOP)
2707 latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2708 else if (optype0 == OFFSOP)
2709 latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
2710 else
2711 latehalf[0] = operands[0];
2712
2713 if (optype1 == REGOP)
2714 latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
2715 else if (optype1 == OFFSOP)
2716 latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
2717 else if (optype1 == CNSTOP)
2718 {
2719 if (GET_CODE (operands[1]) == HIGH)
2720 {
2721 operands[1] = XEXP (operands[1], 0);
2722 highonly = 1;
2723 }
2724 split_double (operands[1], &operands[1], &latehalf[1]);
2725 }
2726 else
2727 latehalf[1] = operands[1];
2728
2729 /* If the first move would clobber the source of the second one,
2730 do them in the other order.
2731
2732 This can happen in two cases:
2733
2734 mem -> register where the first half of the destination register
2735 is the same register used in the memory's address. Reload
2736 can create such insns.
2737
2738 mem in this case will be either register indirect or register
2739 indirect plus a valid offset.
2740
2741 register -> register move where REGNO(dst) == REGNO(src + 1)
2742 someone (Tim/Tege?) claimed this can happen for parameter loads.
2743
2744 Handle mem -> register case first. */
2745 if (optype0 == REGOP
2746 && (optype1 == MEMOP || optype1 == OFFSOP)
2747 && refers_to_regno_p (REGNO (operands[0]), operands[1]))
2748 {
2749 /* Do the late half first. */
2750 if (addreg1)
2751 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2752 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2753
2754 /* Then clobber. */
2755 if (addreg1)
2756 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2757 return pa_singlemove_string (operands);
2758 }
2759
2760 /* Now handle register -> register case. */
2761 if (optype0 == REGOP && optype1 == REGOP
2762 && REGNO (operands[0]) == REGNO (operands[1]) + 1)
2763 {
2764 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2765 return pa_singlemove_string (operands);
2766 }
2767
2768 /* Normal case: do the two words, low-numbered first. */
2769
2770 output_asm_insn (pa_singlemove_string (operands), operands);
2771
2772 /* Make any unoffsettable addresses point at high-numbered word. */
2773 if (addreg0)
2774 output_asm_insn ("ldo 4(%0),%0", &addreg0);
2775 if (addreg1)
2776 output_asm_insn ("ldo 4(%0),%0", &addreg1);
2777
2778 /* Do high-numbered word. */
2779 if (highonly)
2780 output_asm_insn ("ldil L'%1,%0", latehalf);
2781 else
2782 output_asm_insn (pa_singlemove_string (latehalf), latehalf);
2783
2784 /* Undo the adds we just did. */
2785 if (addreg0)
2786 output_asm_insn ("ldo -4(%0),%0", &addreg0);
2787 if (addreg1)
2788 output_asm_insn ("ldo -4(%0),%0", &addreg1);
2789
2790 return "";
2791 }
2792 \f
2793 const char *
2794 pa_output_fp_move_double (rtx *operands)
2795 {
2796 if (FP_REG_P (operands[0]))
2797 {
2798 if (FP_REG_P (operands[1])
2799 || operands[1] == CONST0_RTX (GET_MODE (operands[0])))
2800 output_asm_insn ("fcpy,dbl %f1,%0", operands);
2801 else
2802 output_asm_insn ("fldd%F1 %1,%0", operands);
2803 }
2804 else if (FP_REG_P (operands[1]))
2805 {
2806 output_asm_insn ("fstd%F0 %1,%0", operands);
2807 }
2808 else
2809 {
2810 rtx xoperands[2];
2811
2812 gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
2813
2814 /* This is a pain. You have to be prepared to deal with an
2815 arbitrary address here including pre/post increment/decrement.
2816
2817 so avoid this in the MD. */
2818 gcc_assert (GET_CODE (operands[0]) == REG);
2819
2820 xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
2821 xoperands[0] = operands[0];
2822 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
2823 }
2824 return "";
2825 }
2826 \f
2827 /* Return a REG that occurs in ADDR with coefficient 1.
2828 ADDR can be effectively incremented by incrementing REG. */
2829
2830 static rtx
2831 find_addr_reg (rtx addr)
2832 {
2833 while (GET_CODE (addr) == PLUS)
2834 {
2835 if (GET_CODE (XEXP (addr, 0)) == REG)
2836 addr = XEXP (addr, 0);
2837 else if (GET_CODE (XEXP (addr, 1)) == REG)
2838 addr = XEXP (addr, 1);
2839 else if (CONSTANT_P (XEXP (addr, 0)))
2840 addr = XEXP (addr, 1);
2841 else if (CONSTANT_P (XEXP (addr, 1)))
2842 addr = XEXP (addr, 0);
2843 else
2844 gcc_unreachable ();
2845 }
2846 gcc_assert (GET_CODE (addr) == REG);
2847 return addr;
2848 }
2849
2850 /* Emit code to perform a block move.
2851
2852 OPERANDS[0] is the destination pointer as a REG, clobbered.
2853 OPERANDS[1] is the source pointer as a REG, clobbered.
2854 OPERANDS[2] is a register for temporary storage.
2855 OPERANDS[3] is a register for temporary storage.
2856 OPERANDS[4] is the size as a CONST_INT
2857 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2858 OPERANDS[6] is another temporary register. */
2859
2860 const char *
2861 pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
2862 {
2863 int align = INTVAL (operands[5]);
2864 unsigned long n_bytes = INTVAL (operands[4]);
2865
2866 /* We can't move more than a word at a time because the PA
2867 has no longer integer move insns. (Could use fp mem ops?) */
2868 if (align > (TARGET_64BIT ? 8 : 4))
2869 align = (TARGET_64BIT ? 8 : 4);
2870
2871 /* Note that we know each loop below will execute at least twice
2872 (else we would have open-coded the copy). */
2873 switch (align)
2874 {
2875 case 8:
2876 /* Pre-adjust the loop counter. */
2877 operands[4] = GEN_INT (n_bytes - 16);
2878 output_asm_insn ("ldi %4,%2", operands);
2879
2880 /* Copying loop. */
2881 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2882 output_asm_insn ("ldd,ma 8(%1),%6", operands);
2883 output_asm_insn ("std,ma %3,8(%0)", operands);
2884 output_asm_insn ("addib,>= -16,%2,.-12", operands);
2885 output_asm_insn ("std,ma %6,8(%0)", operands);
2886
2887 /* Handle the residual. There could be up to 7 bytes of
2888 residual to copy! */
2889 if (n_bytes % 16 != 0)
2890 {
2891 operands[4] = GEN_INT (n_bytes % 8);
2892 if (n_bytes % 16 >= 8)
2893 output_asm_insn ("ldd,ma 8(%1),%3", operands);
2894 if (n_bytes % 8 != 0)
2895 output_asm_insn ("ldd 0(%1),%6", operands);
2896 if (n_bytes % 16 >= 8)
2897 output_asm_insn ("std,ma %3,8(%0)", operands);
2898 if (n_bytes % 8 != 0)
2899 output_asm_insn ("stdby,e %6,%4(%0)", operands);
2900 }
2901 return "";
2902
2903 case 4:
2904 /* Pre-adjust the loop counter. */
2905 operands[4] = GEN_INT (n_bytes - 8);
2906 output_asm_insn ("ldi %4,%2", operands);
2907
2908 /* Copying loop. */
2909 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2910 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
2911 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2912 output_asm_insn ("addib,>= -8,%2,.-12", operands);
2913 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
2914
2915 /* Handle the residual. There could be up to 7 bytes of
2916 residual to copy! */
2917 if (n_bytes % 8 != 0)
2918 {
2919 operands[4] = GEN_INT (n_bytes % 4);
2920 if (n_bytes % 8 >= 4)
2921 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
2922 if (n_bytes % 4 != 0)
2923 output_asm_insn ("ldw 0(%1),%6", operands);
2924 if (n_bytes % 8 >= 4)
2925 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
2926 if (n_bytes % 4 != 0)
2927 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
2928 }
2929 return "";
2930
2931 case 2:
2932 /* Pre-adjust the loop counter. */
2933 operands[4] = GEN_INT (n_bytes - 4);
2934 output_asm_insn ("ldi %4,%2", operands);
2935
2936 /* Copying loop. */
2937 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2938 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
2939 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2940 output_asm_insn ("addib,>= -4,%2,.-12", operands);
2941 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
2942
2943 /* Handle the residual. */
2944 if (n_bytes % 4 != 0)
2945 {
2946 if (n_bytes % 4 >= 2)
2947 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
2948 if (n_bytes % 2 != 0)
2949 output_asm_insn ("ldb 0(%1),%6", operands);
2950 if (n_bytes % 4 >= 2)
2951 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
2952 if (n_bytes % 2 != 0)
2953 output_asm_insn ("stb %6,0(%0)", operands);
2954 }
2955 return "";
2956
2957 case 1:
2958 /* Pre-adjust the loop counter. */
2959 operands[4] = GEN_INT (n_bytes - 2);
2960 output_asm_insn ("ldi %4,%2", operands);
2961
2962 /* Copying loop. */
2963 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
2964 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
2965 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
2966 output_asm_insn ("addib,>= -2,%2,.-12", operands);
2967 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
2968
2969 /* Handle the residual. */
2970 if (n_bytes % 2 != 0)
2971 {
2972 output_asm_insn ("ldb 0(%1),%3", operands);
2973 output_asm_insn ("stb %3,0(%0)", operands);
2974 }
2975 return "";
2976
2977 default:
2978 gcc_unreachable ();
2979 }
2980 }
2981
2982 /* Count the number of insns necessary to handle this block move.
2983
2984 Basic structure is the same as emit_block_move, except that we
2985 count insns rather than emit them. */
2986
2987 static int
2988 compute_cpymem_length (rtx_insn *insn)
2989 {
2990 rtx pat = PATTERN (insn);
2991 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
2992 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
2993 unsigned int n_insns = 0;
2994
2995 /* We can't move more than four bytes at a time because the PA
2996 has no longer integer move insns. (Could use fp mem ops?) */
2997 if (align > (TARGET_64BIT ? 8 : 4))
2998 align = (TARGET_64BIT ? 8 : 4);
2999
3000 /* The basic copying loop. */
3001 n_insns = 6;
3002
3003 /* Residuals. */
3004 if (n_bytes % (2 * align) != 0)
3005 {
3006 if ((n_bytes % (2 * align)) >= align)
3007 n_insns += 2;
3008
3009 if ((n_bytes % align) != 0)
3010 n_insns += 2;
3011 }
3012
3013 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3014 return n_insns * 4;
3015 }
3016
3017 /* Emit code to perform a block clear.
3018
3019 OPERANDS[0] is the destination pointer as a REG, clobbered.
3020 OPERANDS[1] is a register for temporary storage.
3021 OPERANDS[2] is the size as a CONST_INT
3022 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
3023
3024 const char *
3025 pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
3026 {
3027 int align = INTVAL (operands[3]);
3028 unsigned long n_bytes = INTVAL (operands[2]);
3029
3030 /* We can't clear more than a word at a time because the PA
3031 has no longer integer move insns. */
3032 if (align > (TARGET_64BIT ? 8 : 4))
3033 align = (TARGET_64BIT ? 8 : 4);
3034
3035 /* Note that we know each loop below will execute at least twice
3036 (else we would have open-coded the copy). */
3037 switch (align)
3038 {
3039 case 8:
3040 /* Pre-adjust the loop counter. */
3041 operands[2] = GEN_INT (n_bytes - 16);
3042 output_asm_insn ("ldi %2,%1", operands);
3043
3044 /* Loop. */
3045 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3046 output_asm_insn ("addib,>= -16,%1,.-4", operands);
3047 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3048
3049 /* Handle the residual. There could be up to 7 bytes of
3050 residual to copy! */
3051 if (n_bytes % 16 != 0)
3052 {
3053 operands[2] = GEN_INT (n_bytes % 8);
3054 if (n_bytes % 16 >= 8)
3055 output_asm_insn ("std,ma %%r0,8(%0)", operands);
3056 if (n_bytes % 8 != 0)
3057 output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
3058 }
3059 return "";
3060
3061 case 4:
3062 /* Pre-adjust the loop counter. */
3063 operands[2] = GEN_INT (n_bytes - 8);
3064 output_asm_insn ("ldi %2,%1", operands);
3065
3066 /* Loop. */
3067 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3068 output_asm_insn ("addib,>= -8,%1,.-4", operands);
3069 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3070
3071 /* Handle the residual. There could be up to 7 bytes of
3072 residual to copy! */
3073 if (n_bytes % 8 != 0)
3074 {
3075 operands[2] = GEN_INT (n_bytes % 4);
3076 if (n_bytes % 8 >= 4)
3077 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
3078 if (n_bytes % 4 != 0)
3079 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
3080 }
3081 return "";
3082
3083 case 2:
3084 /* Pre-adjust the loop counter. */
3085 operands[2] = GEN_INT (n_bytes - 4);
3086 output_asm_insn ("ldi %2,%1", operands);
3087
3088 /* Loop. */
3089 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3090 output_asm_insn ("addib,>= -4,%1,.-4", operands);
3091 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3092
3093 /* Handle the residual. */
3094 if (n_bytes % 4 != 0)
3095 {
3096 if (n_bytes % 4 >= 2)
3097 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
3098 if (n_bytes % 2 != 0)
3099 output_asm_insn ("stb %%r0,0(%0)", operands);
3100 }
3101 return "";
3102
3103 case 1:
3104 /* Pre-adjust the loop counter. */
3105 operands[2] = GEN_INT (n_bytes - 2);
3106 output_asm_insn ("ldi %2,%1", operands);
3107
3108 /* Loop. */
3109 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3110 output_asm_insn ("addib,>= -2,%1,.-4", operands);
3111 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands);
3112
3113 /* Handle the residual. */
3114 if (n_bytes % 2 != 0)
3115 output_asm_insn ("stb %%r0,0(%0)", operands);
3116
3117 return "";
3118
3119 default:
3120 gcc_unreachable ();
3121 }
3122 }
3123
3124 /* Count the number of insns necessary to handle this block move.
3125
3126 Basic structure is the same as emit_block_move, except that we
3127 count insns rather than emit them. */
3128
3129 static int
3130 compute_clrmem_length (rtx_insn *insn)
3131 {
3132 rtx pat = PATTERN (insn);
3133 unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 4), 0));
3134 unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 3), 0));
3135 unsigned int n_insns = 0;
3136
3137 /* We can't clear more than a word at a time because the PA
3138 has no longer integer move insns. */
3139 if (align > (TARGET_64BIT ? 8 : 4))
3140 align = (TARGET_64BIT ? 8 : 4);
3141
3142 /* The basic loop. */
3143 n_insns = 4;
3144
3145 /* Residuals. */
3146 if (n_bytes % (2 * align) != 0)
3147 {
3148 if ((n_bytes % (2 * align)) >= align)
3149 n_insns++;
3150
3151 if ((n_bytes % align) != 0)
3152 n_insns++;
3153 }
3154
3155 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
3156 return n_insns * 4;
3157 }
3158 \f
3159
3160 const char *
3161 pa_output_and (rtx *operands)
3162 {
3163 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3164 {
3165 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3166 int ls0, ls1, ms0, p, len;
3167
3168 for (ls0 = 0; ls0 < 32; ls0++)
3169 if ((mask & (1 << ls0)) == 0)
3170 break;
3171
3172 for (ls1 = ls0; ls1 < 32; ls1++)
3173 if ((mask & (1 << ls1)) != 0)
3174 break;
3175
3176 for (ms0 = ls1; ms0 < 32; ms0++)
3177 if ((mask & (1 << ms0)) == 0)
3178 break;
3179
3180 gcc_assert (ms0 == 32);
3181
3182 if (ls1 == 32)
3183 {
3184 len = ls0;
3185
3186 gcc_assert (len);
3187
3188 operands[2] = GEN_INT (len);
3189 return "{extru|extrw,u} %1,31,%2,%0";
3190 }
3191 else
3192 {
3193 /* We could use this `depi' for the case above as well, but `depi'
3194 requires one more register file access than an `extru'. */
3195
3196 p = 31 - ls0;
3197 len = ls1 - ls0;
3198
3199 operands[2] = GEN_INT (p);
3200 operands[3] = GEN_INT (len);
3201 return "{depi|depwi} 0,%2,%3,%0";
3202 }
3203 }
3204 else
3205 return "and %1,%2,%0";
3206 }
3207
3208 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3209 storing the result in operands[0]. */
3210 const char *
3211 pa_output_64bit_and (rtx *operands)
3212 {
3213 if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
3214 {
3215 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3216 int ls0, ls1, ms0, p, len;
3217
3218 for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
3219 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
3220 break;
3221
3222 for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
3223 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
3224 break;
3225
3226 for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
3227 if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
3228 break;
3229
3230 gcc_assert (ms0 == HOST_BITS_PER_WIDE_INT);
3231
3232 if (ls1 == HOST_BITS_PER_WIDE_INT)
3233 {
3234 len = ls0;
3235
3236 gcc_assert (len);
3237
3238 operands[2] = GEN_INT (len);
3239 return "extrd,u %1,63,%2,%0";
3240 }
3241 else
3242 {
3243 /* We could use this `depi' for the case above as well, but `depi'
3244 requires one more register file access than an `extru'. */
3245
3246 p = 63 - ls0;
3247 len = ls1 - ls0;
3248
3249 operands[2] = GEN_INT (p);
3250 operands[3] = GEN_INT (len);
3251 return "depdi 0,%2,%3,%0";
3252 }
3253 }
3254 else
3255 return "and %1,%2,%0";
3256 }
3257
3258 const char *
3259 pa_output_ior (rtx *operands)
3260 {
3261 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3262 int bs0, bs1, p, len;
3263
3264 if (INTVAL (operands[2]) == 0)
3265 return "copy %1,%0";
3266
3267 for (bs0 = 0; bs0 < 32; bs0++)
3268 if ((mask & (1 << bs0)) != 0)
3269 break;
3270
3271 for (bs1 = bs0; bs1 < 32; bs1++)
3272 if ((mask & (1 << bs1)) == 0)
3273 break;
3274
3275 gcc_assert (bs1 == 32 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3276
3277 p = 31 - bs0;
3278 len = bs1 - bs0;
3279
3280 operands[2] = GEN_INT (p);
3281 operands[3] = GEN_INT (len);
3282 return "{depi|depwi} -1,%2,%3,%0";
3283 }
3284
3285 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3286 storing the result in operands[0]. */
3287 const char *
3288 pa_output_64bit_ior (rtx *operands)
3289 {
3290 unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
3291 int bs0, bs1, p, len;
3292
3293 if (INTVAL (operands[2]) == 0)
3294 return "copy %1,%0";
3295
3296 for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
3297 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
3298 break;
3299
3300 for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
3301 if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
3302 break;
3303
3304 gcc_assert (bs1 == HOST_BITS_PER_WIDE_INT
3305 || ((unsigned HOST_WIDE_INT) 1 << bs1) > mask);
3306
3307 p = 63 - bs0;
3308 len = bs1 - bs0;
3309
3310 operands[2] = GEN_INT (p);
3311 operands[3] = GEN_INT (len);
3312 return "depdi -1,%2,%3,%0";
3313 }
3314 \f
3315 /* Target hook for assembling integer objects. This code handles
3316 aligned SI and DI integers specially since function references
3317 must be preceded by P%. */
3318
3319 static bool
3320 pa_assemble_integer (rtx x, unsigned int size, int aligned_p)
3321 {
3322 bool result;
3323 tree decl = NULL;
3324
3325 /* When we have a SYMBOL_REF with a SYMBOL_REF_DECL, we need to call
3326 call assemble_external and set the SYMBOL_REF_DECL to NULL before
3327 calling output_addr_const. Otherwise, it may call assemble_external
3328 in the midst of outputing the assembler code for the SYMBOL_REF.
3329 We restore the SYMBOL_REF_DECL after the output is done. */
3330 if (GET_CODE (x) == SYMBOL_REF)
3331 {
3332 decl = SYMBOL_REF_DECL (x);
3333 if (decl)
3334 {
3335 assemble_external (decl);
3336 SET_SYMBOL_REF_DECL (x, NULL);
3337 }
3338 }
3339
3340 if (size == UNITS_PER_WORD
3341 && aligned_p
3342 && function_label_operand (x, VOIDmode))
3343 {
3344 fputs (size == 8? "\t.dword\t" : "\t.word\t", asm_out_file);
3345
3346 /* We don't want an OPD when generating fast indirect calls. */
3347 if (!TARGET_FAST_INDIRECT_CALLS)
3348 fputs ("P%", asm_out_file);
3349
3350 output_addr_const (asm_out_file, x);
3351 fputc ('\n', asm_out_file);
3352 result = true;
3353 }
3354 else
3355 result = default_assemble_integer (x, size, aligned_p);
3356
3357 if (decl)
3358 SET_SYMBOL_REF_DECL (x, decl);
3359
3360 return result;
3361 }
3362 \f
3363 /* Output an ascii string. */
3364 void
3365 pa_output_ascii (FILE *file, const char *p, int size)
3366 {
3367 int i;
3368 int chars_output;
3369 unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
3370
3371 /* The HP assembler can only take strings of 256 characters at one
3372 time. This is a limitation on input line length, *not* the
3373 length of the string. Sigh. Even worse, it seems that the
3374 restriction is in number of input characters (see \xnn &
3375 \whatever). So we have to do this very carefully. */
3376
3377 fputs ("\t.STRING \"", file);
3378
3379 chars_output = 0;
3380 for (i = 0; i < size; i += 4)
3381 {
3382 int co = 0;
3383 int io = 0;
3384 for (io = 0, co = 0; io < MIN (4, size - i); io++)
3385 {
3386 register unsigned int c = (unsigned char) p[i + io];
3387
3388 if (c == '\"' || c == '\\')
3389 partial_output[co++] = '\\';
3390 if (c >= ' ' && c < 0177)
3391 partial_output[co++] = c;
3392 else
3393 {
3394 unsigned int hexd;
3395 partial_output[co++] = '\\';
3396 partial_output[co++] = 'x';
3397 hexd = c / 16 - 0 + '0';
3398 if (hexd > '9')
3399 hexd -= '9' - 'a' + 1;
3400 partial_output[co++] = hexd;
3401 hexd = c % 16 - 0 + '0';
3402 if (hexd > '9')
3403 hexd -= '9' - 'a' + 1;
3404 partial_output[co++] = hexd;
3405 }
3406 }
3407 if (chars_output + co > 243)
3408 {
3409 fputs ("\"\n\t.STRING \"", file);
3410 chars_output = 0;
3411 }
3412 fwrite (partial_output, 1, (size_t) co, file);
3413 chars_output += co;
3414 co = 0;
3415 }
3416 fputs ("\"\n", file);
3417 }
3418
3419 /* Try to rewrite floating point comparisons & branches to avoid
3420 useless add,tr insns.
3421
3422 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3423 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3424 first attempt to remove useless add,tr insns. It is zero
3425 for the second pass as reorg sometimes leaves bogus REG_DEAD
3426 notes lying around.
3427
3428 When CHECK_NOTES is zero we can only eliminate add,tr insns
3429 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3430 instructions. */
3431 static void
3432 remove_useless_addtr_insns (int check_notes)
3433 {
3434 rtx_insn *insn;
3435 static int pass = 0;
3436
3437 /* This is fairly cheap, so always run it when optimizing. */
3438 if (optimize > 0)
3439 {
3440 int fcmp_count = 0;
3441 int fbranch_count = 0;
3442
3443 /* Walk all the insns in this function looking for fcmp & fbranch
3444 instructions. Keep track of how many of each we find. */
3445 for (insn = get_insns (); insn; insn = next_insn (insn))
3446 {
3447 rtx tmp;
3448
3449 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3450 if (! NONJUMP_INSN_P (insn) && ! JUMP_P (insn))
3451 continue;
3452
3453 tmp = PATTERN (insn);
3454
3455 /* It must be a set. */
3456 if (GET_CODE (tmp) != SET)
3457 continue;
3458
3459 /* If the destination is CCFP, then we've found an fcmp insn. */
3460 tmp = SET_DEST (tmp);
3461 if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
3462 {
3463 fcmp_count++;
3464 continue;
3465 }
3466
3467 tmp = PATTERN (insn);
3468 /* If this is an fbranch instruction, bump the fbranch counter. */
3469 if (GET_CODE (tmp) == SET
3470 && SET_DEST (tmp) == pc_rtx
3471 && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
3472 && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
3473 && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
3474 && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
3475 {
3476 fbranch_count++;
3477 continue;
3478 }
3479 }
3480
3481
3482 /* Find all floating point compare + branch insns. If possible,
3483 reverse the comparison & the branch to avoid add,tr insns. */
3484 for (insn = get_insns (); insn; insn = next_insn (insn))
3485 {
3486 rtx tmp;
3487 rtx_insn *next;
3488
3489 /* Ignore anything that isn't an INSN. */
3490 if (! NONJUMP_INSN_P (insn))
3491 continue;
3492
3493 tmp = PATTERN (insn);
3494
3495 /* It must be a set. */
3496 if (GET_CODE (tmp) != SET)
3497 continue;
3498
3499 /* The destination must be CCFP, which is register zero. */
3500 tmp = SET_DEST (tmp);
3501 if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
3502 continue;
3503
3504 /* INSN should be a set of CCFP.
3505
3506 See if the result of this insn is used in a reversed FP
3507 conditional branch. If so, reverse our condition and
3508 the branch. Doing so avoids useless add,tr insns. */
3509 next = next_insn (insn);
3510 while (next)
3511 {
3512 /* Jumps, calls and labels stop our search. */
3513 if (JUMP_P (next) || CALL_P (next) || LABEL_P (next))
3514 break;
3515
3516 /* As does another fcmp insn. */
3517 if (NONJUMP_INSN_P (next)
3518 && GET_CODE (PATTERN (next)) == SET
3519 && GET_CODE (SET_DEST (PATTERN (next))) == REG
3520 && REGNO (SET_DEST (PATTERN (next))) == 0)
3521 break;
3522
3523 next = next_insn (next);
3524 }
3525
3526 /* Is NEXT_INSN a branch? */
3527 if (next && JUMP_P (next))
3528 {
3529 rtx pattern = PATTERN (next);
3530
3531 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3532 and CCFP dies, then reverse our conditional and the branch
3533 to avoid the add,tr. */
3534 if (GET_CODE (pattern) == SET
3535 && SET_DEST (pattern) == pc_rtx
3536 && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
3537 && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
3538 && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
3539 && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
3540 && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
3541 && (fcmp_count == fbranch_count
3542 || (check_notes
3543 && find_regno_note (next, REG_DEAD, 0))))
3544 {
3545 /* Reverse the branch. */
3546 tmp = XEXP (SET_SRC (pattern), 1);
3547 XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
3548 XEXP (SET_SRC (pattern), 2) = tmp;
3549 INSN_CODE (next) = -1;
3550
3551 /* Reverse our condition. */
3552 tmp = PATTERN (insn);
3553 PUT_CODE (XEXP (tmp, 1),
3554 (reverse_condition_maybe_unordered
3555 (GET_CODE (XEXP (tmp, 1)))));
3556 }
3557 }
3558 }
3559 }
3560
3561 pass = !pass;
3562
3563 }
3564 \f
3565 /* You may have trouble believing this, but this is the 32 bit HP-PA
3566 stack layout. Wow.
3567
3568 Offset Contents
3569
3570 Variable arguments (optional; any number may be allocated)
3571
3572 SP-(4*(N+9)) arg word N
3573 : :
3574 SP-56 arg word 5
3575 SP-52 arg word 4
3576
3577 Fixed arguments (must be allocated; may remain unused)
3578
3579 SP-48 arg word 3
3580 SP-44 arg word 2
3581 SP-40 arg word 1
3582 SP-36 arg word 0
3583
3584 Frame Marker
3585
3586 SP-32 External Data Pointer (DP)
3587 SP-28 External sr4
3588 SP-24 External/stub RP (RP')
3589 SP-20 Current RP
3590 SP-16 Static Link
3591 SP-12 Clean up
3592 SP-8 Calling Stub RP (RP'')
3593 SP-4 Previous SP
3594
3595 Top of Frame
3596
3597 SP-0 Stack Pointer (points to next available address)
3598
3599 */
3600
3601 /* This function saves registers as follows. Registers marked with ' are
3602 this function's registers (as opposed to the previous function's).
3603 If a frame_pointer isn't needed, r4 is saved as a general register;
3604 the space for the frame pointer is still allocated, though, to keep
3605 things simple.
3606
3607
3608 Top of Frame
3609
3610 SP (FP') Previous FP
3611 SP + 4 Alignment filler (sigh)
3612 SP + 8 Space for locals reserved here.
3613 .
3614 .
3615 .
3616 SP + n All call saved register used.
3617 .
3618 .
3619 .
3620 SP + o All call saved fp registers used.
3621 .
3622 .
3623 .
3624 SP + p (SP') points to next available address.
3625
3626 */
3627
3628 /* Global variables set by output_function_prologue(). */
3629 /* Size of frame. Need to know this to emit return insns from
3630 leaf procedures. */
3631 static HOST_WIDE_INT actual_fsize, local_fsize;
3632 static int save_fregs;
3633
3634 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3635 Handle case where DISP > 8k by using the add_high_const patterns.
3636
3637 Note in DISP > 8k case, we will leave the high part of the address
3638 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3639
3640 static void
3641 store_reg (int reg, HOST_WIDE_INT disp, int base)
3642 {
3643 rtx dest, src, basereg;
3644 rtx_insn *insn;
3645
3646 src = gen_rtx_REG (word_mode, reg);
3647 basereg = gen_rtx_REG (Pmode, base);
3648 if (VAL_14_BITS_P (disp))
3649 {
3650 dest = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
3651 insn = emit_move_insn (dest, src);
3652 }
3653 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3654 {
3655 rtx delta = GEN_INT (disp);
3656 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3657
3658 emit_move_insn (tmpreg, delta);
3659 insn = emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
3660 if (DO_FRAME_NOTES)
3661 {
3662 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3663 gen_rtx_SET (tmpreg,
3664 gen_rtx_PLUS (Pmode, basereg, delta)));
3665 RTX_FRAME_RELATED_P (insn) = 1;
3666 }
3667 dest = gen_rtx_MEM (word_mode, tmpreg);
3668 insn = emit_move_insn (dest, src);
3669 }
3670 else
3671 {
3672 rtx delta = GEN_INT (disp);
3673 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
3674 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3675
3676 emit_move_insn (tmpreg, high);
3677 dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3678 insn = emit_move_insn (dest, src);
3679 if (DO_FRAME_NOTES)
3680 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3681 gen_rtx_SET (gen_rtx_MEM (word_mode,
3682 gen_rtx_PLUS (word_mode,
3683 basereg,
3684 delta)),
3685 src));
3686 }
3687
3688 if (DO_FRAME_NOTES)
3689 RTX_FRAME_RELATED_P (insn) = 1;
3690 }
3691
3692 /* Emit RTL to store REG at the memory location specified by BASE and then
3693 add MOD to BASE. MOD must be <= 8k. */
3694
3695 static void
3696 store_reg_modify (int base, int reg, HOST_WIDE_INT mod)
3697 {
3698 rtx basereg, srcreg, delta;
3699 rtx_insn *insn;
3700
3701 gcc_assert (VAL_14_BITS_P (mod));
3702
3703 basereg = gen_rtx_REG (Pmode, base);
3704 srcreg = gen_rtx_REG (word_mode, reg);
3705 delta = GEN_INT (mod);
3706
3707 insn = emit_insn (gen_post_store (basereg, srcreg, delta));
3708 if (DO_FRAME_NOTES)
3709 {
3710 RTX_FRAME_RELATED_P (insn) = 1;
3711
3712 /* RTX_FRAME_RELATED_P must be set on each frame related set
3713 in a parallel with more than one element. */
3714 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
3715 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
3716 }
3717 }
3718
3719 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3720 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3721 whether to add a frame note or not.
3722
3723 In the DISP > 8k case, we leave the high part of the address in %r1.
3724 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3725
3726 static void
3727 set_reg_plus_d (int reg, int base, HOST_WIDE_INT disp, int note)
3728 {
3729 rtx_insn *insn;
3730
3731 if (VAL_14_BITS_P (disp))
3732 {
3733 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3734 plus_constant (Pmode,
3735 gen_rtx_REG (Pmode, base), disp));
3736 }
3737 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
3738 {
3739 rtx basereg = gen_rtx_REG (Pmode, base);
3740 rtx delta = GEN_INT (disp);
3741 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3742
3743 emit_move_insn (tmpreg, delta);
3744 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3745 gen_rtx_PLUS (Pmode, tmpreg, basereg));
3746 if (DO_FRAME_NOTES)
3747 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3748 gen_rtx_SET (tmpreg,
3749 gen_rtx_PLUS (Pmode, basereg, delta)));
3750 }
3751 else
3752 {
3753 rtx basereg = gen_rtx_REG (Pmode, base);
3754 rtx delta = GEN_INT (disp);
3755 rtx tmpreg = gen_rtx_REG (Pmode, 1);
3756
3757 emit_move_insn (tmpreg,
3758 gen_rtx_PLUS (Pmode, basereg,
3759 gen_rtx_HIGH (Pmode, delta)));
3760 insn = emit_move_insn (gen_rtx_REG (Pmode, reg),
3761 gen_rtx_LO_SUM (Pmode, tmpreg, delta));
3762 }
3763
3764 if (DO_FRAME_NOTES && note)
3765 RTX_FRAME_RELATED_P (insn) = 1;
3766 }
3767
3768 HOST_WIDE_INT
3769 pa_compute_frame_size (poly_int64 size, int *fregs_live)
3770 {
3771 int freg_saved = 0;
3772 int i, j;
3773
3774 /* The code in pa_expand_prologue and pa_expand_epilogue must
3775 be consistent with the rounding and size calculation done here.
3776 Change them at the same time. */
3777
3778 /* We do our own stack alignment. First, round the size of the
3779 stack locals up to a word boundary. */
3780 size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3781
3782 /* Space for previous frame pointer + filler. If any frame is
3783 allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET. We
3784 waste some space here for the sake of HP compatibility. The
3785 first slot is only used when the frame pointer is needed. */
3786 if (size || frame_pointer_needed)
3787 size += pa_starting_frame_offset ();
3788
3789 /* If the current function calls __builtin_eh_return, then we need
3790 to allocate stack space for registers that will hold data for
3791 the exception handler. */
3792 if (DO_FRAME_NOTES && crtl->calls_eh_return)
3793 {
3794 unsigned int i;
3795
3796 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
3797 continue;
3798 size += i * UNITS_PER_WORD;
3799 }
3800
3801 /* Account for space used by the callee general register saves. */
3802 for (i = 18, j = frame_pointer_needed ? 4 : 3; i >= j; i--)
3803 if (df_regs_ever_live_p (i))
3804 size += UNITS_PER_WORD;
3805
3806 /* Account for space used by the callee floating point register saves. */
3807 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
3808 if (df_regs_ever_live_p (i)
3809 || (!TARGET_64BIT && df_regs_ever_live_p (i + 1)))
3810 {
3811 freg_saved = 1;
3812
3813 /* We always save both halves of the FP register, so always
3814 increment the frame size by 8 bytes. */
3815 size += 8;
3816 }
3817
3818 /* If any of the floating registers are saved, account for the
3819 alignment needed for the floating point register save block. */
3820 if (freg_saved)
3821 {
3822 size = (size + 7) & ~7;
3823 if (fregs_live)
3824 *fregs_live = 1;
3825 }
3826
3827 /* The various ABIs include space for the outgoing parameters in the
3828 size of the current function's stack frame. We don't need to align
3829 for the outgoing arguments as their alignment is set by the final
3830 rounding for the frame as a whole. */
3831 size += crtl->outgoing_args_size;
3832
3833 /* Allocate space for the fixed frame marker. This space must be
3834 allocated for any function that makes calls or allocates
3835 stack space. */
3836 if (!crtl->is_leaf || size)
3837 size += TARGET_64BIT ? 48 : 32;
3838
3839 /* Finally, round to the preferred stack boundary. */
3840 return ((size + PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1)
3841 & ~(PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT - 1));
3842 }
3843
3844 /* Output function label, and associated .PROC and .CALLINFO statements. */
3845
3846 void
3847 pa_output_function_label (FILE *file)
3848 {
3849 /* The function's label and associated .PROC must never be
3850 separated and must be output *after* any profiling declarations
3851 to avoid changing spaces/subspaces within a procedure. */
3852 ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
3853 fputs ("\t.PROC\n", file);
3854
3855 /* pa_expand_prologue does the dirty work now. We just need
3856 to output the assembler directives which denote the start
3857 of a function. */
3858 fprintf (file, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC, actual_fsize);
3859 if (crtl->is_leaf)
3860 fputs (",NO_CALLS", file);
3861 else
3862 fputs (",CALLS", file);
3863 if (rp_saved)
3864 fputs (",SAVE_RP", file);
3865
3866 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3867 at the beginning of the frame and that it is used as the frame
3868 pointer for the frame. We do this because our current frame
3869 layout doesn't conform to that specified in the HP runtime
3870 documentation and we need a way to indicate to programs such as
3871 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3872 isn't used by HP compilers but is supported by the assembler.
3873 However, SAVE_SP is supposed to indicate that the previous stack
3874 pointer has been saved in the frame marker. */
3875 if (frame_pointer_needed)
3876 fputs (",SAVE_SP", file);
3877
3878 /* Pass on information about the number of callee register saves
3879 performed in the prologue.
3880
3881 The compiler is supposed to pass the highest register number
3882 saved, the assembler then has to adjust that number before
3883 entering it into the unwind descriptor (to account for any
3884 caller saved registers with lower register numbers than the
3885 first callee saved register). */
3886 if (gr_saved)
3887 fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
3888
3889 if (fr_saved)
3890 fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
3891
3892 fputs ("\n\t.ENTRY\n", file);
3893 }
3894
3895 /* Output function prologue. */
3896
3897 static void
3898 pa_output_function_prologue (FILE *file)
3899 {
3900 pa_output_function_label (file);
3901 remove_useless_addtr_insns (0);
3902 }
3903
3904 /* The label is output by ASM_DECLARE_FUNCTION_NAME on linux. */
3905
3906 static void
3907 pa_linux_output_function_prologue (FILE *file ATTRIBUTE_UNUSED)
3908 {
3909 remove_useless_addtr_insns (0);
3910 }
3911
3912 void
3913 pa_expand_prologue (void)
3914 {
3915 int merge_sp_adjust_with_store = 0;
3916 HOST_WIDE_INT size = get_frame_size ();
3917 HOST_WIDE_INT offset;
3918 int i;
3919 rtx tmpreg;
3920 rtx_insn *insn;
3921
3922 gr_saved = 0;
3923 fr_saved = 0;
3924 save_fregs = 0;
3925
3926 /* Compute total size for frame pointer, filler, locals and rounding to
3927 the next word boundary. Similar code appears in pa_compute_frame_size
3928 and must be changed in tandem with this code. */
3929 local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
3930 if (local_fsize || frame_pointer_needed)
3931 local_fsize += pa_starting_frame_offset ();
3932
3933 actual_fsize = pa_compute_frame_size (size, &save_fregs);
3934 if (flag_stack_usage_info)
3935 current_function_static_stack_size = actual_fsize;
3936
3937 /* Compute a few things we will use often. */
3938 tmpreg = gen_rtx_REG (word_mode, 1);
3939
3940 /* Save RP first. The calling conventions manual states RP will
3941 always be stored into the caller's frame at sp - 20 or sp - 16
3942 depending on which ABI is in use. */
3943 if (df_regs_ever_live_p (2) || crtl->calls_eh_return)
3944 {
3945 store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);
3946 rp_saved = true;
3947 }
3948 else
3949 rp_saved = false;
3950
3951 /* Allocate the local frame and set up the frame pointer if needed. */
3952 if (actual_fsize != 0)
3953 {
3954 if (frame_pointer_needed)
3955 {
3956 /* Copy the old frame pointer temporarily into %r1. Set up the
3957 new stack pointer, then store away the saved old frame pointer
3958 into the stack at sp and at the same time update the stack
3959 pointer by actual_fsize bytes. Two versions, first
3960 handles small (<8k) frames. The second handles large (>=8k)
3961 frames. */
3962 insn = emit_move_insn (tmpreg, hard_frame_pointer_rtx);
3963 if (DO_FRAME_NOTES)
3964 RTX_FRAME_RELATED_P (insn) = 1;
3965
3966 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3967 if (DO_FRAME_NOTES)
3968 RTX_FRAME_RELATED_P (insn) = 1;
3969
3970 if (VAL_14_BITS_P (actual_fsize))
3971 store_reg_modify (STACK_POINTER_REGNUM, 1, actual_fsize);
3972 else
3973 {
3974 /* It is incorrect to store the saved frame pointer at *sp,
3975 then increment sp (writes beyond the current stack boundary).
3976
3977 So instead use stwm to store at *sp and post-increment the
3978 stack pointer as an atomic operation. Then increment sp to
3979 finish allocating the new frame. */
3980 HOST_WIDE_INT adjust1 = 8192 - 64;
3981 HOST_WIDE_INT adjust2 = actual_fsize - adjust1;
3982
3983 store_reg_modify (STACK_POINTER_REGNUM, 1, adjust1);
3984 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
3985 adjust2, 1);
3986 }
3987
3988 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3989 we need to store the previous stack pointer (frame pointer)
3990 into the frame marker on targets that use the HP unwind
3991 library. This allows the HP unwind library to be used to
3992 unwind GCC frames. However, we are not fully compatible
3993 with the HP library because our frame layout differs from
3994 that specified in the HP runtime specification.
3995
3996 We don't want a frame note on this instruction as the frame
3997 marker moves during dynamic stack allocation.
3998
3999 This instruction also serves as a blockage to prevent
4000 register spills from being scheduled before the stack
4001 pointer is raised. This is necessary as we store
4002 registers using the frame pointer as a base register,
4003 and the frame pointer is set before sp is raised. */
4004 if (TARGET_HPUX_UNWIND_LIBRARY)
4005 {
4006 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
4007 GEN_INT (TARGET_64BIT ? -8 : -4));
4008
4009 emit_move_insn (gen_rtx_MEM (word_mode, addr),
4010 hard_frame_pointer_rtx);
4011 }
4012 else
4013 emit_insn (gen_blockage ());
4014 }
4015 /* no frame pointer needed. */
4016 else
4017 {
4018 /* In some cases we can perform the first callee register save
4019 and allocating the stack frame at the same time. If so, just
4020 make a note of it and defer allocating the frame until saving
4021 the callee registers. */
4022 if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
4023 merge_sp_adjust_with_store = 1;
4024 /* Cannot optimize. Adjust the stack frame by actual_fsize
4025 bytes. */
4026 else
4027 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4028 actual_fsize, 1);
4029 }
4030 }
4031
4032 /* Normal register save.
4033
4034 Do not save the frame pointer in the frame_pointer_needed case. It
4035 was done earlier. */
4036 if (frame_pointer_needed)
4037 {
4038 offset = local_fsize;
4039
4040 /* Saving the EH return data registers in the frame is the simplest
4041 way to get the frame unwind information emitted. We put them
4042 just before the general registers. */
4043 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4044 {
4045 unsigned int i, regno;
4046
4047 for (i = 0; ; ++i)
4048 {
4049 regno = EH_RETURN_DATA_REGNO (i);
4050 if (regno == INVALID_REGNUM)
4051 break;
4052
4053 store_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4054 offset += UNITS_PER_WORD;
4055 }
4056 }
4057
4058 for (i = 18; i >= 4; i--)
4059 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4060 {
4061 store_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4062 offset += UNITS_PER_WORD;
4063 gr_saved++;
4064 }
4065 /* Account for %r3 which is saved in a special place. */
4066 gr_saved++;
4067 }
4068 /* No frame pointer needed. */
4069 else
4070 {
4071 offset = local_fsize - actual_fsize;
4072
4073 /* Saving the EH return data registers in the frame is the simplest
4074 way to get the frame unwind information emitted. */
4075 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4076 {
4077 unsigned int i, regno;
4078
4079 for (i = 0; ; ++i)
4080 {
4081 regno = EH_RETURN_DATA_REGNO (i);
4082 if (regno == INVALID_REGNUM)
4083 break;
4084
4085 /* If merge_sp_adjust_with_store is nonzero, then we can
4086 optimize the first save. */
4087 if (merge_sp_adjust_with_store)
4088 {
4089 store_reg_modify (STACK_POINTER_REGNUM, regno, -offset);
4090 merge_sp_adjust_with_store = 0;
4091 }
4092 else
4093 store_reg (regno, offset, STACK_POINTER_REGNUM);
4094 offset += UNITS_PER_WORD;
4095 }
4096 }
4097
4098 for (i = 18; i >= 3; i--)
4099 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4100 {
4101 /* If merge_sp_adjust_with_store is nonzero, then we can
4102 optimize the first GR save. */
4103 if (merge_sp_adjust_with_store)
4104 {
4105 store_reg_modify (STACK_POINTER_REGNUM, i, -offset);
4106 merge_sp_adjust_with_store = 0;
4107 }
4108 else
4109 store_reg (i, offset, STACK_POINTER_REGNUM);
4110 offset += UNITS_PER_WORD;
4111 gr_saved++;
4112 }
4113
4114 /* If we wanted to merge the SP adjustment with a GR save, but we never
4115 did any GR saves, then just emit the adjustment here. */
4116 if (merge_sp_adjust_with_store)
4117 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4118 actual_fsize, 1);
4119 }
4120
4121 /* The hppa calling conventions say that %r19, the pic offset
4122 register, is saved at sp - 32 (in this function's frame)
4123 when generating PIC code. FIXME: What is the correct thing
4124 to do for functions which make no calls and allocate no
4125 frame? Do we need to allocate a frame, or can we just omit
4126 the save? For now we'll just omit the save.
4127
4128 We don't want a note on this insn as the frame marker can
4129 move if there is a dynamic stack allocation. */
4130 if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
4131 {
4132 rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx, GEN_INT (-32));
4133
4134 emit_move_insn (gen_rtx_MEM (word_mode, addr), pic_offset_table_rtx);
4135
4136 }
4137
4138 /* Align pointer properly (doubleword boundary). */
4139 offset = (offset + 7) & ~7;
4140
4141 /* Floating point register store. */
4142 if (save_fregs)
4143 {
4144 rtx base;
4145
4146 /* First get the frame or stack pointer to the start of the FP register
4147 save area. */
4148 if (frame_pointer_needed)
4149 {
4150 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4151 base = hard_frame_pointer_rtx;
4152 }
4153 else
4154 {
4155 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4156 base = stack_pointer_rtx;
4157 }
4158
4159 /* Now actually save the FP registers. */
4160 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4161 {
4162 if (df_regs_ever_live_p (i)
4163 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4164 {
4165 rtx addr, reg;
4166 rtx_insn *insn;
4167 addr = gen_rtx_MEM (DFmode,
4168 gen_rtx_POST_INC (word_mode, tmpreg));
4169 reg = gen_rtx_REG (DFmode, i);
4170 insn = emit_move_insn (addr, reg);
4171 if (DO_FRAME_NOTES)
4172 {
4173 RTX_FRAME_RELATED_P (insn) = 1;
4174 if (TARGET_64BIT)
4175 {
4176 rtx mem = gen_rtx_MEM (DFmode,
4177 plus_constant (Pmode, base,
4178 offset));
4179 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4180 gen_rtx_SET (mem, reg));
4181 }
4182 else
4183 {
4184 rtx meml = gen_rtx_MEM (SFmode,
4185 plus_constant (Pmode, base,
4186 offset));
4187 rtx memr = gen_rtx_MEM (SFmode,
4188 plus_constant (Pmode, base,
4189 offset + 4));
4190 rtx regl = gen_rtx_REG (SFmode, i);
4191 rtx regr = gen_rtx_REG (SFmode, i + 1);
4192 rtx setl = gen_rtx_SET (meml, regl);
4193 rtx setr = gen_rtx_SET (memr, regr);
4194 rtvec vec;
4195
4196 RTX_FRAME_RELATED_P (setl) = 1;
4197 RTX_FRAME_RELATED_P (setr) = 1;
4198 vec = gen_rtvec (2, setl, setr);
4199 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4200 gen_rtx_SEQUENCE (VOIDmode, vec));
4201 }
4202 }
4203 offset += GET_MODE_SIZE (DFmode);
4204 fr_saved++;
4205 }
4206 }
4207 }
4208 }
4209
4210 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4211 Handle case where DISP > 8k by using the add_high_const patterns. */
4212
4213 static void
4214 load_reg (int reg, HOST_WIDE_INT disp, int base)
4215 {
4216 rtx dest = gen_rtx_REG (word_mode, reg);
4217 rtx basereg = gen_rtx_REG (Pmode, base);
4218 rtx src;
4219
4220 if (VAL_14_BITS_P (disp))
4221 src = gen_rtx_MEM (word_mode, plus_constant (Pmode, basereg, disp));
4222 else if (TARGET_64BIT && !VAL_32_BITS_P (disp))
4223 {
4224 rtx delta = GEN_INT (disp);
4225 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4226
4227 emit_move_insn (tmpreg, delta);
4228 if (TARGET_DISABLE_INDEXING)
4229 {
4230 emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4231 src = gen_rtx_MEM (word_mode, tmpreg);
4232 }
4233 else
4234 src = gen_rtx_MEM (word_mode, gen_rtx_PLUS (Pmode, tmpreg, basereg));
4235 }
4236 else
4237 {
4238 rtx delta = GEN_INT (disp);
4239 rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
4240 rtx tmpreg = gen_rtx_REG (Pmode, 1);
4241
4242 emit_move_insn (tmpreg, high);
4243 src = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
4244 }
4245
4246 emit_move_insn (dest, src);
4247 }
4248
4249 /* Update the total code bytes output to the text section. */
4250
4251 static void
4252 update_total_code_bytes (unsigned int nbytes)
4253 {
4254 if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
4255 && !IN_NAMED_SECTION_P (cfun->decl))
4256 {
4257 unsigned int old_total = total_code_bytes;
4258
4259 total_code_bytes += nbytes;
4260
4261 /* Be prepared to handle overflows. */
4262 if (old_total > total_code_bytes)
4263 total_code_bytes = UINT_MAX;
4264 }
4265 }
4266
4267 /* This function generates the assembly code for function exit.
4268 Args are as for output_function_prologue ().
4269
4270 The function epilogue should not depend on the current stack
4271 pointer! It should use the frame pointer only. This is mandatory
4272 because of alloca; we also take advantage of it to omit stack
4273 adjustments before returning. */
4274
4275 static void
4276 pa_output_function_epilogue (FILE *file)
4277 {
4278 rtx_insn *insn = get_last_insn ();
4279 bool extra_nop;
4280
4281 /* pa_expand_epilogue does the dirty work now. We just need
4282 to output the assembler directives which denote the end
4283 of a function.
4284
4285 To make debuggers happy, emit a nop if the epilogue was completely
4286 eliminated due to a volatile call as the last insn in the
4287 current function. That way the return address (in %r2) will
4288 always point to a valid instruction in the current function. */
4289
4290 /* Get the last real insn. */
4291 if (NOTE_P (insn))
4292 insn = prev_real_insn (insn);
4293
4294 /* If it is a sequence, then look inside. */
4295 if (insn && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
4296 insn = as_a <rtx_sequence *> (PATTERN (insn))-> insn (0);
4297
4298 /* If insn is a CALL_INSN, then it must be a call to a volatile
4299 function (otherwise there would be epilogue insns). */
4300 if (insn && CALL_P (insn))
4301 {
4302 fputs ("\tnop\n", file);
4303 extra_nop = true;
4304 }
4305 else
4306 extra_nop = false;
4307
4308 fputs ("\t.EXIT\n\t.PROCEND\n", file);
4309
4310 if (TARGET_SOM && TARGET_GAS)
4311 {
4312 /* We are done with this subspace except possibly for some additional
4313 debug information. Forget that we are in this subspace to ensure
4314 that the next function is output in its own subspace. */
4315 in_section = NULL;
4316 cfun->machine->in_nsubspa = 2;
4317 }
4318
4319 /* Thunks do their own insn accounting. */
4320 if (cfun->is_thunk)
4321 return;
4322
4323 if (INSN_ADDRESSES_SET_P ())
4324 {
4325 last_address = extra_nop ? 4 : 0;
4326 insn = get_last_nonnote_insn ();
4327 if (insn)
4328 {
4329 last_address += INSN_ADDRESSES (INSN_UID (insn));
4330 if (INSN_P (insn))
4331 last_address += insn_default_length (insn);
4332 }
4333 last_address = ((last_address + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
4334 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
4335 }
4336 else
4337 last_address = UINT_MAX;
4338
4339 /* Finally, update the total number of code bytes output so far. */
4340 update_total_code_bytes (last_address);
4341 }
4342
4343 void
4344 pa_expand_epilogue (void)
4345 {
4346 rtx tmpreg;
4347 HOST_WIDE_INT offset;
4348 HOST_WIDE_INT ret_off = 0;
4349 int i;
4350 int merge_sp_adjust_with_load = 0;
4351
4352 /* We will use this often. */
4353 tmpreg = gen_rtx_REG (word_mode, 1);
4354
4355 /* Try to restore RP early to avoid load/use interlocks when
4356 RP gets used in the return (bv) instruction. This appears to still
4357 be necessary even when we schedule the prologue and epilogue. */
4358 if (rp_saved)
4359 {
4360 ret_off = TARGET_64BIT ? -16 : -20;
4361 if (frame_pointer_needed)
4362 {
4363 load_reg (2, ret_off, HARD_FRAME_POINTER_REGNUM);
4364 ret_off = 0;
4365 }
4366 else
4367 {
4368 /* No frame pointer, and stack is smaller than 8k. */
4369 if (VAL_14_BITS_P (ret_off - actual_fsize))
4370 {
4371 load_reg (2, ret_off - actual_fsize, STACK_POINTER_REGNUM);
4372 ret_off = 0;
4373 }
4374 }
4375 }
4376
4377 /* General register restores. */
4378 if (frame_pointer_needed)
4379 {
4380 offset = local_fsize;
4381
4382 /* If the current function calls __builtin_eh_return, then we need
4383 to restore the saved EH data registers. */
4384 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4385 {
4386 unsigned int i, regno;
4387
4388 for (i = 0; ; ++i)
4389 {
4390 regno = EH_RETURN_DATA_REGNO (i);
4391 if (regno == INVALID_REGNUM)
4392 break;
4393
4394 load_reg (regno, offset, HARD_FRAME_POINTER_REGNUM);
4395 offset += UNITS_PER_WORD;
4396 }
4397 }
4398
4399 for (i = 18; i >= 4; i--)
4400 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4401 {
4402 load_reg (i, offset, HARD_FRAME_POINTER_REGNUM);
4403 offset += UNITS_PER_WORD;
4404 }
4405 }
4406 else
4407 {
4408 offset = local_fsize - actual_fsize;
4409
4410 /* If the current function calls __builtin_eh_return, then we need
4411 to restore the saved EH data registers. */
4412 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4413 {
4414 unsigned int i, regno;
4415
4416 for (i = 0; ; ++i)
4417 {
4418 regno = EH_RETURN_DATA_REGNO (i);
4419 if (regno == INVALID_REGNUM)
4420 break;
4421
4422 /* Only for the first load.
4423 merge_sp_adjust_with_load holds the register load
4424 with which we will merge the sp adjustment. */
4425 if (merge_sp_adjust_with_load == 0
4426 && local_fsize == 0
4427 && VAL_14_BITS_P (-actual_fsize))
4428 merge_sp_adjust_with_load = regno;
4429 else
4430 load_reg (regno, offset, STACK_POINTER_REGNUM);
4431 offset += UNITS_PER_WORD;
4432 }
4433 }
4434
4435 for (i = 18; i >= 3; i--)
4436 {
4437 if (df_regs_ever_live_p (i) && ! call_used_regs[i])
4438 {
4439 /* Only for the first load.
4440 merge_sp_adjust_with_load holds the register load
4441 with which we will merge the sp adjustment. */
4442 if (merge_sp_adjust_with_load == 0
4443 && local_fsize == 0
4444 && VAL_14_BITS_P (-actual_fsize))
4445 merge_sp_adjust_with_load = i;
4446 else
4447 load_reg (i, offset, STACK_POINTER_REGNUM);
4448 offset += UNITS_PER_WORD;
4449 }
4450 }
4451 }
4452
4453 /* Align pointer properly (doubleword boundary). */
4454 offset = (offset + 7) & ~7;
4455
4456 /* FP register restores. */
4457 if (save_fregs)
4458 {
4459 /* Adjust the register to index off of. */
4460 if (frame_pointer_needed)
4461 set_reg_plus_d (1, HARD_FRAME_POINTER_REGNUM, offset, 0);
4462 else
4463 set_reg_plus_d (1, STACK_POINTER_REGNUM, offset, 0);
4464
4465 /* Actually do the restores now. */
4466 for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
4467 if (df_regs_ever_live_p (i)
4468 || (! TARGET_64BIT && df_regs_ever_live_p (i + 1)))
4469 {
4470 rtx src = gen_rtx_MEM (DFmode,
4471 gen_rtx_POST_INC (word_mode, tmpreg));
4472 rtx dest = gen_rtx_REG (DFmode, i);
4473 emit_move_insn (dest, src);
4474 }
4475 }
4476
4477 /* Emit a blockage insn here to keep these insns from being moved to
4478 an earlier spot in the epilogue, or into the main instruction stream.
4479
4480 This is necessary as we must not cut the stack back before all the
4481 restores are finished. */
4482 emit_insn (gen_blockage ());
4483
4484 /* Reset stack pointer (and possibly frame pointer). The stack
4485 pointer is initially set to fp + 64 to avoid a race condition. */
4486 if (frame_pointer_needed)
4487 {
4488 rtx delta = GEN_INT (-64);
4489
4490 set_reg_plus_d (STACK_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM, 64, 0);
4491 emit_insn (gen_pre_load (hard_frame_pointer_rtx,
4492 stack_pointer_rtx, delta));
4493 }
4494 /* If we were deferring a callee register restore, do it now. */
4495 else if (merge_sp_adjust_with_load)
4496 {
4497 rtx delta = GEN_INT (-actual_fsize);
4498 rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
4499
4500 emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
4501 }
4502 else if (actual_fsize != 0)
4503 set_reg_plus_d (STACK_POINTER_REGNUM, STACK_POINTER_REGNUM,
4504 - actual_fsize, 0);
4505
4506 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4507 frame greater than 8k), do so now. */
4508 if (ret_off != 0)
4509 load_reg (2, ret_off, STACK_POINTER_REGNUM);
4510
4511 if (DO_FRAME_NOTES && crtl->calls_eh_return)
4512 {
4513 rtx sa = EH_RETURN_STACKADJ_RTX;
4514
4515 emit_insn (gen_blockage ());
4516 emit_insn (TARGET_64BIT
4517 ? gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, sa)
4518 : gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, sa));
4519 }
4520 }
4521
4522 bool
4523 pa_can_use_return_insn (void)
4524 {
4525 if (!reload_completed)
4526 return false;
4527
4528 if (frame_pointer_needed)
4529 return false;
4530
4531 if (df_regs_ever_live_p (2))
4532 return false;
4533
4534 if (crtl->profile)
4535 return false;
4536
4537 return pa_compute_frame_size (get_frame_size (), 0) == 0;
4538 }
4539
4540 rtx
4541 hppa_pic_save_rtx (void)
4542 {
4543 return get_hard_reg_initial_val (word_mode, PIC_OFFSET_TABLE_REGNUM);
4544 }
4545
4546 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4547 #define NO_DEFERRED_PROFILE_COUNTERS 0
4548 #endif
4549
4550
4551 /* Vector of funcdef numbers. */
4552 static vec<int> funcdef_nos;
4553
4554 /* Output deferred profile counters. */
4555 static void
4556 output_deferred_profile_counters (void)
4557 {
4558 unsigned int i;
4559 int align, n;
4560
4561 if (funcdef_nos.is_empty ())
4562 return;
4563
4564 switch_to_section (data_section);
4565 align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
4566 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
4567
4568 for (i = 0; funcdef_nos.iterate (i, &n); i++)
4569 {
4570 targetm.asm_out.internal_label (asm_out_file, "LP", n);
4571 assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
4572 }
4573
4574 funcdef_nos.release ();
4575 }
4576
4577 void
4578 hppa_profile_hook (int label_no)
4579 {
4580 rtx_code_label *label_rtx = gen_label_rtx ();
4581 int reg_parm_stack_space = REG_PARM_STACK_SPACE (NULL_TREE);
4582 rtx arg_bytes, begin_label_rtx, mcount, sym;
4583 rtx_insn *call_insn;
4584 char begin_label_name[16];
4585 bool use_mcount_pcrel_call;
4586
4587 /* Set up call destination. */
4588 sym = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
4589 pa_encode_label (sym);
4590 mcount = gen_rtx_MEM (Pmode, sym);
4591
4592 /* If we can reach _mcount with a pc-relative call, we can optimize
4593 loading the address of the current function. This requires linker
4594 long branch stub support. */
4595 if (!TARGET_PORTABLE_RUNTIME
4596 && !TARGET_LONG_CALLS
4597 && (TARGET_SOM || flag_function_sections))
4598 use_mcount_pcrel_call = TRUE;
4599 else
4600 use_mcount_pcrel_call = FALSE;
4601
4602 ASM_GENERATE_INTERNAL_LABEL (begin_label_name, FUNC_BEGIN_PROLOG_LABEL,
4603 label_no);
4604 begin_label_rtx = gen_rtx_SYMBOL_REF (SImode, ggc_strdup (begin_label_name));
4605
4606 emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
4607
4608 if (!use_mcount_pcrel_call)
4609 {
4610 /* The address of the function is loaded into %r25 with an instruction-
4611 relative sequence that avoids the use of relocations. We use SImode
4612 for the address of the function in both 32 and 64-bit code to avoid
4613 having to provide DImode versions of the lcla2 pattern. */
4614 if (TARGET_PA_20)
4615 emit_insn (gen_lcla2 (gen_rtx_REG (SImode, 25), label_rtx));
4616 else
4617 emit_insn (gen_lcla1 (gen_rtx_REG (SImode, 25), label_rtx));
4618 }
4619
4620 if (!NO_DEFERRED_PROFILE_COUNTERS)
4621 {
4622 rtx count_label_rtx, addr, r24;
4623 char count_label_name[16];
4624
4625 funcdef_nos.safe_push (label_no);
4626 ASM_GENERATE_INTERNAL_LABEL (count_label_name, "LP", label_no);
4627 count_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
4628 ggc_strdup (count_label_name));
4629
4630 addr = force_reg (Pmode, count_label_rtx);
4631 r24 = gen_rtx_REG (Pmode, 24);
4632 emit_move_insn (r24, addr);
4633
4634 arg_bytes = GEN_INT (TARGET_64BIT ? 24 : 12);
4635 if (use_mcount_pcrel_call)
4636 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4637 begin_label_rtx));
4638 else
4639 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4640
4641 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
4642 }
4643 else
4644 {
4645 arg_bytes = GEN_INT (TARGET_64BIT ? 16 : 8);
4646 if (use_mcount_pcrel_call)
4647 call_insn = emit_call_insn (gen_call_mcount (mcount, arg_bytes,
4648 begin_label_rtx));
4649 else
4650 call_insn = emit_call_insn (gen_call (mcount, arg_bytes));
4651 }
4652
4653 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 25));
4654 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), gen_rtx_REG (SImode, 26));
4655
4656 /* Indicate the _mcount call cannot throw, nor will it execute a
4657 non-local goto. */
4658 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
4659
4660 /* Allocate space for fixed arguments. */
4661 if (reg_parm_stack_space > crtl->outgoing_args_size)
4662 crtl->outgoing_args_size = reg_parm_stack_space;
4663 }
4664
4665 /* Fetch the return address for the frame COUNT steps up from
4666 the current frame, after the prologue. FRAMEADDR is the
4667 frame pointer of the COUNT frame.
4668
4669 We want to ignore any export stub remnants here. To handle this,
4670 we examine the code at the return address, and if it is an export
4671 stub, we return a memory rtx for the stub return address stored
4672 at frame-24.
4673
4674 The value returned is used in two different ways:
4675
4676 1. To find a function's caller.
4677
4678 2. To change the return address for a function.
4679
4680 This function handles most instances of case 1; however, it will
4681 fail if there are two levels of stubs to execute on the return
4682 path. The only way I believe that can happen is if the return value
4683 needs a parameter relocation, which never happens for C code.
4684
4685 This function handles most instances of case 2; however, it will
4686 fail if we did not originally have stub code on the return path
4687 but will need stub code on the new return path. This can happen if
4688 the caller & callee are both in the main program, but the new
4689 return location is in a shared library. */
4690
4691 rtx
4692 pa_return_addr_rtx (int count, rtx frameaddr)
4693 {
4694 rtx label;
4695 rtx rp;
4696 rtx saved_rp;
4697 rtx ins;
4698
4699 /* The instruction stream at the return address of a PA1.X export stub is:
4700
4701 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4702 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4703 0x00011820 | stub+16: mtsp r1,sr0
4704 0xe0400002 | stub+20: be,n 0(sr0,rp)
4705
4706 0xe0400002 must be specified as -532676606 so that it won't be
4707 rejected as an invalid immediate operand on 64-bit hosts.
4708
4709 The instruction stream at the return address of a PA2.0 export stub is:
4710
4711 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4712 0xe840d002 | stub+12: bve,n (rp)
4713 */
4714
4715 HOST_WIDE_INT insns[4];
4716 int i, len;
4717
4718 if (count != 0)
4719 return NULL_RTX;
4720
4721 rp = get_hard_reg_initial_val (Pmode, 2);
4722
4723 if (TARGET_64BIT || TARGET_NO_SPACE_REGS)
4724 return rp;
4725
4726 /* If there is no export stub then just use the value saved from
4727 the return pointer register. */
4728
4729 saved_rp = gen_reg_rtx (Pmode);
4730 emit_move_insn (saved_rp, rp);
4731
4732 /* Get pointer to the instruction stream. We have to mask out the
4733 privilege level from the two low order bits of the return address
4734 pointer here so that ins will point to the start of the first
4735 instruction that would have been executed if we returned. */
4736 ins = copy_to_reg (gen_rtx_AND (Pmode, rp, MASK_RETURN_ADDR));
4737 label = gen_label_rtx ();
4738
4739 if (TARGET_PA_20)
4740 {
4741 insns[0] = 0x4bc23fd1;
4742 insns[1] = -398405630;
4743 len = 2;
4744 }
4745 else
4746 {
4747 insns[0] = 0x4bc23fd1;
4748 insns[1] = 0x004010a1;
4749 insns[2] = 0x00011820;
4750 insns[3] = -532676606;
4751 len = 4;
4752 }
4753
4754 /* Check the instruction stream at the normal return address for the
4755 export stub. If it is an export stub, than our return address is
4756 really in -24[frameaddr]. */
4757
4758 for (i = 0; i < len; i++)
4759 {
4760 rtx op0 = gen_rtx_MEM (SImode, plus_constant (Pmode, ins, i * 4));
4761 rtx op1 = GEN_INT (insns[i]);
4762 emit_cmp_and_jump_insns (op0, op1, NE, NULL, SImode, 0, label);
4763 }
4764
4765 /* Here we know that our return address points to an export
4766 stub. We don't want to return the address of the export stub,
4767 but rather the return address of the export stub. That return
4768 address is stored at -24[frameaddr]. */
4769
4770 emit_move_insn (saved_rp,
4771 gen_rtx_MEM (Pmode,
4772 memory_address (Pmode,
4773 plus_constant (Pmode, frameaddr,
4774 -24))));
4775
4776 emit_label (label);
4777
4778 return saved_rp;
4779 }
4780
4781 void
4782 pa_emit_bcond_fp (rtx operands[])
4783 {
4784 enum rtx_code code = GET_CODE (operands[0]);
4785 rtx operand0 = operands[1];
4786 rtx operand1 = operands[2];
4787 rtx label = operands[3];
4788
4789 emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, 0),
4790 gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)));
4791
4792 emit_jump_insn (gen_rtx_SET (pc_rtx,
4793 gen_rtx_IF_THEN_ELSE (VOIDmode,
4794 gen_rtx_fmt_ee (NE,
4795 VOIDmode,
4796 gen_rtx_REG (CCFPmode, 0),
4797 const0_rtx),
4798 gen_rtx_LABEL_REF (VOIDmode, label),
4799 pc_rtx)));
4800
4801 }
4802
4803 /* Adjust the cost of a scheduling dependency. Return the new cost of
4804 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4805
4806 static int
4807 pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
4808 unsigned int)
4809 {
4810 enum attr_type attr_type;
4811
4812 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4813 true dependencies as they are described with bypasses now. */
4814 if (pa_cpu >= PROCESSOR_8000 || dep_type == 0)
4815 return cost;
4816
4817 if (! recog_memoized (insn))
4818 return 0;
4819
4820 attr_type = get_attr_type (insn);
4821
4822 switch (dep_type)
4823 {
4824 case REG_DEP_ANTI:
4825 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4826 cycles later. */
4827
4828 if (attr_type == TYPE_FPLOAD)
4829 {
4830 rtx pat = PATTERN (insn);
4831 rtx dep_pat = PATTERN (dep_insn);
4832 if (GET_CODE (pat) == PARALLEL)
4833 {
4834 /* This happens for the fldXs,mb patterns. */
4835 pat = XVECEXP (pat, 0, 0);
4836 }
4837 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4838 /* If this happens, we have to extend this to schedule
4839 optimally. Return 0 for now. */
4840 return 0;
4841
4842 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4843 {
4844 if (! recog_memoized (dep_insn))
4845 return 0;
4846 switch (get_attr_type (dep_insn))
4847 {
4848 case TYPE_FPALU:
4849 case TYPE_FPMULSGL:
4850 case TYPE_FPMULDBL:
4851 case TYPE_FPDIVSGL:
4852 case TYPE_FPDIVDBL:
4853 case TYPE_FPSQRTSGL:
4854 case TYPE_FPSQRTDBL:
4855 /* A fpload can't be issued until one cycle before a
4856 preceding arithmetic operation has finished if
4857 the target of the fpload is any of the sources
4858 (or destination) of the arithmetic operation. */
4859 return insn_default_latency (dep_insn) - 1;
4860
4861 default:
4862 return 0;
4863 }
4864 }
4865 }
4866 else if (attr_type == TYPE_FPALU)
4867 {
4868 rtx pat = PATTERN (insn);
4869 rtx dep_pat = PATTERN (dep_insn);
4870 if (GET_CODE (pat) == PARALLEL)
4871 {
4872 /* This happens for the fldXs,mb patterns. */
4873 pat = XVECEXP (pat, 0, 0);
4874 }
4875 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4876 /* If this happens, we have to extend this to schedule
4877 optimally. Return 0 for now. */
4878 return 0;
4879
4880 if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat)))
4881 {
4882 if (! recog_memoized (dep_insn))
4883 return 0;
4884 switch (get_attr_type (dep_insn))
4885 {
4886 case TYPE_FPDIVSGL:
4887 case TYPE_FPDIVDBL:
4888 case TYPE_FPSQRTSGL:
4889 case TYPE_FPSQRTDBL:
4890 /* An ALU flop can't be issued until two cycles before a
4891 preceding divide or sqrt operation has finished if
4892 the target of the ALU flop is any of the sources
4893 (or destination) of the divide or sqrt operation. */
4894 return insn_default_latency (dep_insn) - 2;
4895
4896 default:
4897 return 0;
4898 }
4899 }
4900 }
4901
4902 /* For other anti dependencies, the cost is 0. */
4903 return 0;
4904
4905 case REG_DEP_OUTPUT:
4906 /* Output dependency; DEP_INSN writes a register that INSN writes some
4907 cycles later. */
4908 if (attr_type == TYPE_FPLOAD)
4909 {
4910 rtx pat = PATTERN (insn);
4911 rtx dep_pat = PATTERN (dep_insn);
4912 if (GET_CODE (pat) == PARALLEL)
4913 {
4914 /* This happens for the fldXs,mb patterns. */
4915 pat = XVECEXP (pat, 0, 0);
4916 }
4917 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4918 /* If this happens, we have to extend this to schedule
4919 optimally. Return 0 for now. */
4920 return 0;
4921
4922 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4923 {
4924 if (! recog_memoized (dep_insn))
4925 return 0;
4926 switch (get_attr_type (dep_insn))
4927 {
4928 case TYPE_FPALU:
4929 case TYPE_FPMULSGL:
4930 case TYPE_FPMULDBL:
4931 case TYPE_FPDIVSGL:
4932 case TYPE_FPDIVDBL:
4933 case TYPE_FPSQRTSGL:
4934 case TYPE_FPSQRTDBL:
4935 /* A fpload can't be issued until one cycle before a
4936 preceding arithmetic operation has finished if
4937 the target of the fpload is the destination of the
4938 arithmetic operation.
4939
4940 Exception: For PA7100LC, PA7200 and PA7300, the cost
4941 is 3 cycles, unless they bundle together. We also
4942 pay the penalty if the second insn is a fpload. */
4943 return insn_default_latency (dep_insn) - 1;
4944
4945 default:
4946 return 0;
4947 }
4948 }
4949 }
4950 else if (attr_type == TYPE_FPALU)
4951 {
4952 rtx pat = PATTERN (insn);
4953 rtx dep_pat = PATTERN (dep_insn);
4954 if (GET_CODE (pat) == PARALLEL)
4955 {
4956 /* This happens for the fldXs,mb patterns. */
4957 pat = XVECEXP (pat, 0, 0);
4958 }
4959 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
4960 /* If this happens, we have to extend this to schedule
4961 optimally. Return 0 for now. */
4962 return 0;
4963
4964 if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat)))
4965 {
4966 if (! recog_memoized (dep_insn))
4967 return 0;
4968 switch (get_attr_type (dep_insn))
4969 {
4970 case TYPE_FPDIVSGL:
4971 case TYPE_FPDIVDBL:
4972 case TYPE_FPSQRTSGL:
4973 case TYPE_FPSQRTDBL:
4974 /* An ALU flop can't be issued until two cycles before a
4975 preceding divide or sqrt operation has finished if
4976 the target of the ALU flop is also the target of
4977 the divide or sqrt operation. */
4978 return insn_default_latency (dep_insn) - 2;
4979
4980 default:
4981 return 0;
4982 }
4983 }
4984 }
4985
4986 /* For other output dependencies, the cost is 0. */
4987 return 0;
4988
4989 default:
4990 gcc_unreachable ();
4991 }
4992 }
4993
4994 /* The 700 can only issue a single insn at a time.
4995 The 7XXX processors can issue two insns at a time.
4996 The 8000 can issue 4 insns at a time. */
4997 static int
4998 pa_issue_rate (void)
4999 {
5000 switch (pa_cpu)
5001 {
5002 case PROCESSOR_700: return 1;
5003 case PROCESSOR_7100: return 2;
5004 case PROCESSOR_7100LC: return 2;
5005 case PROCESSOR_7200: return 2;
5006 case PROCESSOR_7300: return 2;
5007 case PROCESSOR_8000: return 4;
5008
5009 default:
5010 gcc_unreachable ();
5011 }
5012 }
5013
5014
5015
5016 /* Return any length plus adjustment needed by INSN which already has
5017 its length computed as LENGTH. Return LENGTH if no adjustment is
5018 necessary.
5019
5020 Also compute the length of an inline block move here as it is too
5021 complicated to express as a length attribute in pa.md. */
5022 int
5023 pa_adjust_insn_length (rtx_insn *insn, int length)
5024 {
5025 rtx pat = PATTERN (insn);
5026
5027 /* If length is negative or undefined, provide initial length. */
5028 if ((unsigned int) length >= INT_MAX)
5029 {
5030 if (GET_CODE (pat) == SEQUENCE)
5031 insn = as_a <rtx_insn *> (XVECEXP (pat, 0, 0));
5032
5033 switch (get_attr_type (insn))
5034 {
5035 case TYPE_MILLI:
5036 length = pa_attr_length_millicode_call (insn);
5037 break;
5038 case TYPE_CALL:
5039 length = pa_attr_length_call (insn, 0);
5040 break;
5041 case TYPE_SIBCALL:
5042 length = pa_attr_length_call (insn, 1);
5043 break;
5044 case TYPE_DYNCALL:
5045 length = pa_attr_length_indirect_call (insn);
5046 break;
5047 case TYPE_SH_FUNC_ADRS:
5048 length = pa_attr_length_millicode_call (insn) + 20;
5049 break;
5050 default:
5051 gcc_unreachable ();
5052 }
5053 }
5054
5055 /* Block move pattern. */
5056 if (NONJUMP_INSN_P (insn)
5057 && GET_CODE (pat) == PARALLEL
5058 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5059 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5060 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM
5061 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode
5062 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode)
5063 length += compute_cpymem_length (insn) - 4;
5064 /* Block clear pattern. */
5065 else if (NONJUMP_INSN_P (insn)
5066 && GET_CODE (pat) == PARALLEL
5067 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
5068 && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM
5069 && XEXP (XVECEXP (pat, 0, 0), 1) == const0_rtx
5070 && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode)
5071 length += compute_clrmem_length (insn) - 4;
5072 /* Conditional branch with an unfilled delay slot. */
5073 else if (JUMP_P (insn) && ! simplejump_p (insn))
5074 {
5075 /* Adjust a short backwards conditional with an unfilled delay slot. */
5076 if (GET_CODE (pat) == SET
5077 && length == 4
5078 && JUMP_LABEL (insn) != NULL_RTX
5079 && ! forward_branch_p (insn))
5080 length += 4;
5081 else if (GET_CODE (pat) == PARALLEL
5082 && get_attr_type (insn) == TYPE_PARALLEL_BRANCH
5083 && length == 4)
5084 length += 4;
5085 /* Adjust dbra insn with short backwards conditional branch with
5086 unfilled delay slot -- only for case where counter is in a
5087 general register register. */
5088 else if (GET_CODE (pat) == PARALLEL
5089 && GET_CODE (XVECEXP (pat, 0, 1)) == SET
5090 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG
5091 && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0))
5092 && length == 4
5093 && ! forward_branch_p (insn))
5094 length += 4;
5095 }
5096 return length;
5097 }
5098
5099 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
5100
5101 static bool
5102 pa_print_operand_punct_valid_p (unsigned char code)
5103 {
5104 if (code == '@'
5105 || code == '#'
5106 || code == '*'
5107 || code == '^')
5108 return true;
5109
5110 return false;
5111 }
5112
5113 /* Print operand X (an rtx) in assembler syntax to file FILE.
5114 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
5115 For `%' followed by punctuation, CODE is the punctuation and X is null. */
5116
5117 void
5118 pa_print_operand (FILE *file, rtx x, int code)
5119 {
5120 switch (code)
5121 {
5122 case '#':
5123 /* Output a 'nop' if there's nothing for the delay slot. */
5124 if (dbr_sequence_length () == 0)
5125 fputs ("\n\tnop", file);
5126 return;
5127 case '*':
5128 /* Output a nullification completer if there's nothing for the */
5129 /* delay slot or nullification is requested. */
5130 if (dbr_sequence_length () == 0 ||
5131 (final_sequence &&
5132 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))))
5133 fputs (",n", file);
5134 return;
5135 case 'R':
5136 /* Print out the second register name of a register pair.
5137 I.e., R (6) => 7. */
5138 fputs (reg_names[REGNO (x) + 1], file);
5139 return;
5140 case 'r':
5141 /* A register or zero. */
5142 if (x == const0_rtx
5143 || (x == CONST0_RTX (DFmode))
5144 || (x == CONST0_RTX (SFmode)))
5145 {
5146 fputs ("%r0", file);
5147 return;
5148 }
5149 else
5150 break;
5151 case 'f':
5152 /* A register or zero (floating point). */
5153 if (x == const0_rtx
5154 || (x == CONST0_RTX (DFmode))
5155 || (x == CONST0_RTX (SFmode)))
5156 {
5157 fputs ("%fr0", file);
5158 return;
5159 }
5160 else
5161 break;
5162 case 'A':
5163 {
5164 rtx xoperands[2];
5165
5166 xoperands[0] = XEXP (XEXP (x, 0), 0);
5167 xoperands[1] = XVECEXP (XEXP (XEXP (x, 0), 1), 0, 0);
5168 pa_output_global_address (file, xoperands[1], 0);
5169 fprintf (file, "(%s)", reg_names [REGNO (xoperands[0])]);
5170 return;
5171 }
5172
5173 case 'C': /* Plain (C)ondition */
5174 case 'X':
5175 switch (GET_CODE (x))
5176 {
5177 case EQ:
5178 fputs ("=", file); break;
5179 case NE:
5180 fputs ("<>", file); break;
5181 case GT:
5182 fputs (">", file); break;
5183 case GE:
5184 fputs (">=", file); break;
5185 case GEU:
5186 fputs (">>=", file); break;
5187 case GTU:
5188 fputs (">>", file); break;
5189 case LT:
5190 fputs ("<", file); break;
5191 case LE:
5192 fputs ("<=", file); break;
5193 case LEU:
5194 fputs ("<<=", file); break;
5195 case LTU:
5196 fputs ("<<", file); break;
5197 default:
5198 gcc_unreachable ();
5199 }
5200 return;
5201 case 'N': /* Condition, (N)egated */
5202 switch (GET_CODE (x))
5203 {
5204 case EQ:
5205 fputs ("<>", file); break;
5206 case NE:
5207 fputs ("=", file); break;
5208 case GT:
5209 fputs ("<=", file); break;
5210 case GE:
5211 fputs ("<", file); break;
5212 case GEU:
5213 fputs ("<<", file); break;
5214 case GTU:
5215 fputs ("<<=", file); break;
5216 case LT:
5217 fputs (">=", file); break;
5218 case LE:
5219 fputs (">", file); break;
5220 case LEU:
5221 fputs (">>", file); break;
5222 case LTU:
5223 fputs (">>=", file); break;
5224 default:
5225 gcc_unreachable ();
5226 }
5227 return;
5228 /* For floating point comparisons. Note that the output
5229 predicates are the complement of the desired mode. The
5230 conditions for GT, GE, LT, LE and LTGT cause an invalid
5231 operation exception if the result is unordered and this
5232 exception is enabled in the floating-point status register. */
5233 case 'Y':
5234 switch (GET_CODE (x))
5235 {
5236 case EQ:
5237 fputs ("!=", file); break;
5238 case NE:
5239 fputs ("=", file); break;
5240 case GT:
5241 fputs ("!>", file); break;
5242 case GE:
5243 fputs ("!>=", file); break;
5244 case LT:
5245 fputs ("!<", file); break;
5246 case LE:
5247 fputs ("!<=", file); break;
5248 case LTGT:
5249 fputs ("!<>", file); break;
5250 case UNLE:
5251 fputs ("!?<=", file); break;
5252 case UNLT:
5253 fputs ("!?<", file); break;
5254 case UNGE:
5255 fputs ("!?>=", file); break;
5256 case UNGT:
5257 fputs ("!?>", file); break;
5258 case UNEQ:
5259 fputs ("!?=", file); break;
5260 case UNORDERED:
5261 fputs ("!?", file); break;
5262 case ORDERED:
5263 fputs ("?", file); break;
5264 default:
5265 gcc_unreachable ();
5266 }
5267 return;
5268 case 'S': /* Condition, operands are (S)wapped. */
5269 switch (GET_CODE (x))
5270 {
5271 case EQ:
5272 fputs ("=", file); break;
5273 case NE:
5274 fputs ("<>", file); break;
5275 case GT:
5276 fputs ("<", file); break;
5277 case GE:
5278 fputs ("<=", file); break;
5279 case GEU:
5280 fputs ("<<=", file); break;
5281 case GTU:
5282 fputs ("<<", file); break;
5283 case LT:
5284 fputs (">", file); break;
5285 case LE:
5286 fputs (">=", file); break;
5287 case LEU:
5288 fputs (">>=", file); break;
5289 case LTU:
5290 fputs (">>", file); break;
5291 default:
5292 gcc_unreachable ();
5293 }
5294 return;
5295 case 'B': /* Condition, (B)oth swapped and negate. */
5296 switch (GET_CODE (x))
5297 {
5298 case EQ:
5299 fputs ("<>", file); break;
5300 case NE:
5301 fputs ("=", file); break;
5302 case GT:
5303 fputs (">=", file); break;
5304 case GE:
5305 fputs (">", file); break;
5306 case GEU:
5307 fputs (">>", file); break;
5308 case GTU:
5309 fputs (">>=", file); break;
5310 case LT:
5311 fputs ("<=", file); break;
5312 case LE:
5313 fputs ("<", file); break;
5314 case LEU:
5315 fputs ("<<", file); break;
5316 case LTU:
5317 fputs ("<<=", file); break;
5318 default:
5319 gcc_unreachable ();
5320 }
5321 return;
5322 case 'k':
5323 gcc_assert (GET_CODE (x) == CONST_INT);
5324 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
5325 return;
5326 case 'Q':
5327 gcc_assert (GET_CODE (x) == CONST_INT);
5328 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - (INTVAL (x) & 63));
5329 return;
5330 case 'L':
5331 gcc_assert (GET_CODE (x) == CONST_INT);
5332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - (INTVAL (x) & 31));
5333 return;
5334 case 'o':
5335 gcc_assert (GET_CODE (x) == CONST_INT
5336 && (INTVAL (x) == 1 || INTVAL (x) == 2 || INTVAL (x) == 3));
5337 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
5338 return;
5339 case 'O':
5340 gcc_assert (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0);
5341 fprintf (file, "%d", exact_log2 (INTVAL (x)));
5342 return;
5343 case 'p':
5344 gcc_assert (GET_CODE (x) == CONST_INT);
5345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 63 - (INTVAL (x) & 63));
5346 return;
5347 case 'P':
5348 gcc_assert (GET_CODE (x) == CONST_INT);
5349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 31 - (INTVAL (x) & 31));
5350 return;
5351 case 'I':
5352 if (GET_CODE (x) == CONST_INT)
5353 fputs ("i", file);
5354 return;
5355 case 'M':
5356 case 'F':
5357 switch (GET_CODE (XEXP (x, 0)))
5358 {
5359 case PRE_DEC:
5360 case PRE_INC:
5361 if (ASSEMBLER_DIALECT == 0)
5362 fputs ("s,mb", file);
5363 else
5364 fputs (",mb", file);
5365 break;
5366 case POST_DEC:
5367 case POST_INC:
5368 if (ASSEMBLER_DIALECT == 0)
5369 fputs ("s,ma", file);
5370 else
5371 fputs (",ma", file);
5372 break;
5373 case PLUS:
5374 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5375 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5376 {
5377 if (ASSEMBLER_DIALECT == 0)
5378 fputs ("x", file);
5379 }
5380 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5381 || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5382 {
5383 if (ASSEMBLER_DIALECT == 0)
5384 fputs ("x,s", file);
5385 else
5386 fputs (",s", file);
5387 }
5388 else if (code == 'F' && ASSEMBLER_DIALECT == 0)
5389 fputs ("s", file);
5390 break;
5391 default:
5392 if (code == 'F' && ASSEMBLER_DIALECT == 0)
5393 fputs ("s", file);
5394 break;
5395 }
5396 return;
5397 case 'G':
5398 pa_output_global_address (file, x, 0);
5399 return;
5400 case 'H':
5401 pa_output_global_address (file, x, 1);
5402 return;
5403 case 0: /* Don't do anything special */
5404 break;
5405 case 'Z':
5406 {
5407 unsigned op[3];
5408 compute_zdepwi_operands (INTVAL (x), op);
5409 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5410 return;
5411 }
5412 case 'z':
5413 {
5414 unsigned op[3];
5415 compute_zdepdi_operands (INTVAL (x), op);
5416 fprintf (file, "%d,%d,%d", op[0], op[1], op[2]);
5417 return;
5418 }
5419 case 'c':
5420 /* We can get here from a .vtable_inherit due to our
5421 CONSTANT_ADDRESS_P rejecting perfectly good constant
5422 addresses. */
5423 break;
5424 default:
5425 gcc_unreachable ();
5426 }
5427 if (GET_CODE (x) == REG)
5428 {
5429 fputs (reg_names [REGNO (x)], file);
5430 if (TARGET_64BIT && FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4)
5431 {
5432 fputs ("R", file);
5433 return;
5434 }
5435 if (FP_REG_P (x)
5436 && GET_MODE_SIZE (GET_MODE (x)) <= 4
5437 && (REGNO (x) & 1) == 0)
5438 fputs ("L", file);
5439 }
5440 else if (GET_CODE (x) == MEM)
5441 {
5442 int size = GET_MODE_SIZE (GET_MODE (x));
5443 rtx base = NULL_RTX;
5444 switch (GET_CODE (XEXP (x, 0)))
5445 {
5446 case PRE_DEC:
5447 case POST_DEC:
5448 base = XEXP (XEXP (x, 0), 0);
5449 fprintf (file, "-%d(%s)", size, reg_names [REGNO (base)]);
5450 break;
5451 case PRE_INC:
5452 case POST_INC:
5453 base = XEXP (XEXP (x, 0), 0);
5454 fprintf (file, "%d(%s)", size, reg_names [REGNO (base)]);
5455 break;
5456 case PLUS:
5457 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT)
5458 fprintf (file, "%s(%s)",
5459 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))],
5460 reg_names [REGNO (XEXP (XEXP (x, 0), 1))]);
5461 else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT)
5462 fprintf (file, "%s(%s)",
5463 reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))],
5464 reg_names [REGNO (XEXP (XEXP (x, 0), 0))]);
5465 else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
5466 && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG)
5467 {
5468 /* Because the REG_POINTER flag can get lost during reload,
5469 pa_legitimate_address_p canonicalizes the order of the
5470 index and base registers in the combined move patterns. */
5471 rtx base = XEXP (XEXP (x, 0), 1);
5472 rtx index = XEXP (XEXP (x, 0), 0);
5473
5474 fprintf (file, "%s(%s)",
5475 reg_names [REGNO (index)], reg_names [REGNO (base)]);
5476 }
5477 else
5478 output_address (GET_MODE (x), XEXP (x, 0));
5479 break;
5480 default:
5481 output_address (GET_MODE (x), XEXP (x, 0));
5482 break;
5483 }
5484 }
5485 else
5486 output_addr_const (file, x);
5487 }
5488
5489 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5490
5491 void
5492 pa_output_global_address (FILE *file, rtx x, int round_constant)
5493 {
5494
5495 /* Imagine (high (const (plus ...))). */
5496 if (GET_CODE (x) == HIGH)
5497 x = XEXP (x, 0);
5498
5499 if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x, VOIDmode))
5500 output_addr_const (file, x);
5501 else if (GET_CODE (x) == SYMBOL_REF && !flag_pic)
5502 {
5503 output_addr_const (file, x);
5504 fputs ("-$global$", file);
5505 }
5506 else if (GET_CODE (x) == CONST)
5507 {
5508 const char *sep = "";
5509 int offset = 0; /* assembler wants -$global$ at end */
5510 rtx base = NULL_RTX;
5511
5512 switch (GET_CODE (XEXP (XEXP (x, 0), 0)))
5513 {
5514 case LABEL_REF:
5515 case SYMBOL_REF:
5516 base = XEXP (XEXP (x, 0), 0);
5517 output_addr_const (file, base);
5518 break;
5519 case CONST_INT:
5520 offset = INTVAL (XEXP (XEXP (x, 0), 0));
5521 break;
5522 default:
5523 gcc_unreachable ();
5524 }
5525
5526 switch (GET_CODE (XEXP (XEXP (x, 0), 1)))
5527 {
5528 case LABEL_REF:
5529 case SYMBOL_REF:
5530 base = XEXP (XEXP (x, 0), 1);
5531 output_addr_const (file, base);
5532 break;
5533 case CONST_INT:
5534 offset = INTVAL (XEXP (XEXP (x, 0), 1));
5535 break;
5536 default:
5537 gcc_unreachable ();
5538 }
5539
5540 /* How bogus. The compiler is apparently responsible for
5541 rounding the constant if it uses an LR field selector.
5542
5543 The linker and/or assembler seem a better place since
5544 they have to do this kind of thing already.
5545
5546 If we fail to do this, HP's optimizing linker may eliminate
5547 an addil, but not update the ldw/stw/ldo instruction that
5548 uses the result of the addil. */
5549 if (round_constant)
5550 offset = ((offset + 0x1000) & ~0x1fff);
5551
5552 switch (GET_CODE (XEXP (x, 0)))
5553 {
5554 case PLUS:
5555 if (offset < 0)
5556 {
5557 offset = -offset;
5558 sep = "-";
5559 }
5560 else
5561 sep = "+";
5562 break;
5563
5564 case MINUS:
5565 gcc_assert (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF);
5566 sep = "-";
5567 break;
5568
5569 default:
5570 gcc_unreachable ();
5571 }
5572
5573 if (!read_only_operand (base, VOIDmode) && !flag_pic)
5574 fputs ("-$global$", file);
5575 if (offset)
5576 fprintf (file, "%s%d", sep, offset);
5577 }
5578 else
5579 output_addr_const (file, x);
5580 }
5581
5582 /* Output boilerplate text to appear at the beginning of the file.
5583 There are several possible versions. */
5584 #define aputs(x) fputs(x, asm_out_file)
5585 static inline void
5586 pa_file_start_level (void)
5587 {
5588 if (TARGET_64BIT)
5589 aputs ("\t.LEVEL 2.0w\n");
5590 else if (TARGET_PA_20)
5591 aputs ("\t.LEVEL 2.0\n");
5592 else if (TARGET_PA_11)
5593 aputs ("\t.LEVEL 1.1\n");
5594 else
5595 aputs ("\t.LEVEL 1.0\n");
5596 }
5597
5598 static inline void
5599 pa_file_start_space (int sortspace)
5600 {
5601 aputs ("\t.SPACE $PRIVATE$");
5602 if (sortspace)
5603 aputs (",SORT=16");
5604 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31");
5605 if (flag_tm)
5606 aputs ("\n\t.SUBSPA $TM_CLONE_TABLE$,QUAD=1,ALIGN=8,ACCESS=31");
5607 aputs ("\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5608 "\n\t.SPACE $TEXT$");
5609 if (sortspace)
5610 aputs (",SORT=8");
5611 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5612 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5613 }
5614
5615 static inline void
5616 pa_file_start_file (int want_version)
5617 {
5618 if (write_symbols != NO_DEBUG)
5619 {
5620 output_file_directive (asm_out_file, main_input_filename);
5621 if (want_version)
5622 aputs ("\t.version\t\"01.01\"\n");
5623 }
5624 }
5625
5626 static inline void
5627 pa_file_start_mcount (const char *aswhat)
5628 {
5629 if (profile_flag)
5630 fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
5631 }
5632
5633 static void
5634 pa_elf_file_start (void)
5635 {
5636 pa_file_start_level ();
5637 pa_file_start_mcount ("ENTRY");
5638 pa_file_start_file (0);
5639 }
5640
5641 static void
5642 pa_som_file_start (void)
5643 {
5644 pa_file_start_level ();
5645 pa_file_start_space (0);
5646 aputs ("\t.IMPORT $global$,DATA\n"
5647 "\t.IMPORT $$dyncall,MILLICODE\n");
5648 pa_file_start_mcount ("CODE");
5649 pa_file_start_file (0);
5650 }
5651
5652 static void
5653 pa_linux_file_start (void)
5654 {
5655 pa_file_start_file (1);
5656 pa_file_start_level ();
5657 pa_file_start_mcount ("CODE");
5658 }
5659
5660 static void
5661 pa_hpux64_gas_file_start (void)
5662 {
5663 pa_file_start_level ();
5664 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5665 if (profile_flag)
5666 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file, "_mcount", "function");
5667 #endif
5668 pa_file_start_file (1);
5669 }
5670
5671 static void
5672 pa_hpux64_hpas_file_start (void)
5673 {
5674 pa_file_start_level ();
5675 pa_file_start_space (1);
5676 pa_file_start_mcount ("CODE");
5677 pa_file_start_file (0);
5678 }
5679 #undef aputs
5680
5681 /* Search the deferred plabel list for SYMBOL and return its internal
5682 label. If an entry for SYMBOL is not found, a new entry is created. */
5683
5684 rtx
5685 pa_get_deferred_plabel (rtx symbol)
5686 {
5687 const char *fname = XSTR (symbol, 0);
5688 size_t i;
5689
5690 /* See if we have already put this function on the list of deferred
5691 plabels. This list is generally small, so a liner search is not
5692 too ugly. If it proves too slow replace it with something faster. */
5693 for (i = 0; i < n_deferred_plabels; i++)
5694 if (strcmp (fname, XSTR (deferred_plabels[i].symbol, 0)) == 0)
5695 break;
5696
5697 /* If the deferred plabel list is empty, or this entry was not found
5698 on the list, create a new entry on the list. */
5699 if (deferred_plabels == NULL || i == n_deferred_plabels)
5700 {
5701 tree id;
5702
5703 if (deferred_plabels == 0)
5704 deferred_plabels = ggc_alloc<deferred_plabel> ();
5705 else
5706 deferred_plabels = GGC_RESIZEVEC (struct deferred_plabel,
5707 deferred_plabels,
5708 n_deferred_plabels + 1);
5709
5710 i = n_deferred_plabels++;
5711 deferred_plabels[i].internal_label = gen_label_rtx ();
5712 deferred_plabels[i].symbol = symbol;
5713
5714 /* Gross. We have just implicitly taken the address of this
5715 function. Mark it in the same manner as assemble_name. */
5716 id = maybe_get_identifier (targetm.strip_name_encoding (fname));
5717 if (id)
5718 mark_referenced (id);
5719 }
5720
5721 return deferred_plabels[i].internal_label;
5722 }
5723
5724 static void
5725 output_deferred_plabels (void)
5726 {
5727 size_t i;
5728
5729 /* If we have some deferred plabels, then we need to switch into the
5730 data or readonly data section, and align it to a 4 byte boundary
5731 before outputting the deferred plabels. */
5732 if (n_deferred_plabels)
5733 {
5734 switch_to_section (flag_pic ? data_section : readonly_data_section);
5735 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
5736 }
5737
5738 /* Now output the deferred plabels. */
5739 for (i = 0; i < n_deferred_plabels; i++)
5740 {
5741 targetm.asm_out.internal_label (asm_out_file, "L",
5742 CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
5743 assemble_integer (deferred_plabels[i].symbol,
5744 TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
5745 }
5746 }
5747
5748 /* Initialize optabs to point to emulation routines. */
5749
5750 static void
5751 pa_init_libfuncs (void)
5752 {
5753 if (HPUX_LONG_DOUBLE_LIBRARY)
5754 {
5755 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
5756 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
5757 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
5758 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
5759 set_optab_libfunc (smin_optab, TFmode, "_U_Qmin");
5760 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
5761 set_optab_libfunc (sqrt_optab, TFmode, "_U_Qfsqrt");
5762 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
5763 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
5764
5765 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
5766 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
5767 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
5768 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
5769 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
5770 set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
5771 set_optab_libfunc (unord_optab, TFmode, "_U_Qfunord");
5772
5773 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
5774 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
5775 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
5776 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
5777
5778 set_conv_libfunc (sfix_optab, SImode, TFmode,
5779 TARGET_64BIT ? "__U_Qfcnvfxt_quad_to_sgl"
5780 : "_U_Qfcnvfxt_quad_to_sgl");
5781 set_conv_libfunc (sfix_optab, DImode, TFmode,
5782 "_U_Qfcnvfxt_quad_to_dbl");
5783 set_conv_libfunc (ufix_optab, SImode, TFmode,
5784 "_U_Qfcnvfxt_quad_to_usgl");
5785 set_conv_libfunc (ufix_optab, DImode, TFmode,
5786 "_U_Qfcnvfxt_quad_to_udbl");
5787
5788 set_conv_libfunc (sfloat_optab, TFmode, SImode,
5789 "_U_Qfcnvxf_sgl_to_quad");
5790 set_conv_libfunc (sfloat_optab, TFmode, DImode,
5791 "_U_Qfcnvxf_dbl_to_quad");
5792 set_conv_libfunc (ufloat_optab, TFmode, SImode,
5793 "_U_Qfcnvxf_usgl_to_quad");
5794 set_conv_libfunc (ufloat_optab, TFmode, DImode,
5795 "_U_Qfcnvxf_udbl_to_quad");
5796 }
5797
5798 if (TARGET_SYNC_LIBCALL)
5799 init_sync_libfuncs (8);
5800 }
5801
5802 /* HP's millicode routines mean something special to the assembler.
5803 Keep track of which ones we have used. */
5804
5805 enum millicodes { remI, remU, divI, divU, mulI, end1000 };
5806 static void import_milli (enum millicodes);
5807 static char imported[(int) end1000];
5808 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI"};
5809 static const char import_string[] = ".IMPORT $$....,MILLICODE";
5810 #define MILLI_START 10
5811
5812 static void
5813 import_milli (enum millicodes code)
5814 {
5815 char str[sizeof (import_string)];
5816
5817 if (!imported[(int) code])
5818 {
5819 imported[(int) code] = 1;
5820 strcpy (str, import_string);
5821 strncpy (str + MILLI_START, milli_names[(int) code], 4);
5822 output_asm_insn (str, 0);
5823 }
5824 }
5825
5826 /* The register constraints have put the operands and return value in
5827 the proper registers. */
5828
5829 const char *
5830 pa_output_mul_insn (int unsignedp ATTRIBUTE_UNUSED, rtx_insn *insn)
5831 {
5832 import_milli (mulI);
5833 return pa_output_millicode_call (insn, gen_rtx_SYMBOL_REF (Pmode, "$$mulI"));
5834 }
5835
5836 /* Emit the rtl for doing a division by a constant. */
5837
5838 /* Do magic division millicodes exist for this value? */
5839 const int pa_magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5840
5841 /* We'll use an array to keep track of the magic millicodes and
5842 whether or not we've used them already. [n][0] is signed, [n][1] is
5843 unsigned. */
5844
5845 static int div_milli[16][2];
5846
5847 int
5848 pa_emit_hpdiv_const (rtx *operands, int unsignedp)
5849 {
5850 if (GET_CODE (operands[2]) == CONST_INT
5851 && INTVAL (operands[2]) > 0
5852 && INTVAL (operands[2]) < 16
5853 && pa_magic_milli[INTVAL (operands[2])])
5854 {
5855 rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
5856
5857 emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
5858 emit
5859 (gen_rtx_PARALLEL
5860 (VOIDmode,
5861 gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
5862 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
5863 SImode,
5864 gen_rtx_REG (SImode, 26),
5865 operands[2])),
5866 gen_rtx_CLOBBER (VOIDmode, operands[4]),
5867 gen_rtx_CLOBBER (VOIDmode, operands[3]),
5868 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
5869 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
5870 gen_rtx_CLOBBER (VOIDmode, ret))));
5871 emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
5872 return 1;
5873 }
5874 return 0;
5875 }
5876
5877 const char *
5878 pa_output_div_insn (rtx *operands, int unsignedp, rtx_insn *insn)
5879 {
5880 int divisor;
5881
5882 /* If the divisor is a constant, try to use one of the special
5883 opcodes .*/
5884 if (GET_CODE (operands[0]) == CONST_INT)
5885 {
5886 static char buf[100];
5887 divisor = INTVAL (operands[0]);
5888 if (!div_milli[divisor][unsignedp])
5889 {
5890 div_milli[divisor][unsignedp] = 1;
5891 if (unsignedp)
5892 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands);
5893 else
5894 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands);
5895 }
5896 if (unsignedp)
5897 {
5898 sprintf (buf, "$$divU_" HOST_WIDE_INT_PRINT_DEC,
5899 INTVAL (operands[0]));
5900 return pa_output_millicode_call (insn,
5901 gen_rtx_SYMBOL_REF (SImode, buf));
5902 }
5903 else
5904 {
5905 sprintf (buf, "$$divI_" HOST_WIDE_INT_PRINT_DEC,
5906 INTVAL (operands[0]));
5907 return pa_output_millicode_call (insn,
5908 gen_rtx_SYMBOL_REF (SImode, buf));
5909 }
5910 }
5911 /* Divisor isn't a special constant. */
5912 else
5913 {
5914 if (unsignedp)
5915 {
5916 import_milli (divU);
5917 return pa_output_millicode_call (insn,
5918 gen_rtx_SYMBOL_REF (SImode, "$$divU"));
5919 }
5920 else
5921 {
5922 import_milli (divI);
5923 return pa_output_millicode_call (insn,
5924 gen_rtx_SYMBOL_REF (SImode, "$$divI"));
5925 }
5926 }
5927 }
5928
5929 /* Output a $$rem millicode to do mod. */
5930
5931 const char *
5932 pa_output_mod_insn (int unsignedp, rtx_insn *insn)
5933 {
5934 if (unsignedp)
5935 {
5936 import_milli (remU);
5937 return pa_output_millicode_call (insn,
5938 gen_rtx_SYMBOL_REF (SImode, "$$remU"));
5939 }
5940 else
5941 {
5942 import_milli (remI);
5943 return pa_output_millicode_call (insn,
5944 gen_rtx_SYMBOL_REF (SImode, "$$remI"));
5945 }
5946 }
5947
5948 void
5949 pa_output_arg_descriptor (rtx_insn *call_insn)
5950 {
5951 const char *arg_regs[4];
5952 machine_mode arg_mode;
5953 rtx link;
5954 int i, output_flag = 0;
5955 int regno;
5956
5957 /* We neither need nor want argument location descriptors for the
5958 64bit runtime environment or the ELF32 environment. */
5959 if (TARGET_64BIT || TARGET_ELF32)
5960 return;
5961
5962 for (i = 0; i < 4; i++)
5963 arg_regs[i] = 0;
5964
5965 /* Specify explicitly that no argument relocations should take place
5966 if using the portable runtime calling conventions. */
5967 if (TARGET_PORTABLE_RUNTIME)
5968 {
5969 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5970 asm_out_file);
5971 return;
5972 }
5973
5974 gcc_assert (CALL_P (call_insn));
5975 for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
5976 link; link = XEXP (link, 1))
5977 {
5978 rtx use = XEXP (link, 0);
5979
5980 if (! (GET_CODE (use) == USE
5981 && GET_CODE (XEXP (use, 0)) == REG
5982 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
5983 continue;
5984
5985 arg_mode = GET_MODE (XEXP (use, 0));
5986 regno = REGNO (XEXP (use, 0));
5987 if (regno >= 23 && regno <= 26)
5988 {
5989 arg_regs[26 - regno] = "GR";
5990 if (arg_mode == DImode)
5991 arg_regs[25 - regno] = "GR";
5992 }
5993 else if (regno >= 32 && regno <= 39)
5994 {
5995 if (arg_mode == SFmode)
5996 arg_regs[(regno - 32) / 2] = "FR";
5997 else
5998 {
5999 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
6000 arg_regs[(regno - 34) / 2] = "FR";
6001 arg_regs[(regno - 34) / 2 + 1] = "FU";
6002 #else
6003 arg_regs[(regno - 34) / 2] = "FU";
6004 arg_regs[(regno - 34) / 2 + 1] = "FR";
6005 #endif
6006 }
6007 }
6008 }
6009 fputs ("\t.CALL ", asm_out_file);
6010 for (i = 0; i < 4; i++)
6011 {
6012 if (arg_regs[i])
6013 {
6014 if (output_flag++)
6015 fputc (',', asm_out_file);
6016 fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]);
6017 }
6018 }
6019 fputc ('\n', asm_out_file);
6020 }
6021 \f
6022 /* Inform reload about cases where moving X with a mode MODE to or from
6023 a register in RCLASS requires an extra scratch or immediate register.
6024 Return the class needed for the immediate register. */
6025
6026 static reg_class_t
6027 pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
6028 machine_mode mode, secondary_reload_info *sri)
6029 {
6030 int regno;
6031 enum reg_class rclass = (enum reg_class) rclass_i;
6032
6033 /* Handle the easy stuff first. */
6034 if (rclass == R1_REGS)
6035 return NO_REGS;
6036
6037 if (REG_P (x))
6038 {
6039 regno = REGNO (x);
6040 if (rclass == BASE_REG_CLASS && regno < FIRST_PSEUDO_REGISTER)
6041 return NO_REGS;
6042 }
6043 else
6044 regno = -1;
6045
6046 /* If we have something like (mem (mem (...)), we can safely assume the
6047 inner MEM will end up in a general register after reloading, so there's
6048 no need for a secondary reload. */
6049 if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == MEM)
6050 return NO_REGS;
6051
6052 /* Trying to load a constant into a FP register during PIC code
6053 generation requires %r1 as a scratch register. For float modes,
6054 the only legitimate constant is CONST0_RTX. However, there are
6055 a few patterns that accept constant double operands. */
6056 if (flag_pic
6057 && FP_REG_CLASS_P (rclass)
6058 && (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE))
6059 {
6060 switch (mode)
6061 {
6062 case E_SImode:
6063 sri->icode = CODE_FOR_reload_insi_r1;
6064 break;
6065
6066 case E_DImode:
6067 sri->icode = CODE_FOR_reload_indi_r1;
6068 break;
6069
6070 case E_SFmode:
6071 sri->icode = CODE_FOR_reload_insf_r1;
6072 break;
6073
6074 case E_DFmode:
6075 sri->icode = CODE_FOR_reload_indf_r1;
6076 break;
6077
6078 default:
6079 gcc_unreachable ();
6080 }
6081 return NO_REGS;
6082 }
6083
6084 /* Secondary reloads of symbolic expressions require %r1 as a scratch
6085 register when we're generating PIC code or when the operand isn't
6086 readonly. */
6087 if (pa_symbolic_expression_p (x))
6088 {
6089 if (GET_CODE (x) == HIGH)
6090 x = XEXP (x, 0);
6091
6092 if (flag_pic || !read_only_operand (x, VOIDmode))
6093 {
6094 switch (mode)
6095 {
6096 case E_SImode:
6097 sri->icode = CODE_FOR_reload_insi_r1;
6098 break;
6099
6100 case E_DImode:
6101 sri->icode = CODE_FOR_reload_indi_r1;
6102 break;
6103
6104 default:
6105 gcc_unreachable ();
6106 }
6107 return NO_REGS;
6108 }
6109 }
6110
6111 /* Profiling showed the PA port spends about 1.3% of its compilation
6112 time in true_regnum from calls inside pa_secondary_reload_class. */
6113 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
6114 regno = true_regnum (x);
6115
6116 /* Handle reloads for floating point loads and stores. */
6117 if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
6118 && FP_REG_CLASS_P (rclass))
6119 {
6120 if (MEM_P (x))
6121 {
6122 x = XEXP (x, 0);
6123
6124 /* We don't need a secondary reload for indexed memory addresses.
6125
6126 When INT14_OK_STRICT is true, it might appear that we could
6127 directly allow register indirect memory addresses. However,
6128 this doesn't work because we don't support SUBREGs in
6129 floating-point register copies and reload doesn't tell us
6130 when it's going to use a SUBREG. */
6131 if (IS_INDEX_ADDR_P (x))
6132 return NO_REGS;
6133 }
6134
6135 /* Request a secondary reload with a general scratch register
6136 for everything else. ??? Could symbolic operands be handled
6137 directly when generating non-pic PA 2.0 code? */
6138 sri->icode = (in_p
6139 ? direct_optab_handler (reload_in_optab, mode)
6140 : direct_optab_handler (reload_out_optab, mode));
6141 return NO_REGS;
6142 }
6143
6144 /* A SAR<->FP register copy requires an intermediate general register
6145 and secondary memory. We need a secondary reload with a general
6146 scratch register for spills. */
6147 if (rclass == SHIFT_REGS)
6148 {
6149 /* Handle spill. */
6150 if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
6151 {
6152 sri->icode = (in_p
6153 ? direct_optab_handler (reload_in_optab, mode)
6154 : direct_optab_handler (reload_out_optab, mode));
6155 return NO_REGS;
6156 }
6157
6158 /* Handle FP copy. */
6159 if (FP_REG_CLASS_P (REGNO_REG_CLASS (regno)))
6160 return GENERAL_REGS;
6161 }
6162
6163 if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER
6164 && REGNO_REG_CLASS (regno) == SHIFT_REGS
6165 && FP_REG_CLASS_P (rclass))
6166 return GENERAL_REGS;
6167
6168 return NO_REGS;
6169 }
6170
6171 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
6172
6173 static bool
6174 pa_secondary_memory_needed (machine_mode mode ATTRIBUTE_UNUSED,
6175 reg_class_t class1 ATTRIBUTE_UNUSED,
6176 reg_class_t class2 ATTRIBUTE_UNUSED)
6177 {
6178 #ifdef PA_SECONDARY_MEMORY_NEEDED
6179 return PA_SECONDARY_MEMORY_NEEDED (mode, class1, class2);
6180 #else
6181 return false;
6182 #endif
6183 }
6184
6185 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
6186 is only marked as live on entry by df-scan when it is a fixed
6187 register. It isn't a fixed register in the 64-bit runtime,
6188 so we need to mark it here. */
6189
6190 static void
6191 pa_extra_live_on_entry (bitmap regs)
6192 {
6193 if (TARGET_64BIT)
6194 bitmap_set_bit (regs, ARG_POINTER_REGNUM);
6195 }
6196
6197 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
6198 to prevent it from being deleted. */
6199
6200 rtx
6201 pa_eh_return_handler_rtx (void)
6202 {
6203 rtx tmp;
6204
6205 tmp = gen_rtx_PLUS (word_mode, hard_frame_pointer_rtx,
6206 TARGET_64BIT ? GEN_INT (-16) : GEN_INT (-20));
6207 tmp = gen_rtx_MEM (word_mode, tmp);
6208 tmp->volatil = 1;
6209 return tmp;
6210 }
6211
6212 /* In the 32-bit runtime, arguments larger than eight bytes are passed
6213 by invisible reference. As a GCC extension, we also pass anything
6214 with a zero or variable size by reference.
6215
6216 The 64-bit runtime does not describe passing any types by invisible
6217 reference. The internals of GCC can't currently handle passing
6218 empty structures, and zero or variable length arrays when they are
6219 not passed entirely on the stack or by reference. Thus, as a GCC
6220 extension, we pass these types by reference. The HP compiler doesn't
6221 support these types, so hopefully there shouldn't be any compatibility
6222 issues. This may have to be revisited when HP releases a C99 compiler
6223 or updates the ABI. */
6224
6225 static bool
6226 pa_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
6227 machine_mode mode, const_tree type,
6228 bool named ATTRIBUTE_UNUSED)
6229 {
6230 HOST_WIDE_INT size;
6231
6232 if (type)
6233 size = int_size_in_bytes (type);
6234 else
6235 size = GET_MODE_SIZE (mode);
6236
6237 if (TARGET_64BIT)
6238 return size <= 0;
6239 else
6240 return size <= 0 || size > 8;
6241 }
6242
6243 /* Implement TARGET_FUNCTION_ARG_PADDING. */
6244
6245 static pad_direction
6246 pa_function_arg_padding (machine_mode mode, const_tree type)
6247 {
6248 if (mode == BLKmode
6249 || (TARGET_64BIT
6250 && type
6251 && (AGGREGATE_TYPE_P (type)
6252 || TREE_CODE (type) == COMPLEX_TYPE
6253 || TREE_CODE (type) == VECTOR_TYPE)))
6254 {
6255 /* Return PAD_NONE if justification is not required. */
6256 if (type
6257 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
6258 && (int_size_in_bytes (type) * BITS_PER_UNIT) % PARM_BOUNDARY == 0)
6259 return PAD_NONE;
6260
6261 /* The directions set here are ignored when a BLKmode argument larger
6262 than a word is placed in a register. Different code is used for
6263 the stack and registers. This makes it difficult to have a
6264 consistent data representation for both the stack and registers.
6265 For both runtimes, the justification and padding for arguments on
6266 the stack and in registers should be identical. */
6267 if (TARGET_64BIT)
6268 /* The 64-bit runtime specifies left justification for aggregates. */
6269 return PAD_UPWARD;
6270 else
6271 /* The 32-bit runtime architecture specifies right justification.
6272 When the argument is passed on the stack, the argument is padded
6273 with garbage on the left. The HP compiler pads with zeros. */
6274 return PAD_DOWNWARD;
6275 }
6276
6277 if (GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
6278 return PAD_DOWNWARD;
6279 else
6280 return PAD_NONE;
6281 }
6282
6283 \f
6284 /* Do what is necessary for `va_start'. We look at the current function
6285 to determine if stdargs or varargs is used and fill in an initial
6286 va_list. A pointer to this constructor is returned. */
6287
6288 static rtx
6289 hppa_builtin_saveregs (void)
6290 {
6291 rtx offset, dest;
6292 tree fntype = TREE_TYPE (current_function_decl);
6293 int argadj = ((!stdarg_p (fntype))
6294 ? UNITS_PER_WORD : 0);
6295
6296 if (argadj)
6297 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, argadj);
6298 else
6299 offset = crtl->args.arg_offset_rtx;
6300
6301 if (TARGET_64BIT)
6302 {
6303 int i, off;
6304
6305 /* Adjust for varargs/stdarg differences. */
6306 if (argadj)
6307 offset = plus_constant (Pmode, crtl->args.arg_offset_rtx, -argadj);
6308 else
6309 offset = crtl->args.arg_offset_rtx;
6310
6311 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6312 from the incoming arg pointer and growing to larger addresses. */
6313 for (i = 26, off = -64; i >= 19; i--, off += 8)
6314 emit_move_insn (gen_rtx_MEM (word_mode,
6315 plus_constant (Pmode,
6316 arg_pointer_rtx, off)),
6317 gen_rtx_REG (word_mode, i));
6318
6319 /* The incoming args pointer points just beyond the flushback area;
6320 normally this is not a serious concern. However, when we are doing
6321 varargs/stdargs we want to make the arg pointer point to the start
6322 of the incoming argument area. */
6323 emit_move_insn (virtual_incoming_args_rtx,
6324 plus_constant (Pmode, arg_pointer_rtx, -64));
6325
6326 /* Now return a pointer to the first anonymous argument. */
6327 return copy_to_reg (expand_binop (Pmode, add_optab,
6328 virtual_incoming_args_rtx,
6329 offset, 0, 0, OPTAB_LIB_WIDEN));
6330 }
6331
6332 /* Store general registers on the stack. */
6333 dest = gen_rtx_MEM (BLKmode,
6334 plus_constant (Pmode, crtl->args.internal_arg_pointer,
6335 -16));
6336 set_mem_alias_set (dest, get_varargs_alias_set ());
6337 set_mem_align (dest, BITS_PER_WORD);
6338 move_block_from_reg (23, dest, 4);
6339
6340 /* move_block_from_reg will emit code to store the argument registers
6341 individually as scalar stores.
6342
6343 However, other insns may later load from the same addresses for
6344 a structure load (passing a struct to a varargs routine).
6345
6346 The alias code assumes that such aliasing can never happen, so we
6347 have to keep memory referencing insns from moving up beyond the
6348 last argument register store. So we emit a blockage insn here. */
6349 emit_insn (gen_blockage ());
6350
6351 return copy_to_reg (expand_binop (Pmode, add_optab,
6352 crtl->args.internal_arg_pointer,
6353 offset, 0, 0, OPTAB_LIB_WIDEN));
6354 }
6355
6356 static void
6357 hppa_va_start (tree valist, rtx nextarg)
6358 {
6359 nextarg = expand_builtin_saveregs ();
6360 std_expand_builtin_va_start (valist, nextarg);
6361 }
6362
6363 static tree
6364 hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6365 gimple_seq *post_p)
6366 {
6367 if (TARGET_64BIT)
6368 {
6369 /* Args grow upward. We can use the generic routines. */
6370 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6371 }
6372 else /* !TARGET_64BIT */
6373 {
6374 tree ptr = build_pointer_type (type);
6375 tree valist_type;
6376 tree t, u;
6377 unsigned int size, ofs;
6378 bool indirect;
6379
6380 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0);
6381 if (indirect)
6382 {
6383 type = ptr;
6384 ptr = build_pointer_type (type);
6385 }
6386 size = int_size_in_bytes (type);
6387 valist_type = TREE_TYPE (valist);
6388
6389 /* Args grow down. Not handled by generic routines. */
6390
6391 u = fold_convert (sizetype, size_in_bytes (type));
6392 u = fold_build1 (NEGATE_EXPR, sizetype, u);
6393 t = fold_build_pointer_plus (valist, u);
6394
6395 /* Align to 4 or 8 byte boundary depending on argument size. */
6396
6397 u = build_int_cst (TREE_TYPE (t), (HOST_WIDE_INT)(size > 4 ? -8 : -4));
6398 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
6399 t = fold_convert (valist_type, t);
6400
6401 t = build2 (MODIFY_EXPR, valist_type, valist, t);
6402
6403 ofs = (8 - size) % 4;
6404 if (ofs != 0)
6405 t = fold_build_pointer_plus_hwi (t, ofs);
6406
6407 t = fold_convert (ptr, t);
6408 t = build_va_arg_indirect_ref (t);
6409
6410 if (indirect)
6411 t = build_va_arg_indirect_ref (t);
6412
6413 return t;
6414 }
6415 }
6416
6417 /* True if MODE is valid for the target. By "valid", we mean able to
6418 be manipulated in non-trivial ways. In particular, this means all
6419 the arithmetic is supported.
6420
6421 Currently, TImode is not valid as the HP 64-bit runtime documentation
6422 doesn't document the alignment and calling conventions for this type.
6423 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6424 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6425
6426 static bool
6427 pa_scalar_mode_supported_p (scalar_mode mode)
6428 {
6429 int precision = GET_MODE_PRECISION (mode);
6430
6431 switch (GET_MODE_CLASS (mode))
6432 {
6433 case MODE_PARTIAL_INT:
6434 case MODE_INT:
6435 if (precision == CHAR_TYPE_SIZE)
6436 return true;
6437 if (precision == SHORT_TYPE_SIZE)
6438 return true;
6439 if (precision == INT_TYPE_SIZE)
6440 return true;
6441 if (precision == LONG_TYPE_SIZE)
6442 return true;
6443 if (precision == LONG_LONG_TYPE_SIZE)
6444 return true;
6445 return false;
6446
6447 case MODE_FLOAT:
6448 if (precision == FLOAT_TYPE_SIZE)
6449 return true;
6450 if (precision == DOUBLE_TYPE_SIZE)
6451 return true;
6452 if (precision == LONG_DOUBLE_TYPE_SIZE)
6453 return true;
6454 return false;
6455
6456 case MODE_DECIMAL_FLOAT:
6457 return false;
6458
6459 default:
6460 gcc_unreachable ();
6461 }
6462 }
6463
6464 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6465 it branches into the delay slot. Otherwise, return FALSE. */
6466
6467 static bool
6468 branch_to_delay_slot_p (rtx_insn *insn)
6469 {
6470 rtx_insn *jump_insn;
6471
6472 if (dbr_sequence_length ())
6473 return FALSE;
6474
6475 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6476 while (insn)
6477 {
6478 insn = next_active_insn (insn);
6479 if (jump_insn == insn)
6480 return TRUE;
6481
6482 /* We can't rely on the length of asms. So, we return FALSE when
6483 the branch is followed by an asm. */
6484 if (!insn
6485 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6486 || asm_noperands (PATTERN (insn)) >= 0
6487 || get_attr_length (insn) > 0)
6488 break;
6489 }
6490
6491 return FALSE;
6492 }
6493
6494 /* Return TRUE if INSN, a forward jump insn, needs a nop in its delay slot.
6495
6496 This occurs when INSN has an unfilled delay slot and is followed
6497 by an asm. Disaster can occur if the asm is empty and the jump
6498 branches into the delay slot. So, we add a nop in the delay slot
6499 when this occurs. */
6500
6501 static bool
6502 branch_needs_nop_p (rtx_insn *insn)
6503 {
6504 rtx_insn *jump_insn;
6505
6506 if (dbr_sequence_length ())
6507 return FALSE;
6508
6509 jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6510 while (insn)
6511 {
6512 insn = next_active_insn (insn);
6513 if (!insn || jump_insn == insn)
6514 return TRUE;
6515
6516 if (!(GET_CODE (PATTERN (insn)) == ASM_INPUT
6517 || asm_noperands (PATTERN (insn)) >= 0)
6518 && get_attr_length (insn) > 0)
6519 break;
6520 }
6521
6522 return FALSE;
6523 }
6524
6525 /* Return TRUE if INSN, a forward jump insn, can use nullification
6526 to skip the following instruction. This avoids an extra cycle due
6527 to a mis-predicted branch when we fall through. */
6528
6529 static bool
6530 use_skip_p (rtx_insn *insn)
6531 {
6532 rtx_insn *jump_insn = next_active_insn (JUMP_LABEL_AS_INSN (insn));
6533
6534 while (insn)
6535 {
6536 insn = next_active_insn (insn);
6537
6538 /* We can't rely on the length of asms, so we can't skip asms. */
6539 if (!insn
6540 || GET_CODE (PATTERN (insn)) == ASM_INPUT
6541 || asm_noperands (PATTERN (insn)) >= 0)
6542 break;
6543 if (get_attr_length (insn) == 4
6544 && jump_insn == next_active_insn (insn))
6545 return TRUE;
6546 if (get_attr_length (insn) > 0)
6547 break;
6548 }
6549
6550 return FALSE;
6551 }
6552
6553 /* This routine handles all the normal conditional branch sequences we
6554 might need to generate. It handles compare immediate vs compare
6555 register, nullification of delay slots, varying length branches,
6556 negated branches, and all combinations of the above. It returns the
6557 output appropriate to emit the branch corresponding to all given
6558 parameters. */
6559
6560 const char *
6561 pa_output_cbranch (rtx *operands, int negated, rtx_insn *insn)
6562 {
6563 static char buf[100];
6564 bool useskip;
6565 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6566 int length = get_attr_length (insn);
6567 int xdelay;
6568
6569 /* A conditional branch to the following instruction (e.g. the delay slot)
6570 is asking for a disaster. This can happen when not optimizing and
6571 when jump optimization fails.
6572
6573 While it is usually safe to emit nothing, this can fail if the
6574 preceding instruction is a nullified branch with an empty delay
6575 slot and the same branch target as this branch. We could check
6576 for this but jump optimization should eliminate nop jumps. It
6577 is always safe to emit a nop. */
6578 if (branch_to_delay_slot_p (insn))
6579 return "nop";
6580
6581 /* The doubleword form of the cmpib instruction doesn't have the LEU
6582 and GTU conditions while the cmpb instruction does. Since we accept
6583 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6584 if (GET_MODE (operands[1]) == DImode && operands[2] == const0_rtx)
6585 operands[2] = gen_rtx_REG (DImode, 0);
6586 if (GET_MODE (operands[2]) == DImode && operands[1] == const0_rtx)
6587 operands[1] = gen_rtx_REG (DImode, 0);
6588
6589 /* If this is a long branch with its delay slot unfilled, set `nullify'
6590 as it can nullify the delay slot and save a nop. */
6591 if (length == 8 && dbr_sequence_length () == 0)
6592 nullify = 1;
6593
6594 /* If this is a short forward conditional branch which did not get
6595 its delay slot filled, the delay slot can still be nullified. */
6596 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6597 nullify = forward_branch_p (insn);
6598
6599 /* A forward branch over a single nullified insn can be done with a
6600 comclr instruction. This avoids a single cycle penalty due to
6601 mis-predicted branch if we fall through (branch not taken). */
6602 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6603
6604 switch (length)
6605 {
6606 /* All short conditional branches except backwards with an unfilled
6607 delay slot. */
6608 case 4:
6609 if (useskip)
6610 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6611 else
6612 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6613 if (GET_MODE (operands[1]) == DImode)
6614 strcat (buf, "*");
6615 if (negated)
6616 strcat (buf, "%B3");
6617 else
6618 strcat (buf, "%S3");
6619 if (useskip)
6620 strcat (buf, " %2,%r1,%%r0");
6621 else if (nullify)
6622 {
6623 if (branch_needs_nop_p (insn))
6624 strcat (buf, ",n %2,%r1,%0%#");
6625 else
6626 strcat (buf, ",n %2,%r1,%0");
6627 }
6628 else
6629 strcat (buf, " %2,%r1,%0");
6630 break;
6631
6632 /* All long conditionals. Note a short backward branch with an
6633 unfilled delay slot is treated just like a long backward branch
6634 with an unfilled delay slot. */
6635 case 8:
6636 /* Handle weird backwards branch with a filled delay slot
6637 which is nullified. */
6638 if (dbr_sequence_length () != 0
6639 && ! forward_branch_p (insn)
6640 && nullify)
6641 {
6642 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6643 if (GET_MODE (operands[1]) == DImode)
6644 strcat (buf, "*");
6645 if (negated)
6646 strcat (buf, "%S3");
6647 else
6648 strcat (buf, "%B3");
6649 strcat (buf, ",n %2,%r1,.+12\n\tb %0");
6650 }
6651 /* Handle short backwards branch with an unfilled delay slot.
6652 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6653 taken and untaken branches. */
6654 else if (dbr_sequence_length () == 0
6655 && ! forward_branch_p (insn)
6656 && INSN_ADDRESSES_SET_P ()
6657 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
6658 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
6659 {
6660 strcpy (buf, "{com%I2b,|cmp%I2b,}");
6661 if (GET_MODE (operands[1]) == DImode)
6662 strcat (buf, "*");
6663 if (negated)
6664 strcat (buf, "%B3 %2,%r1,%0%#");
6665 else
6666 strcat (buf, "%S3 %2,%r1,%0%#");
6667 }
6668 else
6669 {
6670 strcpy (buf, "{com%I2clr,|cmp%I2clr,}");
6671 if (GET_MODE (operands[1]) == DImode)
6672 strcat (buf, "*");
6673 if (negated)
6674 strcat (buf, "%S3");
6675 else
6676 strcat (buf, "%B3");
6677 if (nullify)
6678 strcat (buf, " %2,%r1,%%r0\n\tb,n %0");
6679 else
6680 strcat (buf, " %2,%r1,%%r0\n\tb %0");
6681 }
6682 break;
6683
6684 default:
6685 /* The reversed conditional branch must branch over one additional
6686 instruction if the delay slot is filled and needs to be extracted
6687 by pa_output_lbranch. If the delay slot is empty or this is a
6688 nullified forward branch, the instruction after the reversed
6689 condition branch must be nullified. */
6690 if (dbr_sequence_length () == 0
6691 || (nullify && forward_branch_p (insn)))
6692 {
6693 nullify = 1;
6694 xdelay = 0;
6695 operands[4] = GEN_INT (length);
6696 }
6697 else
6698 {
6699 xdelay = 1;
6700 operands[4] = GEN_INT (length + 4);
6701 }
6702
6703 /* Create a reversed conditional branch which branches around
6704 the following insns. */
6705 if (GET_MODE (operands[1]) != DImode)
6706 {
6707 if (nullify)
6708 {
6709 if (negated)
6710 strcpy (buf,
6711 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6712 else
6713 strcpy (buf,
6714 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6715 }
6716 else
6717 {
6718 if (negated)
6719 strcpy (buf,
6720 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6721 else
6722 strcpy (buf,
6723 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6724 }
6725 }
6726 else
6727 {
6728 if (nullify)
6729 {
6730 if (negated)
6731 strcpy (buf,
6732 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6733 else
6734 strcpy (buf,
6735 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6736 }
6737 else
6738 {
6739 if (negated)
6740 strcpy (buf,
6741 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6742 else
6743 strcpy (buf,
6744 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6745 }
6746 }
6747
6748 output_asm_insn (buf, operands);
6749 return pa_output_lbranch (operands[0], insn, xdelay);
6750 }
6751 return buf;
6752 }
6753
6754 /* Output a PIC pc-relative instruction sequence to load the address of
6755 OPERANDS[0] to register OPERANDS[2]. OPERANDS[0] is a symbol ref
6756 or a code label. OPERANDS[1] specifies the register to use to load
6757 the program counter. OPERANDS[3] may be used for label generation
6758 The sequence is always three instructions in length. The program
6759 counter recorded for PA 1.X is eight bytes more than that for PA 2.0.
6760 Register %r1 is clobbered. */
6761
6762 static void
6763 pa_output_pic_pcrel_sequence (rtx *operands)
6764 {
6765 gcc_assert (SYMBOL_REF_P (operands[0]) || LABEL_P (operands[0]));
6766 if (TARGET_PA_20)
6767 {
6768 /* We can use mfia to determine the current program counter. */
6769 if (TARGET_SOM || !TARGET_GAS)
6770 {
6771 operands[3] = gen_label_rtx ();
6772 targetm.asm_out.internal_label (asm_out_file, "L",
6773 CODE_LABEL_NUMBER (operands[3]));
6774 output_asm_insn ("mfia %1", operands);
6775 output_asm_insn ("addil L'%0-%l3,%1", operands);
6776 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6777 }
6778 else
6779 {
6780 output_asm_insn ("mfia %1", operands);
6781 output_asm_insn ("addil L'%0-$PIC_pcrel$0+12,%1", operands);
6782 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+16(%%r1),%2", operands);
6783 }
6784 }
6785 else
6786 {
6787 /* We need to use a branch to determine the current program counter. */
6788 output_asm_insn ("{bl|b,l} .+8,%1", operands);
6789 if (TARGET_SOM || !TARGET_GAS)
6790 {
6791 operands[3] = gen_label_rtx ();
6792 output_asm_insn ("addil L'%0-%l3,%1", operands);
6793 targetm.asm_out.internal_label (asm_out_file, "L",
6794 CODE_LABEL_NUMBER (operands[3]));
6795 output_asm_insn ("ldo R'%0-%l3(%%r1),%2", operands);
6796 }
6797 else
6798 {
6799 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%1", operands);
6800 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%2", operands);
6801 }
6802 }
6803 }
6804
6805 /* This routine handles output of long unconditional branches that
6806 exceed the maximum range of a simple branch instruction. Since
6807 we don't have a register available for the branch, we save register
6808 %r1 in the frame marker, load the branch destination DEST into %r1,
6809 execute the branch, and restore %r1 in the delay slot of the branch.
6810
6811 Since long branches may have an insn in the delay slot and the
6812 delay slot is used to restore %r1, we in general need to extract
6813 this insn and execute it before the branch. However, to facilitate
6814 use of this function by conditional branches, we also provide an
6815 option to not extract the delay insn so that it will be emitted
6816 after the long branch. So, if there is an insn in the delay slot,
6817 it is extracted if XDELAY is nonzero.
6818
6819 The lengths of the various long-branch sequences are 20, 16 and 24
6820 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6821
6822 const char *
6823 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
6824 {
6825 rtx xoperands[4];
6826
6827 xoperands[0] = dest;
6828
6829 /* First, free up the delay slot. */
6830 if (xdelay && dbr_sequence_length () != 0)
6831 {
6832 /* We can't handle a jump in the delay slot. */
6833 gcc_assert (! JUMP_P (NEXT_INSN (insn)));
6834
6835 final_scan_insn (NEXT_INSN (insn), asm_out_file,
6836 optimize, 0, NULL);
6837
6838 /* Now delete the delay insn. */
6839 SET_INSN_DELETED (NEXT_INSN (insn));
6840 }
6841
6842 /* Output an insn to save %r1. The runtime documentation doesn't
6843 specify whether the "Clean Up" slot in the callers frame can
6844 be clobbered by the callee. It isn't copied by HP's builtin
6845 alloca, so this suggests that it can be clobbered if necessary.
6846 The "Static Link" location is copied by HP builtin alloca, so
6847 we avoid using it. Using the cleanup slot might be a problem
6848 if we have to interoperate with languages that pass cleanup
6849 information. However, it should be possible to handle these
6850 situations with GCC's asm feature.
6851
6852 The "Current RP" slot is reserved for the called procedure, so
6853 we try to use it when we don't have a frame of our own. It's
6854 rather unlikely that we won't have a frame when we need to emit
6855 a very long branch.
6856
6857 Really the way to go long term is a register scavenger; goto
6858 the target of the jump and find a register which we can use
6859 as a scratch to hold the value in %r1. Then, we wouldn't have
6860 to free up the delay slot or clobber a slot that may be needed
6861 for other purposes. */
6862 if (TARGET_64BIT)
6863 {
6864 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6865 /* Use the return pointer slot in the frame marker. */
6866 output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
6867 else
6868 /* Use the slot at -40 in the frame marker since HP builtin
6869 alloca doesn't copy it. */
6870 output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
6871 }
6872 else
6873 {
6874 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6875 /* Use the return pointer slot in the frame marker. */
6876 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
6877 else
6878 /* Use the "Clean Up" slot in the frame marker. In GCC,
6879 the only other use of this location is for copying a
6880 floating point double argument from a floating-point
6881 register to two general registers. The copy is done
6882 as an "atomic" operation when outputting a call, so it
6883 won't interfere with our using the location here. */
6884 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
6885 }
6886
6887 if (TARGET_PORTABLE_RUNTIME)
6888 {
6889 output_asm_insn ("ldil L'%0,%%r1", xoperands);
6890 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
6891 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6892 }
6893 else if (flag_pic)
6894 {
6895 xoperands[1] = gen_rtx_REG (Pmode, 1);
6896 xoperands[2] = xoperands[1];
6897 pa_output_pic_pcrel_sequence (xoperands);
6898 output_asm_insn ("bv %%r0(%%r1)", xoperands);
6899 }
6900 else
6901 /* Now output a very long branch to the original target. */
6902 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
6903
6904 /* Now restore the value of %r1 in the delay slot. */
6905 if (TARGET_64BIT)
6906 {
6907 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6908 return "ldd -16(%%r30),%%r1";
6909 else
6910 return "ldd -40(%%r30),%%r1";
6911 }
6912 else
6913 {
6914 if (actual_fsize == 0 && !df_regs_ever_live_p (2))
6915 return "ldw -20(%%r30),%%r1";
6916 else
6917 return "ldw -12(%%r30),%%r1";
6918 }
6919 }
6920
6921 /* This routine handles all the branch-on-bit conditional branch sequences we
6922 might need to generate. It handles nullification of delay slots,
6923 varying length branches, negated branches and all combinations of the
6924 above. it returns the appropriate output template to emit the branch. */
6925
6926 const char *
6927 pa_output_bb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn, int which)
6928 {
6929 static char buf[100];
6930 bool useskip;
6931 int nullify = INSN_ANNULLED_BRANCH_P (insn);
6932 int length = get_attr_length (insn);
6933 int xdelay;
6934
6935 /* A conditional branch to the following instruction (e.g. the delay slot) is
6936 asking for a disaster. I do not think this can happen as this pattern
6937 is only used when optimizing; jump optimization should eliminate the
6938 jump. But be prepared just in case. */
6939
6940 if (branch_to_delay_slot_p (insn))
6941 return "nop";
6942
6943 /* If this is a long branch with its delay slot unfilled, set `nullify'
6944 as it can nullify the delay slot and save a nop. */
6945 if (length == 8 && dbr_sequence_length () == 0)
6946 nullify = 1;
6947
6948 /* If this is a short forward conditional branch which did not get
6949 its delay slot filled, the delay slot can still be nullified. */
6950 if (! nullify && length == 4 && dbr_sequence_length () == 0)
6951 nullify = forward_branch_p (insn);
6952
6953 /* A forward branch over a single nullified insn can be done with a
6954 extrs instruction. This avoids a single cycle penalty due to
6955 mis-predicted branch if we fall through (branch not taken). */
6956 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
6957
6958 switch (length)
6959 {
6960
6961 /* All short conditional branches except backwards with an unfilled
6962 delay slot. */
6963 case 4:
6964 if (useskip)
6965 strcpy (buf, "{extrs,|extrw,s,}");
6966 else
6967 strcpy (buf, "bb,");
6968 if (useskip && GET_MODE (operands[0]) == DImode)
6969 strcpy (buf, "extrd,s,*");
6970 else if (GET_MODE (operands[0]) == DImode)
6971 strcpy (buf, "bb,*");
6972 if ((which == 0 && negated)
6973 || (which == 1 && ! negated))
6974 strcat (buf, ">=");
6975 else
6976 strcat (buf, "<");
6977 if (useskip)
6978 strcat (buf, " %0,%1,1,%%r0");
6979 else if (nullify && negated)
6980 {
6981 if (branch_needs_nop_p (insn))
6982 strcat (buf, ",n %0,%1,%3%#");
6983 else
6984 strcat (buf, ",n %0,%1,%3");
6985 }
6986 else if (nullify && ! negated)
6987 {
6988 if (branch_needs_nop_p (insn))
6989 strcat (buf, ",n %0,%1,%2%#");
6990 else
6991 strcat (buf, ",n %0,%1,%2");
6992 }
6993 else if (! nullify && negated)
6994 strcat (buf, " %0,%1,%3");
6995 else if (! nullify && ! negated)
6996 strcat (buf, " %0,%1,%2");
6997 break;
6998
6999 /* All long conditionals. Note a short backward branch with an
7000 unfilled delay slot is treated just like a long backward branch
7001 with an unfilled delay slot. */
7002 case 8:
7003 /* Handle weird backwards branch with a filled delay slot
7004 which is nullified. */
7005 if (dbr_sequence_length () != 0
7006 && ! forward_branch_p (insn)
7007 && nullify)
7008 {
7009 strcpy (buf, "bb,");
7010 if (GET_MODE (operands[0]) == DImode)
7011 strcat (buf, "*");
7012 if ((which == 0 && negated)
7013 || (which == 1 && ! negated))
7014 strcat (buf, "<");
7015 else
7016 strcat (buf, ">=");
7017 if (negated)
7018 strcat (buf, ",n %0,%1,.+12\n\tb %3");
7019 else
7020 strcat (buf, ",n %0,%1,.+12\n\tb %2");
7021 }
7022 /* Handle short backwards branch with an unfilled delay slot.
7023 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7024 taken and untaken branches. */
7025 else if (dbr_sequence_length () == 0
7026 && ! forward_branch_p (insn)
7027 && INSN_ADDRESSES_SET_P ()
7028 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7029 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7030 {
7031 strcpy (buf, "bb,");
7032 if (GET_MODE (operands[0]) == DImode)
7033 strcat (buf, "*");
7034 if ((which == 0 && negated)
7035 || (which == 1 && ! negated))
7036 strcat (buf, ">=");
7037 else
7038 strcat (buf, "<");
7039 if (negated)
7040 strcat (buf, " %0,%1,%3%#");
7041 else
7042 strcat (buf, " %0,%1,%2%#");
7043 }
7044 else
7045 {
7046 if (GET_MODE (operands[0]) == DImode)
7047 strcpy (buf, "extrd,s,*");
7048 else
7049 strcpy (buf, "{extrs,|extrw,s,}");
7050 if ((which == 0 && negated)
7051 || (which == 1 && ! negated))
7052 strcat (buf, "<");
7053 else
7054 strcat (buf, ">=");
7055 if (nullify && negated)
7056 strcat (buf, " %0,%1,1,%%r0\n\tb,n %3");
7057 else if (nullify && ! negated)
7058 strcat (buf, " %0,%1,1,%%r0\n\tb,n %2");
7059 else if (negated)
7060 strcat (buf, " %0,%1,1,%%r0\n\tb %3");
7061 else
7062 strcat (buf, " %0,%1,1,%%r0\n\tb %2");
7063 }
7064 break;
7065
7066 default:
7067 /* The reversed conditional branch must branch over one additional
7068 instruction if the delay slot is filled and needs to be extracted
7069 by pa_output_lbranch. If the delay slot is empty or this is a
7070 nullified forward branch, the instruction after the reversed
7071 condition branch must be nullified. */
7072 if (dbr_sequence_length () == 0
7073 || (nullify && forward_branch_p (insn)))
7074 {
7075 nullify = 1;
7076 xdelay = 0;
7077 operands[4] = GEN_INT (length);
7078 }
7079 else
7080 {
7081 xdelay = 1;
7082 operands[4] = GEN_INT (length + 4);
7083 }
7084
7085 if (GET_MODE (operands[0]) == DImode)
7086 strcpy (buf, "bb,*");
7087 else
7088 strcpy (buf, "bb,");
7089 if ((which == 0 && negated)
7090 || (which == 1 && !negated))
7091 strcat (buf, "<");
7092 else
7093 strcat (buf, ">=");
7094 if (nullify)
7095 strcat (buf, ",n %0,%1,.+%4");
7096 else
7097 strcat (buf, " %0,%1,.+%4");
7098 output_asm_insn (buf, operands);
7099 return pa_output_lbranch (negated ? operands[3] : operands[2],
7100 insn, xdelay);
7101 }
7102 return buf;
7103 }
7104
7105 /* This routine handles all the branch-on-variable-bit conditional branch
7106 sequences we might need to generate. It handles nullification of delay
7107 slots, varying length branches, negated branches and all combinations
7108 of the above. it returns the appropriate output template to emit the
7109 branch. */
7110
7111 const char *
7112 pa_output_bvb (rtx *operands ATTRIBUTE_UNUSED, int negated, rtx_insn *insn,
7113 int which)
7114 {
7115 static char buf[100];
7116 bool useskip;
7117 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7118 int length = get_attr_length (insn);
7119 int xdelay;
7120
7121 /* A conditional branch to the following instruction (e.g. the delay slot) is
7122 asking for a disaster. I do not think this can happen as this pattern
7123 is only used when optimizing; jump optimization should eliminate the
7124 jump. But be prepared just in case. */
7125
7126 if (branch_to_delay_slot_p (insn))
7127 return "nop";
7128
7129 /* If this is a long branch with its delay slot unfilled, set `nullify'
7130 as it can nullify the delay slot and save a nop. */
7131 if (length == 8 && dbr_sequence_length () == 0)
7132 nullify = 1;
7133
7134 /* If this is a short forward conditional branch which did not get
7135 its delay slot filled, the delay slot can still be nullified. */
7136 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7137 nullify = forward_branch_p (insn);
7138
7139 /* A forward branch over a single nullified insn can be done with a
7140 extrs instruction. This avoids a single cycle penalty due to
7141 mis-predicted branch if we fall through (branch not taken). */
7142 useskip = (length == 4 && nullify) ? use_skip_p (insn) : FALSE;
7143
7144 switch (length)
7145 {
7146
7147 /* All short conditional branches except backwards with an unfilled
7148 delay slot. */
7149 case 4:
7150 if (useskip)
7151 strcpy (buf, "{vextrs,|extrw,s,}");
7152 else
7153 strcpy (buf, "{bvb,|bb,}");
7154 if (useskip && GET_MODE (operands[0]) == DImode)
7155 strcpy (buf, "extrd,s,*");
7156 else if (GET_MODE (operands[0]) == DImode)
7157 strcpy (buf, "bb,*");
7158 if ((which == 0 && negated)
7159 || (which == 1 && ! negated))
7160 strcat (buf, ">=");
7161 else
7162 strcat (buf, "<");
7163 if (useskip)
7164 strcat (buf, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
7165 else if (nullify && negated)
7166 {
7167 if (branch_needs_nop_p (insn))
7168 strcat (buf, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
7169 else
7170 strcat (buf, "{,n %0,%3|,n %0,%%sar,%3}");
7171 }
7172 else if (nullify && ! negated)
7173 {
7174 if (branch_needs_nop_p (insn))
7175 strcat (buf, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
7176 else
7177 strcat (buf, "{,n %0,%2|,n %0,%%sar,%2}");
7178 }
7179 else if (! nullify && negated)
7180 strcat (buf, "{ %0,%3| %0,%%sar,%3}");
7181 else if (! nullify && ! negated)
7182 strcat (buf, "{ %0,%2| %0,%%sar,%2}");
7183 break;
7184
7185 /* All long conditionals. Note a short backward branch with an
7186 unfilled delay slot is treated just like a long backward branch
7187 with an unfilled delay slot. */
7188 case 8:
7189 /* Handle weird backwards branch with a filled delay slot
7190 which is nullified. */
7191 if (dbr_sequence_length () != 0
7192 && ! forward_branch_p (insn)
7193 && nullify)
7194 {
7195 strcpy (buf, "{bvb,|bb,}");
7196 if (GET_MODE (operands[0]) == DImode)
7197 strcat (buf, "*");
7198 if ((which == 0 && negated)
7199 || (which == 1 && ! negated))
7200 strcat (buf, "<");
7201 else
7202 strcat (buf, ">=");
7203 if (negated)
7204 strcat (buf, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
7205 else
7206 strcat (buf, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
7207 }
7208 /* Handle short backwards branch with an unfilled delay slot.
7209 Using a bb;nop rather than extrs;bl saves 1 cycle for both
7210 taken and untaken branches. */
7211 else if (dbr_sequence_length () == 0
7212 && ! forward_branch_p (insn)
7213 && INSN_ADDRESSES_SET_P ()
7214 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7215 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7216 {
7217 strcpy (buf, "{bvb,|bb,}");
7218 if (GET_MODE (operands[0]) == DImode)
7219 strcat (buf, "*");
7220 if ((which == 0 && negated)
7221 || (which == 1 && ! negated))
7222 strcat (buf, ">=");
7223 else
7224 strcat (buf, "<");
7225 if (negated)
7226 strcat (buf, "{ %0,%3%#| %0,%%sar,%3%#}");
7227 else
7228 strcat (buf, "{ %0,%2%#| %0,%%sar,%2%#}");
7229 }
7230 else
7231 {
7232 strcpy (buf, "{vextrs,|extrw,s,}");
7233 if (GET_MODE (operands[0]) == DImode)
7234 strcpy (buf, "extrd,s,*");
7235 if ((which == 0 && negated)
7236 || (which == 1 && ! negated))
7237 strcat (buf, "<");
7238 else
7239 strcat (buf, ">=");
7240 if (nullify && negated)
7241 strcat (buf, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
7242 else if (nullify && ! negated)
7243 strcat (buf, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
7244 else if (negated)
7245 strcat (buf, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
7246 else
7247 strcat (buf, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
7248 }
7249 break;
7250
7251 default:
7252 /* The reversed conditional branch must branch over one additional
7253 instruction if the delay slot is filled and needs to be extracted
7254 by pa_output_lbranch. If the delay slot is empty or this is a
7255 nullified forward branch, the instruction after the reversed
7256 condition branch must be nullified. */
7257 if (dbr_sequence_length () == 0
7258 || (nullify && forward_branch_p (insn)))
7259 {
7260 nullify = 1;
7261 xdelay = 0;
7262 operands[4] = GEN_INT (length);
7263 }
7264 else
7265 {
7266 xdelay = 1;
7267 operands[4] = GEN_INT (length + 4);
7268 }
7269
7270 if (GET_MODE (operands[0]) == DImode)
7271 strcpy (buf, "bb,*");
7272 else
7273 strcpy (buf, "{bvb,|bb,}");
7274 if ((which == 0 && negated)
7275 || (which == 1 && !negated))
7276 strcat (buf, "<");
7277 else
7278 strcat (buf, ">=");
7279 if (nullify)
7280 strcat (buf, ",n {%0,.+%4|%0,%%sar,.+%4}");
7281 else
7282 strcat (buf, " {%0,.+%4|%0,%%sar,.+%4}");
7283 output_asm_insn (buf, operands);
7284 return pa_output_lbranch (negated ? operands[3] : operands[2],
7285 insn, xdelay);
7286 }
7287 return buf;
7288 }
7289
7290 /* Return the output template for emitting a dbra type insn.
7291
7292 Note it may perform some output operations on its own before
7293 returning the final output string. */
7294 const char *
7295 pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
7296 {
7297 int length = get_attr_length (insn);
7298
7299 /* A conditional branch to the following instruction (e.g. the delay slot) is
7300 asking for a disaster. Be prepared! */
7301
7302 if (branch_to_delay_slot_p (insn))
7303 {
7304 if (which_alternative == 0)
7305 return "ldo %1(%0),%0";
7306 else if (which_alternative == 1)
7307 {
7308 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
7309 output_asm_insn ("ldw -16(%%r30),%4", operands);
7310 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7311 return "{fldws|fldw} -16(%%r30),%0";
7312 }
7313 else
7314 {
7315 output_asm_insn ("ldw %0,%4", operands);
7316 return "ldo %1(%4),%4\n\tstw %4,%0";
7317 }
7318 }
7319
7320 if (which_alternative == 0)
7321 {
7322 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7323 int xdelay;
7324
7325 /* If this is a long branch with its delay slot unfilled, set `nullify'
7326 as it can nullify the delay slot and save a nop. */
7327 if (length == 8 && dbr_sequence_length () == 0)
7328 nullify = 1;
7329
7330 /* If this is a short forward conditional branch which did not get
7331 its delay slot filled, the delay slot can still be nullified. */
7332 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7333 nullify = forward_branch_p (insn);
7334
7335 switch (length)
7336 {
7337 case 4:
7338 if (nullify)
7339 {
7340 if (branch_needs_nop_p (insn))
7341 return "addib,%C2,n %1,%0,%3%#";
7342 else
7343 return "addib,%C2,n %1,%0,%3";
7344 }
7345 else
7346 return "addib,%C2 %1,%0,%3";
7347
7348 case 8:
7349 /* Handle weird backwards branch with a fulled delay slot
7350 which is nullified. */
7351 if (dbr_sequence_length () != 0
7352 && ! forward_branch_p (insn)
7353 && nullify)
7354 return "addib,%N2,n %1,%0,.+12\n\tb %3";
7355 /* Handle short backwards branch with an unfilled delay slot.
7356 Using a addb;nop rather than addi;bl saves 1 cycle for both
7357 taken and untaken branches. */
7358 else if (dbr_sequence_length () == 0
7359 && ! forward_branch_p (insn)
7360 && INSN_ADDRESSES_SET_P ()
7361 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7362 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7363 return "addib,%C2 %1,%0,%3%#";
7364
7365 /* Handle normal cases. */
7366 if (nullify)
7367 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7368 else
7369 return "addi,%N2 %1,%0,%0\n\tb %3";
7370
7371 default:
7372 /* The reversed conditional branch must branch over one additional
7373 instruction if the delay slot is filled and needs to be extracted
7374 by pa_output_lbranch. If the delay slot is empty or this is a
7375 nullified forward branch, the instruction after the reversed
7376 condition branch must be nullified. */
7377 if (dbr_sequence_length () == 0
7378 || (nullify && forward_branch_p (insn)))
7379 {
7380 nullify = 1;
7381 xdelay = 0;
7382 operands[4] = GEN_INT (length);
7383 }
7384 else
7385 {
7386 xdelay = 1;
7387 operands[4] = GEN_INT (length + 4);
7388 }
7389
7390 if (nullify)
7391 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands);
7392 else
7393 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands);
7394
7395 return pa_output_lbranch (operands[3], insn, xdelay);
7396 }
7397
7398 }
7399 /* Deal with gross reload from FP register case. */
7400 else if (which_alternative == 1)
7401 {
7402 /* Move loop counter from FP register to MEM then into a GR,
7403 increment the GR, store the GR into MEM, and finally reload
7404 the FP register from MEM from within the branch's delay slot. */
7405 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7406 operands);
7407 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
7408 if (length == 24)
7409 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7410 else if (length == 28)
7411 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7412 else
7413 {
7414 operands[5] = GEN_INT (length - 16);
7415 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands);
7416 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7417 return pa_output_lbranch (operands[3], insn, 0);
7418 }
7419 }
7420 /* Deal with gross reload from memory case. */
7421 else
7422 {
7423 /* Reload loop counter from memory, the store back to memory
7424 happens in the branch's delay slot. */
7425 output_asm_insn ("ldw %0,%4", operands);
7426 if (length == 12)
7427 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7428 else if (length == 16)
7429 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7430 else
7431 {
7432 operands[5] = GEN_INT (length - 4);
7433 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands);
7434 return pa_output_lbranch (operands[3], insn, 0);
7435 }
7436 }
7437 }
7438
7439 /* Return the output template for emitting a movb type insn.
7440
7441 Note it may perform some output operations on its own before
7442 returning the final output string. */
7443 const char *
7444 pa_output_movb (rtx *operands, rtx_insn *insn, int which_alternative,
7445 int reverse_comparison)
7446 {
7447 int length = get_attr_length (insn);
7448
7449 /* A conditional branch to the following instruction (e.g. the delay slot) is
7450 asking for a disaster. Be prepared! */
7451
7452 if (branch_to_delay_slot_p (insn))
7453 {
7454 if (which_alternative == 0)
7455 return "copy %1,%0";
7456 else if (which_alternative == 1)
7457 {
7458 output_asm_insn ("stw %1,-16(%%r30)", operands);
7459 return "{fldws|fldw} -16(%%r30),%0";
7460 }
7461 else if (which_alternative == 2)
7462 return "stw %1,%0";
7463 else
7464 return "mtsar %r1";
7465 }
7466
7467 /* Support the second variant. */
7468 if (reverse_comparison)
7469 PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2])));
7470
7471 if (which_alternative == 0)
7472 {
7473 int nullify = INSN_ANNULLED_BRANCH_P (insn);
7474 int xdelay;
7475
7476 /* If this is a long branch with its delay slot unfilled, set `nullify'
7477 as it can nullify the delay slot and save a nop. */
7478 if (length == 8 && dbr_sequence_length () == 0)
7479 nullify = 1;
7480
7481 /* If this is a short forward conditional branch which did not get
7482 its delay slot filled, the delay slot can still be nullified. */
7483 if (! nullify && length == 4 && dbr_sequence_length () == 0)
7484 nullify = forward_branch_p (insn);
7485
7486 switch (length)
7487 {
7488 case 4:
7489 if (nullify)
7490 {
7491 if (branch_needs_nop_p (insn))
7492 return "movb,%C2,n %1,%0,%3%#";
7493 else
7494 return "movb,%C2,n %1,%0,%3";
7495 }
7496 else
7497 return "movb,%C2 %1,%0,%3";
7498
7499 case 8:
7500 /* Handle weird backwards branch with a filled delay slot
7501 which is nullified. */
7502 if (dbr_sequence_length () != 0
7503 && ! forward_branch_p (insn)
7504 && nullify)
7505 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7506
7507 /* Handle short backwards branch with an unfilled delay slot.
7508 Using a movb;nop rather than or;bl saves 1 cycle for both
7509 taken and untaken branches. */
7510 else if (dbr_sequence_length () == 0
7511 && ! forward_branch_p (insn)
7512 && INSN_ADDRESSES_SET_P ()
7513 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn)))
7514 - INSN_ADDRESSES (INSN_UID (insn)) - 8))
7515 return "movb,%C2 %1,%0,%3%#";
7516 /* Handle normal cases. */
7517 if (nullify)
7518 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7519 else
7520 return "or,%N2 %1,%%r0,%0\n\tb %3";
7521
7522 default:
7523 /* The reversed conditional branch must branch over one additional
7524 instruction if the delay slot is filled and needs to be extracted
7525 by pa_output_lbranch. If the delay slot is empty or this is a
7526 nullified forward branch, the instruction after the reversed
7527 condition branch must be nullified. */
7528 if (dbr_sequence_length () == 0
7529 || (nullify && forward_branch_p (insn)))
7530 {
7531 nullify = 1;
7532 xdelay = 0;
7533 operands[4] = GEN_INT (length);
7534 }
7535 else
7536 {
7537 xdelay = 1;
7538 operands[4] = GEN_INT (length + 4);
7539 }
7540
7541 if (nullify)
7542 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands);
7543 else
7544 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands);
7545
7546 return pa_output_lbranch (operands[3], insn, xdelay);
7547 }
7548 }
7549 /* Deal with gross reload for FP destination register case. */
7550 else if (which_alternative == 1)
7551 {
7552 /* Move source register to MEM, perform the branch test, then
7553 finally load the FP register from MEM from within the branch's
7554 delay slot. */
7555 output_asm_insn ("stw %1,-16(%%r30)", operands);
7556 if (length == 12)
7557 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7558 else if (length == 16)
7559 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7560 else
7561 {
7562 operands[4] = GEN_INT (length - 4);
7563 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands);
7564 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands);
7565 return pa_output_lbranch (operands[3], insn, 0);
7566 }
7567 }
7568 /* Deal with gross reload from memory case. */
7569 else if (which_alternative == 2)
7570 {
7571 /* Reload loop counter from memory, the store back to memory
7572 happens in the branch's delay slot. */
7573 if (length == 8)
7574 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7575 else if (length == 12)
7576 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7577 else
7578 {
7579 operands[4] = GEN_INT (length);
7580 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7581 operands);
7582 return pa_output_lbranch (operands[3], insn, 0);
7583 }
7584 }
7585 /* Handle SAR as a destination. */
7586 else
7587 {
7588 if (length == 8)
7589 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7590 else if (length == 12)
7591 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7592 else
7593 {
7594 operands[4] = GEN_INT (length);
7595 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7596 operands);
7597 return pa_output_lbranch (operands[3], insn, 0);
7598 }
7599 }
7600 }
7601
7602 /* Copy any FP arguments in INSN into integer registers. */
7603 static void
7604 copy_fp_args (rtx_insn *insn)
7605 {
7606 rtx link;
7607 rtx xoperands[2];
7608
7609 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7610 {
7611 int arg_mode, regno;
7612 rtx use = XEXP (link, 0);
7613
7614 if (! (GET_CODE (use) == USE
7615 && GET_CODE (XEXP (use, 0)) == REG
7616 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7617 continue;
7618
7619 arg_mode = GET_MODE (XEXP (use, 0));
7620 regno = REGNO (XEXP (use, 0));
7621
7622 /* Is it a floating point register? */
7623 if (regno >= 32 && regno <= 39)
7624 {
7625 /* Copy the FP register into an integer register via memory. */
7626 if (arg_mode == SFmode)
7627 {
7628 xoperands[0] = XEXP (use, 0);
7629 xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2);
7630 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands);
7631 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7632 }
7633 else
7634 {
7635 xoperands[0] = XEXP (use, 0);
7636 xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2);
7637 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands);
7638 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
7639 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
7640 }
7641 }
7642 }
7643 }
7644
7645 /* Compute length of the FP argument copy sequence for INSN. */
7646 static int
7647 length_fp_args (rtx_insn *insn)
7648 {
7649 int length = 0;
7650 rtx link;
7651
7652 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
7653 {
7654 int arg_mode, regno;
7655 rtx use = XEXP (link, 0);
7656
7657 if (! (GET_CODE (use) == USE
7658 && GET_CODE (XEXP (use, 0)) == REG
7659 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
7660 continue;
7661
7662 arg_mode = GET_MODE (XEXP (use, 0));
7663 regno = REGNO (XEXP (use, 0));
7664
7665 /* Is it a floating point register? */
7666 if (regno >= 32 && regno <= 39)
7667 {
7668 if (arg_mode == SFmode)
7669 length += 8;
7670 else
7671 length += 12;
7672 }
7673 }
7674
7675 return length;
7676 }
7677
7678 /* Return the attribute length for the millicode call instruction INSN.
7679 The length must match the code generated by pa_output_millicode_call.
7680 We include the delay slot in the returned length as it is better to
7681 over estimate the length than to under estimate it. */
7682
7683 int
7684 pa_attr_length_millicode_call (rtx_insn *insn)
7685 {
7686 unsigned long distance = -1;
7687 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7688
7689 if (INSN_ADDRESSES_SET_P ())
7690 {
7691 distance = (total + insn_current_reference_address (insn));
7692 if (distance < total)
7693 distance = -1;
7694 }
7695
7696 if (TARGET_64BIT)
7697 {
7698 if (!TARGET_LONG_CALLS && distance < 7600000)
7699 return 8;
7700
7701 return 20;
7702 }
7703 else if (TARGET_PORTABLE_RUNTIME)
7704 return 24;
7705 else
7706 {
7707 if (!TARGET_LONG_CALLS && distance < MAX_PCREL17F_OFFSET)
7708 return 8;
7709
7710 if (!flag_pic)
7711 return 12;
7712
7713 return 24;
7714 }
7715 }
7716
7717 /* INSN is a function call.
7718
7719 CALL_DEST is the routine we are calling. */
7720
7721 const char *
7722 pa_output_millicode_call (rtx_insn *insn, rtx call_dest)
7723 {
7724 int attr_length = get_attr_length (insn);
7725 int seq_length = dbr_sequence_length ();
7726 rtx xoperands[4];
7727
7728 xoperands[0] = call_dest;
7729
7730 /* Handle the common case where we are sure that the branch will
7731 reach the beginning of the $CODE$ subspace. The within reach
7732 form of the $$sh_func_adrs call has a length of 28. Because it
7733 has an attribute type of sh_func_adrs, it never has a nonzero
7734 sequence length (i.e., the delay slot is never filled). */
7735 if (!TARGET_LONG_CALLS
7736 && (attr_length == 8
7737 || (attr_length == 28
7738 && get_attr_type (insn) == TYPE_SH_FUNC_ADRS)))
7739 {
7740 xoperands[1] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);
7741 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7742 }
7743 else
7744 {
7745 if (TARGET_64BIT)
7746 {
7747 /* It might seem that one insn could be saved by accessing
7748 the millicode function using the linkage table. However,
7749 this doesn't work in shared libraries and other dynamically
7750 loaded objects. Using a pc-relative sequence also avoids
7751 problems related to the implicit use of the gp register. */
7752 xoperands[1] = gen_rtx_REG (Pmode, 1);
7753 xoperands[2] = xoperands[1];
7754 pa_output_pic_pcrel_sequence (xoperands);
7755 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
7756 }
7757 else if (TARGET_PORTABLE_RUNTIME)
7758 {
7759 /* Pure portable runtime doesn't allow be/ble; we also don't
7760 have PIC support in the assembler/linker, so this sequence
7761 is needed. */
7762
7763 /* Get the address of our target into %r1. */
7764 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7765 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands);
7766
7767 /* Get our return address into %r31. */
7768 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands);
7769 output_asm_insn ("addi 8,%%r31,%%r31", xoperands);
7770
7771 /* Jump to our target address in %r1. */
7772 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7773 }
7774 else if (!flag_pic)
7775 {
7776 output_asm_insn ("ldil L'%0,%%r1", xoperands);
7777 if (TARGET_PA_20)
7778 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands);
7779 else
7780 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
7781 }
7782 else
7783 {
7784 xoperands[1] = gen_rtx_REG (Pmode, 31);
7785 xoperands[2] = gen_rtx_REG (Pmode, 1);
7786 pa_output_pic_pcrel_sequence (xoperands);
7787
7788 /* Adjust return address. */
7789 output_asm_insn ("ldo {16|24}(%%r31),%%r31", xoperands);
7790
7791 /* Jump to our target address in %r1. */
7792 output_asm_insn ("bv %%r0(%%r1)", xoperands);
7793 }
7794 }
7795
7796 if (seq_length == 0)
7797 output_asm_insn ("nop", xoperands);
7798
7799 return "";
7800 }
7801
7802 /* Return the attribute length of the call instruction INSN. The SIBCALL
7803 flag indicates whether INSN is a regular call or a sibling call. The
7804 length returned must be longer than the code actually generated by
7805 pa_output_call. Since branch shortening is done before delay branch
7806 sequencing, there is no way to determine whether or not the delay
7807 slot will be filled during branch shortening. Even when the delay
7808 slot is filled, we may have to add a nop if the delay slot contains
7809 a branch that can't reach its target. Thus, we always have to include
7810 the delay slot in the length estimate. This used to be done in
7811 pa_adjust_insn_length but we do it here now as some sequences always
7812 fill the delay slot and we can save four bytes in the estimate for
7813 these sequences. */
7814
7815 int
7816 pa_attr_length_call (rtx_insn *insn, int sibcall)
7817 {
7818 int local_call;
7819 rtx call, call_dest;
7820 tree call_decl;
7821 int length = 0;
7822 rtx pat = PATTERN (insn);
7823 unsigned long distance = -1;
7824
7825 gcc_assert (CALL_P (insn));
7826
7827 if (INSN_ADDRESSES_SET_P ())
7828 {
7829 unsigned long total;
7830
7831 total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
7832 distance = (total + insn_current_reference_address (insn));
7833 if (distance < total)
7834 distance = -1;
7835 }
7836
7837 gcc_assert (GET_CODE (pat) == PARALLEL);
7838
7839 /* Get the call rtx. */
7840 call = XVECEXP (pat, 0, 0);
7841 if (GET_CODE (call) == SET)
7842 call = SET_SRC (call);
7843
7844 gcc_assert (GET_CODE (call) == CALL);
7845
7846 /* Determine if this is a local call. */
7847 call_dest = XEXP (XEXP (call, 0), 0);
7848 call_decl = SYMBOL_REF_DECL (call_dest);
7849 local_call = call_decl && targetm.binds_local_p (call_decl);
7850
7851 /* pc-relative branch. */
7852 if (!TARGET_LONG_CALLS
7853 && ((TARGET_PA_20 && !sibcall && distance < 7600000)
7854 || distance < MAX_PCREL17F_OFFSET))
7855 length += 8;
7856
7857 /* 64-bit plabel sequence. */
7858 else if (TARGET_64BIT && !local_call)
7859 length += sibcall ? 28 : 24;
7860
7861 /* non-pic long absolute branch sequence. */
7862 else if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7863 length += 12;
7864
7865 /* long pc-relative branch sequence. */
7866 else if (TARGET_LONG_PIC_SDIFF_CALL
7867 || (TARGET_GAS && !TARGET_SOM && local_call))
7868 {
7869 length += 20;
7870
7871 if (!TARGET_PA_20 && !TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7872 length += 8;
7873 }
7874
7875 /* 32-bit plabel sequence. */
7876 else
7877 {
7878 length += 32;
7879
7880 if (TARGET_SOM)
7881 length += length_fp_args (insn);
7882
7883 if (flag_pic)
7884 length += 4;
7885
7886 if (!TARGET_PA_20)
7887 {
7888 if (!sibcall)
7889 length += 8;
7890
7891 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
7892 length += 8;
7893 }
7894 }
7895
7896 return length;
7897 }
7898
7899 /* INSN is a function call.
7900
7901 CALL_DEST is the routine we are calling. */
7902
7903 const char *
7904 pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall)
7905 {
7906 int seq_length = dbr_sequence_length ();
7907 tree call_decl = SYMBOL_REF_DECL (call_dest);
7908 int local_call = call_decl && targetm.binds_local_p (call_decl);
7909 rtx xoperands[4];
7910
7911 xoperands[0] = call_dest;
7912
7913 /* Handle the common case where we're sure that the branch will reach
7914 the beginning of the "$CODE$" subspace. This is the beginning of
7915 the current function if we are in a named section. */
7916 if (!TARGET_LONG_CALLS && pa_attr_length_call (insn, sibcall) == 8)
7917 {
7918 xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
7919 output_asm_insn ("{bl|b,l} %0,%1", xoperands);
7920 }
7921 else
7922 {
7923 if (TARGET_64BIT && !local_call)
7924 {
7925 /* ??? As far as I can tell, the HP linker doesn't support the
7926 long pc-relative sequence described in the 64-bit runtime
7927 architecture. So, we use a slightly longer indirect call. */
7928 xoperands[0] = pa_get_deferred_plabel (call_dest);
7929 xoperands[1] = gen_label_rtx ();
7930
7931 /* If this isn't a sibcall, we put the load of %r27 into the
7932 delay slot. We can't do this in a sibcall as we don't
7933 have a second call-clobbered scratch register available.
7934 We don't need to do anything when generating fast indirect
7935 calls. */
7936 if (seq_length != 0 && !sibcall)
7937 {
7938 final_scan_insn (NEXT_INSN (insn), asm_out_file,
7939 optimize, 0, NULL);
7940
7941 /* Now delete the delay insn. */
7942 SET_INSN_DELETED (NEXT_INSN (insn));
7943 seq_length = 0;
7944 }
7945
7946 output_asm_insn ("addil LT'%0,%%r27", xoperands);
7947 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands);
7948 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands);
7949
7950 if (sibcall)
7951 {
7952 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7953 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands);
7954 output_asm_insn ("bve (%%r1)", xoperands);
7955 }
7956 else
7957 {
7958 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands);
7959 output_asm_insn ("bve,l (%%r2),%%r2", xoperands);
7960 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands);
7961 seq_length = 1;
7962 }
7963 }
7964 else
7965 {
7966 int indirect_call = 0;
7967
7968 /* Emit a long call. There are several different sequences
7969 of increasing length and complexity. In most cases,
7970 they don't allow an instruction in the delay slot. */
7971 if (!((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7972 && !TARGET_LONG_PIC_SDIFF_CALL
7973 && !(TARGET_GAS && !TARGET_SOM && local_call)
7974 && !TARGET_64BIT)
7975 indirect_call = 1;
7976
7977 if (seq_length != 0
7978 && !sibcall
7979 && (!TARGET_PA_20
7980 || indirect_call
7981 || ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)))
7982 {
7983 /* A non-jump insn in the delay slot. By definition we can
7984 emit this insn before the call (and in fact before argument
7985 relocating. */
7986 final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0,
7987 NULL);
7988
7989 /* Now delete the delay insn. */
7990 SET_INSN_DELETED (NEXT_INSN (insn));
7991 seq_length = 0;
7992 }
7993
7994 if ((TARGET_LONG_ABS_CALL || local_call) && !flag_pic)
7995 {
7996 /* This is the best sequence for making long calls in
7997 non-pic code. Unfortunately, GNU ld doesn't provide
7998 the stub needed for external calls, and GAS's support
7999 for this with the SOM linker is buggy. It is safe
8000 to use this for local calls. */
8001 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8002 if (sibcall)
8003 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands);
8004 else
8005 {
8006 if (TARGET_PA_20)
8007 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
8008 xoperands);
8009 else
8010 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands);
8011
8012 output_asm_insn ("copy %%r31,%%r2", xoperands);
8013 seq_length = 1;
8014 }
8015 }
8016 else
8017 {
8018 /* The HP assembler and linker can handle relocations for
8019 the difference of two symbols. The HP assembler
8020 recognizes the sequence as a pc-relative call and
8021 the linker provides stubs when needed. */
8022
8023 /* GAS currently can't generate the relocations that
8024 are needed for the SOM linker under HP-UX using this
8025 sequence. The GNU linker doesn't generate the stubs
8026 that are needed for external calls on TARGET_ELF32
8027 with this sequence. For now, we have to use a longer
8028 plabel sequence when using GAS for non local calls. */
8029 if (TARGET_LONG_PIC_SDIFF_CALL
8030 || (TARGET_GAS && !TARGET_SOM && local_call))
8031 {
8032 xoperands[1] = gen_rtx_REG (Pmode, 1);
8033 xoperands[2] = xoperands[1];
8034 pa_output_pic_pcrel_sequence (xoperands);
8035 }
8036 else
8037 {
8038 /* Emit a long plabel-based call sequence. This is
8039 essentially an inline implementation of $$dyncall.
8040 We don't actually try to call $$dyncall as this is
8041 as difficult as calling the function itself. */
8042 xoperands[0] = pa_get_deferred_plabel (call_dest);
8043 xoperands[1] = gen_label_rtx ();
8044
8045 /* Since the call is indirect, FP arguments in registers
8046 need to be copied to the general registers. Then, the
8047 argument relocation stub will copy them back. */
8048 if (TARGET_SOM)
8049 copy_fp_args (insn);
8050
8051 if (flag_pic)
8052 {
8053 output_asm_insn ("addil LT'%0,%%r19", xoperands);
8054 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands);
8055 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands);
8056 }
8057 else
8058 {
8059 output_asm_insn ("addil LR'%0-$global$,%%r27",
8060 xoperands);
8061 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
8062 xoperands);
8063 }
8064
8065 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands);
8066 output_asm_insn ("depi 0,31,2,%%r1", xoperands);
8067 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands);
8068 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands);
8069
8070 if (!sibcall && !TARGET_PA_20)
8071 {
8072 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
8073 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8074 output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
8075 else
8076 output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
8077 }
8078 }
8079
8080 if (TARGET_PA_20)
8081 {
8082 if (sibcall)
8083 output_asm_insn ("bve (%%r1)", xoperands);
8084 else
8085 {
8086 if (indirect_call)
8087 {
8088 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8089 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands);
8090 seq_length = 1;
8091 }
8092 else
8093 output_asm_insn ("bve,l (%%r1),%%r2", xoperands);
8094 }
8095 }
8096 else
8097 {
8098 if (!TARGET_NO_SPACE_REGS && (!local_call || flag_pic))
8099 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
8100 xoperands);
8101
8102 if (sibcall)
8103 {
8104 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8105 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
8106 else
8107 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
8108 }
8109 else
8110 {
8111 if (TARGET_NO_SPACE_REGS || (local_call && !flag_pic))
8112 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
8113 else
8114 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
8115
8116 if (indirect_call)
8117 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
8118 else
8119 output_asm_insn ("copy %%r31,%%r2", xoperands);
8120 seq_length = 1;
8121 }
8122 }
8123 }
8124 }
8125 }
8126
8127 if (seq_length == 0)
8128 output_asm_insn ("nop", xoperands);
8129
8130 return "";
8131 }
8132
8133 /* Return the attribute length of the indirect call instruction INSN.
8134 The length must match the code generated by output_indirect call.
8135 The returned length includes the delay slot. Currently, the delay
8136 slot of an indirect call sequence is not exposed and it is used by
8137 the sequence itself. */
8138
8139 int
8140 pa_attr_length_indirect_call (rtx_insn *insn)
8141 {
8142 unsigned long distance = -1;
8143 unsigned long total = IN_NAMED_SECTION_P (cfun->decl) ? 0 : total_code_bytes;
8144
8145 if (INSN_ADDRESSES_SET_P ())
8146 {
8147 distance = (total + insn_current_reference_address (insn));
8148 if (distance < total)
8149 distance = -1;
8150 }
8151
8152 if (TARGET_64BIT)
8153 return 12;
8154
8155 if (TARGET_FAST_INDIRECT_CALLS)
8156 return 8;
8157
8158 if (TARGET_PORTABLE_RUNTIME)
8159 return 16;
8160
8161 /* Inline version of $$dyncall. */
8162 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8163 return 20;
8164
8165 if (!TARGET_LONG_CALLS
8166 && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000)
8167 || distance < MAX_PCREL17F_OFFSET))
8168 return 8;
8169
8170 /* Out of reach, can use ble. */
8171 if (!flag_pic)
8172 return 12;
8173
8174 /* Inline version of $$dyncall. */
8175 if (TARGET_NO_SPACE_REGS || TARGET_PA_20)
8176 return 20;
8177
8178 if (!optimize_size)
8179 return 36;
8180
8181 /* Long PIC pc-relative call. */
8182 return 20;
8183 }
8184
8185 const char *
8186 pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
8187 {
8188 rtx xoperands[4];
8189 int length;
8190
8191 if (TARGET_64BIT)
8192 {
8193 xoperands[0] = call_dest;
8194 output_asm_insn ("ldd 16(%0),%%r2\n\t"
8195 "bve,l (%%r2),%%r2\n\t"
8196 "ldd 24(%0),%%r27", xoperands);
8197 return "";
8198 }
8199
8200 /* First the special case for kernels, level 0 systems, etc. */
8201 if (TARGET_FAST_INDIRECT_CALLS)
8202 {
8203 pa_output_arg_descriptor (insn);
8204 if (TARGET_PA_20)
8205 return "bve,l,n (%%r22),%%r2\n\tnop";
8206 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8207 }
8208
8209 if (TARGET_PORTABLE_RUNTIME)
8210 {
8211 output_asm_insn ("ldil L'$$dyncall,%%r31\n\t"
8212 "ldo R'$$dyncall(%%r31),%%r31", xoperands);
8213 pa_output_arg_descriptor (insn);
8214 return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)";
8215 }
8216
8217 /* Maybe emit a fast inline version of $$dyncall. */
8218 if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size)
8219 {
8220 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8221 "ldw 2(%%r22),%%r19\n\t"
8222 "ldw -2(%%r22),%%r22", xoperands);
8223 pa_output_arg_descriptor (insn);
8224 if (TARGET_NO_SPACE_REGS)
8225 {
8226 if (TARGET_PA_20)
8227 return "bve,l,n (%%r22),%%r2\n\tnop";
8228 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8229 }
8230 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8231 }
8232
8233 /* Now the normal case -- we can reach $$dyncall directly or
8234 we're sure that we can get there via a long-branch stub.
8235
8236 No need to check target flags as the length uniquely identifies
8237 the remaining cases. */
8238 length = pa_attr_length_indirect_call (insn);
8239 if (length == 8)
8240 {
8241 pa_output_arg_descriptor (insn);
8242
8243 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
8244 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
8245 variant of the B,L instruction can't be used on the SOM target. */
8246 if (TARGET_PA_20 && !TARGET_SOM)
8247 return "b,l,n $$dyncall,%%r2\n\tnop";
8248 else
8249 return "bl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
8250 }
8251
8252 /* Long millicode call, but we are not generating PIC or portable runtime
8253 code. */
8254 if (length == 12)
8255 {
8256 output_asm_insn ("ldil L'$$dyncall,%%r2", xoperands);
8257 pa_output_arg_descriptor (insn);
8258 return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
8259 }
8260
8261 /* Maybe emit a fast inline version of $$dyncall. The long PIC
8262 pc-relative call sequence is five instructions. The inline PA 2.0
8263 version of $$dyncall is also five instructions. The PA 1.X versions
8264 are longer but still an overall win. */
8265 if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size)
8266 {
8267 output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t"
8268 "ldw 2(%%r22),%%r19\n\t"
8269 "ldw -2(%%r22),%%r22", xoperands);
8270 if (TARGET_NO_SPACE_REGS)
8271 {
8272 pa_output_arg_descriptor (insn);
8273 if (TARGET_PA_20)
8274 return "bve,l,n (%%r22),%%r2\n\tnop";
8275 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
8276 }
8277 if (TARGET_PA_20)
8278 {
8279 pa_output_arg_descriptor (insn);
8280 return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)";
8281 }
8282 output_asm_insn ("bl .+8,%%r2\n\t"
8283 "ldo 16(%%r2),%%r2\n\t"
8284 "ldsid (%%r22),%%r1\n\t"
8285 "mtsp %%r1,%%sr0", xoperands);
8286 pa_output_arg_descriptor (insn);
8287 return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)";
8288 }
8289
8290 /* We need a long PIC call to $$dyncall. */
8291 xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall");
8292 xoperands[1] = gen_rtx_REG (Pmode, 2);
8293 xoperands[2] = gen_rtx_REG (Pmode, 1);
8294 pa_output_pic_pcrel_sequence (xoperands);
8295 pa_output_arg_descriptor (insn);
8296 return "bv %%r0(%%r1)\n\tldo {12|20}(%%r2),%%r2";
8297 }
8298
8299 /* In HPUX 8.0's shared library scheme, special relocations are needed
8300 for function labels if they might be passed to a function
8301 in a shared library (because shared libraries don't live in code
8302 space), and special magic is needed to construct their address. */
8303
8304 void
8305 pa_encode_label (rtx sym)
8306 {
8307 const char *str = XSTR (sym, 0);
8308 int len = strlen (str) + 1;
8309 char *newstr, *p;
8310
8311 p = newstr = XALLOCAVEC (char, len + 1);
8312 *p++ = '@';
8313 strcpy (p, str);
8314
8315 XSTR (sym, 0) = ggc_alloc_string (newstr, len);
8316 }
8317
8318 static void
8319 pa_encode_section_info (tree decl, rtx rtl, int first)
8320 {
8321 int old_referenced = 0;
8322
8323 if (!first && MEM_P (rtl) && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF)
8324 old_referenced
8325 = SYMBOL_REF_FLAGS (XEXP (rtl, 0)) & SYMBOL_FLAG_REFERENCED;
8326
8327 default_encode_section_info (decl, rtl, first);
8328
8329 if (first && TEXT_SPACE_P (decl))
8330 {
8331 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
8332 if (TREE_CODE (decl) == FUNCTION_DECL)
8333 pa_encode_label (XEXP (rtl, 0));
8334 }
8335 else if (old_referenced)
8336 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= old_referenced;
8337 }
8338
8339 /* This is sort of inverse to pa_encode_section_info. */
8340
8341 static const char *
8342 pa_strip_name_encoding (const char *str)
8343 {
8344 str += (*str == '@');
8345 str += (*str == '*');
8346 return str;
8347 }
8348
8349 /* Returns 1 if OP is a function label involved in a simple addition
8350 with a constant. Used to keep certain patterns from matching
8351 during instruction combination. */
8352 int
8353 pa_is_function_label_plus_const (rtx op)
8354 {
8355 /* Strip off any CONST. */
8356 if (GET_CODE (op) == CONST)
8357 op = XEXP (op, 0);
8358
8359 return (GET_CODE (op) == PLUS
8360 && function_label_operand (XEXP (op, 0), VOIDmode)
8361 && GET_CODE (XEXP (op, 1)) == CONST_INT);
8362 }
8363
8364 /* Output assembly code for a thunk to FUNCTION. */
8365
8366 static void
8367 pa_asm_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
8368 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
8369 tree function)
8370 {
8371 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
8372 static unsigned int current_thunk_number;
8373 int val_14 = VAL_14_BITS_P (delta);
8374 unsigned int old_last_address = last_address, nbytes = 0;
8375 char label[17];
8376 rtx xoperands[4];
8377
8378 xoperands[0] = XEXP (DECL_RTL (function), 0);
8379 xoperands[1] = XEXP (DECL_RTL (thunk_fndecl), 0);
8380 xoperands[2] = GEN_INT (delta);
8381
8382 assemble_start_function (thunk_fndecl, fnname);
8383 final_start_function (emit_barrier (), file, 1);
8384
8385 /* Output the thunk. We know that the function is in the same
8386 translation unit (i.e., the same space) as the thunk, and that
8387 thunks are output after their method. Thus, we don't need an
8388 external branch to reach the function. With SOM and GAS,
8389 functions and thunks are effectively in different sections.
8390 Thus, we can always use a IA-relative branch and the linker
8391 will add a long branch stub if necessary.
8392
8393 However, we have to be careful when generating PIC code on the
8394 SOM port to ensure that the sequence does not transfer to an
8395 import stub for the target function as this could clobber the
8396 return value saved at SP-24. This would also apply to the
8397 32-bit linux port if the multi-space model is implemented. */
8398 if ((!TARGET_LONG_CALLS && TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8399 && !(flag_pic && TREE_PUBLIC (function))
8400 && (TARGET_GAS || last_address < 262132))
8401 || (!TARGET_LONG_CALLS && !TARGET_SOM && !TARGET_PORTABLE_RUNTIME
8402 && ((targetm_common.have_named_sections
8403 && DECL_SECTION_NAME (thunk_fndecl) != NULL
8404 /* The GNU 64-bit linker has rather poor stub management.
8405 So, we use a long branch from thunks that aren't in
8406 the same section as the target function. */
8407 && ((!TARGET_64BIT
8408 && (DECL_SECTION_NAME (thunk_fndecl)
8409 != DECL_SECTION_NAME (function)))
8410 || ((DECL_SECTION_NAME (thunk_fndecl)
8411 == DECL_SECTION_NAME (function))
8412 && last_address < 262132)))
8413 /* In this case, we need to be able to reach the start of
8414 the stub table even though the function is likely closer
8415 and can be jumped to directly. */
8416 || (targetm_common.have_named_sections
8417 && DECL_SECTION_NAME (thunk_fndecl) == NULL
8418 && DECL_SECTION_NAME (function) == NULL
8419 && total_code_bytes < MAX_PCREL17F_OFFSET)
8420 /* Likewise. */
8421 || (!targetm_common.have_named_sections
8422 && total_code_bytes < MAX_PCREL17F_OFFSET))))
8423 {
8424 if (!val_14)
8425 output_asm_insn ("addil L'%2,%%r26", xoperands);
8426
8427 output_asm_insn ("b %0", xoperands);
8428
8429 if (val_14)
8430 {
8431 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8432 nbytes += 8;
8433 }
8434 else
8435 {
8436 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8437 nbytes += 12;
8438 }
8439 }
8440 else if (TARGET_64BIT)
8441 {
8442 rtx xop[4];
8443
8444 /* We only have one call-clobbered scratch register, so we can't
8445 make use of the delay slot if delta doesn't fit in 14 bits. */
8446 if (!val_14)
8447 {
8448 output_asm_insn ("addil L'%2,%%r26", xoperands);
8449 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8450 }
8451
8452 /* Load function address into %r1. */
8453 xop[0] = xoperands[0];
8454 xop[1] = gen_rtx_REG (Pmode, 1);
8455 xop[2] = xop[1];
8456 pa_output_pic_pcrel_sequence (xop);
8457
8458 if (val_14)
8459 {
8460 output_asm_insn ("bv %%r0(%%r1)", xoperands);
8461 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8462 nbytes += 20;
8463 }
8464 else
8465 {
8466 output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
8467 nbytes += 24;
8468 }
8469 }
8470 else if (TARGET_PORTABLE_RUNTIME)
8471 {
8472 output_asm_insn ("ldil L'%0,%%r1", xoperands);
8473 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands);
8474
8475 if (!val_14)
8476 output_asm_insn ("ldil L'%2,%%r26", xoperands);
8477
8478 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8479
8480 if (val_14)
8481 {
8482 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8483 nbytes += 16;
8484 }
8485 else
8486 {
8487 output_asm_insn ("ldo R'%2(%%r26),%%r26", xoperands);
8488 nbytes += 20;
8489 }
8490 }
8491 else if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8492 {
8493 /* The function is accessible from outside this module. The only
8494 way to avoid an import stub between the thunk and function is to
8495 call the function directly with an indirect sequence similar to
8496 that used by $$dyncall. This is possible because $$dyncall acts
8497 as the import stub in an indirect call. */
8498 ASM_GENERATE_INTERNAL_LABEL (label, "LTHN", current_thunk_number);
8499 xoperands[3] = gen_rtx_SYMBOL_REF (Pmode, label);
8500 output_asm_insn ("addil LT'%3,%%r19", xoperands);
8501 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands);
8502 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8503 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands);
8504 output_asm_insn ("depi 0,31,2,%%r22", xoperands);
8505 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands);
8506 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands);
8507
8508 if (!val_14)
8509 {
8510 output_asm_insn ("addil L'%2,%%r26", xoperands);
8511 nbytes += 4;
8512 }
8513
8514 if (TARGET_PA_20)
8515 {
8516 output_asm_insn ("bve (%%r22)", xoperands);
8517 nbytes += 36;
8518 }
8519 else if (TARGET_NO_SPACE_REGS)
8520 {
8521 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands);
8522 nbytes += 36;
8523 }
8524 else
8525 {
8526 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands);
8527 output_asm_insn ("mtsp %%r21,%%sr0", xoperands);
8528 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands);
8529 nbytes += 44;
8530 }
8531
8532 if (val_14)
8533 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8534 else
8535 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8536 }
8537 else if (flag_pic)
8538 {
8539 rtx xop[4];
8540
8541 /* Load function address into %r22. */
8542 xop[0] = xoperands[0];
8543 xop[1] = gen_rtx_REG (Pmode, 1);
8544 xop[2] = gen_rtx_REG (Pmode, 22);
8545 pa_output_pic_pcrel_sequence (xop);
8546
8547 if (!val_14)
8548 output_asm_insn ("addil L'%2,%%r26", xoperands);
8549
8550 output_asm_insn ("bv %%r0(%%r22)", xoperands);
8551
8552 if (val_14)
8553 {
8554 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8555 nbytes += 20;
8556 }
8557 else
8558 {
8559 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8560 nbytes += 24;
8561 }
8562 }
8563 else
8564 {
8565 if (!val_14)
8566 output_asm_insn ("addil L'%2,%%r26", xoperands);
8567
8568 output_asm_insn ("ldil L'%0,%%r22", xoperands);
8569 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands);
8570
8571 if (val_14)
8572 {
8573 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands);
8574 nbytes += 12;
8575 }
8576 else
8577 {
8578 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands);
8579 nbytes += 16;
8580 }
8581 }
8582
8583 final_end_function ();
8584
8585 if (TARGET_SOM && flag_pic && TREE_PUBLIC (function))
8586 {
8587 switch_to_section (data_section);
8588 output_asm_insn (".align 4", xoperands);
8589 ASM_OUTPUT_LABEL (file, label);
8590 output_asm_insn (".word P'%0", xoperands);
8591 }
8592
8593 current_thunk_number++;
8594 nbytes = ((nbytes + FUNCTION_BOUNDARY / BITS_PER_UNIT - 1)
8595 & ~(FUNCTION_BOUNDARY / BITS_PER_UNIT - 1));
8596 last_address += nbytes;
8597 if (old_last_address > last_address)
8598 last_address = UINT_MAX;
8599 update_total_code_bytes (nbytes);
8600 assemble_end_function (thunk_fndecl, fnname);
8601 }
8602
8603 /* Only direct calls to static functions are allowed to be sibling (tail)
8604 call optimized.
8605
8606 This restriction is necessary because some linker generated stubs will
8607 store return pointers into rp' in some cases which might clobber a
8608 live value already in rp'.
8609
8610 In a sibcall the current function and the target function share stack
8611 space. Thus if the path to the current function and the path to the
8612 target function save a value in rp', they save the value into the
8613 same stack slot, which has undesirable consequences.
8614
8615 Because of the deferred binding nature of shared libraries any function
8616 with external scope could be in a different load module and thus require
8617 rp' to be saved when calling that function. So sibcall optimizations
8618 can only be safe for static function.
8619
8620 Note that GCC never needs return value relocations, so we don't have to
8621 worry about static calls with return value relocations (which require
8622 saving rp').
8623
8624 It is safe to perform a sibcall optimization when the target function
8625 will never return. */
8626 static bool
8627 pa_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8628 {
8629 /* Sibcalls are not ok because the arg pointer register is not a fixed
8630 register. This prevents the sibcall optimization from occurring. In
8631 addition, there are problems with stub placement using GNU ld. This
8632 is because a normal sibcall branch uses a 17-bit relocation while
8633 a regular call branch uses a 22-bit relocation. As a result, more
8634 care needs to be taken in the placement of long-branch stubs. */
8635 if (TARGET_64BIT)
8636 return false;
8637
8638 if (TARGET_PORTABLE_RUNTIME)
8639 return false;
8640
8641 /* Sibcalls are only ok within a translation unit. */
8642 return decl && targetm.binds_local_p (decl);
8643 }
8644
8645 /* ??? Addition is not commutative on the PA due to the weird implicit
8646 space register selection rules for memory addresses. Therefore, we
8647 don't consider a + b == b + a, as this might be inside a MEM. */
8648 static bool
8649 pa_commutative_p (const_rtx x, int outer_code)
8650 {
8651 return (COMMUTATIVE_P (x)
8652 && (TARGET_NO_SPACE_REGS
8653 || (outer_code != UNKNOWN && outer_code != MEM)
8654 || GET_CODE (x) != PLUS));
8655 }
8656
8657 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8658 use in fmpyadd instructions. */
8659 int
8660 pa_fmpyaddoperands (rtx *operands)
8661 {
8662 machine_mode mode = GET_MODE (operands[0]);
8663
8664 /* Must be a floating point mode. */
8665 if (mode != SFmode && mode != DFmode)
8666 return 0;
8667
8668 /* All modes must be the same. */
8669 if (! (mode == GET_MODE (operands[1])
8670 && mode == GET_MODE (operands[2])
8671 && mode == GET_MODE (operands[3])
8672 && mode == GET_MODE (operands[4])
8673 && mode == GET_MODE (operands[5])))
8674 return 0;
8675
8676 /* All operands must be registers. */
8677 if (! (GET_CODE (operands[1]) == REG
8678 && GET_CODE (operands[2]) == REG
8679 && GET_CODE (operands[3]) == REG
8680 && GET_CODE (operands[4]) == REG
8681 && GET_CODE (operands[5]) == REG))
8682 return 0;
8683
8684 /* Only 2 real operands to the addition. One of the input operands must
8685 be the same as the output operand. */
8686 if (! rtx_equal_p (operands[3], operands[4])
8687 && ! rtx_equal_p (operands[3], operands[5]))
8688 return 0;
8689
8690 /* Inout operand of add cannot conflict with any operands from multiply. */
8691 if (rtx_equal_p (operands[3], operands[0])
8692 || rtx_equal_p (operands[3], operands[1])
8693 || rtx_equal_p (operands[3], operands[2]))
8694 return 0;
8695
8696 /* multiply cannot feed into addition operands. */
8697 if (rtx_equal_p (operands[4], operands[0])
8698 || rtx_equal_p (operands[5], operands[0]))
8699 return 0;
8700
8701 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8702 if (mode == SFmode
8703 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8704 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8705 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8706 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8707 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8708 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8709 return 0;
8710
8711 /* Passed. Operands are suitable for fmpyadd. */
8712 return 1;
8713 }
8714
8715 #if !defined(USE_COLLECT2)
8716 static void
8717 pa_asm_out_constructor (rtx symbol, int priority)
8718 {
8719 if (!function_label_operand (symbol, VOIDmode))
8720 pa_encode_label (symbol);
8721
8722 #ifdef CTORS_SECTION_ASM_OP
8723 default_ctor_section_asm_out_constructor (symbol, priority);
8724 #else
8725 # ifdef TARGET_ASM_NAMED_SECTION
8726 default_named_section_asm_out_constructor (symbol, priority);
8727 # else
8728 default_stabs_asm_out_constructor (symbol, priority);
8729 # endif
8730 #endif
8731 }
8732
8733 static void
8734 pa_asm_out_destructor (rtx symbol, int priority)
8735 {
8736 if (!function_label_operand (symbol, VOIDmode))
8737 pa_encode_label (symbol);
8738
8739 #ifdef DTORS_SECTION_ASM_OP
8740 default_dtor_section_asm_out_destructor (symbol, priority);
8741 #else
8742 # ifdef TARGET_ASM_NAMED_SECTION
8743 default_named_section_asm_out_destructor (symbol, priority);
8744 # else
8745 default_stabs_asm_out_destructor (symbol, priority);
8746 # endif
8747 #endif
8748 }
8749 #endif
8750
8751 /* This function places uninitialized global data in the bss section.
8752 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8753 function on the SOM port to prevent uninitialized global data from
8754 being placed in the data section. */
8755
8756 void
8757 pa_asm_output_aligned_bss (FILE *stream,
8758 const char *name,
8759 unsigned HOST_WIDE_INT size,
8760 unsigned int align)
8761 {
8762 switch_to_section (bss_section);
8763 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8764
8765 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8766 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
8767 #endif
8768
8769 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8770 ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
8771 #endif
8772
8773 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8774 ASM_OUTPUT_LABEL (stream, name);
8775 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8776 }
8777
8778 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8779 that doesn't allow the alignment of global common storage to be directly
8780 specified. The SOM linker aligns common storage based on the rounded
8781 value of the NUM_BYTES parameter in the .comm directive. It's not
8782 possible to use the .align directive as it doesn't affect the alignment
8783 of the label associated with a .comm directive. */
8784
8785 void
8786 pa_asm_output_aligned_common (FILE *stream,
8787 const char *name,
8788 unsigned HOST_WIDE_INT size,
8789 unsigned int align)
8790 {
8791 unsigned int max_common_align;
8792
8793 max_common_align = TARGET_64BIT ? 128 : (size >= 4096 ? 256 : 64);
8794 if (align > max_common_align)
8795 {
8796 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8797 "for global common data. Using %u",
8798 align / BITS_PER_UNIT, name, max_common_align / BITS_PER_UNIT);
8799 align = max_common_align;
8800 }
8801
8802 switch_to_section (bss_section);
8803
8804 assemble_name (stream, name);
8805 fprintf (stream, "\t.comm " HOST_WIDE_INT_PRINT_UNSIGNED"\n",
8806 MAX (size, align / BITS_PER_UNIT));
8807 }
8808
8809 /* We can't use .comm for local common storage as the SOM linker effectively
8810 treats the symbol as universal and uses the same storage for local symbols
8811 with the same name in different object files. The .block directive
8812 reserves an uninitialized block of storage. However, it's not common
8813 storage. Fortunately, GCC never requests common storage with the same
8814 name in any given translation unit. */
8815
8816 void
8817 pa_asm_output_aligned_local (FILE *stream,
8818 const char *name,
8819 unsigned HOST_WIDE_INT size,
8820 unsigned int align)
8821 {
8822 switch_to_section (bss_section);
8823 fprintf (stream, "\t.align %u\n", align / BITS_PER_UNIT);
8824
8825 #ifdef LOCAL_ASM_OP
8826 fprintf (stream, "%s", LOCAL_ASM_OP);
8827 assemble_name (stream, name);
8828 fprintf (stream, "\n");
8829 #endif
8830
8831 ASM_OUTPUT_LABEL (stream, name);
8832 fprintf (stream, "\t.block " HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
8833 }
8834
8835 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8836 use in fmpysub instructions. */
8837 int
8838 pa_fmpysuboperands (rtx *operands)
8839 {
8840 machine_mode mode = GET_MODE (operands[0]);
8841
8842 /* Must be a floating point mode. */
8843 if (mode != SFmode && mode != DFmode)
8844 return 0;
8845
8846 /* All modes must be the same. */
8847 if (! (mode == GET_MODE (operands[1])
8848 && mode == GET_MODE (operands[2])
8849 && mode == GET_MODE (operands[3])
8850 && mode == GET_MODE (operands[4])
8851 && mode == GET_MODE (operands[5])))
8852 return 0;
8853
8854 /* All operands must be registers. */
8855 if (! (GET_CODE (operands[1]) == REG
8856 && GET_CODE (operands[2]) == REG
8857 && GET_CODE (operands[3]) == REG
8858 && GET_CODE (operands[4]) == REG
8859 && GET_CODE (operands[5]) == REG))
8860 return 0;
8861
8862 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8863 operation, so operands[4] must be the same as operand[3]. */
8864 if (! rtx_equal_p (operands[3], operands[4]))
8865 return 0;
8866
8867 /* multiply cannot feed into subtraction. */
8868 if (rtx_equal_p (operands[5], operands[0]))
8869 return 0;
8870
8871 /* Inout operand of sub cannot conflict with any operands from multiply. */
8872 if (rtx_equal_p (operands[3], operands[0])
8873 || rtx_equal_p (operands[3], operands[1])
8874 || rtx_equal_p (operands[3], operands[2]))
8875 return 0;
8876
8877 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8878 if (mode == SFmode
8879 && (REGNO_REG_CLASS (REGNO (operands[0])) != FPUPPER_REGS
8880 || REGNO_REG_CLASS (REGNO (operands[1])) != FPUPPER_REGS
8881 || REGNO_REG_CLASS (REGNO (operands[2])) != FPUPPER_REGS
8882 || REGNO_REG_CLASS (REGNO (operands[3])) != FPUPPER_REGS
8883 || REGNO_REG_CLASS (REGNO (operands[4])) != FPUPPER_REGS
8884 || REGNO_REG_CLASS (REGNO (operands[5])) != FPUPPER_REGS))
8885 return 0;
8886
8887 /* Passed. Operands are suitable for fmpysub. */
8888 return 1;
8889 }
8890
8891 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8892 constants for a MULT embedded inside a memory address. */
8893 int
8894 pa_mem_shadd_constant_p (int val)
8895 {
8896 if (val == 2 || val == 4 || val == 8)
8897 return 1;
8898 else
8899 return 0;
8900 }
8901
8902 /* Return 1 if the given constant is 1, 2, or 3. These are the valid
8903 constants for shadd instructions. */
8904 int
8905 pa_shadd_constant_p (int val)
8906 {
8907 if (val == 1 || val == 2 || val == 3)
8908 return 1;
8909 else
8910 return 0;
8911 }
8912
8913 /* Return TRUE if INSN branches forward. */
8914
8915 static bool
8916 forward_branch_p (rtx_insn *insn)
8917 {
8918 rtx lab = JUMP_LABEL (insn);
8919
8920 /* The INSN must have a jump label. */
8921 gcc_assert (lab != NULL_RTX);
8922
8923 if (INSN_ADDRESSES_SET_P ())
8924 return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
8925
8926 while (insn)
8927 {
8928 if (insn == lab)
8929 return true;
8930 else
8931 insn = NEXT_INSN (insn);
8932 }
8933
8934 return false;
8935 }
8936
8937 /* Output an unconditional move and branch insn. */
8938
8939 const char *
8940 pa_output_parallel_movb (rtx *operands, rtx_insn *insn)
8941 {
8942 int length = get_attr_length (insn);
8943
8944 /* These are the cases in which we win. */
8945 if (length == 4)
8946 return "mov%I1b,tr %1,%0,%2";
8947
8948 /* None of the following cases win, but they don't lose either. */
8949 if (length == 8)
8950 {
8951 if (dbr_sequence_length () == 0)
8952 {
8953 /* Nothing in the delay slot, fake it by putting the combined
8954 insn (the copy or add) in the delay slot of a bl. */
8955 if (GET_CODE (operands[1]) == CONST_INT)
8956 return "b %2\n\tldi %1,%0";
8957 else
8958 return "b %2\n\tcopy %1,%0";
8959 }
8960 else
8961 {
8962 /* Something in the delay slot, but we've got a long branch. */
8963 if (GET_CODE (operands[1]) == CONST_INT)
8964 return "ldi %1,%0\n\tb %2";
8965 else
8966 return "copy %1,%0\n\tb %2";
8967 }
8968 }
8969
8970 if (GET_CODE (operands[1]) == CONST_INT)
8971 output_asm_insn ("ldi %1,%0", operands);
8972 else
8973 output_asm_insn ("copy %1,%0", operands);
8974 return pa_output_lbranch (operands[2], insn, 1);
8975 }
8976
8977 /* Output an unconditional add and branch insn. */
8978
8979 const char *
8980 pa_output_parallel_addb (rtx *operands, rtx_insn *insn)
8981 {
8982 int length = get_attr_length (insn);
8983
8984 /* To make life easy we want operand0 to be the shared input/output
8985 operand and operand1 to be the readonly operand. */
8986 if (operands[0] == operands[1])
8987 operands[1] = operands[2];
8988
8989 /* These are the cases in which we win. */
8990 if (length == 4)
8991 return "add%I1b,tr %1,%0,%3";
8992
8993 /* None of the following cases win, but they don't lose either. */
8994 if (length == 8)
8995 {
8996 if (dbr_sequence_length () == 0)
8997 /* Nothing in the delay slot, fake it by putting the combined
8998 insn (the copy or add) in the delay slot of a bl. */
8999 return "b %3\n\tadd%I1 %1,%0,%0";
9000 else
9001 /* Something in the delay slot, but we've got a long branch. */
9002 return "add%I1 %1,%0,%0\n\tb %3";
9003 }
9004
9005 output_asm_insn ("add%I1 %1,%0,%0", operands);
9006 return pa_output_lbranch (operands[3], insn, 1);
9007 }
9008
9009 /* We use this hook to perform a PA specific optimization which is difficult
9010 to do in earlier passes. */
9011
9012 static void
9013 pa_reorg (void)
9014 {
9015 remove_useless_addtr_insns (1);
9016
9017 if (pa_cpu < PROCESSOR_8000)
9018 pa_combine_instructions ();
9019 }
9020
9021 /* The PA has a number of odd instructions which can perform multiple
9022 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
9023 it may be profitable to combine two instructions into one instruction
9024 with two outputs. It's not profitable PA2.0 machines because the
9025 two outputs would take two slots in the reorder buffers.
9026
9027 This routine finds instructions which can be combined and combines
9028 them. We only support some of the potential combinations, and we
9029 only try common ways to find suitable instructions.
9030
9031 * addb can add two registers or a register and a small integer
9032 and jump to a nearby (+-8k) location. Normally the jump to the
9033 nearby location is conditional on the result of the add, but by
9034 using the "true" condition we can make the jump unconditional.
9035 Thus addb can perform two independent operations in one insn.
9036
9037 * movb is similar to addb in that it can perform a reg->reg
9038 or small immediate->reg copy and jump to a nearby (+-8k location).
9039
9040 * fmpyadd and fmpysub can perform a FP multiply and either an
9041 FP add or FP sub if the operands of the multiply and add/sub are
9042 independent (there are other minor restrictions). Note both
9043 the fmpy and fadd/fsub can in theory move to better spots according
9044 to data dependencies, but for now we require the fmpy stay at a
9045 fixed location.
9046
9047 * Many of the memory operations can perform pre & post updates
9048 of index registers. GCC's pre/post increment/decrement addressing
9049 is far too simple to take advantage of all the possibilities. This
9050 pass may not be suitable since those insns may not be independent.
9051
9052 * comclr can compare two ints or an int and a register, nullify
9053 the following instruction and zero some other register. This
9054 is more difficult to use as it's harder to find an insn which
9055 will generate a comclr than finding something like an unconditional
9056 branch. (conditional moves & long branches create comclr insns).
9057
9058 * Most arithmetic operations can conditionally skip the next
9059 instruction. They can be viewed as "perform this operation
9060 and conditionally jump to this nearby location" (where nearby
9061 is an insns away). These are difficult to use due to the
9062 branch length restrictions. */
9063
9064 static void
9065 pa_combine_instructions (void)
9066 {
9067 rtx_insn *anchor;
9068
9069 /* This can get expensive since the basic algorithm is on the
9070 order of O(n^2) (or worse). Only do it for -O2 or higher
9071 levels of optimization. */
9072 if (optimize < 2)
9073 return;
9074
9075 /* Walk down the list of insns looking for "anchor" insns which
9076 may be combined with "floating" insns. As the name implies,
9077 "anchor" instructions don't move, while "floating" insns may
9078 move around. */
9079 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
9080 rtx_insn *new_rtx = make_insn_raw (par);
9081
9082 for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
9083 {
9084 enum attr_pa_combine_type anchor_attr;
9085 enum attr_pa_combine_type floater_attr;
9086
9087 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9088 Also ignore any special USE insns. */
9089 if ((! NONJUMP_INSN_P (anchor) && ! JUMP_P (anchor) && ! CALL_P (anchor))
9090 || GET_CODE (PATTERN (anchor)) == USE
9091 || GET_CODE (PATTERN (anchor)) == CLOBBER)
9092 continue;
9093
9094 anchor_attr = get_attr_pa_combine_type (anchor);
9095 /* See if anchor is an insn suitable for combination. */
9096 if (anchor_attr == PA_COMBINE_TYPE_FMPY
9097 || anchor_attr == PA_COMBINE_TYPE_FADDSUB
9098 || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9099 && ! forward_branch_p (anchor)))
9100 {
9101 rtx_insn *floater;
9102
9103 for (floater = PREV_INSN (anchor);
9104 floater;
9105 floater = PREV_INSN (floater))
9106 {
9107 if (NOTE_P (floater)
9108 || (NONJUMP_INSN_P (floater)
9109 && (GET_CODE (PATTERN (floater)) == USE
9110 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9111 continue;
9112
9113 /* Anything except a regular INSN will stop our search. */
9114 if (! NONJUMP_INSN_P (floater))
9115 {
9116 floater = NULL;
9117 break;
9118 }
9119
9120 /* See if FLOATER is suitable for combination with the
9121 anchor. */
9122 floater_attr = get_attr_pa_combine_type (floater);
9123 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9124 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9125 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9126 && floater_attr == PA_COMBINE_TYPE_FMPY))
9127 {
9128 /* If ANCHOR and FLOATER can be combined, then we're
9129 done with this pass. */
9130 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9131 SET_DEST (PATTERN (floater)),
9132 XEXP (SET_SRC (PATTERN (floater)), 0),
9133 XEXP (SET_SRC (PATTERN (floater)), 1)))
9134 break;
9135 }
9136
9137 else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
9138 && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
9139 {
9140 if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
9141 {
9142 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9143 SET_DEST (PATTERN (floater)),
9144 XEXP (SET_SRC (PATTERN (floater)), 0),
9145 XEXP (SET_SRC (PATTERN (floater)), 1)))
9146 break;
9147 }
9148 else
9149 {
9150 if (pa_can_combine_p (new_rtx, anchor, floater, 0,
9151 SET_DEST (PATTERN (floater)),
9152 SET_SRC (PATTERN (floater)),
9153 SET_SRC (PATTERN (floater))))
9154 break;
9155 }
9156 }
9157 }
9158
9159 /* If we didn't find anything on the backwards scan try forwards. */
9160 if (!floater
9161 && (anchor_attr == PA_COMBINE_TYPE_FMPY
9162 || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
9163 {
9164 for (floater = anchor; floater; floater = NEXT_INSN (floater))
9165 {
9166 if (NOTE_P (floater)
9167 || (NONJUMP_INSN_P (floater)
9168 && (GET_CODE (PATTERN (floater)) == USE
9169 || GET_CODE (PATTERN (floater)) == CLOBBER)))
9170
9171 continue;
9172
9173 /* Anything except a regular INSN will stop our search. */
9174 if (! NONJUMP_INSN_P (floater))
9175 {
9176 floater = NULL;
9177 break;
9178 }
9179
9180 /* See if FLOATER is suitable for combination with the
9181 anchor. */
9182 floater_attr = get_attr_pa_combine_type (floater);
9183 if ((anchor_attr == PA_COMBINE_TYPE_FMPY
9184 && floater_attr == PA_COMBINE_TYPE_FADDSUB)
9185 || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9186 && floater_attr == PA_COMBINE_TYPE_FMPY))
9187 {
9188 /* If ANCHOR and FLOATER can be combined, then we're
9189 done with this pass. */
9190 if (pa_can_combine_p (new_rtx, anchor, floater, 1,
9191 SET_DEST (PATTERN (floater)),
9192 XEXP (SET_SRC (PATTERN (floater)),
9193 0),
9194 XEXP (SET_SRC (PATTERN (floater)),
9195 1)))
9196 break;
9197 }
9198 }
9199 }
9200
9201 /* FLOATER will be nonzero if we found a suitable floating
9202 insn for combination with ANCHOR. */
9203 if (floater
9204 && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
9205 || anchor_attr == PA_COMBINE_TYPE_FMPY))
9206 {
9207 /* Emit the new instruction and delete the old anchor. */
9208 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9209 copy_rtx (PATTERN (floater)));
9210 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9211 emit_insn_before (temp, anchor);
9212
9213 SET_INSN_DELETED (anchor);
9214
9215 /* Emit a special USE insn for FLOATER, then delete
9216 the floating insn. */
9217 temp = copy_rtx (PATTERN (floater));
9218 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9219 delete_insn (floater);
9220
9221 continue;
9222 }
9223 else if (floater
9224 && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
9225 {
9226 /* Emit the new_jump instruction and delete the old anchor. */
9227 rtvec vtemp = gen_rtvec (2, copy_rtx (PATTERN (anchor)),
9228 copy_rtx (PATTERN (floater)));
9229 rtx temp = gen_rtx_PARALLEL (VOIDmode, vtemp);
9230 temp = emit_jump_insn_before (temp, anchor);
9231
9232 JUMP_LABEL (temp) = JUMP_LABEL (anchor);
9233 SET_INSN_DELETED (anchor);
9234
9235 /* Emit a special USE insn for FLOATER, then delete
9236 the floating insn. */
9237 temp = copy_rtx (PATTERN (floater));
9238 emit_insn_before (gen_rtx_USE (VOIDmode, temp), floater);
9239 delete_insn (floater);
9240 continue;
9241 }
9242 }
9243 }
9244 }
9245
9246 static int
9247 pa_can_combine_p (rtx_insn *new_rtx, rtx_insn *anchor, rtx_insn *floater,
9248 int reversed, rtx dest,
9249 rtx src1, rtx src2)
9250 {
9251 int insn_code_number;
9252 rtx_insn *start, *end;
9253
9254 /* Create a PARALLEL with the patterns of ANCHOR and
9255 FLOATER, try to recognize it, then test constraints
9256 for the resulting pattern.
9257
9258 If the pattern doesn't match or the constraints
9259 aren't met keep searching for a suitable floater
9260 insn. */
9261 XVECEXP (PATTERN (new_rtx), 0, 0) = PATTERN (anchor);
9262 XVECEXP (PATTERN (new_rtx), 0, 1) = PATTERN (floater);
9263 INSN_CODE (new_rtx) = -1;
9264 insn_code_number = recog_memoized (new_rtx);
9265 basic_block bb = BLOCK_FOR_INSN (anchor);
9266 if (insn_code_number < 0
9267 || (extract_insn (new_rtx),
9268 !constrain_operands (1, get_preferred_alternatives (new_rtx, bb))))
9269 return 0;
9270
9271 if (reversed)
9272 {
9273 start = anchor;
9274 end = floater;
9275 }
9276 else
9277 {
9278 start = floater;
9279 end = anchor;
9280 }
9281
9282 /* There's up to three operands to consider. One
9283 output and two inputs.
9284
9285 The output must not be used between FLOATER & ANCHOR
9286 exclusive. The inputs must not be set between
9287 FLOATER and ANCHOR exclusive. */
9288
9289 if (reg_used_between_p (dest, start, end))
9290 return 0;
9291
9292 if (reg_set_between_p (src1, start, end))
9293 return 0;
9294
9295 if (reg_set_between_p (src2, start, end))
9296 return 0;
9297
9298 /* If we get here, then everything is good. */
9299 return 1;
9300 }
9301
9302 /* Return nonzero if references for INSN are delayed.
9303
9304 Millicode insns are actually function calls with some special
9305 constraints on arguments and register usage.
9306
9307 Millicode calls always expect their arguments in the integer argument
9308 registers, and always return their result in %r29 (ret1). They
9309 are expected to clobber their arguments, %r1, %r29, and the return
9310 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9311
9312 This function tells reorg that the references to arguments and
9313 millicode calls do not appear to happen until after the millicode call.
9314 This allows reorg to put insns which set the argument registers into the
9315 delay slot of the millicode call -- thus they act more like traditional
9316 CALL_INSNs.
9317
9318 Note we cannot consider side effects of the insn to be delayed because
9319 the branch and link insn will clobber the return pointer. If we happened
9320 to use the return pointer in the delay slot of the call, then we lose.
9321
9322 get_attr_type will try to recognize the given insn, so make sure to
9323 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9324 in particular. */
9325 int
9326 pa_insn_refs_are_delayed (rtx_insn *insn)
9327 {
9328 return ((NONJUMP_INSN_P (insn)
9329 && GET_CODE (PATTERN (insn)) != SEQUENCE
9330 && GET_CODE (PATTERN (insn)) != USE
9331 && GET_CODE (PATTERN (insn)) != CLOBBER
9332 && get_attr_type (insn) == TYPE_MILLI));
9333 }
9334
9335 /* Promote the return value, but not the arguments. */
9336
9337 static machine_mode
9338 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9339 machine_mode mode,
9340 int *punsignedp ATTRIBUTE_UNUSED,
9341 const_tree fntype ATTRIBUTE_UNUSED,
9342 int for_return)
9343 {
9344 if (for_return == 0)
9345 return mode;
9346 return promote_mode (type, mode, punsignedp);
9347 }
9348
9349 /* On the HP-PA the value is found in register(s) 28(-29), unless
9350 the mode is SF or DF. Then the value is returned in fr4 (32).
9351
9352 This must perform the same promotions as PROMOTE_MODE, else promoting
9353 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9354
9355 Small structures must be returned in a PARALLEL on PA64 in order
9356 to match the HP Compiler ABI. */
9357
9358 static rtx
9359 pa_function_value (const_tree valtype,
9360 const_tree func ATTRIBUTE_UNUSED,
9361 bool outgoing ATTRIBUTE_UNUSED)
9362 {
9363 machine_mode valmode;
9364
9365 if (AGGREGATE_TYPE_P (valtype)
9366 || TREE_CODE (valtype) == COMPLEX_TYPE
9367 || TREE_CODE (valtype) == VECTOR_TYPE)
9368 {
9369 HOST_WIDE_INT valsize = int_size_in_bytes (valtype);
9370
9371 /* Handle aggregates that fit exactly in a word or double word. */
9372 if ((valsize & (UNITS_PER_WORD - 1)) == 0)
9373 return gen_rtx_REG (TYPE_MODE (valtype), 28);
9374
9375 if (TARGET_64BIT)
9376 {
9377 /* Aggregates with a size less than or equal to 128 bits are
9378 returned in GR 28(-29). They are left justified. The pad
9379 bits are undefined. Larger aggregates are returned in
9380 memory. */
9381 rtx loc[2];
9382 int i, offset = 0;
9383 int ub = valsize <= UNITS_PER_WORD ? 1 : 2;
9384
9385 for (i = 0; i < ub; i++)
9386 {
9387 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9388 gen_rtx_REG (DImode, 28 + i),
9389 GEN_INT (offset));
9390 offset += 8;
9391 }
9392
9393 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (ub, loc));
9394 }
9395 else if (valsize > UNITS_PER_WORD)
9396 {
9397 /* Aggregates 5 to 8 bytes in size are returned in general
9398 registers r28-r29 in the same manner as other non
9399 floating-point objects. The data is right-justified and
9400 zero-extended to 64 bits. This is opposite to the normal
9401 justification used on big endian targets and requires
9402 special treatment. */
9403 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9404 gen_rtx_REG (DImode, 28), const0_rtx);
9405 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9406 }
9407 }
9408
9409 if ((INTEGRAL_TYPE_P (valtype)
9410 && GET_MODE_BITSIZE (TYPE_MODE (valtype)) < BITS_PER_WORD)
9411 || POINTER_TYPE_P (valtype))
9412 valmode = word_mode;
9413 else
9414 valmode = TYPE_MODE (valtype);
9415
9416 if (TREE_CODE (valtype) == REAL_TYPE
9417 && !AGGREGATE_TYPE_P (valtype)
9418 && TYPE_MODE (valtype) != TFmode
9419 && !TARGET_SOFT_FLOAT)
9420 return gen_rtx_REG (valmode, 32);
9421
9422 return gen_rtx_REG (valmode, 28);
9423 }
9424
9425 /* Implement the TARGET_LIBCALL_VALUE hook. */
9426
9427 static rtx
9428 pa_libcall_value (machine_mode mode,
9429 const_rtx fun ATTRIBUTE_UNUSED)
9430 {
9431 if (! TARGET_SOFT_FLOAT
9432 && (mode == SFmode || mode == DFmode))
9433 return gen_rtx_REG (mode, 32);
9434 else
9435 return gen_rtx_REG (mode, 28);
9436 }
9437
9438 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9439
9440 static bool
9441 pa_function_value_regno_p (const unsigned int regno)
9442 {
9443 if (regno == 28
9444 || (! TARGET_SOFT_FLOAT && regno == 32))
9445 return true;
9446
9447 return false;
9448 }
9449
9450 /* Update the data in CUM to advance over an argument
9451 of mode MODE and data type TYPE.
9452 (TYPE is null for libcalls where that information may not be available.) */
9453
9454 static void
9455 pa_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
9456 const_tree type, bool named ATTRIBUTE_UNUSED)
9457 {
9458 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9459 int arg_size = pa_function_arg_size (mode, type);
9460
9461 cum->nargs_prototype--;
9462 cum->words += (arg_size
9463 + ((cum->words & 01)
9464 && type != NULL_TREE
9465 && arg_size > 1));
9466 }
9467
9468 /* Return the location of a parameter that is passed in a register or NULL
9469 if the parameter has any component that is passed in memory.
9470
9471 This is new code and will be pushed to into the net sources after
9472 further testing.
9473
9474 ??? We might want to restructure this so that it looks more like other
9475 ports. */
9476 static rtx
9477 pa_function_arg (cumulative_args_t cum_v, machine_mode mode,
9478 const_tree type, bool named ATTRIBUTE_UNUSED)
9479 {
9480 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9481 int max_arg_words = (TARGET_64BIT ? 8 : 4);
9482 int alignment = 0;
9483 int arg_size;
9484 int fpr_reg_base;
9485 int gpr_reg_base;
9486 rtx retval;
9487
9488 if (mode == VOIDmode)
9489 return NULL_RTX;
9490
9491 arg_size = pa_function_arg_size (mode, type);
9492
9493 /* If this arg would be passed partially or totally on the stack, then
9494 this routine should return zero. pa_arg_partial_bytes will
9495 handle arguments which are split between regs and stack slots if
9496 the ABI mandates split arguments. */
9497 if (!TARGET_64BIT)
9498 {
9499 /* The 32-bit ABI does not split arguments. */
9500 if (cum->words + arg_size > max_arg_words)
9501 return NULL_RTX;
9502 }
9503 else
9504 {
9505 if (arg_size > 1)
9506 alignment = cum->words & 1;
9507 if (cum->words + alignment >= max_arg_words)
9508 return NULL_RTX;
9509 }
9510
9511 /* The 32bit ABIs and the 64bit ABIs are rather different,
9512 particularly in their handling of FP registers. We might
9513 be able to cleverly share code between them, but I'm not
9514 going to bother in the hope that splitting them up results
9515 in code that is more easily understood. */
9516
9517 if (TARGET_64BIT)
9518 {
9519 /* Advance the base registers to their current locations.
9520
9521 Remember, gprs grow towards smaller register numbers while
9522 fprs grow to higher register numbers. Also remember that
9523 although FP regs are 32-bit addressable, we pretend that
9524 the registers are 64-bits wide. */
9525 gpr_reg_base = 26 - cum->words;
9526 fpr_reg_base = 32 + cum->words;
9527
9528 /* Arguments wider than one word and small aggregates need special
9529 treatment. */
9530 if (arg_size > 1
9531 || mode == BLKmode
9532 || (type && (AGGREGATE_TYPE_P (type)
9533 || TREE_CODE (type) == COMPLEX_TYPE
9534 || TREE_CODE (type) == VECTOR_TYPE)))
9535 {
9536 /* Double-extended precision (80-bit), quad-precision (128-bit)
9537 and aggregates including complex numbers are aligned on
9538 128-bit boundaries. The first eight 64-bit argument slots
9539 are associated one-to-one, with general registers r26
9540 through r19, and also with floating-point registers fr4
9541 through fr11. Arguments larger than one word are always
9542 passed in general registers.
9543
9544 Using a PARALLEL with a word mode register results in left
9545 justified data on a big-endian target. */
9546
9547 rtx loc[8];
9548 int i, offset = 0, ub = arg_size;
9549
9550 /* Align the base register. */
9551 gpr_reg_base -= alignment;
9552
9553 ub = MIN (ub, max_arg_words - cum->words - alignment);
9554 for (i = 0; i < ub; i++)
9555 {
9556 loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
9557 gen_rtx_REG (DImode, gpr_reg_base),
9558 GEN_INT (offset));
9559 gpr_reg_base -= 1;
9560 offset += 8;
9561 }
9562
9563 return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
9564 }
9565 }
9566 else
9567 {
9568 /* If the argument is larger than a word, then we know precisely
9569 which registers we must use. */
9570 if (arg_size > 1)
9571 {
9572 if (cum->words)
9573 {
9574 gpr_reg_base = 23;
9575 fpr_reg_base = 38;
9576 }
9577 else
9578 {
9579 gpr_reg_base = 25;
9580 fpr_reg_base = 34;
9581 }
9582
9583 /* Structures 5 to 8 bytes in size are passed in the general
9584 registers in the same manner as other non floating-point
9585 objects. The data is right-justified and zero-extended
9586 to 64 bits. This is opposite to the normal justification
9587 used on big endian targets and requires special treatment.
9588 We now define BLOCK_REG_PADDING to pad these objects.
9589 Aggregates, complex and vector types are passed in the same
9590 manner as structures. */
9591 if (mode == BLKmode
9592 || (type && (AGGREGATE_TYPE_P (type)
9593 || TREE_CODE (type) == COMPLEX_TYPE
9594 || TREE_CODE (type) == VECTOR_TYPE)))
9595 {
9596 rtx loc = gen_rtx_EXPR_LIST (VOIDmode,
9597 gen_rtx_REG (DImode, gpr_reg_base),
9598 const0_rtx);
9599 return gen_rtx_PARALLEL (BLKmode, gen_rtvec (1, loc));
9600 }
9601 }
9602 else
9603 {
9604 /* We have a single word (32 bits). A simple computation
9605 will get us the register #s we need. */
9606 gpr_reg_base = 26 - cum->words;
9607 fpr_reg_base = 32 + 2 * cum->words;
9608 }
9609 }
9610
9611 /* Determine if the argument needs to be passed in both general and
9612 floating point registers. */
9613 if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
9614 /* If we are doing soft-float with portable runtime, then there
9615 is no need to worry about FP regs. */
9616 && !TARGET_SOFT_FLOAT
9617 /* The parameter must be some kind of scalar float, else we just
9618 pass it in integer registers. */
9619 && GET_MODE_CLASS (mode) == MODE_FLOAT
9620 /* The target function must not have a prototype. */
9621 && cum->nargs_prototype <= 0
9622 /* libcalls do not need to pass items in both FP and general
9623 registers. */
9624 && type != NULL_TREE
9625 /* All this hair applies to "outgoing" args only. This includes
9626 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9627 && !cum->incoming)
9628 /* Also pass outgoing floating arguments in both registers in indirect
9629 calls with the 32 bit ABI and the HP assembler since there is no
9630 way to the specify argument locations in static functions. */
9631 || (!TARGET_64BIT
9632 && !TARGET_GAS
9633 && !cum->incoming
9634 && cum->indirect
9635 && GET_MODE_CLASS (mode) == MODE_FLOAT))
9636 {
9637 retval
9638 = gen_rtx_PARALLEL
9639 (mode,
9640 gen_rtvec (2,
9641 gen_rtx_EXPR_LIST (VOIDmode,
9642 gen_rtx_REG (mode, fpr_reg_base),
9643 const0_rtx),
9644 gen_rtx_EXPR_LIST (VOIDmode,
9645 gen_rtx_REG (mode, gpr_reg_base),
9646 const0_rtx)));
9647 }
9648 else
9649 {
9650 /* See if we should pass this parameter in a general register. */
9651 if (TARGET_SOFT_FLOAT
9652 /* Indirect calls in the normal 32bit ABI require all arguments
9653 to be passed in general registers. */
9654 || (!TARGET_PORTABLE_RUNTIME
9655 && !TARGET_64BIT
9656 && !TARGET_ELF32
9657 && cum->indirect)
9658 /* If the parameter is not a scalar floating-point parameter,
9659 then it belongs in GPRs. */
9660 || GET_MODE_CLASS (mode) != MODE_FLOAT
9661 /* Structure with single SFmode field belongs in GPR. */
9662 || (type && AGGREGATE_TYPE_P (type)))
9663 retval = gen_rtx_REG (mode, gpr_reg_base);
9664 else
9665 retval = gen_rtx_REG (mode, fpr_reg_base);
9666 }
9667 return retval;
9668 }
9669
9670 /* Arguments larger than one word are double word aligned. */
9671
9672 static unsigned int
9673 pa_function_arg_boundary (machine_mode mode, const_tree type)
9674 {
9675 bool singleword = (type
9676 ? (integer_zerop (TYPE_SIZE (type))
9677 || !TREE_CONSTANT (TYPE_SIZE (type))
9678 || int_size_in_bytes (type) <= UNITS_PER_WORD)
9679 : GET_MODE_SIZE (mode) <= UNITS_PER_WORD);
9680
9681 return singleword ? PARM_BOUNDARY : MAX_PARM_BOUNDARY;
9682 }
9683
9684 /* If this arg would be passed totally in registers or totally on the stack,
9685 then this routine should return zero. */
9686
9687 static int
9688 pa_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
9689 tree type, bool named ATTRIBUTE_UNUSED)
9690 {
9691 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9692 unsigned int max_arg_words = 8;
9693 unsigned int offset = 0;
9694
9695 if (!TARGET_64BIT)
9696 return 0;
9697
9698 if (pa_function_arg_size (mode, type) > 1 && (cum->words & 1))
9699 offset = 1;
9700
9701 if (cum->words + offset + pa_function_arg_size (mode, type) <= max_arg_words)
9702 /* Arg fits fully into registers. */
9703 return 0;
9704 else if (cum->words + offset >= max_arg_words)
9705 /* Arg fully on the stack. */
9706 return 0;
9707 else
9708 /* Arg is split. */
9709 return (max_arg_words - cum->words - offset) * UNITS_PER_WORD;
9710 }
9711
9712
9713 /* A get_unnamed_section callback for switching to the text section.
9714
9715 This function is only used with SOM. Because we don't support
9716 named subspaces, we can only create a new subspace or switch back
9717 to the default text subspace. */
9718
9719 static void
9720 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED)
9721 {
9722 gcc_assert (TARGET_SOM);
9723 if (TARGET_GAS)
9724 {
9725 if (cfun && cfun->machine && !cfun->machine->in_nsubspa)
9726 {
9727 /* We only want to emit a .nsubspa directive once at the
9728 start of the function. */
9729 cfun->machine->in_nsubspa = 1;
9730
9731 /* Create a new subspace for the text. This provides
9732 better stub placement and one-only functions. */
9733 if (cfun->decl
9734 && DECL_ONE_ONLY (cfun->decl)
9735 && !DECL_WEAK (cfun->decl))
9736 {
9737 output_section_asm_op ("\t.SPACE $TEXT$\n"
9738 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9739 "ACCESS=44,SORT=24,COMDAT");
9740 return;
9741 }
9742 }
9743 else
9744 {
9745 /* There isn't a current function or the body of the current
9746 function has been completed. So, we are changing to the
9747 text section to output debugging information. Thus, we
9748 need to forget that we are in the text section so that
9749 varasm.c will call us when text_section is selected again. */
9750 gcc_assert (!cfun || !cfun->machine
9751 || cfun->machine->in_nsubspa == 2);
9752 in_section = NULL;
9753 }
9754 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9755 return;
9756 }
9757 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9758 }
9759
9760 /* A get_unnamed_section callback for switching to comdat data
9761 sections. This function is only used with SOM. */
9762
9763 static void
9764 som_output_comdat_data_section_asm_op (const void *data)
9765 {
9766 in_section = NULL;
9767 output_section_asm_op (data);
9768 }
9769
9770 /* Implement TARGET_ASM_INIT_SECTIONS. */
9771
9772 static void
9773 pa_som_asm_init_sections (void)
9774 {
9775 text_section
9776 = get_unnamed_section (0, som_output_text_section_asm_op, NULL);
9777
9778 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9779 is not being generated. */
9780 som_readonly_data_section
9781 = get_unnamed_section (0, output_section_asm_op,
9782 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9783
9784 /* When secondary definitions are not supported, SOM makes readonly
9785 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9786 the comdat flag. */
9787 som_one_only_readonly_data_section
9788 = get_unnamed_section (0, som_output_comdat_data_section_asm_op,
9789 "\t.SPACE $TEXT$\n"
9790 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9791 "ACCESS=0x2c,SORT=16,COMDAT");
9792
9793
9794 /* When secondary definitions are not supported, SOM makes data one-only
9795 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9796 som_one_only_data_section
9797 = get_unnamed_section (SECTION_WRITE,
9798 som_output_comdat_data_section_asm_op,
9799 "\t.SPACE $PRIVATE$\n"
9800 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9801 "ACCESS=31,SORT=24,COMDAT");
9802
9803 if (flag_tm)
9804 som_tm_clone_table_section
9805 = get_unnamed_section (0, output_section_asm_op,
9806 "\t.SPACE $PRIVATE$\n\t.SUBSPA $TM_CLONE_TABLE$");
9807
9808 /* HPUX ld generates incorrect GOT entries for "T" fixups which
9809 reference data within the $TEXT$ space (for example constant
9810 strings in the $LIT$ subspace).
9811
9812 The assemblers (GAS and HP as) both have problems with handling
9813 the difference of two symbols. This is the other correct way to
9814 reference constant data during PIC code generation.
9815
9816 Thus, we can't put constant data needing relocation in the $TEXT$
9817 space during PIC generation.
9818
9819 Previously, we placed all constant data into the $DATA$ subspace
9820 when generating PIC code. This reduces sharing, but it works
9821 correctly. Now we rely on pa_reloc_rw_mask() for section selection.
9822 This puts constant data not needing relocation into the $TEXT$ space. */
9823 readonly_data_section = som_readonly_data_section;
9824
9825 /* We must not have a reference to an external symbol defined in a
9826 shared library in a readonly section, else the SOM linker will
9827 complain.
9828
9829 So, we force exception information into the data section. */
9830 exception_section = data_section;
9831 }
9832
9833 /* Implement TARGET_ASM_TM_CLONE_TABLE_SECTION. */
9834
9835 static section *
9836 pa_som_tm_clone_table_section (void)
9837 {
9838 return som_tm_clone_table_section;
9839 }
9840
9841 /* On hpux10, the linker will give an error if we have a reference
9842 in the read-only data section to a symbol defined in a shared
9843 library. Therefore, expressions that might require a reloc
9844 cannot be placed in the read-only data section. */
9845
9846 static section *
9847 pa_select_section (tree exp, int reloc,
9848 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
9849 {
9850 if (TREE_CODE (exp) == VAR_DECL
9851 && TREE_READONLY (exp)
9852 && !TREE_THIS_VOLATILE (exp)
9853 && DECL_INITIAL (exp)
9854 && (DECL_INITIAL (exp) == error_mark_node
9855 || TREE_CONSTANT (DECL_INITIAL (exp)))
9856 && !(reloc & pa_reloc_rw_mask ()))
9857 {
9858 if (TARGET_SOM
9859 && DECL_ONE_ONLY (exp)
9860 && !DECL_WEAK (exp))
9861 return som_one_only_readonly_data_section;
9862 else
9863 return readonly_data_section;
9864 }
9865 else if (CONSTANT_CLASS_P (exp)
9866 && !(reloc & pa_reloc_rw_mask ()))
9867 return readonly_data_section;
9868 else if (TARGET_SOM
9869 && TREE_CODE (exp) == VAR_DECL
9870 && DECL_ONE_ONLY (exp)
9871 && !DECL_WEAK (exp))
9872 return som_one_only_data_section;
9873 else
9874 return data_section;
9875 }
9876
9877 /* Implement pa_reloc_rw_mask. */
9878
9879 static int
9880 pa_reloc_rw_mask (void)
9881 {
9882 if (flag_pic || (TARGET_SOM && !TARGET_HPUX_11))
9883 return 3;
9884
9885 /* HP linker does not support global relocs in readonly memory. */
9886 return TARGET_SOM ? 2 : 0;
9887 }
9888
9889 static void
9890 pa_globalize_label (FILE *stream, const char *name)
9891 {
9892 /* We only handle DATA objects here, functions are globalized in
9893 ASM_DECLARE_FUNCTION_NAME. */
9894 if (! FUNCTION_NAME_P (name))
9895 {
9896 fputs ("\t.EXPORT ", stream);
9897 assemble_name (stream, name);
9898 fputs (",DATA\n", stream);
9899 }
9900 }
9901
9902 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9903
9904 static rtx
9905 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
9906 int incoming ATTRIBUTE_UNUSED)
9907 {
9908 return gen_rtx_REG (Pmode, PA_STRUCT_VALUE_REGNUM);
9909 }
9910
9911 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9912
9913 bool
9914 pa_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9915 {
9916 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9917 PA64 ABI says that objects larger than 128 bits are returned in memory.
9918 Note, int_size_in_bytes can return -1 if the size of the object is
9919 variable or larger than the maximum value that can be expressed as
9920 a HOST_WIDE_INT. It can also return zero for an empty type. The
9921 simplest way to handle variable and empty types is to pass them in
9922 memory. This avoids problems in defining the boundaries of argument
9923 slots, allocating registers, etc. */
9924 return (int_size_in_bytes (type) > (TARGET_64BIT ? 16 : 8)
9925 || int_size_in_bytes (type) <= 0);
9926 }
9927
9928 /* Structure to hold declaration and name of external symbols that are
9929 emitted by GCC. We generate a vector of these symbols and output them
9930 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9931 This avoids putting out names that are never really used. */
9932
9933 typedef struct GTY(()) extern_symbol
9934 {
9935 tree decl;
9936 const char *name;
9937 } extern_symbol;
9938
9939 /* Define gc'd vector type for extern_symbol. */
9940
9941 /* Vector of extern_symbol pointers. */
9942 static GTY(()) vec<extern_symbol, va_gc> *extern_symbols;
9943
9944 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9945 /* Mark DECL (name NAME) as an external reference (assembler output
9946 file FILE). This saves the names to output at the end of the file
9947 if actually referenced. */
9948
9949 void
9950 pa_hpux_asm_output_external (FILE *file, tree decl, const char *name)
9951 {
9952 gcc_assert (file == asm_out_file);
9953 extern_symbol p = {decl, name};
9954 vec_safe_push (extern_symbols, p);
9955 }
9956 #endif
9957
9958 /* Output text required at the end of an assembler file.
9959 This includes deferred plabels and .import directives for
9960 all external symbols that were actually referenced. */
9961
9962 static void
9963 pa_file_end (void)
9964 {
9965 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9966 unsigned int i;
9967 extern_symbol *p;
9968
9969 if (!NO_DEFERRED_PROFILE_COUNTERS)
9970 output_deferred_profile_counters ();
9971 #endif
9972
9973 output_deferred_plabels ();
9974
9975 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9976 for (i = 0; vec_safe_iterate (extern_symbols, i, &p); i++)
9977 {
9978 tree decl = p->decl;
9979
9980 if (!TREE_ASM_WRITTEN (decl)
9981 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl), 0)))
9982 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file, decl, p->name);
9983 }
9984
9985 vec_free (extern_symbols);
9986 #endif
9987
9988 if (NEED_INDICATE_EXEC_STACK)
9989 file_end_indicate_exec_stack ();
9990 }
9991
9992 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
9993
9994 static bool
9995 pa_can_change_mode_class (machine_mode from, machine_mode to,
9996 reg_class_t rclass)
9997 {
9998 if (from == to)
9999 return true;
10000
10001 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to))
10002 return true;
10003
10004 /* Reject changes to/from modes with zero size. */
10005 if (!GET_MODE_SIZE (from) || !GET_MODE_SIZE (to))
10006 return false;
10007
10008 /* Reject changes to/from complex and vector modes. */
10009 if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
10010 || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
10011 return false;
10012
10013 /* There is no way to load QImode or HImode values directly from memory
10014 to a FP register. SImode loads to the FP registers are not zero
10015 extended. On the 64-bit target, this conflicts with the definition
10016 of LOAD_EXTEND_OP. Thus, we reject all mode changes in the FP registers
10017 except for DImode to SImode on the 64-bit target. It is handled by
10018 register renaming in pa_print_operand. */
10019 if (MAYBE_FP_REG_CLASS_P (rclass))
10020 return TARGET_64BIT && from == DImode && to == SImode;
10021
10022 /* TARGET_HARD_REGNO_MODE_OK places modes with sizes larger than a word
10023 in specific sets of registers. Thus, we cannot allow changing
10024 to a larger mode when it's larger than a word. */
10025 if (GET_MODE_SIZE (to) > UNITS_PER_WORD
10026 && GET_MODE_SIZE (to) > GET_MODE_SIZE (from))
10027 return false;
10028
10029 return true;
10030 }
10031
10032 /* Implement TARGET_MODES_TIEABLE_P.
10033
10034 We should return FALSE for QImode and HImode because these modes
10035 are not ok in the floating-point registers. However, this prevents
10036 tieing these modes to SImode and DImode in the general registers.
10037 So, this isn't a good idea. We rely on TARGET_HARD_REGNO_MODE_OK and
10038 TARGET_CAN_CHANGE_MODE_CLASS to prevent these modes from being used
10039 in the floating-point registers. */
10040
10041 static bool
10042 pa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10043 {
10044 /* Don't tie modes in different classes. */
10045 if (GET_MODE_CLASS (mode1) != GET_MODE_CLASS (mode2))
10046 return false;
10047
10048 return true;
10049 }
10050
10051 \f
10052 /* Length in units of the trampoline instruction code. */
10053
10054 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
10055
10056
10057 /* Output assembler code for a block containing the constant parts
10058 of a trampoline, leaving space for the variable parts.\
10059
10060 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
10061 and then branches to the specified routine.
10062
10063 This code template is copied from text segment to stack location
10064 and then patched with pa_trampoline_init to contain valid values,
10065 and then entered as a subroutine.
10066
10067 It is best to keep this as small as possible to avoid having to
10068 flush multiple lines in the cache. */
10069
10070 static void
10071 pa_asm_trampoline_template (FILE *f)
10072 {
10073 if (!TARGET_64BIT)
10074 {
10075 fputs ("\tldw 36(%r22),%r21\n", f);
10076 fputs ("\tbb,>=,n %r21,30,.+16\n", f);
10077 if (ASSEMBLER_DIALECT == 0)
10078 fputs ("\tdepi 0,31,2,%r21\n", f);
10079 else
10080 fputs ("\tdepwi 0,31,2,%r21\n", f);
10081 fputs ("\tldw 4(%r21),%r19\n", f);
10082 fputs ("\tldw 0(%r21),%r21\n", f);
10083 if (TARGET_PA_20)
10084 {
10085 fputs ("\tbve (%r21)\n", f);
10086 fputs ("\tldw 40(%r22),%r29\n", f);
10087 fputs ("\t.word 0\n", f);
10088 fputs ("\t.word 0\n", f);
10089 }
10090 else
10091 {
10092 fputs ("\tldsid (%r21),%r1\n", f);
10093 fputs ("\tmtsp %r1,%sr0\n", f);
10094 fputs ("\tbe 0(%sr0,%r21)\n", f);
10095 fputs ("\tldw 40(%r22),%r29\n", f);
10096 }
10097 fputs ("\t.word 0\n", f);
10098 fputs ("\t.word 0\n", f);
10099 fputs ("\t.word 0\n", f);
10100 fputs ("\t.word 0\n", f);
10101 }
10102 else
10103 {
10104 fputs ("\t.dword 0\n", f);
10105 fputs ("\t.dword 0\n", f);
10106 fputs ("\t.dword 0\n", f);
10107 fputs ("\t.dword 0\n", f);
10108 fputs ("\tmfia %r31\n", f);
10109 fputs ("\tldd 24(%r31),%r1\n", f);
10110 fputs ("\tldd 24(%r1),%r27\n", f);
10111 fputs ("\tldd 16(%r1),%r1\n", f);
10112 fputs ("\tbve (%r1)\n", f);
10113 fputs ("\tldd 32(%r31),%r31\n", f);
10114 fputs ("\t.dword 0 ; fptr\n", f);
10115 fputs ("\t.dword 0 ; static link\n", f);
10116 }
10117 }
10118
10119 /* Emit RTL insns to initialize the variable parts of a trampoline.
10120 FNADDR is an RTX for the address of the function's pure code.
10121 CXT is an RTX for the static chain value for the function.
10122
10123 Move the function address to the trampoline template at offset 36.
10124 Move the static chain value to trampoline template at offset 40.
10125 Move the trampoline address to trampoline template at offset 44.
10126 Move r19 to trampoline template at offset 48. The latter two
10127 words create a plabel for the indirect call to the trampoline.
10128
10129 A similar sequence is used for the 64-bit port but the plabel is
10130 at the beginning of the trampoline.
10131
10132 Finally, the cache entries for the trampoline code are flushed.
10133 This is necessary to ensure that the trampoline instruction sequence
10134 is written to memory prior to any attempts at prefetching the code
10135 sequence. */
10136
10137 static void
10138 pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10139 {
10140 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10141 rtx start_addr = gen_reg_rtx (Pmode);
10142 rtx end_addr = gen_reg_rtx (Pmode);
10143 rtx line_length = gen_reg_rtx (Pmode);
10144 rtx r_tramp, tmp;
10145
10146 emit_block_move (m_tramp, assemble_trampoline_template (),
10147 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
10148 r_tramp = force_reg (Pmode, XEXP (m_tramp, 0));
10149
10150 if (!TARGET_64BIT)
10151 {
10152 tmp = adjust_address (m_tramp, Pmode, 36);
10153 emit_move_insn (tmp, fnaddr);
10154 tmp = adjust_address (m_tramp, Pmode, 40);
10155 emit_move_insn (tmp, chain_value);
10156
10157 /* Create a fat pointer for the trampoline. */
10158 tmp = adjust_address (m_tramp, Pmode, 44);
10159 emit_move_insn (tmp, r_tramp);
10160 tmp = adjust_address (m_tramp, Pmode, 48);
10161 emit_move_insn (tmp, gen_rtx_REG (Pmode, 19));
10162
10163 /* fdc and fic only use registers for the address to flush,
10164 they do not accept integer displacements. We align the
10165 start and end addresses to the beginning of their respective
10166 cache lines to minimize the number of lines flushed. */
10167 emit_insn (gen_andsi3 (start_addr, r_tramp,
10168 GEN_INT (-MIN_CACHELINE_SIZE)));
10169 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp,
10170 TRAMPOLINE_CODE_SIZE-1));
10171 emit_insn (gen_andsi3 (end_addr, tmp,
10172 GEN_INT (-MIN_CACHELINE_SIZE)));
10173 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10174 emit_insn (gen_dcacheflushsi (start_addr, end_addr, line_length));
10175 emit_insn (gen_icacheflushsi (start_addr, end_addr, line_length,
10176 gen_reg_rtx (Pmode),
10177 gen_reg_rtx (Pmode)));
10178 }
10179 else
10180 {
10181 tmp = adjust_address (m_tramp, Pmode, 56);
10182 emit_move_insn (tmp, fnaddr);
10183 tmp = adjust_address (m_tramp, Pmode, 64);
10184 emit_move_insn (tmp, chain_value);
10185
10186 /* Create a fat pointer for the trampoline. */
10187 tmp = adjust_address (m_tramp, Pmode, 16);
10188 emit_move_insn (tmp, force_reg (Pmode, plus_constant (Pmode,
10189 r_tramp, 32)));
10190 tmp = adjust_address (m_tramp, Pmode, 24);
10191 emit_move_insn (tmp, gen_rtx_REG (Pmode, 27));
10192
10193 /* fdc and fic only use registers for the address to flush,
10194 they do not accept integer displacements. We align the
10195 start and end addresses to the beginning of their respective
10196 cache lines to minimize the number of lines flushed. */
10197 tmp = force_reg (Pmode, plus_constant (Pmode, r_tramp, 32));
10198 emit_insn (gen_anddi3 (start_addr, tmp,
10199 GEN_INT (-MIN_CACHELINE_SIZE)));
10200 tmp = force_reg (Pmode, plus_constant (Pmode, tmp,
10201 TRAMPOLINE_CODE_SIZE - 1));
10202 emit_insn (gen_anddi3 (end_addr, tmp,
10203 GEN_INT (-MIN_CACHELINE_SIZE)));
10204 emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE));
10205 emit_insn (gen_dcacheflushdi (start_addr, end_addr, line_length));
10206 emit_insn (gen_icacheflushdi (start_addr, end_addr, line_length,
10207 gen_reg_rtx (Pmode),
10208 gen_reg_rtx (Pmode)));
10209 }
10210
10211 #ifdef HAVE_ENABLE_EXECUTE_STACK
10212  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10213 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10214 #endif
10215 }
10216
10217 /* Perform any machine-specific adjustment in the address of the trampoline.
10218 ADDR contains the address that was passed to pa_trampoline_init.
10219 Adjust the trampoline address to point to the plabel at offset 44. */
10220
10221 static rtx
10222 pa_trampoline_adjust_address (rtx addr)
10223 {
10224 if (!TARGET_64BIT)
10225 addr = memory_address (Pmode, plus_constant (Pmode, addr, 46));
10226 return addr;
10227 }
10228
10229 static rtx
10230 pa_delegitimize_address (rtx orig_x)
10231 {
10232 rtx x = delegitimize_mem_from_attrs (orig_x);
10233
10234 if (GET_CODE (x) == LO_SUM
10235 && GET_CODE (XEXP (x, 1)) == UNSPEC
10236 && XINT (XEXP (x, 1), 1) == UNSPEC_DLTIND14R)
10237 return gen_const_mem (Pmode, XVECEXP (XEXP (x, 1), 0, 0));
10238 return x;
10239 }
10240 \f
10241 static rtx
10242 pa_internal_arg_pointer (void)
10243 {
10244 /* The argument pointer and the hard frame pointer are the same in
10245 the 32-bit runtime, so we don't need a copy. */
10246 if (TARGET_64BIT)
10247 return copy_to_reg (virtual_incoming_args_rtx);
10248 else
10249 return virtual_incoming_args_rtx;
10250 }
10251
10252 /* Given FROM and TO register numbers, say whether this elimination is allowed.
10253 Frame pointer elimination is automatically handled. */
10254
10255 static bool
10256 pa_can_eliminate (const int from, const int to)
10257 {
10258 /* The argument cannot be eliminated in the 64-bit runtime. */
10259 if (TARGET_64BIT && from == ARG_POINTER_REGNUM)
10260 return false;
10261
10262 return (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
10263 ? ! frame_pointer_needed
10264 : true);
10265 }
10266
10267 /* Define the offset between two registers, FROM to be eliminated and its
10268 replacement TO, at the start of a routine. */
10269 HOST_WIDE_INT
10270 pa_initial_elimination_offset (int from, int to)
10271 {
10272 HOST_WIDE_INT offset;
10273
10274 if ((from == HARD_FRAME_POINTER_REGNUM || from == FRAME_POINTER_REGNUM)
10275 && to == STACK_POINTER_REGNUM)
10276 offset = -pa_compute_frame_size (get_frame_size (), 0);
10277 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10278 offset = 0;
10279 else
10280 gcc_unreachable ();
10281
10282 return offset;
10283 }
10284
10285 static void
10286 pa_conditional_register_usage (void)
10287 {
10288 int i;
10289
10290 if (!TARGET_64BIT && !TARGET_PA_11)
10291 {
10292 for (i = 56; i <= FP_REG_LAST; i++)
10293 fixed_regs[i] = call_used_regs[i] = 1;
10294 for (i = 33; i < 56; i += 2)
10295 fixed_regs[i] = call_used_regs[i] = 1;
10296 }
10297 if (TARGET_DISABLE_FPREGS || TARGET_SOFT_FLOAT)
10298 {
10299 for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
10300 fixed_regs[i] = call_used_regs[i] = 1;
10301 }
10302 if (flag_pic)
10303 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10304 }
10305
10306 /* Target hook for c_mode_for_suffix. */
10307
10308 static machine_mode
10309 pa_c_mode_for_suffix (char suffix)
10310 {
10311 if (HPUX_LONG_DOUBLE_LIBRARY)
10312 {
10313 if (suffix == 'q')
10314 return TFmode;
10315 }
10316
10317 return VOIDmode;
10318 }
10319
10320 /* Target hook for function_section. */
10321
10322 static section *
10323 pa_function_section (tree decl, enum node_frequency freq,
10324 bool startup, bool exit)
10325 {
10326 /* Put functions in text section if target doesn't have named sections. */
10327 if (!targetm_common.have_named_sections)
10328 return text_section;
10329
10330 /* Force nested functions into the same section as the containing
10331 function. */
10332 if (decl
10333 && DECL_SECTION_NAME (decl) == NULL
10334 && DECL_CONTEXT (decl) != NULL_TREE
10335 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
10336 && DECL_SECTION_NAME (DECL_CONTEXT (decl)) == NULL)
10337 return function_section (DECL_CONTEXT (decl));
10338
10339 /* Otherwise, use the default function section. */
10340 return default_function_section (decl, freq, startup, exit);
10341 }
10342
10343 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
10344
10345 In 64-bit mode, we reject CONST_DOUBLES. We also reject CONST_INTS
10346 that need more than three instructions to load prior to reload. This
10347 limit is somewhat arbitrary. It takes three instructions to load a
10348 CONST_INT from memory but two are memory accesses. It may be better
10349 to increase the allowed range for CONST_INTS. We may also be able
10350 to handle CONST_DOUBLES. */
10351
10352 static bool
10353 pa_legitimate_constant_p (machine_mode mode, rtx x)
10354 {
10355 if (GET_MODE_CLASS (mode) == MODE_FLOAT && x != CONST0_RTX (mode))
10356 return false;
10357
10358 if (!NEW_HP_ASSEMBLER && !TARGET_GAS && GET_CODE (x) == LABEL_REF)
10359 return false;
10360
10361 /* TLS_MODEL_GLOBAL_DYNAMIC and TLS_MODEL_LOCAL_DYNAMIC are not
10362 legitimate constants. The other variants can't be handled by
10363 the move patterns after reload starts. */
10364 if (tls_referenced_p (x))
10365 return false;
10366
10367 if (TARGET_64BIT && GET_CODE (x) == CONST_DOUBLE)
10368 return false;
10369
10370 if (TARGET_64BIT
10371 && HOST_BITS_PER_WIDE_INT > 32
10372 && GET_CODE (x) == CONST_INT
10373 && !reload_in_progress
10374 && !reload_completed
10375 && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
10376 && !pa_cint_ok_for_move (UINTVAL (x)))
10377 return false;
10378
10379 if (function_label_operand (x, mode))
10380 return false;
10381
10382 return true;
10383 }
10384
10385 /* Implement TARGET_SECTION_TYPE_FLAGS. */
10386
10387 static unsigned int
10388 pa_section_type_flags (tree decl, const char *name, int reloc)
10389 {
10390 unsigned int flags;
10391
10392 flags = default_section_type_flags (decl, name, reloc);
10393
10394 /* Function labels are placed in the constant pool. This can
10395 cause a section conflict if decls are put in ".data.rel.ro"
10396 or ".data.rel.ro.local" using the __attribute__ construct. */
10397 if (strcmp (name, ".data.rel.ro") == 0
10398 || strcmp (name, ".data.rel.ro.local") == 0)
10399 flags |= SECTION_WRITE | SECTION_RELRO;
10400
10401 return flags;
10402 }
10403
10404 /* pa_legitimate_address_p recognizes an RTL expression that is a
10405 valid memory address for an instruction. The MODE argument is the
10406 machine mode for the MEM expression that wants to use this address.
10407
10408 On HP PA-RISC, the legitimate address forms are REG+SMALLINT,
10409 REG+REG, and REG+(REG*SCALE). The indexed address forms are only
10410 available with floating point loads and stores, and integer loads.
10411 We get better code by allowing indexed addresses in the initial
10412 RTL generation.
10413
10414 The acceptance of indexed addresses as legitimate implies that we
10415 must provide patterns for doing indexed integer stores, or the move
10416 expanders must force the address of an indexed store to a register.
10417 We have adopted the latter approach.
10418
10419 Another function of pa_legitimate_address_p is to ensure that
10420 the base register is a valid pointer for indexed instructions.
10421 On targets that have non-equivalent space registers, we have to
10422 know at the time of assembler output which register in a REG+REG
10423 pair is the base register. The REG_POINTER flag is sometimes lost
10424 in reload and the following passes, so it can't be relied on during
10425 code generation. Thus, we either have to canonicalize the order
10426 of the registers in REG+REG indexed addresses, or treat REG+REG
10427 addresses separately and provide patterns for both permutations.
10428
10429 The latter approach requires several hundred additional lines of
10430 code in pa.md. The downside to canonicalizing is that a PLUS
10431 in the wrong order can't combine to form to make a scaled indexed
10432 memory operand. As we won't need to canonicalize the operands if
10433 the REG_POINTER lossage can be fixed, it seems better canonicalize.
10434
10435 We initially break out scaled indexed addresses in canonical order
10436 in pa_emit_move_sequence. LEGITIMIZE_ADDRESS also canonicalizes
10437 scaled indexed addresses during RTL generation. However, fold_rtx
10438 has its own opinion on how the operands of a PLUS should be ordered.
10439 If one of the operands is equivalent to a constant, it will make
10440 that operand the second operand. As the base register is likely to
10441 be equivalent to a SYMBOL_REF, we have made it the second operand.
10442
10443 pa_legitimate_address_p accepts REG+REG as legitimate when the
10444 operands are in the order INDEX+BASE on targets with non-equivalent
10445 space registers, and in any order on targets with equivalent space
10446 registers. It accepts both MULT+BASE and BASE+MULT for scaled indexing.
10447
10448 We treat a SYMBOL_REF as legitimate if it is part of the current
10449 function's constant-pool, because such addresses can actually be
10450 output as REG+SMALLINT. */
10451
10452 static bool
10453 pa_legitimate_address_p (machine_mode mode, rtx x, bool strict)
10454 {
10455 if ((REG_P (x)
10456 && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
10457 : REG_OK_FOR_BASE_P (x)))
10458 || ((GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC
10459 || GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC)
10460 && REG_P (XEXP (x, 0))
10461 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10462 : REG_OK_FOR_BASE_P (XEXP (x, 0)))))
10463 return true;
10464
10465 if (GET_CODE (x) == PLUS)
10466 {
10467 rtx base, index;
10468
10469 /* For REG+REG, the base register should be in XEXP (x, 1),
10470 so check it first. */
10471 if (REG_P (XEXP (x, 1))
10472 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 1))
10473 : REG_OK_FOR_BASE_P (XEXP (x, 1))))
10474 base = XEXP (x, 1), index = XEXP (x, 0);
10475 else if (REG_P (XEXP (x, 0))
10476 && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
10477 : REG_OK_FOR_BASE_P (XEXP (x, 0))))
10478 base = XEXP (x, 0), index = XEXP (x, 1);
10479 else
10480 return false;
10481
10482 if (GET_CODE (index) == CONST_INT)
10483 {
10484 if (INT_5_BITS (index))
10485 return true;
10486
10487 /* When INT14_OK_STRICT is false, a secondary reload is needed
10488 to adjust the displacement of SImode and DImode floating point
10489 instructions but this may fail when the register also needs
10490 reloading. So, we return false when STRICT is true. We
10491 also reject long displacements for float mode addresses since
10492 the majority of accesses will use floating point instructions
10493 that don't support 14-bit offsets. */
10494 if (!INT14_OK_STRICT
10495 && (strict || !(reload_in_progress || reload_completed))
10496 && mode != QImode
10497 && mode != HImode)
10498 return false;
10499
10500 return base14_operand (index, mode);
10501 }
10502
10503 if (!TARGET_DISABLE_INDEXING
10504 /* Only accept the "canonical" INDEX+BASE operand order
10505 on targets with non-equivalent space registers. */
10506 && (TARGET_NO_SPACE_REGS
10507 ? REG_P (index)
10508 : (base == XEXP (x, 1) && REG_P (index)
10509 && (reload_completed
10510 || (reload_in_progress && HARD_REGISTER_P (base))
10511 || REG_POINTER (base))
10512 && (reload_completed
10513 || (reload_in_progress && HARD_REGISTER_P (index))
10514 || !REG_POINTER (index))))
10515 && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
10516 && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
10517 : REG_OK_FOR_INDEX_P (index))
10518 && borx_reg_operand (base, Pmode)
10519 && borx_reg_operand (index, Pmode))
10520 return true;
10521
10522 if (!TARGET_DISABLE_INDEXING
10523 && GET_CODE (index) == MULT
10524 /* Only accept base operands with the REG_POINTER flag prior to
10525 reload on targets with non-equivalent space registers. */
10526 && (TARGET_NO_SPACE_REGS
10527 || (base == XEXP (x, 1)
10528 && (reload_completed
10529 || (reload_in_progress && HARD_REGISTER_P (base))
10530 || REG_POINTER (base))))
10531 && REG_P (XEXP (index, 0))
10532 && GET_MODE (XEXP (index, 0)) == Pmode
10533 && MODE_OK_FOR_SCALED_INDEXING_P (mode)
10534 && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0))
10535 : REG_OK_FOR_INDEX_P (XEXP (index, 0)))
10536 && GET_CODE (XEXP (index, 1)) == CONST_INT
10537 && INTVAL (XEXP (index, 1))
10538 == (HOST_WIDE_INT) GET_MODE_SIZE (mode)
10539 && borx_reg_operand (base, Pmode))
10540 return true;
10541
10542 return false;
10543 }
10544
10545 if (GET_CODE (x) == LO_SUM)
10546 {
10547 rtx y = XEXP (x, 0);
10548
10549 if (GET_CODE (y) == SUBREG)
10550 y = SUBREG_REG (y);
10551
10552 if (REG_P (y)
10553 && (strict ? STRICT_REG_OK_FOR_BASE_P (y)
10554 : REG_OK_FOR_BASE_P (y)))
10555 {
10556 /* Needed for -fPIC */
10557 if (mode == Pmode
10558 && GET_CODE (XEXP (x, 1)) == UNSPEC)
10559 return true;
10560
10561 if (!INT14_OK_STRICT
10562 && (strict || !(reload_in_progress || reload_completed))
10563 && mode != QImode
10564 && mode != HImode)
10565 return false;
10566
10567 if (CONSTANT_P (XEXP (x, 1)))
10568 return true;
10569 }
10570 return false;
10571 }
10572
10573 if (GET_CODE (x) == CONST_INT && INT_5_BITS (x))
10574 return true;
10575
10576 return false;
10577 }
10578
10579 /* Look for machine dependent ways to make the invalid address AD a
10580 valid address.
10581
10582 For the PA, transform:
10583
10584 memory(X + <large int>)
10585
10586 into:
10587
10588 if (<large int> & mask) >= 16
10589 Y = (<large int> & ~mask) + mask + 1 Round up.
10590 else
10591 Y = (<large int> & ~mask) Round down.
10592 Z = X + Y
10593 memory (Z + (<large int> - Y));
10594
10595 This makes reload inheritance and reload_cse work better since Z
10596 can be reused.
10597
10598 There may be more opportunities to improve code with this hook. */
10599
10600 rtx
10601 pa_legitimize_reload_address (rtx ad, machine_mode mode,
10602 int opnum, int type,
10603 int ind_levels ATTRIBUTE_UNUSED)
10604 {
10605 long offset, newoffset, mask;
10606 rtx new_rtx, temp = NULL_RTX;
10607
10608 mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
10609 && !INT14_OK_STRICT ? 0x1f : 0x3fff);
10610
10611 if (optimize && GET_CODE (ad) == PLUS)
10612 temp = simplify_binary_operation (PLUS, Pmode,
10613 XEXP (ad, 0), XEXP (ad, 1));
10614
10615 new_rtx = temp ? temp : ad;
10616
10617 if (optimize
10618 && GET_CODE (new_rtx) == PLUS
10619 && GET_CODE (XEXP (new_rtx, 0)) == REG
10620 && GET_CODE (XEXP (new_rtx, 1)) == CONST_INT)
10621 {
10622 offset = INTVAL (XEXP ((new_rtx), 1));
10623
10624 /* Choose rounding direction. Round up if we are >= halfway. */
10625 if ((offset & mask) >= ((mask + 1) / 2))
10626 newoffset = (offset & ~mask) + mask + 1;
10627 else
10628 newoffset = offset & ~mask;
10629
10630 /* Ensure that long displacements are aligned. */
10631 if (mask == 0x3fff
10632 && (GET_MODE_CLASS (mode) == MODE_FLOAT
10633 || (TARGET_64BIT && (mode) == DImode)))
10634 newoffset &= ~(GET_MODE_SIZE (mode) - 1);
10635
10636 if (newoffset != 0 && VAL_14_BITS_P (newoffset))
10637 {
10638 temp = gen_rtx_PLUS (Pmode, XEXP (new_rtx, 0),
10639 GEN_INT (newoffset));
10640 ad = gen_rtx_PLUS (Pmode, temp, GEN_INT (offset - newoffset));
10641 push_reload (XEXP (ad, 0), 0, &XEXP (ad, 0), 0,
10642 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
10643 opnum, (enum reload_type) type);
10644 return ad;
10645 }
10646 }
10647
10648 return NULL_RTX;
10649 }
10650
10651 /* Output address vector. */
10652
10653 void
10654 pa_output_addr_vec (rtx lab, rtx body)
10655 {
10656 int idx, vlen = XVECLEN (body, 0);
10657
10658 if (!TARGET_SOM)
10659 fputs ("\t.align 4\n", asm_out_file);
10660 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10661 if (TARGET_GAS)
10662 fputs ("\t.begin_brtab\n", asm_out_file);
10663 for (idx = 0; idx < vlen; idx++)
10664 {
10665 ASM_OUTPUT_ADDR_VEC_ELT
10666 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10667 }
10668 if (TARGET_GAS)
10669 fputs ("\t.end_brtab\n", asm_out_file);
10670 }
10671
10672 /* Output address difference vector. */
10673
10674 void
10675 pa_output_addr_diff_vec (rtx lab, rtx body)
10676 {
10677 rtx base = XEXP (XEXP (body, 0), 0);
10678 int idx, vlen = XVECLEN (body, 1);
10679
10680 targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10681 if (TARGET_GAS)
10682 fputs ("\t.begin_brtab\n", asm_out_file);
10683 for (idx = 0; idx < vlen; idx++)
10684 {
10685 ASM_OUTPUT_ADDR_DIFF_ELT
10686 (asm_out_file,
10687 body,
10688 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10689 CODE_LABEL_NUMBER (base));
10690 }
10691 if (TARGET_GAS)
10692 fputs ("\t.end_brtab\n", asm_out_file);
10693 }
10694
10695 /* This is a helper function for the other atomic operations. This function
10696 emits a loop that contains SEQ that iterates until a compare-and-swap
10697 operation at the end succeeds. MEM is the memory to be modified. SEQ is
10698 a set of instructions that takes a value from OLD_REG as an input and
10699 produces a value in NEW_REG as an output. Before SEQ, OLD_REG will be
10700 set to the current contents of MEM. After SEQ, a compare-and-swap will
10701 attempt to update MEM with NEW_REG. The function returns true when the
10702 loop was generated successfully. */
10703
10704 static bool
10705 pa_expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
10706 {
10707 machine_mode mode = GET_MODE (mem);
10708 rtx_code_label *label;
10709 rtx cmp_reg, success, oldval;
10710
10711 /* The loop we want to generate looks like
10712
10713 cmp_reg = mem;
10714 label:
10715 old_reg = cmp_reg;
10716 seq;
10717 (success, cmp_reg) = compare-and-swap(mem, old_reg, new_reg)
10718 if (success)
10719 goto label;
10720
10721 Note that we only do the plain load from memory once. Subsequent
10722 iterations use the value loaded by the compare-and-swap pattern. */
10723
10724 label = gen_label_rtx ();
10725 cmp_reg = gen_reg_rtx (mode);
10726
10727 emit_move_insn (cmp_reg, mem);
10728 emit_label (label);
10729 emit_move_insn (old_reg, cmp_reg);
10730 if (seq)
10731 emit_insn (seq);
10732
10733 success = NULL_RTX;
10734 oldval = cmp_reg;
10735 if (!expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg,
10736 new_reg, false, MEMMODEL_SYNC_SEQ_CST,
10737 MEMMODEL_RELAXED))
10738 return false;
10739
10740 if (oldval != cmp_reg)
10741 emit_move_insn (cmp_reg, oldval);
10742
10743 /* Mark this jump predicted not taken. */
10744 emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx,
10745 GET_MODE (success), 1, label,
10746 profile_probability::guessed_never ());
10747 return true;
10748 }
10749
10750 /* This function tries to implement an atomic exchange operation using a
10751 compare_and_swap loop. VAL is written to *MEM. The previous contents of
10752 *MEM are returned, using TARGET if possible. No memory model is required
10753 since a compare_and_swap loop is seq-cst. */
10754
10755 rtx
10756 pa_maybe_emit_compare_and_swap_exchange_loop (rtx target, rtx mem, rtx val)
10757 {
10758 machine_mode mode = GET_MODE (mem);
10759
10760 if (can_compare_and_swap_p (mode, true))
10761 {
10762 if (!target || !register_operand (target, mode))
10763 target = gen_reg_rtx (mode);
10764 if (pa_expand_compare_and_swap_loop (mem, target, val, NULL_RTX))
10765 return target;
10766 }
10767
10768 return NULL_RTX;
10769 }
10770
10771 /* Implement TARGET_CALLEE_COPIES. The callee is responsible for copying
10772 arguments passed by hidden reference in the 32-bit HP runtime. Users
10773 can override this behavior for better compatibility with openmp at the
10774 risk of library incompatibilities. Arguments are always passed by value
10775 in the 64-bit HP runtime. */
10776
10777 static bool
10778 pa_callee_copies (cumulative_args_t cum ATTRIBUTE_UNUSED,
10779 machine_mode mode ATTRIBUTE_UNUSED,
10780 const_tree type ATTRIBUTE_UNUSED,
10781 bool named ATTRIBUTE_UNUSED)
10782 {
10783 return !TARGET_CALLER_COPIES;
10784 }
10785
10786 /* Implement TARGET_HARD_REGNO_NREGS. */
10787
10788 static unsigned int
10789 pa_hard_regno_nregs (unsigned int regno ATTRIBUTE_UNUSED, machine_mode mode)
10790 {
10791 return PA_HARD_REGNO_NREGS (regno, mode);
10792 }
10793
10794 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10795
10796 static bool
10797 pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10798 {
10799 return PA_HARD_REGNO_MODE_OK (regno, mode);
10800 }
10801
10802 /* Implement TARGET_STARTING_FRAME_OFFSET.
10803
10804 On the 32-bit ports, we reserve one slot for the previous frame
10805 pointer and one fill slot. The fill slot is for compatibility
10806 with HP compiled programs. On the 64-bit ports, we reserve one
10807 slot for the previous frame pointer. */
10808
10809 static HOST_WIDE_INT
10810 pa_starting_frame_offset (void)
10811 {
10812 return 8;
10813 }
10814
10815 /* Figure out the size in words of the function argument. The size
10816 returned by this function should always be greater than zero because
10817 we pass variable and zero sized objects by reference. */
10818
10819 HOST_WIDE_INT
10820 pa_function_arg_size (machine_mode mode, const_tree type)
10821 {
10822 HOST_WIDE_INT size;
10823
10824 size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
10825 return CEIL (size, UNITS_PER_WORD);
10826 }
10827
10828 #include "gt-pa.h"