1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
76 #include "ipa-fnsummary.h"
78 #include "case-cfn-macros.h"
80 #include "rs6000-internal.h"
83 /* This file should be included last. */
84 #include "target-def.h"
86 extern tree
rs6000_builtin_mask_for_load (void);
87 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
88 extern tree
rs6000_builtin_reciprocal (tree
);
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
100 #define TARGET_IEEEQUAD_DEFAULT 0
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno
= 0;
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode
;
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected
= false;
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size
;
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float
= false;
138 bool rs6000_passes_long_double
= false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector
= false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct
= false;
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
158 static int dbg_cost_ctrl
;
160 /* Flag to say the TOC is initialized */
161 int toc_initialized
, need_toc_init
;
162 char toc_label_name
[10];
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more
;
168 static GTY(()) section
*read_only_data_section
;
169 static GTY(()) section
*private_data_section
;
170 static GTY(()) section
*tls_data_section
;
171 static GTY(()) section
*tls_private_data_section
;
172 static GTY(()) section
*read_only_private_data_section
;
173 static GTY(()) section
*sdata2_section
;
175 section
*toc_section
= 0;
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
179 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
181 /* Register classes for various constraints that are based on the target
183 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align
[NUM_MACHINE_MODES
];
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
192 /* Masks to determine which reciprocal esitmate instructions to generate
194 enum rs6000_recip_mask
{
195 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
196 RECIP_DF_DIV
= 0x002,
197 RECIP_V4SF_DIV
= 0x004,
198 RECIP_V2DF_DIV
= 0x008,
200 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT
= 0x020,
202 RECIP_V4SF_RSQRT
= 0x040,
203 RECIP_V2DF_RSQRT
= 0x080,
205 /* Various combination of flags for -mrecip=xxx. */
207 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
211 RECIP_HIGH_PRECISION
= RECIP_ALL
,
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
218 /* -mrecip options. */
221 const char *string
; /* option name */
222 unsigned int mask
; /* mask bits to set */
223 } recip_options
[] = {
224 { "all", RECIP_ALL
},
225 { "none", RECIP_NONE
},
226 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
228 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
229 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
230 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT
) },
232 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
233 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
241 CLONE_DEFAULT
= 0, /* default clone. */
242 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
250 /* Map compiler ISA bits into HWCAP names. */
252 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
253 const char *name
; /* name to use in __builtin_cpu_supports. */
256 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p
= false;
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
);
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
285 enum rs6000_reg_type
{
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
310 enum rs6000_reload_reg_type
{
311 RELOAD_REG_GPR
, /* General purpose registers. */
312 RELOAD_REG_FPR
, /* Traditional floating point regs. */
313 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type
{
326 const char *name
; /* Register class name. */
327 int reg
; /* Register in the register class. */
330 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
331 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type
;
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr
{
353 enum insn_code reload_load
; /* INSN to reload for loading. */
354 enum insn_code reload_store
; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
359 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
362 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
366 mode_supports_pre_incdec_p (machine_mode mode
)
368 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
374 mode_supports_pre_modify_p (machine_mode mode
)
376 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
380 /* Return true if we have D-form addressing in altivec registers. */
382 mode_supports_vmx_dform (machine_mode mode
)
384 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
391 mode_supports_dq_form (machine_mode mode
)
393 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
413 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
420 in_set
= single_set (in_insn
);
423 if (MEM_P (SET_DEST (in_set
)))
425 out_set
= single_set (out_insn
);
428 out_pat
= PATTERN (out_insn
);
429 if (GET_CODE (out_pat
) == PARALLEL
)
431 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
433 out_exp
= XVECEXP (out_pat
, 0, i
);
434 if ((GET_CODE (out_exp
) == CLOBBER
)
435 || (GET_CODE (out_exp
) == USE
))
437 else if (GET_CODE (out_exp
) != SET
)
446 in_pat
= PATTERN (in_insn
);
447 if (GET_CODE (in_pat
) != PARALLEL
)
450 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
452 in_exp
= XVECEXP (in_pat
, 0, i
);
453 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
455 else if (GET_CODE (in_exp
) != SET
)
458 if (MEM_P (SET_DEST (in_exp
)))
460 out_set
= single_set (out_insn
);
463 out_pat
= PATTERN (out_insn
);
464 if (GET_CODE (out_pat
) != PARALLEL
)
466 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
468 out_exp
= XVECEXP (out_pat
, 0, j
);
469 if ((GET_CODE (out_exp
) == CLOBBER
)
470 || (GET_CODE (out_exp
) == USE
))
472 else if (GET_CODE (out_exp
) != SET
)
479 return store_data_bypass_p (out_insn
, in_insn
);
483 /* Processor costs (relative to an add) */
485 const struct processor_costs
*rs6000_cost
;
487 /* Instruction size costs on 32bit processors. */
489 struct processor_costs size32_cost
= {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
504 0, /* SF->DF convert */
507 /* Instruction size costs on 64bit processors. */
509 struct processor_costs size64_cost
= {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
524 0, /* SF->DF convert */
527 /* Instruction costs on RS64A processors. */
529 struct processor_costs rs64a_cost
= {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
544 0, /* SF->DF convert */
547 /* Instruction costs on MPCCORE processors. */
549 struct processor_costs mpccore_cost
= {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
564 0, /* SF->DF convert */
567 /* Instruction costs on PPC403 processors. */
569 struct processor_costs ppc403_cost
= {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
584 0, /* SF->DF convert */
587 /* Instruction costs on PPC405 processors. */
589 struct processor_costs ppc405_cost
= {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
604 0, /* SF->DF convert */
607 /* Instruction costs on PPC440 processors. */
609 struct processor_costs ppc440_cost
= {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
624 0, /* SF->DF convert */
627 /* Instruction costs on PPC476 processors. */
629 struct processor_costs ppc476_cost
= {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
644 0, /* SF->DF convert */
647 /* Instruction costs on PPC601 processors. */
649 struct processor_costs ppc601_cost
= {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
664 0, /* SF->DF convert */
667 /* Instruction costs on PPC603 processors. */
669 struct processor_costs ppc603_cost
= {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
684 0, /* SF->DF convert */
687 /* Instruction costs on PPC604 processors. */
689 struct processor_costs ppc604_cost
= {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
704 0, /* SF->DF convert */
707 /* Instruction costs on PPC604e processors. */
709 struct processor_costs ppc604e_cost
= {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
724 0, /* SF->DF convert */
727 /* Instruction costs on PPC620 processors. */
729 struct processor_costs ppc620_cost
= {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
744 0, /* SF->DF convert */
747 /* Instruction costs on PPC630 processors. */
749 struct processor_costs ppc630_cost
= {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
764 0, /* SF->DF convert */
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
770 struct processor_costs ppccell_cost
= {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC750 and PPC7400 processors. */
790 struct processor_costs ppc750_cost
= {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
805 0, /* SF->DF convert */
808 /* Instruction costs on PPC7450 processors. */
810 struct processor_costs ppc7450_cost
= {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
825 0, /* SF->DF convert */
828 /* Instruction costs on PPC8540 processors. */
830 struct processor_costs ppc8540_cost
= {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
848 /* Instruction costs on E300C2 and E300C3 cores. */
850 struct processor_costs ppce300c2c3_cost
= {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
868 /* Instruction costs on PPCE500MC processors. */
870 struct processor_costs ppce500mc_cost
= {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
888 /* Instruction costs on PPCE500MC64 processors. */
890 struct processor_costs ppce500mc64_cost
= {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
908 /* Instruction costs on PPCE5500 processors. */
910 struct processor_costs ppce5500_cost
= {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
928 /* Instruction costs on PPCE6500 processors. */
930 struct processor_costs ppce6500_cost
= {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
948 /* Instruction costs on AppliedMicro Titan processors. */
950 struct processor_costs titan_cost
= {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
968 /* Instruction costs on POWER4 and POWER5 processors. */
970 struct processor_costs power4_cost
= {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
988 /* Instruction costs on POWER6 processors. */
990 struct processor_costs power6_cost
= {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1008 /* Instruction costs on POWER7 processors. */
1010 struct processor_costs power7_cost
= {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1028 /* Instruction costs on POWER8 processors. */
1030 struct processor_costs power8_cost
= {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1048 /* Instruction costs on POWER9 processors. */
1050 struct processor_costs power9_cost
= {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1068 /* Instruction costs on POWER10 processors. */
1070 struct processor_costs power10_cost
= {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1088 /* Instruction costs on POWER A2 processors. */
1090 struct processor_costs ppca2_cost
= {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1112 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool,
1113 code_helper
= ERROR_MARK
);
1114 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1115 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1116 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1117 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1118 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1119 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1120 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1121 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1123 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1125 static bool is_microcoded_insn (rtx_insn
*);
1126 static bool is_nonpipeline_insn (rtx_insn
*);
1127 static bool is_cracked_insn (rtx_insn
*);
1128 static bool is_load_insn (rtx
, rtx
*);
1129 static bool is_store_insn (rtx
, rtx
*);
1130 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1131 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1132 static bool insn_must_be_first_in_group (rtx_insn
*);
1133 static bool insn_must_be_last_in_group (rtx_insn
*);
1134 bool easy_vector_constant (rtx
, machine_mode
);
1135 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1136 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1138 static tree
get_prev_label (tree
);
1140 static bool rs6000_mode_dependent_address (const_rtx
);
1141 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1142 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1143 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1145 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1148 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1149 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1151 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1154 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1158 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1159 = rs6000_mode_dependent_address
;
1161 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1163 = rs6000_secondary_reload_class
;
1165 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1166 = rs6000_preferred_reload_class
;
1168 const int INSN_NOT_AVAILABLE
= -1;
1170 static void rs6000_print_isa_options (FILE *, int, const char *,
1172 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1174 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1175 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1176 enum rs6000_reg_type
,
1178 secondary_reload_info
*,
1180 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1182 /* Hash table stuff for keeping track of TOC entries. */
1184 struct GTY((for_user
)) toc_hash_struct
1186 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1187 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1189 machine_mode key_mode
;
1193 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1195 static hashval_t
hash (toc_hash_struct
*);
1196 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1199 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1203 /* Default register names. */
1204 char rs6000_reg_names
[][8] =
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1217 "0", "1", "2", "3", "4", "5", "6", "7",
1218 "8", "9", "10", "11", "12", "13", "14", "15",
1219 "16", "17", "18", "19", "20", "21", "22", "23",
1220 "24", "25", "26", "27", "28", "29", "30", "31",
1222 "lr", "ctr", "ca", "ap",
1224 "0", "1", "2", "3", "4", "5", "6", "7",
1225 /* vrsave vscr sfp */
1226 "vrsave", "vscr", "sfp",
1229 #ifdef TARGET_REGNAMES
1230 static const char alt_reg_names
[][8] =
1233 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1234 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1235 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1236 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1238 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1239 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1240 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1241 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1243 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1244 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1245 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1246 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1248 "lr", "ctr", "ca", "ap",
1250 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1251 /* vrsave vscr sfp */
1252 "vrsave", "vscr", "sfp",
1256 /* Table of valid machine attributes. */
1258 static const struct attribute_spec rs6000_attribute_table
[] =
1260 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1261 affects_type_identity, handler, exclude } */
1262 { "altivec", 1, 1, false, true, false, false,
1263 rs6000_handle_altivec_attribute
, NULL
},
1264 { "longcall", 0, 0, false, true, true, false,
1265 rs6000_handle_longcall_attribute
, NULL
},
1266 { "shortcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute
, NULL
},
1268 { "ms_struct", 0, 0, false, false, false, false,
1269 rs6000_handle_struct_attribute
, NULL
},
1270 { "gcc_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute
, NULL
},
1272 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1273 SUBTARGET_ATTRIBUTE_TABLE
,
1275 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1278 #ifndef TARGET_PROFILE_KERNEL
1279 #define TARGET_PROFILE_KERNEL 0
1282 /* Initialize the GCC target structure. */
1283 #undef TARGET_ATTRIBUTE_TABLE
1284 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1285 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1286 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1287 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1288 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1290 #undef TARGET_ASM_ALIGNED_DI_OP
1291 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1293 /* Default unaligned ops are only provided for ELF. Find the ops needed
1294 for non-ELF systems. */
1295 #ifndef OBJECT_FORMAT_ELF
1297 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1299 #undef TARGET_ASM_UNALIGNED_HI_OP
1300 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1301 #undef TARGET_ASM_UNALIGNED_SI_OP
1302 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1303 #undef TARGET_ASM_UNALIGNED_DI_OP
1304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1307 #undef TARGET_ASM_UNALIGNED_HI_OP
1308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1309 #undef TARGET_ASM_UNALIGNED_SI_OP
1310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1311 #undef TARGET_ASM_UNALIGNED_DI_OP
1312 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1313 #undef TARGET_ASM_ALIGNED_DI_OP
1314 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1318 /* This hook deals with fixups for relocatable code and DI-mode objects
1320 #undef TARGET_ASM_INTEGER
1321 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1323 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1324 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1325 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1328 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1329 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1330 rs6000_print_patchable_function_entry
1332 #undef TARGET_SET_UP_BY_PROLOGUE
1333 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1335 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1336 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1337 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1338 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1339 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1341 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1342 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1343 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1345 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1348 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1349 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1351 #undef TARGET_INTERNAL_ARG_POINTER
1352 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1354 #undef TARGET_HAVE_TLS
1355 #define TARGET_HAVE_TLS HAVE_AS_TLS
1357 #undef TARGET_CANNOT_FORCE_CONST_MEM
1358 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1360 #undef TARGET_DELEGITIMIZE_ADDRESS
1361 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1363 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1364 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1366 #undef TARGET_LEGITIMATE_COMBINED_INSN
1367 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1369 #undef TARGET_ASM_FUNCTION_PROLOGUE
1370 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1371 #undef TARGET_ASM_FUNCTION_EPILOGUE
1372 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1374 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1375 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1377 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1378 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1380 #undef TARGET_LEGITIMIZE_ADDRESS
1381 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1383 #undef TARGET_SCHED_VARIABLE_ISSUE
1384 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1386 #undef TARGET_SCHED_ISSUE_RATE
1387 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1388 #undef TARGET_SCHED_ADJUST_COST
1389 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1390 #undef TARGET_SCHED_ADJUST_PRIORITY
1391 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1392 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1393 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1394 #undef TARGET_SCHED_INIT
1395 #define TARGET_SCHED_INIT rs6000_sched_init
1396 #undef TARGET_SCHED_FINISH
1397 #define TARGET_SCHED_FINISH rs6000_sched_finish
1398 #undef TARGET_SCHED_REORDER
1399 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1400 #undef TARGET_SCHED_REORDER2
1401 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1403 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1404 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1406 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1407 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1409 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1410 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1411 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1412 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1413 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1414 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1415 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1416 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1418 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1419 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1421 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1422 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1423 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1424 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1425 rs6000_builtin_support_vector_misalignment
1426 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1427 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1428 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1429 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1430 rs6000_builtin_vectorization_cost
1431 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1432 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1433 rs6000_preferred_simd_mode
1434 #undef TARGET_VECTORIZE_CREATE_COSTS
1435 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1437 #undef TARGET_LOOP_UNROLL_ADJUST
1438 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1440 #undef TARGET_INIT_BUILTINS
1441 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1442 #undef TARGET_BUILTIN_DECL
1443 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1445 #undef TARGET_FOLD_BUILTIN
1446 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1447 #undef TARGET_GIMPLE_FOLD_BUILTIN
1448 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1450 #undef TARGET_EXPAND_BUILTIN
1451 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1453 #undef TARGET_MANGLE_TYPE
1454 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1456 #undef TARGET_INIT_LIBFUNCS
1457 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1460 #undef TARGET_BINDS_LOCAL_P
1461 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1464 #undef TARGET_MS_BITFIELD_LAYOUT_P
1465 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1467 #undef TARGET_ASM_OUTPUT_MI_THUNK
1468 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1470 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1471 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1473 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1474 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1476 #undef TARGET_REGISTER_MOVE_COST
1477 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1478 #undef TARGET_MEMORY_MOVE_COST
1479 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1480 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1481 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1482 rs6000_ira_change_pseudo_allocno_class
1483 #undef TARGET_CANNOT_COPY_INSN_P
1484 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1485 #undef TARGET_RTX_COSTS
1486 #define TARGET_RTX_COSTS rs6000_rtx_costs
1487 #undef TARGET_ADDRESS_COST
1488 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1489 #undef TARGET_INSN_COST
1490 #define TARGET_INSN_COST rs6000_insn_cost
1492 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1493 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1495 #undef TARGET_PROMOTE_FUNCTION_MODE
1496 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1498 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1499 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1501 #undef TARGET_RETURN_IN_MEMORY
1502 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1504 #undef TARGET_RETURN_IN_MSB
1505 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1507 #undef TARGET_SETUP_INCOMING_VARARGS
1508 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1510 /* Always strict argument naming on rs6000. */
1511 #undef TARGET_STRICT_ARGUMENT_NAMING
1512 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1513 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1514 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1515 #undef TARGET_SPLIT_COMPLEX_ARG
1516 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1517 #undef TARGET_MUST_PASS_IN_STACK
1518 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1519 #undef TARGET_PASS_BY_REFERENCE
1520 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1521 #undef TARGET_ARG_PARTIAL_BYTES
1522 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1523 #undef TARGET_FUNCTION_ARG_ADVANCE
1524 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1525 #undef TARGET_FUNCTION_ARG
1526 #define TARGET_FUNCTION_ARG rs6000_function_arg
1527 #undef TARGET_FUNCTION_ARG_PADDING
1528 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1529 #undef TARGET_FUNCTION_ARG_BOUNDARY
1530 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1532 #undef TARGET_BUILD_BUILTIN_VA_LIST
1533 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1535 #undef TARGET_EXPAND_BUILTIN_VA_START
1536 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1538 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1539 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1541 #undef TARGET_EH_RETURN_FILTER_MODE
1542 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1544 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1545 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1547 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1548 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1550 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1551 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1552 rs6000_libgcc_floating_mode_supported_p
1554 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1555 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1557 #undef TARGET_FLOATN_MODE
1558 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1560 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1561 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1563 #undef TARGET_MD_ASM_ADJUST
1564 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1566 #undef TARGET_OPTION_OVERRIDE
1567 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1569 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1570 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1571 rs6000_builtin_vectorized_function
1573 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1574 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1575 rs6000_builtin_md_vectorized_function
1577 #undef TARGET_STACK_PROTECT_GUARD
1578 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1581 #undef TARGET_STACK_PROTECT_FAIL
1582 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1586 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1587 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1590 /* Use a 32-bit anchor range. This leads to sequences like:
1592 addis tmp,anchor,high
1595 where tmp itself acts as an anchor, and can be shared between
1596 accesses to the same 64k page. */
1597 #undef TARGET_MIN_ANCHOR_OFFSET
1598 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1599 #undef TARGET_MAX_ANCHOR_OFFSET
1600 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1601 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1602 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1603 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1604 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1606 #undef TARGET_BUILTIN_RECIPROCAL
1607 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1609 #undef TARGET_SECONDARY_RELOAD
1610 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1611 #undef TARGET_SECONDARY_MEMORY_NEEDED
1612 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1613 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1614 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1616 #undef TARGET_LEGITIMATE_ADDRESS_P
1617 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1619 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1620 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1622 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1623 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1625 #undef TARGET_CAN_ELIMINATE
1626 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1628 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1629 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1631 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1632 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1634 #undef TARGET_TRAMPOLINE_INIT
1635 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1637 #undef TARGET_FUNCTION_VALUE
1638 #define TARGET_FUNCTION_VALUE rs6000_function_value
1640 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1641 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1643 #undef TARGET_OPTION_SAVE
1644 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1646 #undef TARGET_OPTION_RESTORE
1647 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1649 #undef TARGET_OPTION_PRINT
1650 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1652 #undef TARGET_CAN_INLINE_P
1653 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1655 #undef TARGET_SET_CURRENT_FUNCTION
1656 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1658 #undef TARGET_LEGITIMATE_CONSTANT_P
1659 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1661 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1662 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1664 #undef TARGET_CAN_USE_DOLOOP_P
1665 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1667 #undef TARGET_PREDICT_DOLOOP_P
1668 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1670 #undef TARGET_HAVE_COUNT_REG_DECR_P
1671 #define TARGET_HAVE_COUNT_REG_DECR_P true
1673 /* 1000000000 is infinite cost in IVOPTs. */
1674 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1675 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1677 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1678 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1680 #undef TARGET_PREFERRED_DOLOOP_MODE
1681 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1683 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1684 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1686 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1687 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1688 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1689 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1690 #undef TARGET_UNWIND_WORD_MODE
1691 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1693 #undef TARGET_OFFLOAD_OPTIONS
1694 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1696 #undef TARGET_C_MODE_FOR_SUFFIX
1697 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1699 #undef TARGET_INVALID_BINARY_OP
1700 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1702 #undef TARGET_OPTAB_SUPPORTED_P
1703 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1705 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1706 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1708 #undef TARGET_COMPARE_VERSION_PRIORITY
1709 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1711 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1712 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1713 rs6000_generate_version_dispatcher_body
1715 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1716 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1717 rs6000_get_function_versions_dispatcher
1719 #undef TARGET_OPTION_FUNCTION_VERSIONS
1720 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1722 #undef TARGET_HARD_REGNO_NREGS
1723 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1724 #undef TARGET_HARD_REGNO_MODE_OK
1725 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1727 #undef TARGET_MODES_TIEABLE_P
1728 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1730 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1731 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1732 rs6000_hard_regno_call_part_clobbered
1734 #undef TARGET_SLOW_UNALIGNED_ACCESS
1735 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1737 #undef TARGET_CAN_CHANGE_MODE_CLASS
1738 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1740 #undef TARGET_CONSTANT_ALIGNMENT
1741 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1743 #undef TARGET_STARTING_FRAME_OFFSET
1744 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1746 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1747 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1749 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1750 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1752 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1753 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1754 rs6000_cannot_substitute_mem_equiv_p
1756 #undef TARGET_INVALID_CONVERSION
1757 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1759 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1760 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1762 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1763 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1765 #undef TARGET_CONST_ANCHOR
1766 #define TARGET_CONST_ANCHOR 0x8000
1770 /* Processor table. */
1773 const char *const name
; /* Canonical processor name. */
1774 const enum processor_type processor
; /* Processor type enum value. */
1775 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1778 static struct rs6000_ptt
const processor_target_table
[] =
1780 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1781 #include "rs6000-cpus.def"
1785 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1789 rs6000_cpu_name_lookup (const char *name
)
1795 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1796 if (! strcmp (name
, processor_target_table
[i
].name
))
1804 /* Return number of consecutive hard regs needed starting at reg REGNO
1805 to hold something of mode MODE.
1806 This is ordinarily the length in words of a value of mode MODE
1807 but can be less for certain modes in special long registers.
1809 POWER and PowerPC GPRs hold 32 bits worth;
1810 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1813 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1815 unsigned HOST_WIDE_INT reg_size
;
1817 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1818 128-bit floating point that can go in vector registers, which has VSX
1819 memory addressing. */
1820 if (FP_REGNO_P (regno
))
1821 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1822 ? UNITS_PER_VSX_WORD
1823 : UNITS_PER_FP_WORD
);
1825 else if (ALTIVEC_REGNO_P (regno
))
1826 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1829 reg_size
= UNITS_PER_WORD
;
1831 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1834 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1837 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1839 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1841 if (COMPLEX_MODE_P (mode
))
1842 mode
= GET_MODE_INNER (mode
);
1844 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1847 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1849 /* MMA accumulator modes need FPR registers divisible by 4. */
1851 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1853 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1854 register combinations, and use PTImode where we need to deal with quad
1855 word memory operations. Don't allow quad words in the argument or frame
1856 pointer registers, just registers 0..31. */
1857 if (mode
== PTImode
)
1858 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1859 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1860 && ((regno
& 1) == 0));
1862 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1863 implementations. Don't allow an item to be split between a FP register
1864 and an Altivec register. Allow TImode in all VSX registers if the user
1866 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1867 && (VECTOR_MEM_VSX_P (mode
)
1868 || VECTOR_ALIGNMENT_P (mode
)
1869 || reg_addr
[mode
].scalar_in_vmx_p
1871 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1873 if (FP_REGNO_P (regno
))
1874 return FP_REGNO_P (last_regno
);
1876 if (ALTIVEC_REGNO_P (regno
))
1878 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1881 return ALTIVEC_REGNO_P (last_regno
);
1885 /* The GPRs can hold any mode, but values bigger than one register
1886 cannot go past R31. */
1887 if (INT_REGNO_P (regno
))
1888 return INT_REGNO_P (last_regno
);
1890 /* The float registers (except for VSX vector modes) can only hold floating
1891 modes and DImode. */
1892 if (FP_REGNO_P (regno
))
1894 if (VECTOR_ALIGNMENT_P (mode
))
1897 if (SCALAR_FLOAT_MODE_P (mode
)
1898 && (mode
!= TDmode
|| (regno
% 2) == 0)
1899 && FP_REGNO_P (last_regno
))
1902 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1904 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1907 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1910 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1917 /* The CR register can only hold CC modes. */
1918 if (CR_REGNO_P (regno
))
1919 return GET_MODE_CLASS (mode
) == MODE_CC
;
1921 if (CA_REGNO_P (regno
))
1922 return mode
== Pmode
|| mode
== SImode
;
1924 /* AltiVec only in AldyVec registers. */
1925 if (ALTIVEC_REGNO_P (regno
))
1926 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1927 || mode
== V1TImode
);
1929 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1930 and it must be able to fit within the register set. */
1932 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1935 /* Implement TARGET_HARD_REGNO_NREGS. */
1938 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1940 return rs6000_hard_regno_nregs
[mode
][regno
];
1943 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1946 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1948 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1951 /* Implement TARGET_MODES_TIEABLE_P.
1953 PTImode cannot tie with other modes because PTImode is restricted to even
1954 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1957 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1958 registers) or XOmode (vector quad, restricted to FPR registers divisible
1959 by 4) to tie with other modes.
1961 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1962 128-bit floating point on VSX systems ties with other vectors. */
1965 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1967 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1968 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1969 return mode1
== mode2
;
1971 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1972 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1973 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1976 if (SCALAR_FLOAT_MODE_P (mode1
))
1977 return SCALAR_FLOAT_MODE_P (mode2
);
1978 if (SCALAR_FLOAT_MODE_P (mode2
))
1981 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1982 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1983 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1989 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1992 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1997 && GET_MODE_SIZE (mode
) > 4
1998 && INT_REGNO_P (regno
))
2002 && FP_REGNO_P (regno
)
2003 && GET_MODE_SIZE (mode
) > 8
2004 && !FLOAT128_2REG_P (mode
))
2010 /* Print interesting facts about registers. */
2012 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2016 for (r
= first_regno
; r
<= last_regno
; ++r
)
2018 const char *comma
= "";
2021 if (first_regno
== last_regno
)
2022 fprintf (stderr
, "%s:\t", reg_name
);
2024 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2027 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2028 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2032 fprintf (stderr
, ",\n\t");
2037 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2038 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2039 rs6000_hard_regno_nregs
[m
][r
]);
2041 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2046 if (call_used_or_fixed_reg_p (r
))
2050 fprintf (stderr
, ",\n\t");
2055 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2063 fprintf (stderr
, ",\n\t");
2068 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2074 fprintf (stderr
, ",\n\t");
2078 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2079 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2084 fprintf (stderr
, ",\n\t");
2088 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2093 rs6000_debug_vector_unit (enum rs6000_vector v
)
2099 case VECTOR_NONE
: ret
= "none"; break;
2100 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2101 case VECTOR_VSX
: ret
= "vsx"; break;
2102 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2103 default: ret
= "unknown"; break;
2109 /* Inner function printing just the address mask for a particular reload
2111 DEBUG_FUNCTION
char *
2112 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2117 if ((mask
& RELOAD_REG_VALID
) != 0)
2119 else if (keep_spaces
)
2122 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2124 else if (keep_spaces
)
2127 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2129 else if (keep_spaces
)
2132 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2134 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2136 else if (keep_spaces
)
2139 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2141 else if (keep_spaces
)
2144 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2146 else if (keep_spaces
)
2149 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2151 else if (keep_spaces
)
2159 /* Print the address masks in a human readble fashion. */
2161 rs6000_debug_print_mode (ssize_t m
)
2166 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2167 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2168 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2169 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2171 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2172 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2174 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2175 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2176 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2180 spaces
+= strlen (" Reload=sl");
2182 if (reg_addr
[m
].scalar_in_vmx_p
)
2184 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2188 spaces
+= strlen (" Upper=y");
2190 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2191 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2193 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2195 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2196 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2199 fputs ("\n", stderr
);
2202 #define DEBUG_FMT_ID "%-32s= "
2203 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2204 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2205 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2207 /* Print various interesting information with -mdebug=reg. */
2209 rs6000_debug_reg_global (void)
2211 static const char *const tf
[2] = { "false", "true" };
2212 const char *nl
= (const char *)0;
2215 char costly_num
[20];
2217 char flags_buffer
[40];
2218 const char *costly_str
;
2219 const char *nop_str
;
2220 const char *trace_str
;
2221 const char *abi_str
;
2222 const char *cmodel_str
;
2223 struct cl_target_option cl_opts
;
2225 /* Modes we want tieable information on. */
2226 static const machine_mode print_tieable_modes
[] = {
2265 /* Virtual regs we are interested in. */
2266 const static struct {
2267 int regno
; /* register number. */
2268 const char *name
; /* register name. */
2269 } virtual_regs
[] = {
2270 { STACK_POINTER_REGNUM
, "stack pointer:" },
2271 { TOC_REGNUM
, "toc: " },
2272 { STATIC_CHAIN_REGNUM
, "static chain: " },
2273 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2274 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2275 { ARG_POINTER_REGNUM
, "arg pointer: " },
2276 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2277 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2278 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2279 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2280 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2281 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2282 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2283 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2284 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2285 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2288 fputs ("\nHard register information:\n", stderr
);
2289 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2290 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2291 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2294 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2295 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2296 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2297 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2298 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2299 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2301 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2302 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2303 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2307 "d reg_class = %s\n"
2308 "v reg_class = %s\n"
2309 "wa reg_class = %s\n"
2310 "we reg_class = %s\n"
2311 "wr reg_class = %s\n"
2312 "wx reg_class = %s\n"
2313 "wA reg_class = %s\n"
2315 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2316 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2317 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2318 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2319 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2320 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2321 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2324 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2325 rs6000_debug_print_mode (m
);
2327 fputs ("\n", stderr
);
2329 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2331 machine_mode mode1
= print_tieable_modes
[m1
];
2332 bool first_time
= true;
2334 nl
= (const char *)0;
2335 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2337 machine_mode mode2
= print_tieable_modes
[m2
];
2338 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2342 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2347 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2352 fputs ("\n", stderr
);
2358 if (rs6000_recip_control
)
2360 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2362 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2363 if (rs6000_recip_bits
[m
])
2366 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2368 (RS6000_RECIP_AUTO_RE_P (m
)
2370 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2371 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2373 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2376 fputs ("\n", stderr
);
2379 if (rs6000_cpu_index
>= 0)
2381 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2383 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2385 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2386 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2389 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2391 if (rs6000_tune_index
>= 0)
2393 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2395 = processor_target_table
[rs6000_tune_index
].target_enable
;
2397 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2398 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2401 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2403 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2404 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2407 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2408 rs6000_isa_flags_explicit
);
2410 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2412 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2413 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2415 switch (rs6000_sched_costly_dep
)
2417 case max_dep_latency
:
2418 costly_str
= "max_dep_latency";
2422 costly_str
= "no_dep_costly";
2425 case all_deps_costly
:
2426 costly_str
= "all_deps_costly";
2429 case true_store_to_load_dep_costly
:
2430 costly_str
= "true_store_to_load_dep_costly";
2433 case store_to_load_dep_costly
:
2434 costly_str
= "store_to_load_dep_costly";
2438 costly_str
= costly_num
;
2439 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2443 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2445 switch (rs6000_sched_insert_nops
)
2447 case sched_finish_regroup_exact
:
2448 nop_str
= "sched_finish_regroup_exact";
2451 case sched_finish_pad_groups
:
2452 nop_str
= "sched_finish_pad_groups";
2455 case sched_finish_none
:
2456 nop_str
= "sched_finish_none";
2461 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2465 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2467 switch (rs6000_sdata
)
2474 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2478 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2482 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2487 switch (rs6000_traceback
)
2489 case traceback_default
: trace_str
= "default"; break;
2490 case traceback_none
: trace_str
= "none"; break;
2491 case traceback_part
: trace_str
= "part"; break;
2492 case traceback_full
: trace_str
= "full"; break;
2493 default: trace_str
= "unknown"; break;
2496 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2498 switch (rs6000_current_cmodel
)
2500 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2501 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2502 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2503 default: cmodel_str
= "unknown"; break;
2506 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2508 switch (rs6000_current_abi
)
2510 case ABI_NONE
: abi_str
= "none"; break;
2511 case ABI_AIX
: abi_str
= "aix"; break;
2512 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2513 case ABI_V4
: abi_str
= "V4"; break;
2514 case ABI_DARWIN
: abi_str
= "darwin"; break;
2515 default: abi_str
= "unknown"; break;
2518 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2520 if (rs6000_altivec_abi
)
2521 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2523 if (rs6000_aix_extabi
)
2524 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2526 if (rs6000_darwin64_abi
)
2527 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2529 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2530 (TARGET_SOFT_FLOAT
? "true" : "false"));
2532 if (TARGET_LINK_STACK
)
2533 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2535 if (TARGET_P8_FUSION
)
2539 strcpy (options
, "power8");
2540 if (TARGET_P8_FUSION_SIGN
)
2541 strcat (options
, ", sign");
2543 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2546 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2547 TARGET_SECURE_PLT
? "secure" : "bss");
2548 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2549 aix_struct_return
? "aix" : "sysv");
2550 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2551 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2552 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2553 tf
[!!rs6000_align_branch_targets
]);
2554 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2555 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2556 rs6000_long_double_type_size
);
2557 if (rs6000_long_double_type_size
> 64)
2559 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2560 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2561 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2562 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2564 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2565 (int)rs6000_sched_restricted_insns_priority
);
2566 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2569 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2570 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2573 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2574 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2576 if (TARGET_DIRECT_MOVE_128
)
2577 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2578 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2582 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2583 legitimate address support to figure out the appropriate addressing to
2587 rs6000_setup_reg_addr_masks (void)
2589 ssize_t rc
, reg
, m
, nregs
;
2590 addr_mask_type any_addr_mask
, addr_mask
;
2592 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2594 machine_mode m2
= (machine_mode
) m
;
2595 bool complex_p
= false;
2596 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2599 if (COMPLEX_MODE_P (m2
))
2602 m2
= GET_MODE_INNER (m2
);
2605 msize
= GET_MODE_SIZE (m2
);
2607 /* SDmode is special in that we want to access it only via REG+REG
2608 addressing on power7 and above, since we want to use the LFIWZX and
2609 STFIWZX instructions to load it. */
2610 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2613 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2616 reg
= reload_reg_map
[rc
].reg
;
2618 /* Can mode values go in the GPR/FPR/Altivec registers? */
2619 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2621 bool small_int_vsx_p
= (small_int_p
2622 && (rc
== RELOAD_REG_FPR
2623 || rc
== RELOAD_REG_VMX
));
2625 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2626 addr_mask
|= RELOAD_REG_VALID
;
2628 /* Indicate if the mode takes more than 1 physical register. If
2629 it takes a single register, indicate it can do REG+REG
2630 addressing. Small integers in VSX registers can only do
2631 REG+REG addressing. */
2632 if (small_int_vsx_p
)
2633 addr_mask
|= RELOAD_REG_INDEXED
;
2634 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2635 addr_mask
|= RELOAD_REG_MULTIPLE
;
2637 addr_mask
|= RELOAD_REG_INDEXED
;
2639 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2640 addressing. If we allow scalars into Altivec registers,
2641 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2643 For VSX systems, we don't allow update addressing for
2644 DFmode/SFmode if those registers can go in both the
2645 traditional floating point registers and Altivec registers.
2646 The load/store instructions for the Altivec registers do not
2647 have update forms. If we allowed update addressing, it seems
2648 to break IV-OPT code using floating point if the index type is
2649 int instead of long (PR target/81550 and target/84042). */
2652 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2654 && !VECTOR_MODE_P (m2
)
2655 && !VECTOR_ALIGNMENT_P (m2
)
2657 && (m
!= E_DFmode
|| !TARGET_VSX
)
2658 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2659 && !small_int_vsx_p
)
2661 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2663 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2664 we don't allow PRE_MODIFY for some multi-register
2669 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2673 if (TARGET_POWERPC64
)
2674 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2679 if (TARGET_HARD_FLOAT
)
2680 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2686 /* GPR and FPR registers can do REG+OFFSET addressing, except
2687 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2688 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2689 if ((addr_mask
!= 0) && !indexed_only_p
2691 && (rc
== RELOAD_REG_GPR
2692 || ((msize
== 8 || m2
== SFmode
)
2693 && (rc
== RELOAD_REG_FPR
2694 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2695 addr_mask
|= RELOAD_REG_OFFSET
;
2697 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2698 instructions are enabled. The offset for 128-bit VSX registers is
2699 only 12-bits. While GPRs can handle the full offset range, VSX
2700 registers can only handle the restricted range. */
2701 else if ((addr_mask
!= 0) && !indexed_only_p
2702 && msize
== 16 && TARGET_P9_VECTOR
2703 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2704 || (m2
== TImode
&& TARGET_VSX
)))
2706 addr_mask
|= RELOAD_REG_OFFSET
;
2707 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2708 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2711 /* Vector pairs can do both indexed and offset loads if the
2712 instructions are enabled, otherwise they can only do offset loads
2713 since it will be broken into two vector moves. Vector quads can
2714 only do offset loads. */
2715 else if ((addr_mask
!= 0) && TARGET_MMA
2716 && (m2
== OOmode
|| m2
== XOmode
))
2718 addr_mask
|= RELOAD_REG_OFFSET
;
2719 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2721 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2723 addr_mask
|= RELOAD_REG_INDEXED
;
2727 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2728 addressing on 128-bit types. */
2729 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2730 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2731 addr_mask
|= RELOAD_REG_AND_M16
;
2733 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2734 any_addr_mask
|= addr_mask
;
2737 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2742 /* Initialize the various global tables that are based on register size. */
2744 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2750 /* Precalculate REGNO_REG_CLASS. */
2751 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2752 for (r
= 1; r
< 32; ++r
)
2753 rs6000_regno_regclass
[r
] = BASE_REGS
;
2755 for (r
= 32; r
< 64; ++r
)
2756 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2758 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2759 rs6000_regno_regclass
[r
] = NO_REGS
;
2761 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2762 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2764 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2765 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2766 rs6000_regno_regclass
[r
] = CR_REGS
;
2768 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2769 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2770 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2771 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2772 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2773 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2774 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2776 /* Precalculate register class to simpler reload register class. We don't
2777 need all of the register classes that are combinations of different
2778 classes, just the simple ones that have constraint letters. */
2779 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2780 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2782 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2783 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2784 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2785 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2786 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2787 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2788 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2789 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2790 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2795 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2796 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2800 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2801 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2804 /* Precalculate the valid memory formats as well as the vector information,
2805 this must be set up before the rs6000_hard_regno_nregs_internal calls
2807 gcc_assert ((int)VECTOR_NONE
== 0);
2808 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2809 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2811 gcc_assert ((int)CODE_FOR_nothing
== 0);
2812 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2814 gcc_assert ((int)NO_REGS
== 0);
2815 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2817 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2818 believes it can use native alignment or still uses 128-bit alignment. */
2819 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2830 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2831 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2832 if (TARGET_FLOAT128_TYPE
)
2834 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2835 rs6000_vector_align
[KFmode
] = 128;
2837 if (FLOAT128_IEEE_P (TFmode
))
2839 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2840 rs6000_vector_align
[TFmode
] = 128;
2844 /* V2DF mode, VSX only. */
2847 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2848 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2849 rs6000_vector_align
[V2DFmode
] = align64
;
2852 /* V4SF mode, either VSX or Altivec. */
2855 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2856 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2857 rs6000_vector_align
[V4SFmode
] = align32
;
2859 else if (TARGET_ALTIVEC
)
2861 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2862 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2863 rs6000_vector_align
[V4SFmode
] = align32
;
2866 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2870 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2871 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2872 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2873 rs6000_vector_align
[V4SImode
] = align32
;
2874 rs6000_vector_align
[V8HImode
] = align32
;
2875 rs6000_vector_align
[V16QImode
] = align32
;
2879 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2880 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2881 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2885 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2886 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2887 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2891 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2892 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2895 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2896 rs6000_vector_unit
[V2DImode
]
2897 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2898 rs6000_vector_align
[V2DImode
] = align64
;
2900 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2901 rs6000_vector_unit
[V1TImode
]
2902 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2903 rs6000_vector_align
[V1TImode
] = 128;
2906 /* DFmode, see if we want to use the VSX unit. Memory is handled
2907 differently, so don't set rs6000_vector_mem. */
2910 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2911 rs6000_vector_align
[DFmode
] = 64;
2914 /* SFmode, see if we want to use the VSX unit. */
2915 if (TARGET_P8_VECTOR
)
2917 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2918 rs6000_vector_align
[SFmode
] = 32;
2921 /* Allow TImode in VSX register and set the VSX memory macros. */
2924 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2925 rs6000_vector_align
[TImode
] = align64
;
2928 /* Add support for vector pairs and vector quad registers. */
2931 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2932 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2933 rs6000_vector_align
[OOmode
] = 256;
2935 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2936 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2937 rs6000_vector_align
[XOmode
] = 512;
2940 /* Register class constraints for the constraints that depend on compile
2941 switches. When the VSX code was added, different constraints were added
2942 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2943 of the VSX registers are used. The register classes for scalar floating
2944 point types is set, based on whether we allow that type into the upper
2945 (Altivec) registers. GCC has register classes to target the Altivec
2946 registers for load/store operations, to select using a VSX memory
2947 operation instead of the traditional floating point operation. The
2950 d - Register class to use with traditional DFmode instructions.
2951 v - Altivec register.
2952 wa - Any VSX register.
2953 wc - Reserved to represent individual CR bits (used in LLVM).
2954 wn - always NO_REGS.
2955 wr - GPR if 64-bit mode is permitted.
2956 wx - Float register if we can do 32-bit int stores. */
2958 if (TARGET_HARD_FLOAT
)
2959 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
2961 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2963 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2965 if (TARGET_POWERPC64
)
2967 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2968 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2972 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2974 /* Support for new direct moves (ISA 3.0 + 64bit). */
2975 if (TARGET_DIRECT_MOVE_128
)
2976 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2978 /* Set up the reload helper and direct move functions. */
2979 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2983 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2984 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2985 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2986 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2987 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2988 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2989 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2990 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
2991 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
2992 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
2993 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
2994 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
2995 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
2996 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
2997 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
2998 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
2999 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3000 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3001 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3002 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3004 if (FLOAT128_VECTOR_P (KFmode
))
3006 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3007 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3010 if (FLOAT128_VECTOR_P (TFmode
))
3012 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3013 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3016 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3018 if (TARGET_NO_SDMODE_STACK
)
3020 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3021 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3026 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3027 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3030 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3032 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3033 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3034 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3035 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3036 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3037 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3038 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3039 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3040 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3042 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3043 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3044 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3045 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3046 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3047 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3048 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3049 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3050 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3052 if (FLOAT128_VECTOR_P (KFmode
))
3054 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3055 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3058 if (FLOAT128_VECTOR_P (TFmode
))
3060 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3061 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3066 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3067 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3068 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3069 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3075 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3076 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3077 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3078 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3079 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3080 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3081 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3082 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3083 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3084 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3085 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3086 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3087 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3088 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3089 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3090 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3091 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3092 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3093 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3094 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3096 if (FLOAT128_VECTOR_P (KFmode
))
3098 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3099 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3102 if (FLOAT128_IEEE_P (TFmode
))
3104 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3105 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3108 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3110 if (TARGET_NO_SDMODE_STACK
)
3112 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3113 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3118 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3119 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3122 if (TARGET_DIRECT_MOVE
)
3124 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3125 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3126 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3130 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3131 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3133 if (TARGET_P8_VECTOR
)
3135 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3136 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3138 if (TARGET_P9_VECTOR
)
3140 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3141 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3146 /* Precalculate HARD_REGNO_NREGS. */
3147 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3148 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3149 rs6000_hard_regno_nregs
[m
][r
]
3150 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3152 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3153 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3154 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3155 rs6000_hard_regno_mode_ok_p
[m
][r
]
3156 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3158 /* Precalculate CLASS_MAX_NREGS sizes. */
3159 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3163 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3164 reg_size
= UNITS_PER_VSX_WORD
;
3166 else if (c
== ALTIVEC_REGS
)
3167 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3169 else if (c
== FLOAT_REGS
)
3170 reg_size
= UNITS_PER_FP_WORD
;
3173 reg_size
= UNITS_PER_WORD
;
3175 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3177 machine_mode m2
= (machine_mode
)m
;
3178 int reg_size2
= reg_size
;
3180 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3182 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3183 reg_size2
= UNITS_PER_FP_WORD
;
3185 rs6000_class_max_nregs
[m
][c
]
3186 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3190 /* Calculate which modes to automatically generate code to use a the
3191 reciprocal divide and square root instructions. In the future, possibly
3192 automatically generate the instructions even if the user did not specify
3193 -mrecip. The older machines double precision reciprocal sqrt estimate is
3194 not accurate enough. */
3195 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3197 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3199 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3200 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3201 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3202 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3203 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3205 if (TARGET_FRSQRTES
)
3206 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3208 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3209 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3210 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3211 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3212 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3214 if (rs6000_recip_control
)
3216 if (!flag_finite_math_only
)
3217 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3219 if (flag_trapping_math
)
3220 warning (0, "%qs requires %qs or %qs", "-mrecip",
3221 "-fno-trapping-math", "-ffast-math");
3222 if (!flag_reciprocal_math
)
3223 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3225 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3227 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3228 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3229 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3231 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3232 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3233 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3235 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3236 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3237 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3239 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3240 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3241 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3243 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3244 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3245 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3247 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3248 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3249 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3251 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3252 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3253 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3255 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3256 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3257 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3261 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3262 legitimate address support to figure out the appropriate addressing to
3264 rs6000_setup_reg_addr_masks ();
3266 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3268 if (TARGET_DEBUG_REG
)
3269 rs6000_debug_reg_global ();
3271 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3273 "SImode variable mult cost = %d\n"
3274 "SImode constant mult cost = %d\n"
3275 "SImode short constant mult cost = %d\n"
3276 "DImode multipliciation cost = %d\n"
3277 "SImode division cost = %d\n"
3278 "DImode division cost = %d\n"
3279 "Simple fp operation cost = %d\n"
3280 "DFmode multiplication cost = %d\n"
3281 "SFmode division cost = %d\n"
3282 "DFmode division cost = %d\n"
3283 "cache line size = %d\n"
3284 "l1 cache size = %d\n"
3285 "l2 cache size = %d\n"
3286 "simultaneous prefetches = %d\n"
3289 rs6000_cost
->mulsi_const
,
3290 rs6000_cost
->mulsi_const9
,
3298 rs6000_cost
->cache_line_size
,
3299 rs6000_cost
->l1_cache_size
,
3300 rs6000_cost
->l2_cache_size
,
3301 rs6000_cost
->simultaneous_prefetches
);
3306 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3309 darwin_rs6000_override_options (void)
3311 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3313 rs6000_altivec_abi
= 1;
3314 TARGET_ALTIVEC_VRSAVE
= 1;
3315 rs6000_current_abi
= ABI_DARWIN
;
3317 if (DEFAULT_ABI
== ABI_DARWIN
3319 darwin_one_byte_bool
= 1;
3321 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3323 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3324 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3327 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3328 optimisation, and will not work with the most generic case (where the
3329 symbol is undefined external, but there is no symbl stub). */
3331 rs6000_default_long_calls
= 0;
3333 /* ld_classic is (so far) still used for kernel (static) code, and supports
3334 the JBSR longcall / branch islands. */
3337 rs6000_default_long_calls
= 1;
3339 /* Allow a kext author to do -mkernel -mhard-float. */
3340 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3341 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3344 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3346 if (!flag_mkernel
&& !flag_apple_kext
3348 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3349 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3351 /* Unless the user (not the configurer) has explicitly overridden
3352 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3353 G4 unless targeting the kernel. */
3356 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3357 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3358 && ! OPTION_SET_P (rs6000_cpu_index
))
3360 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3365 /* If not otherwise specified by a target, make 'long double' equivalent to
3368 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3369 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3372 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3373 to clobber the XER[CA] bit because clobbering that bit without telling
3374 the compiler worked just fine with versions of GCC before GCC 5, and
3375 breaking a lot of older code in ways that are hard to track down is
3376 not such a great idea. */
3379 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3380 vec
<machine_mode
> & /*input_modes*/,
3381 vec
<const char *> & /*constraints*/, vec
<rtx
> &clobbers
,
3382 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3384 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3385 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3389 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3390 but is called when the optimize level is changed via an attribute or
3391 pragma or when it is reset at the end of the code affected by the
3392 attribute or pragma. It is not called at the beginning of compilation
3393 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3394 actions then, you should have TARGET_OPTION_OVERRIDE call
3395 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3398 rs6000_override_options_after_change (void)
3400 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3401 turns -frename-registers on. */
3402 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3403 || (OPTION_SET_P (flag_unroll_all_loops
)
3404 && flag_unroll_all_loops
))
3406 if (!OPTION_SET_P (unroll_only_small_loops
))
3407 unroll_only_small_loops
= 0;
3408 if (!OPTION_SET_P (flag_rename_registers
))
3409 flag_rename_registers
= 1;
3410 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3411 flag_cunroll_grow_size
= 1;
3413 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3414 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3416 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3417 if (rs6000_rop_protect
)
3418 flag_shrink_wrap
= 0;
3421 #ifdef TARGET_USES_LINUX64_OPT
3423 rs6000_linux64_override_options ()
3425 if (!OPTION_SET_P (rs6000_alignment_flags
))
3426 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3427 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3429 if (DEFAULT_ABI
!= ABI_AIX
)
3431 rs6000_current_abi
= ABI_AIX
;
3432 error (INVALID_64BIT
, "call");
3434 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3435 if (ELFv2_ABI_CHECK
)
3437 rs6000_current_abi
= ABI_ELFv2
;
3439 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3441 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3443 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3444 error (INVALID_64BIT
, "relocatable");
3446 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3448 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3449 error (INVALID_64BIT
, "eabi");
3451 if (TARGET_PROTOTYPE
)
3453 target_prototype
= 0;
3454 error (INVALID_64BIT
, "prototype");
3456 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3458 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3459 error ("%<-m64%> requires a PowerPC64 cpu");
3461 if (!OPTION_SET_P (rs6000_current_cmodel
))
3462 SET_CMODEL (CMODEL_MEDIUM
);
3463 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3465 if (OPTION_SET_P (rs6000_current_cmodel
)
3466 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3467 error ("%<-mcmodel%> incompatible with other toc options");
3468 if (TARGET_MINIMAL_TOC
)
3469 SET_CMODEL (CMODEL_SMALL
);
3470 else if (TARGET_PCREL
3471 || (PCREL_SUPPORTED_BY_OS
3472 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3473 /* Ignore -mno-minimal-toc. */
3476 SET_CMODEL (CMODEL_SMALL
);
3478 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3480 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3481 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3482 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3483 TARGET_NO_SUM_IN_TOC
= 0;
3485 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3487 if (OPTION_SET_P (rs6000_pltseq
))
3488 warning (0, "%qs unsupported for this ABI",
3490 rs6000_pltseq
= false;
3493 else if (TARGET_64BIT
)
3494 error (INVALID_32BIT
, "32");
3497 if (TARGET_PROFILE_KERNEL
)
3500 error (INVALID_32BIT
, "profile-kernel");
3502 if (OPTION_SET_P (rs6000_current_cmodel
))
3504 SET_CMODEL (CMODEL_SMALL
);
3505 error (INVALID_32BIT
, "cmodel");
3511 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3512 This support is only in little endian GLIBC 2.32 or newer. */
3514 glibc_supports_ieee_128bit (void)
3517 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3518 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3520 #endif /* OPTION_GLIBC. */
3525 /* Override command line options.
3527 Combine build-specific configuration information with options
3528 specified on the command line to set various state variables which
3529 influence code generation, optimization, and expansion of built-in
3530 functions. Assure that command-line configuration preferences are
3531 compatible with each other and with the build configuration; issue
3532 warnings while adjusting configuration or error messages while
3533 rejecting configuration.
3535 Upon entry to this function:
3537 This function is called once at the beginning of
3538 compilation, and then again at the start and end of compiling
3539 each section of code that has a different configuration, as
3540 indicated, for example, by adding the
3542 __attribute__((__target__("cpu=power9")))
3544 qualifier to a function definition or, for example, by bracketing
3547 #pragma GCC target("altivec")
3551 #pragma GCC reset_options
3553 directives. Parameter global_init_p is true for the initial
3554 invocation, which initializes global variables, and false for all
3555 subsequent invocations.
3558 Various global state information is assumed to be valid. This
3559 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3560 default CPU specified at build configure time, TARGET_DEFAULT,
3561 representing the default set of option flags for the default
3562 target, and OPTION_SET_P (rs6000_isa_flags), representing
3563 which options were requested on the command line.
3565 Upon return from this function:
3567 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3568 was set by name on the command line. Additionally, if certain
3569 attributes are automatically enabled or disabled by this function
3570 in order to assure compatibility between options and
3571 configuration, the flags associated with those attributes are
3572 also set. By setting these "explicit bits", we avoid the risk
3573 that other code might accidentally overwrite these particular
3574 attributes with "default values".
3576 The various bits of rs6000_isa_flags are set to indicate the
3577 target options that have been selected for the most current
3578 compilation efforts. This has the effect of also turning on the
3579 associated TARGET_XXX values since these are macros which are
3580 generally defined to test the corresponding bit of the
3581 rs6000_isa_flags variable.
3583 Various other global variables and fields of global structures
3584 (over 50 in all) are initialized to reflect the desired options
3585 for the most current compilation efforts. */
3588 rs6000_option_override_internal (bool global_init_p
)
3592 HOST_WIDE_INT set_masks
;
3593 HOST_WIDE_INT ignore_masks
;
3596 struct cl_target_option
*main_target_opt
3597 = ((global_init_p
|| target_option_default_node
== NULL
)
3598 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3600 /* Print defaults. */
3601 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3602 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3604 /* Remember the explicit arguments. */
3606 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3608 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3609 library functions, so warn about it. The flag may be useful for
3610 performance studies from time to time though, so don't disable it
3612 if (OPTION_SET_P (rs6000_alignment_flags
)
3613 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3614 && DEFAULT_ABI
== ABI_DARWIN
3616 warning (0, "%qs is not supported for 64-bit Darwin;"
3617 " it is incompatible with the installed C and C++ libraries",
3620 /* Numerous experiment shows that IRA based loop pressure
3621 calculation works better for RTL loop invariant motion on targets
3622 with enough (>= 32) registers. It is an expensive optimization.
3623 So it is on only for peak performance. */
3624 if (optimize
>= 3 && global_init_p
3625 && !OPTION_SET_P (flag_ira_loop_pressure
))
3626 flag_ira_loop_pressure
= 1;
3628 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3629 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3630 options were already specified. */
3631 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3632 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3633 flag_asynchronous_unwind_tables
= 1;
3635 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3636 loop unroller is active. It is only checked during unrolling, so
3637 we can just set it on by default. */
3638 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3639 flag_variable_expansion_in_unroller
= 1;
3641 /* Set the pointer size. */
3644 rs6000_pmode
= DImode
;
3645 rs6000_pointer_size
= 64;
3649 rs6000_pmode
= SImode
;
3650 rs6000_pointer_size
= 32;
3653 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3654 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3655 must explicitly specify it and we won't interfere with the user's
3658 set_masks
= POWERPC_MASKS
;
3659 #ifdef OS_MISSING_ALTIVEC
3660 if (OS_MISSING_ALTIVEC
)
3661 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3662 | OTHER_VSX_VECTOR_MASKS
);
3665 /* Don't override by the processor default if given explicitly. */
3666 set_masks
&= ~rs6000_isa_flags_explicit
;
3668 /* Without option powerpc64 specified explicitly, we need to ensure
3669 powerpc64 always enabled for 64 bit here, otherwise some following
3670 checks can use unexpected TARGET_POWERPC64 value. */
3671 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
)
3674 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3675 /* Need to stop powerpc64 from being unset in later processing,
3676 so clear it in set_masks. But as PR108240 shows, to keep it
3677 consistent with before, we want to make this only if 64 bit
3678 is enabled explicitly. This is a hack, revisit this later. */
3679 if (rs6000_isa_flags_explicit
& OPTION_MASK_64BIT
)
3680 set_masks
&= ~OPTION_MASK_POWERPC64
;
3683 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3684 the cpu in a target attribute or pragma, but did not specify a tuning
3685 option, use the cpu for the tuning option rather than the option specified
3686 with -mtune on the command line. Process a '--with-cpu' configuration
3687 request as an implicit --cpu. */
3688 if (rs6000_cpu_index
>= 0)
3689 cpu_index
= rs6000_cpu_index
;
3690 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3691 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3692 else if (OPTION_TARGET_CPU_DEFAULT
)
3693 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3695 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3696 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3697 with those from the cpu, except for options that were explicitly set. If
3698 we don't have a cpu, do not override the target bits set in
3702 rs6000_cpu_index
= cpu_index
;
3703 rs6000_isa_flags
&= ~set_masks
;
3704 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3709 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3710 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3711 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3712 to using rs6000_isa_flags, we need to do the initialization here.
3714 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3715 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3716 HOST_WIDE_INT flags
;
3718 flags
= TARGET_DEFAULT
;
3721 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3722 const char *default_cpu
= (!TARGET_POWERPC64
3727 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3728 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3730 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3733 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3734 since they do not save and restore the high half of the GPRs correctly
3735 in all cases. If the user explicitly specifies it, we won't interfere
3736 with the user's specification. */
3737 #ifdef OS_MISSING_POWERPC64
3738 if (OS_MISSING_POWERPC64
3741 && !(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
))
3742 rs6000_isa_flags
&= ~OPTION_MASK_POWERPC64
;
3745 if (rs6000_tune_index
>= 0)
3746 tune_index
= rs6000_tune_index
;
3747 else if (cpu_index
>= 0)
3748 rs6000_tune_index
= tune_index
= cpu_index
;
3752 enum processor_type tune_proc
3753 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3756 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3757 if (processor_target_table
[i
].processor
== tune_proc
)
3765 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3767 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3769 gcc_assert (tune_index
>= 0);
3770 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3772 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3773 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3774 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3777 error ("AltiVec not supported in this target");
3780 /* If we are optimizing big endian systems for space, use the load/store
3781 multiple instructions. */
3782 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3783 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3785 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3786 because the hardware doesn't support the instructions used in little
3787 endian mode, and causes an alignment trap. The 750 does not cause an
3788 alignment trap (except when the target is unaligned). */
3790 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3792 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3793 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3794 warning (0, "%qs is not supported on little endian systems",
3798 /* If little-endian, default to -mstrict-align on older processors.
3799 Testing for direct_move matches power8 and later. */
3800 if (!BYTES_BIG_ENDIAN
3801 && !(processor_target_table
[tune_index
].target_enable
3802 & OPTION_MASK_DIRECT_MOVE
))
3803 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3805 /* Add some warnings for VSX. */
3808 const char *msg
= NULL
;
3809 if (!TARGET_HARD_FLOAT
)
3811 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3812 msg
= N_("%<-mvsx%> requires hardware floating point");
3815 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3816 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3819 else if (TARGET_AVOID_XFORM
> 0)
3820 msg
= N_("%<-mvsx%> needs indexed addressing");
3821 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3822 & OPTION_MASK_ALTIVEC
))
3824 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3825 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3827 msg
= N_("%<-mno-altivec%> disables vsx");
3833 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3834 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3838 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3839 the -mcpu setting to enable options that conflict. */
3840 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3841 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3842 | OPTION_MASK_ALTIVEC
3843 | OPTION_MASK_VSX
)) != 0)
3844 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3845 | OPTION_MASK_DIRECT_MOVE
)
3846 & ~rs6000_isa_flags_explicit
);
3848 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3849 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3851 #ifdef XCOFF_DEBUGGING_INFO
3852 /* For AIX default to 64-bit DWARF. */
3853 if (!OPTION_SET_P (dwarf_offset_size
))
3854 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3857 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3858 off all of the options that depend on those flags. */
3859 ignore_masks
= rs6000_disable_incompatible_switches ();
3861 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3862 unless the user explicitly used the -mno-<option> to disable the code. */
3863 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3864 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3865 else if (TARGET_P9_MINMAX
)
3869 if (cpu_index
== PROCESSOR_POWER9
)
3871 /* legacy behavior: allow -mcpu=power9 with certain
3872 capabilities explicitly disabled. */
3873 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3876 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3877 "for <xxx> less than power9", "-mcpu");
3879 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3880 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3881 & rs6000_isa_flags_explicit
))
3882 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3883 were explicitly cleared. */
3884 error ("%qs incompatible with explicitly disabled options",
3887 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3889 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3890 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3891 else if (TARGET_VSX
)
3892 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3893 else if (TARGET_POPCNTD
)
3894 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3895 else if (TARGET_DFP
)
3896 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3897 else if (TARGET_CMPB
)
3898 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3899 else if (TARGET_FPRND
)
3900 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3901 else if (TARGET_POPCNTB
)
3902 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3903 else if (TARGET_ALTIVEC
)
3904 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3906 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3907 target attribute or pragma which automatically enables both options,
3908 unless the altivec ABI was set. This is set by default for 64-bit, but
3909 not for 32-bit. Don't move this before the above code using ignore_masks,
3910 since it can reset the cleared VSX/ALTIVEC flag again. */
3911 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
3912 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
)
3913 & ~rs6000_isa_flags_explicit
);
3915 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3917 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3918 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3919 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3922 if (!TARGET_FPRND
&& TARGET_VSX
)
3924 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3925 /* TARGET_VSX = 1 implies Power 7 and newer */
3926 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3927 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3930 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3932 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3933 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3934 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3937 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3939 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3940 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3941 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3944 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3946 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3947 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3948 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3949 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3951 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3952 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3953 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3957 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3959 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3960 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3964 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3966 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3967 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3968 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3971 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3972 silently turn off quad memory mode. */
3973 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3975 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3976 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3978 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3979 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3981 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3982 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3985 /* Non-atomic quad memory load/store are disabled for little endian, since
3986 the words are reversed, but atomic operations can still be done by
3987 swapping the words. */
3988 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3990 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3991 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3994 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
3997 /* Assume if the user asked for normal quad memory instructions, they want
3998 the atomic versions as well, unless they explicity told us not to use quad
3999 word atomic instructions. */
4000 if (TARGET_QUAD_MEMORY
4001 && !TARGET_QUAD_MEMORY_ATOMIC
4002 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4003 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4005 /* If we can shrink-wrap the TOC register save separately, then use
4006 -msave-toc-indirect unless explicitly disabled. */
4007 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4008 && flag_shrink_wrap_separate
4009 && optimize_function_for_speed_p (cfun
))
4010 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4012 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4013 generating power8 instructions. Power9 does not optimize power8 fusion
4015 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4017 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4018 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4020 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4023 /* Setting additional fusion flags turns on base fusion. */
4024 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4026 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4028 if (TARGET_P8_FUSION_SIGN
)
4029 error ("%qs requires %qs", "-mpower8-fusion-sign",
4032 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4035 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4038 /* Power8 does not fuse sign extended loads with the addis. If we are
4039 optimizing at high levels for speed, convert a sign extended load into a
4040 zero extending load, and an explicit sign extension. */
4041 if (TARGET_P8_FUSION
4042 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4043 && optimize_function_for_speed_p (cfun
)
4045 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4047 /* ISA 3.0 vector instructions include ISA 2.07. */
4048 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4050 /* We prefer to not mention undocumented options in
4051 error messages. However, if users have managed to select
4052 power9-vector without selecting power8-vector, they
4053 already know about undocumented flags. */
4054 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4055 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4056 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4057 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4059 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4060 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4061 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4065 /* OPTION_MASK_P9_VECTOR is explicit and
4066 OPTION_MASK_P8_VECTOR is not explicit. */
4067 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4068 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4072 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4073 support. If we only have ISA 2.06 support, and the user did not specify
4074 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4075 but we don't enable the full vectorization support */
4076 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4077 TARGET_ALLOW_MOVMISALIGN
= 1;
4079 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4081 if (TARGET_ALLOW_MOVMISALIGN
> 0
4082 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4083 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4085 TARGET_ALLOW_MOVMISALIGN
= 0;
4088 /* Determine when unaligned vector accesses are permitted, and when
4089 they are preferred over masked Altivec loads. Note that if
4090 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4091 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4093 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4097 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4098 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4100 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4103 else if (!TARGET_ALLOW_MOVMISALIGN
)
4105 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4106 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4107 "-mallow-movmisalign");
4109 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4113 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4115 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4116 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4118 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4121 /* Use long double size to select the appropriate long double. We use
4122 TYPE_PRECISION to differentiate the 3 different long double types. We map
4123 128 into the precision used for TFmode. */
4124 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4126 : FLOAT_PRECISION_TFmode
);
4128 /* Set long double size before the IEEE 128-bit tests. */
4129 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4131 if (main_target_opt
!= NULL
4132 && (main_target_opt
->x_rs6000_long_double_type_size
4133 != default_long_double_size
))
4134 error ("target attribute or pragma changes %<long double%> size");
4136 rs6000_long_double_type_size
= default_long_double_size
;
4138 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4139 ; /* The option value can be seen when cl_target_option_restore is called. */
4140 else if (rs6000_long_double_type_size
== 128)
4141 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4143 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4144 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4145 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4146 those systems will not pick up this default. Warn if the user changes the
4147 default unless -Wno-psabi. */
4148 if (!OPTION_SET_P (rs6000_ieeequad
))
4149 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4151 else if (TARGET_LONG_DOUBLE_128
)
4153 if (global_options
.x_rs6000_ieeequad
4154 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4155 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4157 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4159 /* Determine if the user can change the default long double type at
4160 compilation time. You need GLIBC 2.32 or newer to be able to
4161 change the long double type. Only issue one warning. */
4162 static bool warned_change_long_double
;
4164 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4166 warned_change_long_double
= true;
4167 if (TARGET_IEEEQUAD
)
4168 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4171 warning (OPT_Wpsabi
, "Using IBM extended precision "
4177 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4178 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4179 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4180 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4181 the keyword as well as the type. */
4182 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4184 /* IEEE 128-bit floating point requires VSX support. */
4185 if (TARGET_FLOAT128_KEYWORD
)
4189 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4190 error ("%qs requires VSX support", "-mfloat128");
4192 TARGET_FLOAT128_TYPE
= 0;
4193 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4194 | OPTION_MASK_FLOAT128_HW
);
4196 else if (!TARGET_FLOAT128_TYPE
)
4198 TARGET_FLOAT128_TYPE
= 1;
4199 warning (0, "The %<-mfloat128%> option may not be fully supported");
4203 /* Enable the __float128 keyword under Linux by default. */
4204 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4205 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4206 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4208 /* If we have are supporting the float128 type and full ISA 3.0 support,
4209 enable -mfloat128-hardware by default. However, don't enable the
4210 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4211 because sometimes the compiler wants to put things in an integer
4212 container, and if we don't have __int128 support, it is impossible. */
4213 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4214 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4215 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4216 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4218 if (TARGET_FLOAT128_HW
4219 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4221 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4222 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4224 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4227 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4229 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4230 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4232 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4235 /* Enable -mprefixed by default on power10 systems. */
4236 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4237 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4239 /* -mprefixed requires -mcpu=power10 (or later). */
4240 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4242 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4243 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4245 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4248 /* -mpcrel requires prefixed load/store addressing. */
4249 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4251 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4252 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4254 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4257 /* Print the options after updating the defaults. */
4258 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4259 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4261 /* E500mc does "better" if we inline more aggressively. Respect the
4262 user's opinion, though. */
4263 if (rs6000_block_move_inline_limit
== 0
4264 && (rs6000_tune
== PROCESSOR_PPCE500MC
4265 || rs6000_tune
== PROCESSOR_PPCE500MC64
4266 || rs6000_tune
== PROCESSOR_PPCE5500
4267 || rs6000_tune
== PROCESSOR_PPCE6500
))
4268 rs6000_block_move_inline_limit
= 128;
4270 /* store_one_arg depends on expand_block_move to handle at least the
4271 size of reg_parm_stack_space. */
4272 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4273 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4277 /* If the appropriate debug option is enabled, replace the target hooks
4278 with debug versions that call the real version and then prints
4279 debugging information. */
4280 if (TARGET_DEBUG_COST
)
4282 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4283 targetm
.address_cost
= rs6000_debug_address_cost
;
4284 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4287 if (TARGET_DEBUG_ADDR
)
4289 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4290 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4291 rs6000_secondary_reload_class_ptr
4292 = rs6000_debug_secondary_reload_class
;
4293 targetm
.secondary_memory_needed
4294 = rs6000_debug_secondary_memory_needed
;
4295 targetm
.can_change_mode_class
4296 = rs6000_debug_can_change_mode_class
;
4297 rs6000_preferred_reload_class_ptr
4298 = rs6000_debug_preferred_reload_class
;
4299 rs6000_mode_dependent_address_ptr
4300 = rs6000_debug_mode_dependent_address
;
4303 if (rs6000_veclibabi_name
)
4305 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4306 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4309 error ("unknown vectorization library ABI type in "
4310 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4316 /* Enable Altivec ABI for AIX -maltivec. */
4318 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4319 && !OPTION_SET_P (rs6000_altivec_abi
))
4321 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4322 error ("target attribute or pragma changes AltiVec ABI");
4324 rs6000_altivec_abi
= 1;
4327 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4328 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4329 be explicitly overridden in either case. */
4332 if (!OPTION_SET_P (rs6000_altivec_abi
)
4333 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4335 if (main_target_opt
!= NULL
&&
4336 !main_target_opt
->x_rs6000_altivec_abi
)
4337 error ("target attribute or pragma changes AltiVec ABI");
4339 rs6000_altivec_abi
= 1;
4343 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4344 So far, the only darwin64 targets are also MACH-O. */
4346 && DEFAULT_ABI
== ABI_DARWIN
4349 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4350 error ("target attribute or pragma changes darwin64 ABI");
4353 rs6000_darwin64_abi
= 1;
4354 /* Default to natural alignment, for better performance. */
4355 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4359 /* Place FP constants in the constant pool instead of TOC
4360 if section anchors enabled. */
4361 if (flag_section_anchors
4362 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4363 TARGET_NO_FP_IN_TOC
= 1;
4365 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4366 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4368 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4369 SUBTARGET_OVERRIDE_OPTIONS
;
4371 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4372 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4374 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4375 SUB3TARGET_OVERRIDE_OPTIONS
;
4378 /* If the ABI has support for PC-relative relocations, enable it by default.
4379 This test depends on the sub-target tests above setting the code model to
4380 medium for ELF v2 systems. */
4381 if (PCREL_SUPPORTED_BY_OS
4382 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4383 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4385 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4386 after the subtarget override options are done. */
4387 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4389 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4390 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4392 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4395 /* Enable -mmma by default on power10 systems. */
4396 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4397 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4399 /* Turn off vector pair/mma options on non-power10 systems. */
4400 else if (!TARGET_POWER10
&& TARGET_MMA
)
4402 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4403 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4405 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4408 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4409 generating power10 instructions. */
4410 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
))
4412 if (rs6000_tune
== PROCESSOR_POWER10
)
4413 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4415 rs6000_isa_flags
&= ~OPTION_MASK_P10_FUSION
;
4418 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4419 such as "*movoo" uses vector pair access which use VSX registers.
4420 So make MMA require VSX support here. */
4421 if (TARGET_MMA
&& !TARGET_VSX
)
4423 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4424 error ("%qs requires %qs", "-mmma", "-mvsx");
4425 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4428 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4429 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4431 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4432 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4434 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4435 && rs6000_tune
!= PROCESSOR_POWER5
4436 && rs6000_tune
!= PROCESSOR_POWER6
4437 && rs6000_tune
!= PROCESSOR_POWER7
4438 && rs6000_tune
!= PROCESSOR_POWER8
4439 && rs6000_tune
!= PROCESSOR_POWER9
4440 && rs6000_tune
!= PROCESSOR_POWER10
4441 && rs6000_tune
!= PROCESSOR_PPCA2
4442 && rs6000_tune
!= PROCESSOR_CELL
4443 && rs6000_tune
!= PROCESSOR_PPC476
);
4444 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4445 || rs6000_tune
== PROCESSOR_POWER5
4446 || rs6000_tune
== PROCESSOR_POWER7
4447 || rs6000_tune
== PROCESSOR_POWER8
);
4448 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4449 || rs6000_tune
== PROCESSOR_POWER5
4450 || rs6000_tune
== PROCESSOR_POWER6
4451 || rs6000_tune
== PROCESSOR_POWER7
4452 || rs6000_tune
== PROCESSOR_POWER8
4453 || rs6000_tune
== PROCESSOR_POWER9
4454 || rs6000_tune
== PROCESSOR_POWER10
4455 || rs6000_tune
== PROCESSOR_PPCE500MC
4456 || rs6000_tune
== PROCESSOR_PPCE500MC64
4457 || rs6000_tune
== PROCESSOR_PPCE5500
4458 || rs6000_tune
== PROCESSOR_PPCE6500
);
4460 /* Allow debug switches to override the above settings. These are set to -1
4461 in rs6000.opt to indicate the user hasn't directly set the switch. */
4462 if (TARGET_ALWAYS_HINT
>= 0)
4463 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4465 if (TARGET_SCHED_GROUPS
>= 0)
4466 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4468 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4469 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4471 rs6000_sched_restricted_insns_priority
4472 = (rs6000_sched_groups
? 1 : 0);
4474 /* Handle -msched-costly-dep option. */
4475 rs6000_sched_costly_dep
4476 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4478 if (rs6000_sched_costly_dep_str
)
4480 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4481 rs6000_sched_costly_dep
= no_dep_costly
;
4482 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4483 rs6000_sched_costly_dep
= all_deps_costly
;
4484 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4485 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4486 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4487 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4489 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4490 atoi (rs6000_sched_costly_dep_str
));
4493 /* Handle -minsert-sched-nops option. */
4494 rs6000_sched_insert_nops
4495 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4497 if (rs6000_sched_insert_nops_str
)
4499 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4500 rs6000_sched_insert_nops
= sched_finish_none
;
4501 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4502 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4503 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4504 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4506 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4507 atoi (rs6000_sched_insert_nops_str
));
4510 /* Handle stack protector */
4511 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4512 #ifdef TARGET_THREAD_SSP_OFFSET
4513 rs6000_stack_protector_guard
= SSP_TLS
;
4515 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4518 #ifdef TARGET_THREAD_SSP_OFFSET
4519 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4520 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4523 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4526 const char *str
= rs6000_stack_protector_guard_offset_str
;
4529 long offset
= strtol (str
, &endp
, 0);
4530 if (!*str
|| *endp
|| errno
)
4531 error ("%qs is not a valid number in %qs", str
,
4532 "-mstack-protector-guard-offset=");
4534 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4535 || (TARGET_64BIT
&& (offset
& 3)))
4536 error ("%qs is not a valid offset in %qs", str
,
4537 "-mstack-protector-guard-offset=");
4539 rs6000_stack_protector_guard_offset
= offset
;
4542 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4544 const char *str
= rs6000_stack_protector_guard_reg_str
;
4545 int reg
= decode_reg_name (str
);
4547 if (!IN_RANGE (reg
, 1, 31))
4548 error ("%qs is not a valid base register in %qs", str
,
4549 "-mstack-protector-guard-reg=");
4551 rs6000_stack_protector_guard_reg
= reg
;
4554 if (rs6000_stack_protector_guard
== SSP_TLS
4555 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4556 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4560 #ifdef TARGET_REGNAMES
4561 /* If the user desires alternate register names, copy in the
4562 alternate names now. */
4563 if (TARGET_REGNAMES
)
4564 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4567 /* Set aix_struct_return last, after the ABI is determined.
4568 If -maix-struct-return or -msvr4-struct-return was explicitly
4569 used, don't override with the ABI default. */
4570 if (!OPTION_SET_P (aix_struct_return
))
4571 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4574 /* IBM XL compiler defaults to unsigned bitfields. */
4575 if (TARGET_XL_COMPAT
)
4576 flag_signed_bitfields
= 0;
4579 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4580 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4582 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4584 /* We can only guarantee the availability of DI pseudo-ops when
4585 assembling for 64-bit targets. */
4588 targetm
.asm_out
.aligned_op
.di
= NULL
;
4589 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4593 /* Set branch target alignment, if not optimizing for size. */
4596 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4597 aligned 8byte to avoid misprediction by the branch predictor. */
4598 if (rs6000_tune
== PROCESSOR_TITAN
4599 || rs6000_tune
== PROCESSOR_CELL
)
4601 if (flag_align_functions
&& !str_align_functions
)
4602 str_align_functions
= "8";
4603 if (flag_align_jumps
&& !str_align_jumps
)
4604 str_align_jumps
= "8";
4605 if (flag_align_loops
&& !str_align_loops
)
4606 str_align_loops
= "8";
4608 if (rs6000_align_branch_targets
)
4610 if (flag_align_functions
&& !str_align_functions
)
4611 str_align_functions
= "16";
4612 if (flag_align_jumps
&& !str_align_jumps
)
4613 str_align_jumps
= "16";
4614 if (flag_align_loops
&& !str_align_loops
)
4616 can_override_loop_align
= 1;
4617 str_align_loops
= "16";
4622 /* Arrange to save and restore machine status around nested functions. */
4623 init_machine_status
= rs6000_init_machine_status
;
4625 /* We should always be splitting complex arguments, but we can't break
4626 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4627 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4628 targetm
.calls
.split_complex_arg
= NULL
;
4630 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4631 if (DEFAULT_ABI
== ABI_AIX
)
4632 targetm
.calls
.custom_function_descriptors
= 0;
4635 /* Initialize rs6000_cost with the appropriate target costs. */
4637 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4639 switch (rs6000_tune
)
4641 case PROCESSOR_RS64A
:
4642 rs6000_cost
= &rs64a_cost
;
4645 case PROCESSOR_MPCCORE
:
4646 rs6000_cost
= &mpccore_cost
;
4649 case PROCESSOR_PPC403
:
4650 rs6000_cost
= &ppc403_cost
;
4653 case PROCESSOR_PPC405
:
4654 rs6000_cost
= &ppc405_cost
;
4657 case PROCESSOR_PPC440
:
4658 rs6000_cost
= &ppc440_cost
;
4661 case PROCESSOR_PPC476
:
4662 rs6000_cost
= &ppc476_cost
;
4665 case PROCESSOR_PPC601
:
4666 rs6000_cost
= &ppc601_cost
;
4669 case PROCESSOR_PPC603
:
4670 rs6000_cost
= &ppc603_cost
;
4673 case PROCESSOR_PPC604
:
4674 rs6000_cost
= &ppc604_cost
;
4677 case PROCESSOR_PPC604e
:
4678 rs6000_cost
= &ppc604e_cost
;
4681 case PROCESSOR_PPC620
:
4682 rs6000_cost
= &ppc620_cost
;
4685 case PROCESSOR_PPC630
:
4686 rs6000_cost
= &ppc630_cost
;
4689 case PROCESSOR_CELL
:
4690 rs6000_cost
= &ppccell_cost
;
4693 case PROCESSOR_PPC750
:
4694 case PROCESSOR_PPC7400
:
4695 rs6000_cost
= &ppc750_cost
;
4698 case PROCESSOR_PPC7450
:
4699 rs6000_cost
= &ppc7450_cost
;
4702 case PROCESSOR_PPC8540
:
4703 case PROCESSOR_PPC8548
:
4704 rs6000_cost
= &ppc8540_cost
;
4707 case PROCESSOR_PPCE300C2
:
4708 case PROCESSOR_PPCE300C3
:
4709 rs6000_cost
= &ppce300c2c3_cost
;
4712 case PROCESSOR_PPCE500MC
:
4713 rs6000_cost
= &ppce500mc_cost
;
4716 case PROCESSOR_PPCE500MC64
:
4717 rs6000_cost
= &ppce500mc64_cost
;
4720 case PROCESSOR_PPCE5500
:
4721 rs6000_cost
= &ppce5500_cost
;
4724 case PROCESSOR_PPCE6500
:
4725 rs6000_cost
= &ppce6500_cost
;
4728 case PROCESSOR_TITAN
:
4729 rs6000_cost
= &titan_cost
;
4732 case PROCESSOR_POWER4
:
4733 case PROCESSOR_POWER5
:
4734 rs6000_cost
= &power4_cost
;
4737 case PROCESSOR_POWER6
:
4738 rs6000_cost
= &power6_cost
;
4741 case PROCESSOR_POWER7
:
4742 rs6000_cost
= &power7_cost
;
4745 case PROCESSOR_POWER8
:
4746 rs6000_cost
= &power8_cost
;
4749 case PROCESSOR_POWER9
:
4750 rs6000_cost
= &power9_cost
;
4753 case PROCESSOR_POWER10
:
4754 rs6000_cost
= &power10_cost
;
4757 case PROCESSOR_PPCA2
:
4758 rs6000_cost
= &ppca2_cost
;
4767 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4768 param_simultaneous_prefetches
,
4769 rs6000_cost
->simultaneous_prefetches
);
4770 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4771 param_l1_cache_size
,
4772 rs6000_cost
->l1_cache_size
);
4773 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4774 param_l1_cache_line_size
,
4775 rs6000_cost
->cache_line_size
);
4776 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4777 param_l2_cache_size
,
4778 rs6000_cost
->l2_cache_size
);
4780 /* Increase loop peeling limits based on performance analysis. */
4781 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4782 param_max_peeled_insns
, 400);
4783 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4784 param_max_completely_peeled_insns
, 400);
4786 /* The lxvl/stxvl instructions don't perform well before Power10. */
4788 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4789 param_vect_partial_vector_usage
, 1);
4791 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4792 param_vect_partial_vector_usage
, 0);
4794 /* Use the 'model' -fsched-pressure algorithm by default. */
4795 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4796 param_sched_pressure_algorithm
,
4797 SCHED_PRESSURE_MODEL
);
4799 /* If using typedef char *va_list, signal that
4800 __builtin_va_start (&ap, 0) can be optimized to
4801 ap = __builtin_next_arg (0). */
4802 if (DEFAULT_ABI
!= ABI_V4
)
4803 targetm
.expand_builtin_va_start
= NULL
;
4806 rs6000_override_options_after_change ();
4808 /* If not explicitly specified via option, decide whether to generate indexed
4809 load/store instructions. A value of -1 indicates that the
4810 initial value of this variable has not been overwritten. During
4811 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4812 if (TARGET_AVOID_XFORM
== -1)
4813 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4814 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4815 need indexed accesses and the type used is the scalar type of the element
4816 being loaded or stored. */
4817 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4818 && !TARGET_ALTIVEC
);
4820 /* Set the -mrecip options. */
4821 if (rs6000_recip_name
)
4823 char *p
= ASTRDUP (rs6000_recip_name
);
4825 unsigned int mask
, i
;
4828 while ((q
= strtok (p
, ",")) != NULL
)
4839 if (!strcmp (q
, "default"))
4840 mask
= ((TARGET_RECIP_PRECISION
)
4841 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4844 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4845 if (!strcmp (q
, recip_options
[i
].string
))
4847 mask
= recip_options
[i
].mask
;
4851 if (i
== ARRAY_SIZE (recip_options
))
4853 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4861 rs6000_recip_control
&= ~mask
;
4863 rs6000_recip_control
|= mask
;
4867 /* Initialize all of the registers. */
4868 rs6000_init_hard_regno_mode_ok (global_init_p
);
4870 /* Save the initial options in case the user does function specific options */
4872 target_option_default_node
= target_option_current_node
4873 = build_target_option_node (&global_options
, &global_options_set
);
4875 /* If not explicitly specified via option, decide whether to generate the
4876 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4877 if (TARGET_LINK_STACK
== -1)
4878 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4880 /* Deprecate use of -mno-speculate-indirect-jumps. */
4881 if (!rs6000_speculate_indirect_jumps
)
4882 warning (0, "%qs is deprecated and not recommended in any circumstances",
4883 "-mno-speculate-indirect-jumps");
4888 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4889 define the target cpu type. */
4892 rs6000_option_override (void)
4894 (void) rs6000_option_override_internal (true);
4898 /* Implement LOOP_ALIGN. */
4900 rs6000_loop_align (rtx label
)
4905 /* Don't override loop alignment if -falign-loops was specified. */
4906 if (!can_override_loop_align
)
4909 bb
= BLOCK_FOR_INSN (label
);
4910 ninsns
= num_loop_insns(bb
->loop_father
);
4912 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4913 if (ninsns
> 4 && ninsns
<= 8
4914 && (rs6000_tune
== PROCESSOR_POWER4
4915 || rs6000_tune
== PROCESSOR_POWER5
4916 || rs6000_tune
== PROCESSOR_POWER6
4917 || rs6000_tune
== PROCESSOR_POWER7
4918 || rs6000_tune
== PROCESSOR_POWER8
))
4919 return align_flags (5);
4924 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4925 after applying N number of iterations. This routine does not determine
4926 how may iterations are required to reach desired alignment. */
4929 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4936 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4939 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4949 /* Assuming that all other types are naturally aligned. CHECKME! */
4954 /* Return true if the vector misalignment factor is supported by the
4957 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4964 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4967 /* Return if movmisalign pattern is not supported for this mode. */
4968 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4971 if (misalignment
== -1)
4973 /* Misalignment factor is unknown at compile time but we know
4974 it's word aligned. */
4975 if (rs6000_vector_alignment_reachable (type
, is_packed
))
4977 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
4979 if (element_size
== 64 || element_size
== 32)
4986 /* VSX supports word-aligned vector. */
4987 if (misalignment
% 4 == 0)
4993 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4995 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4996 tree vectype
, int misalign
)
5001 switch (type_of_cost
)
5009 case cond_branch_not_taken
:
5013 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5017 /* Power7 has only one permute unit, make it a bit expensive. */
5018 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5023 case vec_promote_demote
:
5024 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5025 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5030 case cond_branch_taken
:
5033 case unaligned_load
:
5034 case vector_gather_load
:
5035 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5036 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5039 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5041 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5042 /* See PR102767, consider V1TI to keep consistency. */
5043 if (elements
== 2 || elements
== 1)
5044 /* Double word aligned. */
5052 /* Double word aligned. */
5056 /* Unknown misalignment. */
5069 /* Misaligned loads are not supported. */
5072 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5075 case unaligned_store
:
5076 case vector_scatter_store
:
5077 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5080 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5082 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5083 /* See PR102767, consider V1TI to keep consistency. */
5084 if (elements
== 2 || elements
== 1)
5085 /* Double word aligned. */
5093 /* Double word aligned. */
5097 /* Unknown misalignment. */
5110 /* Misaligned stores are not supported. */
5116 /* This is a rough approximation assuming non-constant elements
5117 constructed into a vector via element insertion. FIXME:
5118 vec_construct is not granular enough for uniformly good
5119 decisions. If the initialization is a splat, this is
5120 cheaper than we estimate. Improve this someday. */
5121 elem_type
= TREE_TYPE (vectype
);
5122 /* 32-bit vectors loaded into registers are stored as double
5123 precision, so we need 2 permutes, 2 converts, and 1 merge
5124 to construct a vector of short floats from them. */
5125 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5126 && TYPE_PRECISION (elem_type
) == 32)
5128 /* On POWER9, integer vector types are built up in GPRs and then
5129 use a direct move (2 cycles). For POWER8 this is even worse,
5130 as we need two direct moves and a merge, and the direct moves
5132 else if (INTEGRAL_TYPE_P (elem_type
))
5134 if (TARGET_P9_VECTOR
)
5135 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5137 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5140 /* V2DFmode doesn't need a direct move. */
5148 /* Implement targetm.vectorize.preferred_simd_mode. */
5151 rs6000_preferred_simd_mode (scalar_mode mode
)
5153 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5155 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5156 return vmode
.require ();
5161 class rs6000_cost_data
: public vector_costs
5164 using vector_costs::vector_costs
;
5166 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5167 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5169 vect_cost_model_location where
) override
;
5170 void finish_cost (const vector_costs
*) override
;
5173 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5174 vect_cost_model_location
, unsigned int);
5175 void density_test (loop_vec_info
);
5176 void adjust_vect_cost_per_loop (loop_vec_info
);
5177 unsigned int determine_suggested_unroll_factor (loop_vec_info
);
5179 /* Total number of vectorized stmts (loop only). */
5180 unsigned m_nstmts
= 0;
5181 /* Total number of loads (loop only). */
5182 unsigned m_nloads
= 0;
5183 /* Total number of stores (loop only). */
5184 unsigned m_nstores
= 0;
5185 /* Reduction factor for suggesting unroll factor (loop only). */
5186 unsigned m_reduc_factor
= 0;
5187 /* Possible extra penalized cost on vector construction (loop only). */
5188 unsigned m_extra_ctor_cost
= 0;
5189 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5190 instruction is needed by the vectorization. */
5191 bool m_vect_nonmem
= false;
5192 /* If this loop gets vectorized with emulated gather load. */
5193 bool m_gather_load
= false;
5196 /* Test for likely overcommitment of vector hardware resources. If a
5197 loop iteration is relatively large, and too large a percentage of
5198 instructions in the loop are vectorized, the cost model may not
5199 adequately reflect delays from unavailable vector resources.
5200 Penalize the loop body cost for this case. */
5203 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5205 /* This density test only cares about the cost of vector version of the
5206 loop, so immediately return if we are passed costing for the scalar
5207 version (namely computing single scalar iteration cost). */
5208 if (m_costing_for_scalar
)
5211 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5212 basic_block
*bbs
= get_loop_body (loop
);
5213 int nbbs
= loop
->num_nodes
;
5214 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5216 for (int i
= 0; i
< nbbs
; i
++)
5218 basic_block bb
= bbs
[i
];
5219 gimple_stmt_iterator gsi
;
5221 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5223 gimple
*stmt
= gsi_stmt (gsi
);
5224 if (is_gimple_debug (stmt
))
5227 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5229 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5230 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5236 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5238 if (density_pct
> rs6000_density_pct_threshold
5239 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5241 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5242 if (dump_enabled_p ())
5243 dump_printf_loc (MSG_NOTE
, vect_location
,
5244 "density %d%%, cost %d exceeds threshold, penalizing "
5245 "loop body cost by %u%%\n", density_pct
,
5246 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5249 /* Check whether we need to penalize the body cost to account
5250 for excess strided or elementwise loads. */
5251 if (m_extra_ctor_cost
> 0)
5253 gcc_assert (m_nloads
<= m_nstmts
);
5254 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5256 /* It's likely to be bounded by latency and execution resources
5257 from many scalar loads which are strided or elementwise loads
5258 into a vector if both conditions below are found:
5259 1. there are many loads, it's easy to result in a long wait
5261 2. load has a big proportion of all vectorized statements,
5262 it's not easy to schedule other statements to spread among
5264 One typical case is the innermost loop of the hotspot of SPEC2017
5265 503.bwaves_r without loop interchange. */
5266 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5267 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5269 m_costs
[vect_body
] += m_extra_ctor_cost
;
5270 if (dump_enabled_p ())
5271 dump_printf_loc (MSG_NOTE
, vect_location
,
5272 "Found %u loads and "
5273 "load pct. %u%% exceed "
5275 "penalizing loop body "
5276 "cost by extra cost %u "
5284 /* Implement targetm.vectorize.create_costs. */
5286 static vector_costs
*
5287 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5289 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5292 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5293 For some statement, we would like to further fine-grain tweak the cost on
5294 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5295 information on statement operation codes etc. One typical case here is
5296 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5297 for scalar cost, but it should be priced more whatever transformed to either
5298 compare + branch or compare + isel instructions. */
5301 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5302 struct _stmt_vec_info
*stmt_info
)
5304 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5305 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5307 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5308 if (subcode
== COND_EXPR
)
5315 /* Helper function for add_stmt_cost. Check each statement cost
5316 entry, gather information and update the target_cost fields
5319 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5320 stmt_vec_info stmt_info
,
5321 vect_cost_model_location where
,
5322 unsigned int orig_count
)
5325 /* Check whether we're doing something other than just a copy loop.
5326 Not all such loops may be profitably vectorized; see
5327 rs6000_finish_cost. */
5328 if (kind
== vec_to_scalar
5330 || kind
== vec_promote_demote
5331 || kind
== vec_construct
5332 || kind
== scalar_to_vec
5333 || (where
== vect_body
&& kind
== vector_stmt
))
5334 m_vect_nonmem
= true;
5336 /* Gather some information when we are costing the vectorized instruction
5337 for the statements located in a loop body. */
5338 if (!m_costing_for_scalar
5339 && is_a
<loop_vec_info
> (m_vinfo
)
5340 && where
== vect_body
)
5342 m_nstmts
+= orig_count
;
5344 if (kind
== scalar_load
5345 || kind
== vector_load
5346 || kind
== unaligned_load
5347 || kind
== vector_gather_load
)
5349 m_nloads
+= orig_count
;
5350 if (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5351 m_gather_load
= true;
5353 else if (kind
== scalar_store
5354 || kind
== vector_store
5355 || kind
== unaligned_store
5356 || kind
== vector_scatter_store
)
5357 m_nstores
+= orig_count
;
5358 else if ((kind
== scalar_stmt
5359 || kind
== vector_stmt
5360 || kind
== vec_to_scalar
)
5362 && vect_is_reduction (stmt_info
))
5364 /* Loop body contains normal int or fp operations and epilogue
5365 contains vector reduction. For simplicity, we assume int
5366 operation takes one cycle and fp operation takes one more. */
5367 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5368 bool is_float
= FLOAT_TYPE_P (TREE_TYPE (lhs
));
5369 unsigned int basic_cost
= is_float
? 2 : 1;
5370 m_reduc_factor
= MAX (basic_cost
* orig_count
, m_reduc_factor
);
5373 /* Power processors do not currently have instructions for strided
5374 and elementwise loads, and instead we must generate multiple
5375 scalar loads. This leads to undercounting of the cost. We
5376 account for this by scaling the construction cost by the number
5377 of elements involved, and saving this as extra cost that we may
5378 or may not need to apply. When finalizing the cost of the loop,
5379 the extra penalty is applied when the load density heuristics
5381 if (kind
== vec_construct
&& stmt_info
5382 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5383 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5384 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5386 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5387 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5388 /* As PR103702 shows, it's possible that vectorizer wants to do
5389 costings for only one unit here, it's no need to do any
5390 penalization for it, so simply early return here. */
5393 /* i386 port adopts nunits * stmt_cost as the penalized cost
5394 for this kind of penalization, we used to follow it but
5395 found it could result in an unreliable body cost especially
5396 for V16QI/V8HI modes. To make it better, we choose this
5397 new heuristic: for each scalar load, we use 2 as penalized
5398 cost for the case with 2 nunits and use 1 for the other
5399 cases. It's without much supporting theory, mainly
5400 concluded from the broad performance evaluations on Power8,
5401 Power9 and Power10. One possibly related point is that:
5402 vector construction for more units would use more insns,
5403 it has more chances to schedule them better (even run in
5404 parallelly when enough available units at that time), so
5405 it seems reasonable not to penalize that much for them. */
5406 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5407 unsigned int extra_cost
= nunits
* adjusted_cost
;
5408 m_extra_ctor_cost
+= extra_cost
;
5414 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5415 stmt_vec_info stmt_info
, slp_tree
,
5416 tree vectype
, int misalign
,
5417 vect_cost_model_location where
)
5419 unsigned retval
= 0;
5421 if (flag_vect_cost_model
)
5423 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5425 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5426 /* Statements in an inner loop relative to the loop being
5427 vectorized are weighted more heavily. The value here is
5428 arbitrary and could potentially be improved with analysis. */
5429 unsigned int orig_count
= count
;
5430 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5431 m_costs
[where
] += retval
;
5433 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5439 /* For some target specific vectorization cost which can't be handled per stmt,
5440 we check the requisite conditions and adjust the vectorization cost
5441 accordingly if satisfied. One typical example is to model shift cost for
5442 vector with length by counting number of required lengths under condition
5443 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5446 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5448 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5450 rgroup_controls
*rgc
;
5451 unsigned int num_vectors_m1
;
5452 unsigned int shift_cnt
= 0;
5453 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5455 /* Each length needs one shift to fill into bits 0-7. */
5456 shift_cnt
+= num_vectors_m1
+ 1;
5458 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5459 NULL_TREE
, 0, vect_body
);
5463 /* Determine suggested unroll factor by considering some below factors:
5465 - unroll option/pragma which can disable unrolling for this loop;
5466 - simple hardware resource model for non memory vector insns;
5467 - aggressive heuristics when iteration count is unknown:
5468 - reduction case to break cross iteration dependency;
5469 - emulated gather load;
5470 - estimated iteration count when iteration count is unknown;
5475 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo
)
5477 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5479 /* Don't unroll if it's specified explicitly not to be unrolled. */
5480 if (loop
->unroll
== 1
5481 || (OPTION_SET_P (flag_unroll_loops
) && !flag_unroll_loops
)
5482 || (OPTION_SET_P (flag_unroll_all_loops
) && !flag_unroll_all_loops
))
5485 unsigned int nstmts_nonldst
= m_nstmts
- m_nloads
- m_nstores
;
5486 /* Don't unroll if no vector instructions excepting for memory access. */
5487 if (nstmts_nonldst
== 0)
5490 /* Consider breaking cross iteration dependency for reduction. */
5491 unsigned int reduc_factor
= m_reduc_factor
> 1 ? m_reduc_factor
: 1;
5493 /* Use this simple hardware resource model that how many non ld/st
5494 vector instructions can be issued per cycle. */
5495 unsigned int issue_width
= rs6000_vect_unroll_issue
;
5496 unsigned int uf
= CEIL (reduc_factor
* issue_width
, nstmts_nonldst
);
5497 uf
= MIN ((unsigned int) rs6000_vect_unroll_limit
, uf
);
5498 /* Make sure it is power of 2. */
5499 uf
= 1 << ceil_log2 (uf
);
5501 /* If the iteration count is known, the costing would be exact enough,
5502 don't worry it could be worse. */
5503 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5506 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5507 loop if either condition is satisfied:
5508 - reduction factor exceeds the threshold;
5509 - emulated gather load adopted. */
5510 if (reduc_factor
> (unsigned int) rs6000_vect_unroll_reduc_threshold
5514 /* Check if we can conclude it's good to unroll from the estimated
5516 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
5517 unsigned int vf
= vect_vf_for_cost (loop_vinfo
);
5518 unsigned int unrolled_vf
= vf
* uf
;
5519 if (est_niter
== -1 || est_niter
< unrolled_vf
)
5520 /* When the estimated iteration of this loop is unknown, it's possible
5521 that we are able to vectorize this loop with the original VF but fail
5522 to vectorize it with the unrolled VF any more if the actual iteration
5523 count is in between. */
5527 unsigned int epil_niter_unr
= est_niter
% unrolled_vf
;
5528 unsigned int epil_niter
= est_niter
% vf
;
5529 /* Even if we have partial vector support, it can be still inefficent
5530 to calculate the length when the iteration count is unknown, so
5531 only expect it's good to unroll when the epilogue iteration count
5532 is not bigger than VF (only one time length calculation). */
5533 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5534 && epil_niter_unr
<= vf
)
5536 /* Without partial vector support, conservatively unroll this when
5537 the epilogue iteration count is less than the original one
5538 (epilogue execution time wouldn't be longer than before). */
5539 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5540 && epil_niter_unr
<= epil_niter
)
5548 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5550 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5552 adjust_vect_cost_per_loop (loop_vinfo
);
5553 density_test (loop_vinfo
);
5555 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5556 that require versioning for any reason. The vectorization is at
5557 best a wash inside the loop, and the versioning checks make
5558 profitability highly unlikely and potentially quite harmful. */
5560 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5561 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5562 m_costs
[vect_body
] += 10000;
5564 m_suggested_unroll_factor
5565 = determine_suggested_unroll_factor (loop_vinfo
);
5568 vector_costs::finish_cost (scalar_costs
);
5571 /* Implement targetm.loop_unroll_adjust. */
5574 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5576 if (unroll_only_small_loops
)
5578 /* TODO: These are hardcoded values right now. We probably should use
5580 if (loop
->ninsns
<= 6)
5581 return MIN (4, nunroll
);
5582 if (loop
->ninsns
<= 10)
5583 return MIN (2, nunroll
);
5591 /* Returns a function decl for a vectorized version of the builtin function
5592 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5593 if it is not available.
5595 Implement targetm.vectorize.builtin_vectorized_function. */
5598 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5601 machine_mode in_mode
, out_mode
;
5604 if (TARGET_DEBUG_BUILTIN
)
5605 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5606 combined_fn_name (combined_fn (fn
)),
5607 GET_MODE_NAME (TYPE_MODE (type_out
)),
5608 GET_MODE_NAME (TYPE_MODE (type_in
)));
5610 /* TODO: Should this be gcc_assert? */
5611 if (TREE_CODE (type_out
) != VECTOR_TYPE
5612 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5615 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5616 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5617 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5618 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5623 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5624 && out_mode
== DFmode
&& out_n
== 2
5625 && in_mode
== DFmode
&& in_n
== 2)
5626 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5627 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5628 && out_mode
== SFmode
&& out_n
== 4
5629 && in_mode
== SFmode
&& in_n
== 4)
5630 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5631 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5632 && out_mode
== SFmode
&& out_n
== 4
5633 && in_mode
== SFmode
&& in_n
== 4)
5634 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5637 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5638 && out_mode
== DFmode
&& out_n
== 2
5639 && in_mode
== DFmode
&& in_n
== 2)
5640 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5641 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5642 && out_mode
== SFmode
&& out_n
== 4
5643 && in_mode
== SFmode
&& in_n
== 4)
5644 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5645 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5646 && out_mode
== SFmode
&& out_n
== 4
5647 && in_mode
== SFmode
&& in_n
== 4)
5648 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5651 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5652 && out_mode
== DFmode
&& out_n
== 2
5653 && in_mode
== DFmode
&& in_n
== 2)
5654 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5655 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5656 && out_mode
== SFmode
&& out_n
== 4
5657 && in_mode
== SFmode
&& in_n
== 4)
5658 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5659 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5660 && out_mode
== SFmode
&& out_n
== 4
5661 && in_mode
== SFmode
&& in_n
== 4)
5662 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5665 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5666 && out_mode
== DFmode
&& out_n
== 2
5667 && in_mode
== DFmode
&& in_n
== 2)
5668 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5669 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5670 && out_mode
== SFmode
&& out_n
== 4
5671 && in_mode
== SFmode
&& in_n
== 4)
5672 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5673 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5674 && out_mode
== SFmode
&& out_n
== 4
5675 && in_mode
== SFmode
&& in_n
== 4)
5676 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5679 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5680 && out_mode
== DFmode
&& out_n
== 2
5681 && in_mode
== DFmode
&& in_n
== 2)
5682 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5683 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5684 && out_mode
== SFmode
&& out_n
== 4
5685 && in_mode
== SFmode
&& in_n
== 4)
5686 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5687 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5688 && out_mode
== SFmode
&& out_n
== 4
5689 && in_mode
== SFmode
&& in_n
== 4)
5690 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5693 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5694 && flag_unsafe_math_optimizations
5695 && out_mode
== DFmode
&& out_n
== 2
5696 && in_mode
== DFmode
&& in_n
== 2)
5697 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5698 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5699 && flag_unsafe_math_optimizations
5700 && out_mode
== SFmode
&& out_n
== 4
5701 && in_mode
== SFmode
&& in_n
== 4)
5702 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5705 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5706 && !flag_trapping_math
5707 && out_mode
== DFmode
&& out_n
== 2
5708 && in_mode
== DFmode
&& in_n
== 2)
5709 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5710 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5711 && !flag_trapping_math
5712 && out_mode
== SFmode
&& out_n
== 4
5713 && in_mode
== SFmode
&& in_n
== 4)
5714 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5720 /* Generate calls to libmass if appropriate. */
5721 if (rs6000_veclib_handler
)
5722 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5727 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5728 library with vectorized intrinsics. */
5731 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5735 const char *suffix
= NULL
;
5736 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5739 machine_mode el_mode
, in_mode
;
5742 /* Libmass is suitable for unsafe math only as it does not correctly support
5743 parts of IEEE with the required precision such as denormals. Only support
5744 it if we have VSX to use the simd d2 or f4 functions.
5745 XXX: Add variable length support. */
5746 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5749 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5750 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5751 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5752 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5753 if (el_mode
!= in_mode
5789 if (el_mode
== DFmode
&& n
== 2)
5791 bdecl
= mathfn_built_in (double_type_node
, fn
);
5792 suffix
= "d2"; /* pow -> powd2 */
5794 else if (el_mode
== SFmode
&& n
== 4)
5796 bdecl
= mathfn_built_in (float_type_node
, fn
);
5797 suffix
= "4"; /* powf -> powf4 */
5809 gcc_assert (suffix
!= NULL
);
5810 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5814 strcpy (name
, bname
+ strlen ("__builtin_"));
5815 strcat (name
, suffix
);
5818 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5819 else if (n_args
== 2)
5820 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5824 /* Build a function declaration for the vectorized function. */
5825 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5826 FUNCTION_DECL
, get_identifier (name
), fntype
);
5827 TREE_PUBLIC (new_fndecl
) = 1;
5828 DECL_EXTERNAL (new_fndecl
) = 1;
5829 DECL_IS_NOVOPS (new_fndecl
) = 1;
5830 TREE_READONLY (new_fndecl
) = 1;
5836 /* Default CPU string for rs6000*_file_start functions. */
5837 static const char *rs6000_default_cpu
;
5839 #ifdef USING_ELFOS_H
5840 const char *rs6000_machine
;
5843 rs6000_machine_from_flags (void)
5846 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5848 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5850 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5852 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5854 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5856 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5860 if (rs6000_cpu
== PROCESSOR_PPC403
)
5862 if (rs6000_cpu
== PROCESSOR_PPC405
)
5864 if (rs6000_cpu
== PROCESSOR_PPC440
)
5866 if (rs6000_cpu
== PROCESSOR_PPC476
)
5870 if (rs6000_cpu
== PROCESSOR_PPCA2
)
5874 if (rs6000_cpu
== PROCESSOR_CELL
)
5878 if (rs6000_cpu
== PROCESSOR_TITAN
)
5881 /* 500 series and 800 series */
5882 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
5886 /* This (and ppc64 below) are disabled here (for now at least) because
5887 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5888 are #define'd as some of these. Untangling that is a job for later. */
5890 /* 600 series and 700 series, "classic" */
5891 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
5892 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
5893 || rs6000_cpu
== PROCESSOR_PPC750
)
5897 /* Classic with AltiVec, "G4" */
5898 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
5902 /* The older 64-bit CPUs */
5903 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
5904 || rs6000_cpu
== PROCESSOR_RS64A
)
5908 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5910 /* Disable the flags that should never influence the .machine selection. */
5911 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
5913 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5915 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5917 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5919 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5921 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5923 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5925 if ((flags
& ISA_2_1_MASKS
) != 0)
5927 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5933 emit_asm_machine (void)
5935 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5939 /* Do anything needed at the start of the asm file. */
5942 rs6000_file_start (void)
5945 const char *start
= buffer
;
5946 FILE *file
= asm_out_file
;
5948 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5950 default_file_start ();
5952 if (flag_verbose_asm
)
5954 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5956 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5958 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5962 if (OPTION_SET_P (rs6000_cpu_index
))
5964 fprintf (file
, "%s -mcpu=%s", start
,
5965 processor_target_table
[rs6000_cpu_index
].name
);
5969 if (OPTION_SET_P (rs6000_tune_index
))
5971 fprintf (file
, "%s -mtune=%s", start
,
5972 processor_target_table
[rs6000_tune_index
].name
);
5976 if (PPC405_ERRATUM77
)
5978 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5982 #ifdef USING_ELFOS_H
5983 switch (rs6000_sdata
)
5985 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5986 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5987 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5988 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5991 if (rs6000_sdata
&& g_switch_value
)
5993 fprintf (file
, "%s -G %d", start
,
6003 #ifdef USING_ELFOS_H
6004 rs6000_machine
= rs6000_machine_from_flags ();
6005 emit_asm_machine ();
6008 if (DEFAULT_ABI
== ABI_ELFv2
)
6009 fprintf (file
, "\t.abiversion 2\n");
6013 /* Return nonzero if this function is known to have a null epilogue. */
6016 direct_return (void)
6018 if (reload_completed
)
6020 rs6000_stack_t
*info
= rs6000_stack_info ();
6022 if (info
->first_gp_reg_save
== 32
6023 && info
->first_fp_reg_save
== 64
6024 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6025 && ! info
->lr_save_p
6026 && ! info
->cr_save_p
6027 && info
->vrsave_size
== 0
6035 /* Helper for num_insns_constant. Calculate number of instructions to
6036 load VALUE to a single gpr using combinations of addi, addis, ori,
6037 oris, sldi and rldimi instructions. */
6040 num_insns_constant_gpr (HOST_WIDE_INT value
)
6042 /* signed constant loadable with addi */
6043 if (SIGNED_INTEGER_16BIT_P (value
))
6046 /* constant loadable with addis */
6047 else if ((value
& 0xffff) == 0
6048 && (value
>> 31 == -1 || value
>> 31 == 0))
6051 /* PADDI can support up to 34 bit signed integers. */
6052 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
6055 else if (TARGET_POWERPC64
)
6057 HOST_WIDE_INT low
= sext_hwi (value
, 32);
6058 HOST_WIDE_INT high
= value
>> 31;
6060 if (high
== 0 || high
== -1)
6065 if (low
== 0 || low
== high
)
6066 return num_insns_constant_gpr (high
) + 1;
6068 return num_insns_constant_gpr (low
) + 1;
6070 return (num_insns_constant_gpr (high
)
6071 + num_insns_constant_gpr (low
) + 1);
6078 /* Helper for num_insns_constant. Allow constants formed by the
6079 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6080 and handle modes that require multiple gprs. */
6083 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
6085 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6089 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6090 int insns
= num_insns_constant_gpr (low
);
6092 /* We won't get more than 2 from num_insns_constant_gpr
6093 except when TARGET_POWERPC64 and mode is DImode or
6094 wider, so the register mode must be DImode. */
6095 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6098 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6099 it all at once would be UB. */
6100 value
>>= (BITS_PER_WORD
- 1);
6106 /* Return the number of instructions it takes to form a constant in as
6107 many gprs are needed for MODE. */
6110 num_insns_constant (rtx op
, machine_mode mode
)
6114 switch (GET_CODE (op
))
6120 case CONST_WIDE_INT
:
6123 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6124 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6131 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6133 if (mode
== SFmode
|| mode
== SDmode
)
6138 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6140 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6141 /* See the first define_split in rs6000.md handling a
6142 const_double_operand. */
6146 else if (mode
== DFmode
|| mode
== DDmode
)
6151 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6153 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6155 /* See the second (32-bit) and third (64-bit) define_split
6156 in rs6000.md handling a const_double_operand. */
6157 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6158 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6161 else if (mode
== TFmode
|| mode
== TDmode
6162 || mode
== KFmode
|| mode
== IFmode
)
6168 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6170 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6172 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6173 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6174 insns
= num_insns_constant_multi (val
, DImode
);
6175 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6176 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6177 insns
+= num_insns_constant_multi (val
, DImode
);
6189 return num_insns_constant_multi (val
, mode
);
6192 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6193 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6194 corresponding element of the vector, but for V4SFmode, the
6195 corresponding "float" is interpreted as an SImode integer. */
6198 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6202 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6203 gcc_assert (GET_MODE (op
) != V2DImode
6204 && GET_MODE (op
) != V2DFmode
);
6206 tmp
= CONST_VECTOR_ELT (op
, elt
);
6207 if (GET_MODE (op
) == V4SFmode
)
6208 tmp
= gen_lowpart (SImode
, tmp
);
6209 return INTVAL (tmp
);
6212 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6213 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6214 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6215 all items are set to the same value and contain COPIES replicas of the
6216 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6217 operand and the others are set to the value of the operand's msb. */
6220 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6222 machine_mode mode
= GET_MODE (op
);
6223 machine_mode inner
= GET_MODE_INNER (mode
);
6231 HOST_WIDE_INT splat_val
;
6232 HOST_WIDE_INT msb_val
;
6234 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6237 nunits
= GET_MODE_NUNITS (mode
);
6238 bitsize
= GET_MODE_BITSIZE (inner
);
6239 mask
= GET_MODE_MASK (inner
);
6241 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6243 msb_val
= val
>= 0 ? 0 : -1;
6245 if (val
== 0 && step
> 1)
6247 /* Special case for loading most significant bit with step > 1.
6248 In that case, match 0s in all but step-1s elements, where match
6250 for (i
= 1; i
< nunits
; ++i
)
6252 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6253 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6254 if ((i
& (step
- 1)) == step
- 1)
6256 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6266 /* Construct the value to be splatted, if possible. If not, return 0. */
6267 for (i
= 2; i
<= copies
; i
*= 2)
6269 HOST_WIDE_INT small_val
;
6271 small_val
= splat_val
>> bitsize
;
6273 if (splat_val
!= ((HOST_WIDE_INT
)
6274 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6275 | (small_val
& mask
)))
6277 splat_val
= small_val
;
6278 inner
= smallest_int_mode_for_size (bitsize
);
6281 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6282 if (EASY_VECTOR_15 (splat_val
))
6285 /* Also check if we can splat, and then add the result to itself. Do so if
6286 the value is positive, of if the splat instruction is using OP's mode;
6287 for splat_val < 0, the splat and the add should use the same mode. */
6288 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6289 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6292 /* Also check if are loading up the most significant bit which can be done by
6293 loading up -1 and shifting the value left by -1. Only do this for
6294 step 1 here, for larger steps it is done earlier. */
6295 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6301 /* Check if VAL is present in every STEP-th element, and the
6302 other elements are filled with its most significant bit. */
6303 for (i
= 1; i
< nunits
; ++i
)
6305 HOST_WIDE_INT desired_val
;
6306 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6307 if ((i
& (step
- 1)) == 0)
6310 desired_val
= msb_val
;
6312 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6319 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6320 instruction, filling in the bottom elements with 0 or -1.
6322 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6323 for the number of zeroes to shift in, or negative for the number of 0xff
6326 OP is a CONST_VECTOR. */
6329 vspltis_shifted (rtx op
)
6331 machine_mode mode
= GET_MODE (op
);
6332 machine_mode inner
= GET_MODE_INNER (mode
);
6340 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6343 /* We need to create pseudo registers to do the shift, so don't recognize
6344 shift vector constants after reload. Don't match it even before RA
6345 after split1 is done, because there won't be further splitting pass
6346 before RA to do the splitting. */
6347 if (!can_create_pseudo_p ()
6348 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6351 nunits
= GET_MODE_NUNITS (mode
);
6352 mask
= GET_MODE_MASK (inner
);
6354 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6356 /* Check if the value can really be the operand of a vspltis[bhw]. */
6357 if (EASY_VECTOR_15 (val
))
6360 /* Also check if we are loading up the most significant bit which can be done
6361 by loading up -1 and shifting the value left by -1. */
6362 else if (EASY_VECTOR_MSB (val
, inner
))
6368 /* Check if VAL is present in every STEP-th element until we find elements
6369 that are 0 or all 1 bits. */
6370 for (i
= 1; i
< nunits
; ++i
)
6372 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6373 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6375 /* If the value isn't the splat value, check for the remaining elements
6381 for (j
= i
+1; j
< nunits
; ++j
)
6383 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6384 if (const_vector_elt_as_int (op
, elt2
) != 0)
6388 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6391 else if ((elt_val
& mask
) == mask
)
6393 for (j
= i
+1; j
< nunits
; ++j
)
6395 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6396 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6400 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6408 /* If all elements are equal, we don't need to do VSLDOI. */
6413 /* Return non-zero (element mode byte size) if OP is of the given MODE
6414 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6417 easy_altivec_constant (rtx op
, machine_mode mode
)
6419 unsigned step
, copies
;
6421 if (mode
== VOIDmode
)
6422 mode
= GET_MODE (op
);
6423 else if (mode
!= GET_MODE (op
))
6426 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6428 if (mode
== V2DFmode
)
6429 return zero_constant (op
, mode
) ? 8 : 0;
6431 else if (mode
== V2DImode
)
6433 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6434 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6437 if (zero_constant (op
, mode
))
6440 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6441 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6447 /* V1TImode is a special container for TImode. Ignore for now. */
6448 else if (mode
== V1TImode
)
6451 /* Start with a vspltisw. */
6452 step
= GET_MODE_NUNITS (mode
) / 4;
6455 if (vspltis_constant (op
, step
, copies
))
6458 /* Then try with a vspltish. */
6464 if (vspltis_constant (op
, step
, copies
))
6467 /* And finally a vspltisb. */
6473 if (vspltis_constant (op
, step
, copies
))
6476 if (vspltis_shifted (op
) != 0)
6477 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6482 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6483 result is OP. Abort if it is not possible. */
6486 gen_easy_altivec_constant (rtx op
)
6488 machine_mode mode
= GET_MODE (op
);
6489 int nunits
= GET_MODE_NUNITS (mode
);
6490 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6491 unsigned step
= nunits
/ 4;
6492 unsigned copies
= 1;
6494 /* Start with a vspltisw. */
6495 if (vspltis_constant (op
, step
, copies
))
6496 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6498 /* Then try with a vspltish. */
6504 if (vspltis_constant (op
, step
, copies
))
6505 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6507 /* And finally a vspltisb. */
6513 if (vspltis_constant (op
, step
, copies
))
6514 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6519 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6520 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6522 Return the number of instructions needed (1 or 2) into the address pointed
6525 Return the constant that is being split via CONSTANT_PTR. */
6528 xxspltib_constant_p (rtx op
,
6533 size_t nunits
= GET_MODE_NUNITS (mode
);
6535 HOST_WIDE_INT value
;
6538 /* Set the returned values to out of bound values. */
6539 *num_insns_ptr
= -1;
6540 *constant_ptr
= 256;
6542 if (!TARGET_P9_VECTOR
)
6545 if (mode
== VOIDmode
)
6546 mode
= GET_MODE (op
);
6548 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6551 /* Handle (vec_duplicate <constant>). */
6552 if (GET_CODE (op
) == VEC_DUPLICATE
)
6554 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6555 && mode
!= V2DImode
)
6558 element
= XEXP (op
, 0);
6559 if (!CONST_INT_P (element
))
6562 value
= INTVAL (element
);
6563 if (!IN_RANGE (value
, -128, 127))
6567 /* Handle (const_vector [...]). */
6568 else if (GET_CODE (op
) == CONST_VECTOR
)
6570 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6571 && mode
!= V2DImode
)
6574 element
= CONST_VECTOR_ELT (op
, 0);
6575 if (!CONST_INT_P (element
))
6578 value
= INTVAL (element
);
6579 if (!IN_RANGE (value
, -128, 127))
6582 for (i
= 1; i
< nunits
; i
++)
6584 element
= CONST_VECTOR_ELT (op
, i
);
6585 if (!CONST_INT_P (element
))
6588 if (value
!= INTVAL (element
))
6593 /* Handle integer constants being loaded into the upper part of the VSX
6594 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6595 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6596 else if (CONST_INT_P (op
))
6598 if (!SCALAR_INT_MODE_P (mode
))
6601 value
= INTVAL (op
);
6602 if (!IN_RANGE (value
, -128, 127))
6605 if (!IN_RANGE (value
, -1, 0))
6607 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6610 if (EASY_VECTOR_15 (value
))
6618 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6619 sign extend. Special case 0/-1 to allow getting any VSX register instead
6620 of an Altivec register. */
6621 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6622 && EASY_VECTOR_15 (value
))
6625 /* Return # of instructions and the constant byte for XXSPLTIB. */
6626 if (mode
== V16QImode
)
6629 else if (IN_RANGE (value
, -1, 0))
6632 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6633 single XXSPLTIW or XXSPLTIDP instruction. */
6634 else if (vsx_prefixed_constant (op
, mode
))
6637 /* Return XXSPLITB followed by a sign extend operation to convert the
6638 constant to V8HImode or V4SImode. */
6642 *constant_ptr
= (int) value
;
6646 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6647 instructions vupkhsw and vspltisw.
6649 Return the constant that is being split via CONSTANT_PTR. */
6652 vspltisw_vupkhsw_constant_p (rtx op
, machine_mode mode
, int *constant_ptr
)
6654 HOST_WIDE_INT value
;
6657 if (!TARGET_P8_VECTOR
)
6660 if (mode
!= V2DImode
)
6663 if (!const_vec_duplicate_p (op
, &elt
))
6666 value
= INTVAL (elt
);
6667 if (value
== 0 || value
== 1
6668 || !EASY_VECTOR_15 (value
))
6672 *constant_ptr
= (int) value
;
6677 output_vec_const_move (rtx
*operands
)
6685 mode
= GET_MODE (dest
);
6689 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6690 int xxspltib_value
= 256;
6693 if (zero_constant (vec
, mode
))
6695 if (TARGET_P9_VECTOR
)
6696 return "xxspltib %x0,0";
6698 else if (dest_vmx_p
)
6699 return "vspltisw %0,0";
6702 return "xxlxor %x0,%x0,%x0";
6705 if (all_ones_constant (vec
, mode
))
6707 if (TARGET_P9_VECTOR
)
6708 return "xxspltib %x0,255";
6710 else if (dest_vmx_p
)
6711 return "vspltisw %0,-1";
6713 else if (TARGET_P8_VECTOR
)
6714 return "xxlorc %x0,%x0,%x0";
6720 vec_const_128bit_type vsx_const
;
6721 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6723 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6726 operands
[2] = GEN_INT (imm
);
6727 return "lxvkq %x0,%2";
6730 imm
= constant_generates_xxspltiw (&vsx_const
);
6733 operands
[2] = GEN_INT (imm
);
6734 return "xxspltiw %x0,%2";
6737 imm
= constant_generates_xxspltidp (&vsx_const
);
6740 operands
[2] = GEN_INT (imm
);
6741 return "xxspltidp %x0,%2";
6745 if (TARGET_P9_VECTOR
6746 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6750 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6751 return "xxspltib %x0,%2";
6762 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6763 if (zero_constant (vec
, mode
))
6764 return "vspltisw %0,0";
6766 if (all_ones_constant (vec
, mode
))
6767 return "vspltisw %0,-1";
6769 /* Do we need to construct a value using VSLDOI? */
6770 shift
= vspltis_shifted (vec
);
6774 splat_vec
= gen_easy_altivec_constant (vec
);
6775 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6776 operands
[1] = XEXP (splat_vec
, 0);
6777 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6780 switch (GET_MODE (splat_vec
))
6783 return "vspltisw %0,%1";
6786 return "vspltish %0,%1";
6789 return "vspltisb %0,%1";
6799 /* Initialize vector TARGET to VALS. */
6802 rs6000_expand_vector_init (rtx target
, rtx vals
)
6804 machine_mode mode
= GET_MODE (target
);
6805 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6806 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6807 int n_var
= 0, one_var
= -1;
6808 bool all_same
= true, all_const_zero
= true;
6812 for (i
= 0; i
< n_elts
; ++i
)
6814 x
= XVECEXP (vals
, 0, i
);
6815 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6816 ++n_var
, one_var
= i
;
6817 else if (x
!= CONST0_RTX (inner_mode
))
6818 all_const_zero
= false;
6820 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6826 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6827 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6828 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6830 /* Zero register. */
6831 emit_move_insn (target
, CONST0_RTX (mode
));
6834 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6836 /* Splat immediate. */
6837 emit_insn (gen_rtx_SET (target
, const_vec
));
6842 /* Load from constant pool. */
6843 emit_move_insn (target
, const_vec
);
6848 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6849 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6853 size_t num_elements
= all_same
? 1 : 2;
6854 for (i
= 0; i
< num_elements
; i
++)
6856 op
[i
] = XVECEXP (vals
, 0, i
);
6857 /* Just in case there is a SUBREG with a smaller mode, do a
6859 if (GET_MODE (op
[i
]) != inner_mode
)
6861 rtx tmp
= gen_reg_rtx (inner_mode
);
6862 convert_move (tmp
, op
[i
], 0);
6865 /* Allow load with splat double word. */
6866 else if (MEM_P (op
[i
]))
6869 op
[i
] = force_reg (inner_mode
, op
[i
]);
6871 else if (!REG_P (op
[i
]))
6872 op
[i
] = force_reg (inner_mode
, op
[i
]);
6877 if (mode
== V2DFmode
)
6878 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6880 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6884 if (mode
== V2DFmode
)
6885 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6887 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6892 /* Special case initializing vector int if we are on 64-bit systems with
6893 direct move or we have the ISA 3.0 instructions. */
6894 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6895 && TARGET_DIRECT_MOVE_64BIT
)
6899 rtx element0
= XVECEXP (vals
, 0, 0);
6900 if (MEM_P (element0
))
6901 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6903 element0
= force_reg (SImode
, element0
);
6905 if (TARGET_P9_VECTOR
)
6906 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6909 rtx tmp
= gen_reg_rtx (DImode
);
6910 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6911 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6920 for (i
= 0; i
< 4; i
++)
6921 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6923 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6924 elements
[2], elements
[3]));
6929 /* With single precision floating point on VSX, know that internally single
6930 precision is actually represented as a double, and either make 2 V2DF
6931 vectors, and convert these vectors to single precision, or do one
6932 conversion, and splat the result to the other elements. */
6933 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6937 rtx element0
= XVECEXP (vals
, 0, 0);
6939 if (TARGET_P9_VECTOR
)
6941 if (MEM_P (element0
))
6942 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6944 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6949 rtx freg
= gen_reg_rtx (V4SFmode
);
6950 rtx sreg
= force_reg (SFmode
, element0
);
6951 rtx cvt
= (TARGET_XSCVDPSPN
6952 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6953 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6956 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6962 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6968 for (i
= 0; i
< 4; i
++)
6970 tmp_si
[i
] = gen_reg_rtx (SImode
);
6971 tmp_di
[i
] = gen_reg_rtx (DImode
);
6972 mrg_di
[i
] = gen_reg_rtx (DImode
);
6973 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6974 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6975 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6978 if (!BYTES_BIG_ENDIAN
)
6980 std::swap (tmp_di
[0], tmp_di
[1]);
6981 std::swap (tmp_di
[2], tmp_di
[3]);
6984 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6985 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6986 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6987 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6989 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6990 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6991 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6995 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6996 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6997 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6998 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6999 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
7000 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
7001 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7002 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7004 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7005 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7006 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7007 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7008 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7014 /* Special case initializing vector short/char that are splats if we are on
7015 64-bit systems with direct move. */
7016 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7017 && (mode
== V16QImode
|| mode
== V8HImode
))
7019 rtx op0
= XVECEXP (vals
, 0, 0);
7020 rtx di_tmp
= gen_reg_rtx (DImode
);
7023 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7025 if (mode
== V16QImode
)
7027 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7028 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7032 if (mode
== V8HImode
)
7034 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7035 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7040 /* Store value to stack temp. Load vector element. Splat. However, splat
7041 of 64-bit items is not supported on Altivec. */
7042 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7044 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7045 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7046 XVECEXP (vals
, 0, 0));
7047 x
= gen_rtx_UNSPEC (VOIDmode
,
7048 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7049 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7051 gen_rtx_SET (target
, mem
),
7053 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7054 gen_rtx_PARALLEL (VOIDmode
,
7055 gen_rtvec (1, const0_rtx
)));
7056 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7060 /* One field is non-constant. Load constant then overwrite
7064 rtx copy
= copy_rtx (vals
);
7066 /* Load constant part of vector, substitute neighboring value for
7068 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7069 rs6000_expand_vector_init (target
, copy
);
7071 /* Insert variable. */
7072 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
7077 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
7080 /* Force the values into word_mode registers. */
7081 for (i
= 0; i
< n_elts
; i
++)
7083 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
7084 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
7085 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
7088 /* Take unsigned char big endianness on 64bit as example for below
7089 construction, the input values are: A, B, C, D, ..., O, P. */
7091 if (TARGET_DIRECT_MOVE_128
)
7093 /* Move to VSX register with vec_concat, each has 2 values.
7094 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7095 vr1[1] = { xxxxxxxC, xxxxxxxD };
7097 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7099 for (i
= 0; i
< n_elts
/ 2; i
++)
7101 vr1
[i
] = gen_reg_rtx (V2DImode
);
7102 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
7106 /* Pack vectors with 2 values into vectors with 4 values.
7107 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7108 vr2[1] = { xxxExxxF, xxxGxxxH };
7109 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7110 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7112 for (i
= 0; i
< n_elts
/ 4; i
++)
7114 vr2
[i
] = gen_reg_rtx (V4SImode
);
7115 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
7119 /* Pack vectors with 4 values into vectors with 8 values.
7120 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7121 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7123 for (i
= 0; i
< n_elts
/ 8; i
++)
7125 vr3
[i
] = gen_reg_rtx (V8HImode
);
7126 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7130 /* If it's V8HImode, it's done and return it. */
7131 if (mode
== V8HImode
)
7133 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7137 /* Pack vectors with 8 values into 16 values. */
7138 rtx res
= gen_reg_rtx (V16QImode
);
7139 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7140 emit_insn (gen_rtx_SET (target
, res
));
7144 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7145 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7146 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7149 /* Set up some common gen routines and values. */
7150 if (BYTES_BIG_ENDIAN
)
7152 if (mode
== V16QImode
)
7154 merge_v16qi
= gen_altivec_vmrghb
;
7155 merge_v8hi
= gen_altivec_vmrglh
;
7158 merge_v8hi
= gen_altivec_vmrghh
;
7160 merge_v4si
= gen_altivec_vmrglw
;
7161 perm_idx
= GEN_INT (3);
7165 if (mode
== V16QImode
)
7167 merge_v16qi
= gen_altivec_vmrglb
;
7168 merge_v8hi
= gen_altivec_vmrghh
;
7171 merge_v8hi
= gen_altivec_vmrglh
;
7173 merge_v4si
= gen_altivec_vmrghw
;
7174 perm_idx
= GEN_INT (0);
7177 /* Move to VSX register with direct move.
7178 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7179 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7181 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7183 for (i
= 0; i
< n_elts
; i
++)
7185 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7186 if (TARGET_POWERPC64
)
7187 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7189 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7192 /* Merge/move to vector short.
7193 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7194 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7196 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7198 for (i
= 0; i
< 8; i
++)
7201 if (mode
== V16QImode
)
7203 tmp
= gen_reg_rtx (V16QImode
);
7204 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7206 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7207 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7210 /* Merge vector short to vector int.
7211 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7212 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7214 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7216 for (i
= 0; i
< 4; i
++)
7218 rtx tmp
= gen_reg_rtx (V8HImode
);
7219 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7220 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7221 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7224 /* Merge vector int to vector long.
7225 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7226 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7228 for (i
= 0; i
< 2; i
++)
7230 rtx tmp
= gen_reg_rtx (V4SImode
);
7231 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7232 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7233 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7236 rtx res
= gen_reg_rtx (V2DImode
);
7237 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7238 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7244 /* Construct the vector in memory one field at a time
7245 and load the whole vector. */
7246 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7247 for (i
= 0; i
< n_elts
; i
++)
7248 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7249 i
* GET_MODE_SIZE (inner_mode
)),
7250 XVECEXP (vals
, 0, i
));
7251 emit_move_insn (target
, mem
);
7254 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7255 is variable and also counts by vector element size for p9 and above. */
7258 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7260 machine_mode mode
= GET_MODE (target
);
7262 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7264 machine_mode inner_mode
= GET_MODE (val
);
7266 int width
= GET_MODE_SIZE (inner_mode
);
7268 gcc_assert (width
>= 1 && width
<= 8);
7270 int shift
= exact_log2 (width
);
7272 machine_mode idx_mode
= GET_MODE (idx
);
7274 machine_mode shift_mode
;
7275 /* Gen function pointers for shifting left and generation of permutation
7277 rtx (*gen_ashl
) (rtx
, rtx
, rtx
);
7278 rtx (*gen_pcvr1
) (rtx
, rtx
);
7279 rtx (*gen_pcvr2
) (rtx
, rtx
);
7281 if (TARGET_POWERPC64
)
7283 shift_mode
= DImode
;
7284 gen_ashl
= gen_ashldi3
;
7285 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_di
7286 : gen_altivec_lvsr_reg_di
;
7287 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_di
7288 : gen_altivec_lvsl_reg_di
;
7292 shift_mode
= SImode
;
7293 gen_ashl
= gen_ashlsi3
;
7294 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_si
7295 : gen_altivec_lvsr_reg_si
;
7296 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_si
7297 : gen_altivec_lvsl_reg_si
;
7299 /* Generate the IDX for permute shift, width is the vector element size.
7300 idx = idx * width. */
7301 rtx tmp
= gen_reg_rtx (shift_mode
);
7302 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7304 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7306 /* Generate one permutation control vector used for rotating the element
7307 at to-insert position to element zero in target vector. lvsl is
7308 used for big endianness while lvsr is used for little endianness:
7309 lvs[lr] v1,0,idx. */
7310 rtx pcvr1
= gen_reg_rtx (V16QImode
);
7311 emit_insn (gen_pcvr1 (pcvr1
, tmp
));
7313 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7314 rtx perm1
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7318 /* Insert val into element 0 of target vector. */
7319 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7321 /* Rotate back with a reversed permutation control vector generated from:
7322 lvs[rl] v2,0,idx. */
7323 rtx pcvr2
= gen_reg_rtx (V16QImode
);
7324 emit_insn (gen_pcvr2 (pcvr2
, tmp
));
7326 rtx perm2
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7331 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7332 is variable and also counts by vector element size for p7 & p8. */
7335 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7337 machine_mode mode
= GET_MODE (target
);
7339 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7341 machine_mode inner_mode
= GET_MODE (val
);
7342 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7344 int width
= GET_MODE_SIZE (inner_mode
);
7345 gcc_assert (width
>= 1 && width
<= 4);
7347 int shift
= exact_log2 (width
);
7349 machine_mode idx_mode
= GET_MODE (idx
);
7351 machine_mode shift_mode
;
7352 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7353 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7354 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7355 rtx (*gen_lvsl
)(rtx
, rtx
);
7357 if (TARGET_POWERPC64
)
7359 shift_mode
= DImode
;
7360 gen_ashl
= gen_ashldi3
;
7361 gen_add
= gen_adddi3
;
7362 gen_sub
= gen_subdi3
;
7363 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7367 shift_mode
= SImode
;
7368 gen_ashl
= gen_ashlsi3
;
7369 gen_add
= gen_addsi3
;
7370 gen_sub
= gen_subsi3
;
7371 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7374 /* idx = idx * width. */
7375 rtx tmp
= gen_reg_rtx (shift_mode
);
7376 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7378 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7380 /* For LE: idx = idx + 8. */
7381 if (!BYTES_BIG_ENDIAN
)
7382 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7384 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7387 DImode: 0xffffffffffffffff0000000000000000
7388 SImode: 0x00000000ffffffff0000000000000000
7389 HImode: 0x000000000000ffff0000000000000000.
7390 QImode: 0x00000000000000ff0000000000000000. */
7391 rtx mask
= gen_reg_rtx (V16QImode
);
7392 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7393 rtvec v
= rtvec_alloc (2);
7394 if (!BYTES_BIG_ENDIAN
)
7396 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7397 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7401 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7402 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7404 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7405 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7406 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7408 /* mtvsrd[wz] f0,tmp_val. */
7409 rtx tmp_val
= gen_reg_rtx (SImode
);
7410 if (inner_mode
== E_SFmode
)
7411 if (TARGET_DIRECT_MOVE_64BIT
)
7412 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7415 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7416 emit_insn (gen_movsf_hardfloat (stack
, val
));
7417 rtx stack2
= copy_rtx (stack
);
7418 PUT_MODE (stack2
, SImode
);
7419 emit_move_insn (tmp_val
, stack2
);
7422 tmp_val
= force_reg (SImode
, val
);
7424 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7425 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7426 rtvec vec_val
= rtvec_alloc (2);
7427 if (!BYTES_BIG_ENDIAN
)
7429 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7430 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7434 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7435 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7438 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7439 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7440 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7442 /* lvsl 13,0,idx. */
7443 rtx pcv
= gen_reg_rtx (V16QImode
);
7444 emit_insn (gen_lvsl (pcv
, tmp
));
7446 /* vperm 1,1,1,13. */
7447 /* vperm 0,0,0,13. */
7448 rtx val_perm
= gen_reg_rtx (V16QImode
);
7449 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7450 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7451 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7453 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7455 /* xxsel 34,34,32,33. */
7457 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7460 /* Set field ELT_RTX of TARGET to VAL. */
7463 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7465 machine_mode mode
= GET_MODE (target
);
7466 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7467 rtx reg
= gen_reg_rtx (mode
);
7469 int width
= GET_MODE_SIZE (inner_mode
);
7472 val
= force_reg (GET_MODE (val
), val
);
7474 if (VECTOR_MEM_VSX_P (mode
))
7476 if (!CONST_INT_P (elt_rtx
))
7478 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7479 when elt_rtx is variable. */
7480 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7482 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7485 else if (TARGET_VSX
)
7487 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7491 gcc_assert (CONST_INT_P (elt_rtx
));
7494 rtx insn
= NULL_RTX
;
7496 if (mode
== V2DFmode
)
7497 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7499 else if (mode
== V2DImode
)
7500 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7502 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7504 if (mode
== V4SImode
)
7505 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7506 else if (mode
== V8HImode
)
7507 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7508 else if (mode
== V16QImode
)
7509 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7510 else if (mode
== V4SFmode
)
7511 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7521 /* Simplify setting single element vectors like V1TImode. */
7522 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7523 && INTVAL (elt_rtx
) == 0)
7525 emit_move_insn (target
, gen_lowpart (mode
, val
));
7529 /* Load single variable value. */
7530 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7531 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7532 x
= gen_rtx_UNSPEC (VOIDmode
,
7533 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7534 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7536 gen_rtx_SET (reg
, mem
),
7539 /* Linear sequence. */
7540 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7541 for (i
= 0; i
< 16; ++i
)
7542 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7544 /* Set permute mask to insert element into target. */
7545 for (i
= 0; i
< width
; ++i
)
7546 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7547 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7549 if (BYTES_BIG_ENDIAN
)
7550 x
= gen_rtx_UNSPEC (mode
,
7551 gen_rtvec (3, target
, reg
,
7552 force_reg (V16QImode
, x
)),
7556 if (TARGET_P9_VECTOR
)
7557 x
= gen_rtx_UNSPEC (mode
,
7558 gen_rtvec (3, reg
, target
,
7559 force_reg (V16QImode
, x
)),
7563 /* Invert selector. We prefer to generate VNAND on P8 so
7564 that future fusion opportunities can kick in, but must
7565 generate VNOR elsewhere. */
7566 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7567 rtx iorx
= (TARGET_P8_VECTOR
7568 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7569 : gen_rtx_AND (V16QImode
, notx
, notx
));
7570 rtx tmp
= gen_reg_rtx (V16QImode
);
7571 emit_insn (gen_rtx_SET (tmp
, iorx
));
7573 /* Permute with operands reversed and adjusted selector. */
7574 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7579 emit_insn (gen_rtx_SET (target
, x
));
7582 /* Extract field ELT from VEC into TARGET. */
7585 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7587 machine_mode mode
= GET_MODE (vec
);
7588 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7591 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7598 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7601 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7604 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7607 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7610 if (TARGET_DIRECT_MOVE_64BIT
)
7612 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7618 if (TARGET_DIRECT_MOVE_64BIT
)
7620 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7626 if (TARGET_DIRECT_MOVE_64BIT
)
7628 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7634 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7635 && TARGET_DIRECT_MOVE_64BIT
)
7637 if (GET_MODE (elt
) != DImode
)
7639 rtx tmp
= gen_reg_rtx (DImode
);
7640 convert_move (tmp
, elt
, 0);
7643 else if (!REG_P (elt
))
7644 elt
= force_reg (DImode
, elt
);
7649 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7653 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7657 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7661 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7665 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7669 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7673 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7681 /* Allocate mode-sized buffer. */
7682 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7684 emit_move_insn (mem
, vec
);
7685 if (CONST_INT_P (elt
))
7687 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7689 /* Add offset to field within buffer matching vector element. */
7690 mem
= adjust_address_nv (mem
, inner_mode
,
7691 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7692 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7696 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7697 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7699 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7701 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7702 rtx new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7703 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7704 emit_move_insn (target
, new_addr
);
7708 /* Return the offset within a memory object (MEM) of a vector type to a given
7709 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7710 the element is constant, we return a constant integer.
7712 Otherwise, we use a base register temporary to calculate the offset after
7713 masking it to fit within the bounds of the vector and scaling it. The
7714 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7715 built-in function. */
7718 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7720 if (CONST_INT_P (element
))
7721 return GEN_INT (INTVAL (element
) * scalar_size
);
7723 /* All insns should use the 'Q' constraint (address is a single register) if
7724 the element number is not a constant. */
7725 gcc_assert (satisfies_constraint_Q (mem
));
7727 /* Mask the element to make sure the element number is between 0 and the
7728 maximum number of elements - 1 so that we don't generate an address
7729 outside the vector. */
7730 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7731 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7732 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7734 /* Shift the element to get the byte offset from the element number. */
7735 int shift
= exact_log2 (scalar_size
);
7736 gcc_assert (shift
>= 0);
7740 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7741 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7747 /* Helper function update PC-relative addresses when we are adjusting a memory
7748 address (ADDR) to a vector to point to a scalar field within the vector with
7749 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7750 use the base register temporary (BASE_TMP) to form the address. */
7753 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7755 rtx new_addr
= NULL
;
7757 gcc_assert (CONST_INT_P (element_offset
));
7759 if (GET_CODE (addr
) == CONST
)
7760 addr
= XEXP (addr
, 0);
7762 if (GET_CODE (addr
) == PLUS
)
7764 rtx op0
= XEXP (addr
, 0);
7765 rtx op1
= XEXP (addr
, 1);
7767 if (CONST_INT_P (op1
))
7769 HOST_WIDE_INT offset
7770 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7777 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7778 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7784 emit_move_insn (base_tmp
, addr
);
7785 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7789 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7791 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7792 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7801 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7802 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7803 temporary (BASE_TMP) to fixup the address. Return the new memory address
7804 that is valid for reads or writes to a given register (SCALAR_REG).
7806 This function is expected to be called after reload is completed when we are
7807 splitting insns. The temporary BASE_TMP might be set multiple times with
7811 rs6000_adjust_vec_address (rtx scalar_reg
,
7815 machine_mode scalar_mode
)
7817 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7818 rtx addr
= XEXP (mem
, 0);
7821 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7822 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7824 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7825 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7827 /* Calculate what we need to add to the address to get the element
7829 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7831 /* Create the new address pointing to the element within the vector. If we
7832 are adding 0, we don't have to change the address. */
7833 if (element_offset
== const0_rtx
)
7836 /* A simple indirect address can be converted into a reg + offset
7838 else if (REG_P (addr
) || SUBREG_P (addr
))
7839 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7841 /* For references to local static variables, fold a constant offset into the
7843 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7844 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7846 /* Optimize D-FORM addresses with constant offset with a constant element, to
7847 include the element offset in the address directly. */
7848 else if (GET_CODE (addr
) == PLUS
)
7850 rtx op0
= XEXP (addr
, 0);
7851 rtx op1
= XEXP (addr
, 1);
7853 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7854 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7856 /* op0 should never be r0, because r0+offset is not valid. But it
7857 doesn't hurt to make sure it is not r0. */
7858 gcc_assert (reg_or_subregno (op0
) != 0);
7860 /* D-FORM address with constant element number. */
7861 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7862 rtx offset_rtx
= GEN_INT (offset
);
7863 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7867 /* If we don't have a D-FORM address with a constant element number,
7868 add the two elements in the current address. Then add the offset.
7870 Previously, we tried to add the offset to OP1 and change the
7871 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7872 complicated because we had to verify that op1 was not GPR0 and we
7873 had a constant element offset (due to the way ADDI is defined).
7874 By doing the add of OP0 and OP1 first, and then adding in the
7875 offset, it has the benefit that if D-FORM instructions are
7876 allowed, the offset is part of the memory access to the vector
7878 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7879 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7885 emit_move_insn (base_tmp
, addr
);
7886 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7889 /* If the address isn't valid, move the address into the temporary base
7890 register. Some reasons it could not be valid include:
7892 The address offset overflowed the 16 or 34 bit offset size;
7893 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7894 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7895 Only X_FORM loads can be done, and the address is D_FORM. */
7897 enum insn_form iform
7898 = address_to_insn_form (new_addr
, scalar_mode
,
7899 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7901 if (iform
== INSN_FORM_BAD
)
7903 emit_move_insn (base_tmp
, new_addr
);
7904 new_addr
= base_tmp
;
7907 return change_address (mem
, scalar_mode
, new_addr
);
7910 /* Split a variable vec_extract operation into the component instructions. */
7913 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7916 machine_mode mode
= GET_MODE (src
);
7917 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7918 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7919 int byte_shift
= exact_log2 (scalar_size
);
7921 gcc_assert (byte_shift
>= 0);
7923 /* If we are given a memory address, optimize to load just the element. We
7924 don't have to adjust the vector element number on little endian
7928 emit_move_insn (dest
,
7929 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7934 else if (REG_P (src
) || SUBREG_P (src
))
7936 int num_elements
= GET_MODE_NUNITS (mode
);
7937 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7938 int bit_shift
= 7 - exact_log2 (num_elements
);
7940 unsigned int dest_regno
= reg_or_subregno (dest
);
7941 unsigned int src_regno
= reg_or_subregno (src
);
7942 unsigned int element_regno
= reg_or_subregno (element
);
7944 gcc_assert (REG_P (tmp_gpr
));
7946 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7947 a general purpose register. */
7948 if (TARGET_P9_VECTOR
7949 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7950 && INT_REGNO_P (dest_regno
)
7951 && ALTIVEC_REGNO_P (src_regno
)
7952 && INT_REGNO_P (element_regno
))
7954 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7955 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7957 if (mode
== V16QImode
)
7958 emit_insn (BYTES_BIG_ENDIAN
7959 ? gen_vextublx (dest_si
, element_si
, src
)
7960 : gen_vextubrx (dest_si
, element_si
, src
));
7962 else if (mode
== V8HImode
)
7964 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7965 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7966 emit_insn (BYTES_BIG_ENDIAN
7967 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7968 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7974 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7975 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7976 emit_insn (BYTES_BIG_ENDIAN
7977 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7978 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7985 gcc_assert (REG_P (tmp_altivec
));
7987 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7988 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7989 will shift the element into the upper position (adding 3 to convert a
7990 byte shift into a bit shift). */
7991 if (scalar_size
== 8)
7993 if (!BYTES_BIG_ENDIAN
)
7995 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
8001 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8003 emit_insn (gen_rtx_SET (tmp_gpr
,
8004 gen_rtx_AND (DImode
,
8005 gen_rtx_ASHIFT (DImode
,
8012 if (!BYTES_BIG_ENDIAN
)
8014 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
8016 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
8017 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
8023 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
8026 /* Get the value into the lower byte of the Altivec register where VSLO
8028 if (TARGET_P9_VECTOR
)
8029 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
8030 else if (can_create_pseudo_p ())
8031 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8034 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8035 emit_move_insn (tmp_di
, tmp_gpr
);
8036 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8039 /* Do the VSLO to get the value into the final location. */
8043 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8047 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8052 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8053 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8054 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8055 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8058 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8066 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8067 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8068 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8069 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8071 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8072 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8073 GEN_INT (64 - bits_in_element
)));
8087 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8088 selects whether the alignment is abi mandated, optional, or
8089 both abi and optional alignment. */
8092 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8094 if (how
!= align_opt
)
8096 if (VECTOR_TYPE_P (type
) && align
< 128)
8100 if (how
!= align_abi
)
8102 if (TREE_CODE (type
) == ARRAY_TYPE
8103 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8105 if (align
< BITS_PER_WORD
)
8106 align
= BITS_PER_WORD
;
8113 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8114 instructions simply ignore the low bits; VSX memory instructions
8115 are aligned to 4 or 8 bytes. */
8118 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
8120 return (STRICT_ALIGNMENT
8121 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8122 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
8123 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
8124 && (int) align
< VECTOR_ALIGN (mode
)))));
8127 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8130 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8132 if (computed
<= 32 || TYPE_PACKED (type
))
8135 /* Strip initial arrays. */
8136 while (TREE_CODE (type
) == ARRAY_TYPE
)
8137 type
= TREE_TYPE (type
);
8139 /* If RECORD or UNION, recursively find the first field. */
8140 while (AGGREGATE_TYPE_P (type
))
8142 tree field
= TYPE_FIELDS (type
);
8144 /* Skip all non field decls */
8145 while (field
!= NULL
8146 && (TREE_CODE (field
) != FIELD_DECL
8147 || DECL_FIELD_ABI_IGNORED (field
)))
8148 field
= DECL_CHAIN (field
);
8153 /* A packed field does not contribute any extra alignment. */
8154 if (DECL_PACKED (field
))
8157 type
= TREE_TYPE (field
);
8160 while (TREE_CODE (type
) == ARRAY_TYPE
)
8161 type
= TREE_TYPE (type
);
8164 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8165 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8166 computed
= MIN (computed
, 32);
8171 /* AIX increases natural record alignment to doubleword if the innermost first
8172 field is an FP double while the FP fields remain word aligned.
8173 Only called if TYPE initially is a RECORD or UNION. */
8176 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8177 unsigned int specified
)
8179 unsigned int align
= MAX (computed
, specified
);
8181 if (TYPE_PACKED (type
) || align
>= 64)
8184 /* If RECORD or UNION, recursively find the first field. */
8187 tree field
= TYPE_FIELDS (type
);
8189 /* Skip all non field decls */
8190 while (field
!= NULL
8191 && (TREE_CODE (field
) != FIELD_DECL
8192 || DECL_FIELD_ABI_IGNORED (field
)))
8193 field
= DECL_CHAIN (field
);
8198 /* A packed field does not contribute any extra alignment. */
8199 if (DECL_PACKED (field
))
8202 type
= TREE_TYPE (field
);
8205 while (TREE_CODE (type
) == ARRAY_TYPE
)
8206 type
= TREE_TYPE (type
);
8207 } while (AGGREGATE_TYPE_P (type
));
8209 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8210 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8211 align
= MAX (align
, 64);
8216 /* Darwin increases record alignment to the natural alignment of
8220 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8221 unsigned int specified
)
8223 unsigned int align
= MAX (computed
, specified
);
8225 if (TYPE_PACKED (type
))
8228 /* Find the first field, looking down into aggregates. */
8230 tree field
= TYPE_FIELDS (type
);
8231 /* Skip all non field decls */
8232 while (field
!= NULL
8233 && (TREE_CODE (field
) != FIELD_DECL
8234 || DECL_FIELD_ABI_IGNORED (field
)))
8235 field
= DECL_CHAIN (field
);
8238 /* A packed field does not contribute any extra alignment. */
8239 if (DECL_PACKED (field
))
8241 type
= TREE_TYPE (field
);
8242 while (TREE_CODE (type
) == ARRAY_TYPE
)
8243 type
= TREE_TYPE (type
);
8244 } while (AGGREGATE_TYPE_P (type
));
8246 if (type
!= error_mark_node
&& ! AGGREGATE_TYPE_P (type
)
8247 && ! TYPE_PACKED (type
) && maximum_field_alignment
== 0)
8248 align
= MAX (align
, TYPE_ALIGN (type
));
8253 /* Return 1 for an operand in small memory on V.4/eabi. */
8256 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8257 machine_mode mode ATTRIBUTE_UNUSED
)
8262 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8265 if (DEFAULT_ABI
!= ABI_V4
)
8268 if (SYMBOL_REF_P (op
))
8271 else if (GET_CODE (op
) != CONST
8272 || GET_CODE (XEXP (op
, 0)) != PLUS
8273 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8274 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8279 rtx sum
= XEXP (op
, 0);
8280 HOST_WIDE_INT summand
;
8282 /* We have to be careful here, because it is the referenced address
8283 that must be 32k from _SDA_BASE_, not just the symbol. */
8284 summand
= INTVAL (XEXP (sum
, 1));
8285 if (summand
< 0 || summand
> g_switch_value
)
8288 sym_ref
= XEXP (sum
, 0);
8291 return SYMBOL_REF_SMALL_P (sym_ref
);
8297 /* Return true if either operand is a general purpose register. */
8300 gpr_or_gpr_p (rtx op0
, rtx op1
)
8302 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8303 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8306 /* Return true if this is a move direct operation between GPR registers and
8307 floating point/VSX registers. */
8310 direct_move_p (rtx op0
, rtx op1
)
8312 if (!REG_P (op0
) || !REG_P (op1
))
8315 if (!TARGET_DIRECT_MOVE
)
8318 int regno0
= REGNO (op0
);
8319 int regno1
= REGNO (op1
);
8320 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8323 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8326 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8332 /* Return true if the ADDR is an acceptable address for a quad memory
8333 operation of mode MODE (either LQ/STQ for general purpose registers, or
8334 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8335 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8336 3.0 LXV/STXV instruction. */
8339 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8343 if (GET_MODE_SIZE (mode
) < 16)
8346 if (legitimate_indirect_address_p (addr
, strict
))
8349 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8352 /* Is this a valid prefixed address? If the bottom four bits of the offset
8353 are non-zero, we could use a prefixed instruction (which does not have the
8354 DQ-form constraint that the traditional instruction had) instead of
8355 forcing the unaligned offset to a GPR. */
8356 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8359 if (GET_CODE (addr
) != PLUS
)
8362 op0
= XEXP (addr
, 0);
8363 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8366 op1
= XEXP (addr
, 1);
8367 if (!CONST_INT_P (op1
))
8370 return quad_address_offset_p (INTVAL (op1
));
8373 /* Return true if this is a load or store quad operation. This function does
8374 not handle the atomic quad memory instructions. */
8377 quad_load_store_p (rtx op0
, rtx op1
)
8381 if (!TARGET_QUAD_MEMORY
)
8384 else if (REG_P (op0
) && MEM_P (op1
))
8385 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8386 && quad_memory_operand (op1
, GET_MODE (op1
))
8387 && !reg_overlap_mentioned_p (op0
, op1
));
8389 else if (MEM_P (op0
) && REG_P (op1
))
8390 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8391 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8396 if (TARGET_DEBUG_ADDR
)
8398 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8399 ret
? "true" : "false");
8400 debug_rtx (gen_rtx_SET (op0
, op1
));
8406 /* Given an address, return a constant offset term if one exists. */
8409 address_offset (rtx op
)
8411 if (GET_CODE (op
) == PRE_INC
8412 || GET_CODE (op
) == PRE_DEC
)
8414 else if (GET_CODE (op
) == PRE_MODIFY
8415 || GET_CODE (op
) == LO_SUM
)
8418 if (GET_CODE (op
) == CONST
)
8421 if (GET_CODE (op
) == PLUS
)
8424 if (CONST_INT_P (op
))
8430 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8431 the mode. If we can't find (or don't know) the alignment of the symbol
8432 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8433 should be pessimistic]. Offsets are validated in the same way as for
8436 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8438 /* We should not get here with this. */
8439 gcc_checking_assert (! mode_supports_dq_form (mode
));
8441 if (GET_CODE (x
) == CONST
)
8444 /* If we are building PIC code, then any symbol must be wrapped in an
8445 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8446 bool machopic_offs_p
= false;
8447 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8449 x
= XVECEXP (x
, 0, 0);
8450 machopic_offs_p
= true;
8454 unsigned HOST_WIDE_INT offset
= 0;
8456 if (GET_CODE (x
) == PLUS
)
8459 if (! SYMBOL_REF_P (sym
))
8461 if (!CONST_INT_P (XEXP (x
, 1)))
8463 offset
= INTVAL (XEXP (x
, 1));
8465 else if (SYMBOL_REF_P (x
))
8467 else if (CONST_INT_P (x
))
8468 offset
= INTVAL (x
);
8469 else if (GET_CODE (x
) == LABEL_REF
)
8470 offset
= 0; // We assume code labels are Pmode aligned
8472 return false; // not sure what we have here.
8474 /* If we don't know the alignment of the thing to which the symbol refers,
8475 we assume optimistically it is "enough".
8476 ??? maybe we should be pessimistic instead. */
8481 tree decl
= SYMBOL_REF_DECL (sym
);
8482 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8483 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8486 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8487 /* The decl in an indirection symbol is the original one, which might
8488 be less aligned than the indirection. Our indirections are always
8493 if (decl
&& DECL_ALIGN (decl
))
8494 align
= DECL_ALIGN_UNIT (decl
);
8497 unsigned int extra
= 0;
8503 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8505 if (VECTOR_MEM_VSX_P (mode
))
8508 if (!TARGET_POWERPC64
)
8510 else if ((offset
& 3) || (align
& 3))
8521 if (!TARGET_POWERPC64
)
8523 else if ((offset
& 3) || (align
& 3))
8531 /* We only care if the access(es) would cause a change to the high part. */
8532 offset
= sext_hwi (offset
, 16);
8533 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8536 /* Return true if the MEM operand is a memory operand suitable for use
8537 with a (full width, possibly multiple) gpr load/store. On
8538 powerpc64 this means the offset must be divisible by 4.
8539 Implements 'Y' constraint.
8541 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8542 a constraint function we know the operand has satisfied a suitable
8545 Offsetting a lo_sum should not be allowed, except where we know by
8546 alignment that a 32k boundary is not crossed. Note that by
8547 "offsetting" here we mean a further offset to access parts of the
8548 MEM. It's fine to have a lo_sum where the inner address is offset
8549 from a sym, since the same sym+offset will appear in the high part
8550 of the address calculation. */
8553 mem_operand_gpr (rtx op
, machine_mode mode
)
8555 unsigned HOST_WIDE_INT offset
;
8557 rtx addr
= XEXP (op
, 0);
8559 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8561 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8562 && mode_supports_pre_incdec_p (mode
)
8563 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8566 /* Allow prefixed instructions if supported. If the bottom two bits of the
8567 offset are non-zero, we could use a prefixed instruction (which does not
8568 have the DS-form constraint that the traditional instruction had) instead
8569 of forcing the unaligned offset to a GPR. */
8570 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8573 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8574 really OK. Doing this early avoids teaching all the other machinery
8576 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8577 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8579 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8580 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8583 op
= address_offset (addr
);
8587 offset
= INTVAL (op
);
8588 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8591 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8595 if (GET_CODE (addr
) == LO_SUM
)
8596 /* For lo_sum addresses, we must allow any offset except one that
8597 causes a wrap, so test only the low 16 bits. */
8598 offset
= sext_hwi (offset
, 16);
8600 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8603 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8604 enforce an offset divisible by 4 even for 32-bit. */
8607 mem_operand_ds_form (rtx op
, machine_mode mode
)
8609 unsigned HOST_WIDE_INT offset
;
8611 rtx addr
= XEXP (op
, 0);
8613 /* Allow prefixed instructions if supported. If the bottom two bits of the
8614 offset are non-zero, we could use a prefixed instruction (which does not
8615 have the DS-form constraint that the traditional instruction had) instead
8616 of forcing the unaligned offset to a GPR. */
8617 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8620 if (!offsettable_address_p (false, mode
, addr
))
8623 op
= address_offset (addr
);
8627 offset
= INTVAL (op
);
8628 if ((offset
& 3) != 0)
8631 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8635 if (GET_CODE (addr
) == LO_SUM
)
8636 /* For lo_sum addresses, we must allow any offset except one that
8637 causes a wrap, so test only the low 16 bits. */
8638 offset
= sext_hwi (offset
, 16);
8640 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8643 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8646 reg_offset_addressing_ok_p (machine_mode mode
)
8660 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8661 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8662 a vector mode, if we want to use the VSX registers to move it around,
8663 we need to restrict ourselves to reg+reg addressing. Similarly for
8664 IEEE 128-bit floating point that is passed in a single vector
8666 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8667 return mode_supports_dq_form (mode
);
8670 /* The vector pair/quad types support offset addressing if the
8671 underlying vectors support offset addressing. */
8677 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8678 addressing for the LFIWZX and STFIWX instructions. */
8679 if (TARGET_NO_SDMODE_STACK
)
8691 virtual_stack_registers_memory_p (rtx op
)
8696 regnum
= REGNO (op
);
8698 else if (GET_CODE (op
) == PLUS
8699 && REG_P (XEXP (op
, 0))
8700 && CONST_INT_P (XEXP (op
, 1)))
8701 regnum
= REGNO (XEXP (op
, 0));
8706 return (regnum
>= FIRST_VIRTUAL_REGISTER
8707 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8710 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8711 is known to not straddle a 32k boundary. This function is used
8712 to determine whether -mcmodel=medium code can use TOC pointer
8713 relative addressing for OP. This means the alignment of the TOC
8714 pointer must also be taken into account, and unfortunately that is
8717 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8718 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8722 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8726 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8728 if (!SYMBOL_REF_P (op
))
8731 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8733 if (mode_supports_dq_form (mode
))
8736 dsize
= GET_MODE_SIZE (mode
);
8737 decl
= SYMBOL_REF_DECL (op
);
8743 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8744 replacing memory addresses with an anchor plus offset. We
8745 could find the decl by rummaging around in the block->objects
8746 VEC for the given offset but that seems like too much work. */
8747 dalign
= BITS_PER_UNIT
;
8748 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8749 && SYMBOL_REF_ANCHOR_P (op
)
8750 && SYMBOL_REF_BLOCK (op
) != NULL
)
8752 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8754 dalign
= block
->alignment
;
8755 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8757 else if (CONSTANT_POOL_ADDRESS_P (op
))
8759 /* It would be nice to have get_pool_align().. */
8760 machine_mode cmode
= get_pool_mode (op
);
8762 dalign
= GET_MODE_ALIGNMENT (cmode
);
8765 else if (DECL_P (decl
))
8767 dalign
= DECL_ALIGN (decl
);
8771 /* Allow BLKmode when the entire object is known to not
8772 cross a 32k boundary. */
8773 if (!DECL_SIZE_UNIT (decl
))
8776 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8779 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8783 dalign
/= BITS_PER_UNIT
;
8784 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8785 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8786 return dalign
>= dsize
;
8792 /* Find how many bits of the alignment we know for this access. */
8793 dalign
/= BITS_PER_UNIT
;
8794 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8795 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8797 lsb
= offset
& -offset
;
8801 return dalign
>= dsize
;
8805 constant_pool_expr_p (rtx op
)
8809 split_const (op
, &base
, &offset
);
8810 return (SYMBOL_REF_P (base
)
8811 && CONSTANT_POOL_ADDRESS_P (base
)
8812 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8815 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8816 use that as the register to put the HIGH value into if register allocation
8820 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8822 rtx tocrel
, tocreg
, hi
;
8824 gcc_assert (TARGET_TOC
);
8826 if (TARGET_DEBUG_ADDR
)
8828 if (SYMBOL_REF_P (symbol
))
8829 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8833 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8834 GET_RTX_NAME (GET_CODE (symbol
)));
8839 if (!can_create_pseudo_p ())
8840 df_set_regs_ever_live (TOC_REGISTER
, true);
8842 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8843 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8844 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8847 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8848 if (largetoc_reg
!= NULL
)
8850 emit_move_insn (largetoc_reg
, hi
);
8853 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8856 /* These are only used to pass through from print_operand/print_operand_address
8857 to rs6000_output_addr_const_extra over the intervening function
8858 output_addr_const which is not target code. */
8859 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8861 /* Return true if OP is a toc pointer relative address (the output
8862 of create_TOC_reference). If STRICT, do not match non-split
8863 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8864 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8865 TOCREL_OFFSET_RET respectively. */
8868 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8869 const_rtx
*tocrel_offset_ret
)
8874 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8876 /* When strict ensure we have everything tidy. */
8878 && !(GET_CODE (op
) == LO_SUM
8879 && REG_P (XEXP (op
, 0))
8880 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8883 /* When not strict, allow non-split TOC addresses and also allow
8884 (lo_sum (high ..)) TOC addresses created during reload. */
8885 if (GET_CODE (op
) == LO_SUM
)
8889 const_rtx tocrel_base
= op
;
8890 const_rtx tocrel_offset
= const0_rtx
;
8892 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8894 tocrel_base
= XEXP (op
, 0);
8895 tocrel_offset
= XEXP (op
, 1);
8898 if (tocrel_base_ret
)
8899 *tocrel_base_ret
= tocrel_base
;
8900 if (tocrel_offset_ret
)
8901 *tocrel_offset_ret
= tocrel_offset
;
8903 return (GET_CODE (tocrel_base
) == UNSPEC
8904 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8905 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8906 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8909 /* Return true if X is a constant pool address, and also for cmodel=medium
8910 if X is a toc-relative address known to be offsettable within MODE. */
8913 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8916 const_rtx tocrel_base
, tocrel_offset
;
8917 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8918 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8919 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8921 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8922 INTVAL (tocrel_offset
), mode
)));
8926 legitimate_small_data_p (machine_mode mode
, rtx x
)
8928 return (DEFAULT_ABI
== ABI_V4
8929 && !flag_pic
&& !TARGET_TOC
8930 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8931 && small_data_operand (x
, mode
));
8935 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8936 bool strict
, bool worst_case
)
8938 unsigned HOST_WIDE_INT offset
;
8941 if (GET_CODE (x
) != PLUS
)
8943 if (!REG_P (XEXP (x
, 0)))
8945 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8947 if (mode_supports_dq_form (mode
))
8948 return quad_address_p (x
, mode
, strict
);
8949 if (!reg_offset_addressing_ok_p (mode
))
8950 return virtual_stack_registers_memory_p (x
);
8951 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8953 if (!CONST_INT_P (XEXP (x
, 1)))
8956 offset
= INTVAL (XEXP (x
, 1));
8963 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8965 if (VECTOR_MEM_VSX_P (mode
))
8970 if (!TARGET_POWERPC64
)
8972 else if (offset
& 3)
8985 if (!TARGET_POWERPC64
)
8987 else if (offset
& 3)
8995 if (TARGET_PREFIXED
)
8996 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8998 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
9002 legitimate_indexed_address_p (rtx x
, int strict
)
9006 if (GET_CODE (x
) != PLUS
)
9012 return (REG_P (op0
) && REG_P (op1
)
9013 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
9014 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
9015 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
9016 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
9020 avoiding_indexed_address_p (machine_mode mode
)
9022 unsigned int msize
= GET_MODE_SIZE (mode
);
9024 /* Avoid indexed addressing for modes that have non-indexed load/store
9025 instruction forms. On power10, vector pairs have an indexed
9026 form, but vector quads don't. */
9030 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
9034 legitimate_indirect_address_p (rtx x
, int strict
)
9036 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
9040 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9042 if (!TARGET_MACHO
|| !flag_pic
9043 || mode
!= SImode
|| !MEM_P (x
))
9047 if (GET_CODE (x
) != LO_SUM
)
9049 if (!REG_P (XEXP (x
, 0)))
9051 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9055 return CONSTANT_P (x
);
9059 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9061 if (GET_CODE (x
) != LO_SUM
)
9063 if (!REG_P (XEXP (x
, 0)))
9065 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9067 /* quad word addresses are restricted, and we can't use LO_SUM. */
9068 if (mode_supports_dq_form (mode
))
9076 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9078 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9079 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9080 recognizes some LO_SUM addresses as valid although this
9081 function says opposite. In most cases, LRA through different
9082 transformations can generate correct code for address reloads.
9083 It cannot manage only some LO_SUM cases. So we need to add
9084 code here saying that some addresses are still valid. */
9085 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9086 && small_toc_ref (x
, VOIDmode
));
9087 if (TARGET_TOC
&& ! large_toc_ok
)
9089 if (GET_MODE_NUNITS (mode
) != 1)
9091 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9092 && !(/* ??? Assume floating point reg based on mode? */
9093 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9096 return CONSTANT_P (x
) || large_toc_ok
;
9098 else if (TARGET_MACHO
)
9100 if (GET_MODE_NUNITS (mode
) != 1)
9102 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9103 && !(/* see above */
9104 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9107 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
9108 return CONSTANT_P (x
);
9110 /* Macho-O PIC code from here. */
9111 if (GET_CODE (x
) == CONST
)
9114 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9115 if (SYMBOL_REF_P (x
))
9118 /* So this is OK if the wrapped object is const. */
9119 if (GET_CODE (x
) == UNSPEC
9120 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9121 return CONSTANT_P (XVECEXP (x
, 0, 0));
9122 return CONSTANT_P (x
);
9128 /* Try machine-dependent ways of modifying an illegitimate address
9129 to be legitimate. If we find one, return the new, valid address.
9130 This is used from only one place: `memory_address' in explow.cc.
9132 OLDX is the address as it was before break_out_memory_refs was
9133 called. In some cases it is useful to look at this to decide what
9136 It is always safe for this function to do nothing. It exists to
9137 recognize opportunities to optimize the output.
9139 On RS/6000, first check for the sum of a register with a constant
9140 integer that is out of range. If so, generate code to add the
9141 constant with the low-order 16 bits masked to the register and force
9142 this result into another register (this can be done with `cau').
9143 Then generate an address of REG+(CONST&0xffff), allowing for the
9144 possibility of bit 16 being a one.
9146 Then check for the sum of a register and something not constant, try to
9147 load the other things into a register and return the sum. */
9150 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9155 if (!reg_offset_addressing_ok_p (mode
)
9156 || mode_supports_dq_form (mode
))
9158 if (virtual_stack_registers_memory_p (x
))
9161 /* In theory we should not be seeing addresses of the form reg+0,
9162 but just in case it is generated, optimize it away. */
9163 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9164 return force_reg (Pmode
, XEXP (x
, 0));
9166 /* For TImode with load/store quad, restrict addresses to just a single
9167 pointer, so it works with both GPRs and VSX registers. */
9168 /* Make sure both operands are registers. */
9169 else if (GET_CODE (x
) == PLUS
9170 && (mode
!= TImode
|| !TARGET_VSX
))
9171 return gen_rtx_PLUS (Pmode
,
9172 force_reg (Pmode
, XEXP (x
, 0)),
9173 force_reg (Pmode
, XEXP (x
, 1)));
9175 return force_reg (Pmode
, x
);
9177 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9179 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9181 return rs6000_legitimize_tls_address (x
, model
);
9193 /* As in legitimate_offset_address_p we do not assume
9194 worst-case. The mode here is just a hint as to the registers
9195 used. A TImode is usually in gprs, but may actually be in
9196 fprs. Leave worst-case scenario for reload to handle via
9197 insn constraints. PTImode is only GPRs. */
9204 if (GET_CODE (x
) == PLUS
9205 && REG_P (XEXP (x
, 0))
9206 && CONST_INT_P (XEXP (x
, 1))
9207 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9208 >= 0x10000 - extra
))
9210 HOST_WIDE_INT high_int
, low_int
;
9212 low_int
= sext_hwi (INTVAL (XEXP (x
, 1)), 16);
9213 if (low_int
>= 0x8000 - extra
)
9215 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9216 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9217 gen_int_mode (high_int
, Pmode
)), 0);
9218 return plus_constant (Pmode
, sum
, low_int
);
9220 else if (GET_CODE (x
) == PLUS
9221 && REG_P (XEXP (x
, 0))
9222 && !CONST_INT_P (XEXP (x
, 1))
9223 && GET_MODE_NUNITS (mode
) == 1
9224 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9225 || (/* ??? Assume floating point reg based on mode? */
9226 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9227 && !avoiding_indexed_address_p (mode
))
9229 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9230 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9232 else if ((TARGET_ELF
9234 || !MACHO_DYNAMIC_NO_PIC_P
9238 && TARGET_NO_TOC_OR_PCREL
9241 && !CONST_WIDE_INT_P (x
)
9242 && !CONST_DOUBLE_P (x
)
9244 && GET_MODE_NUNITS (mode
) == 1
9245 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9246 || (/* ??? Assume floating point reg based on mode? */
9247 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9249 rtx reg
= gen_reg_rtx (Pmode
);
9251 emit_insn (gen_elf_high (reg
, x
));
9253 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9254 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9258 && constant_pool_expr_p (x
)
9259 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9260 return create_TOC_reference (x
, NULL_RTX
);
9265 /* Debug version of rs6000_legitimize_address. */
9267 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9273 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9274 insns
= get_insns ();
9280 "\nrs6000_legitimize_address: mode %s, old code %s, "
9281 "new code %s, modified\n",
9282 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9283 GET_RTX_NAME (GET_CODE (ret
)));
9285 fprintf (stderr
, "Original address:\n");
9288 fprintf (stderr
, "oldx:\n");
9291 fprintf (stderr
, "New address:\n");
9296 fprintf (stderr
, "Insns added:\n");
9297 debug_rtx_list (insns
, 20);
9303 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9304 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9315 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9316 We need to emit DTP-relative relocations. */
9318 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9320 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9325 fputs ("\t.long\t", file
);
9328 fputs (DOUBLE_INT_ASM_OP
, file
);
9333 output_addr_const (file
, x
);
9335 fputs ("@dtprel+0x8000", file
);
9338 /* Return true if X is a symbol that refers to real (rather than emulated)
9342 rs6000_real_tls_symbol_ref_p (rtx x
)
9344 return (SYMBOL_REF_P (x
)
9345 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9348 /* In the name of slightly smaller debug output, and to cater to
9349 general assembler lossage, recognize various UNSPEC sequences
9350 and turn them back into a direct symbol reference. */
9353 rs6000_delegitimize_address (rtx orig_x
)
9357 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9358 encodes loading up the high part of the address of a TOC reference along
9359 with a load of a GPR using the same base register used for the load. We
9360 return the original SYMBOL_REF.
9362 (set (reg:INT1 <reg>
9363 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9365 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9366 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9367 We return the original SYMBOL_REF.
9369 (parallel [(set (reg:DI <base-reg>)
9370 (unspec:DI [(symbol_ref <symbol>)
9371 (const_int <marker>)]
9372 UNSPEC_PCREL_OPT_LD_ADDR))
9373 (set (reg:DI <load-reg>)
9374 (unspec:DI [(const_int 0)]
9375 UNSPEC_PCREL_OPT_LD_DATA))])
9377 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9378 GPR being loaded is the same as the GPR used to hold the external address.
9380 (set (reg:DI <base-reg>)
9381 (unspec:DI [(symbol_ref <symbol>)
9382 (const_int <marker>)]
9383 UNSPEC_PCREL_OPT_LD_SAME_REG))
9385 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9386 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9387 We return the original SYMBOL_REF.
9389 (parallel [(set (reg:DI <base-reg>)
9390 (unspec:DI [(symbol_ref <symbol>)
9391 (const_int <marker>)]
9392 UNSPEC_PCREL_OPT_ST_ADDR))
9393 (use (reg <store-reg>))]) */
9395 if (GET_CODE (orig_x
) == UNSPEC
)
9396 switch (XINT (orig_x
, 1))
9398 case UNSPEC_FUSION_GPR
:
9399 case UNSPEC_PCREL_OPT_LD_ADDR
:
9400 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9401 case UNSPEC_PCREL_OPT_ST_ADDR
:
9402 orig_x
= XVECEXP (orig_x
, 0, 0);
9409 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9416 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9420 if (GET_CODE (y
) == PLUS
9421 && GET_MODE (y
) == Pmode
9422 && CONST_INT_P (XEXP (y
, 1)))
9424 offset
= XEXP (y
, 1);
9428 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9430 y
= XVECEXP (y
, 0, 0);
9433 /* Do not associate thread-local symbols with the original
9434 constant pool symbol. */
9437 && CONSTANT_POOL_ADDRESS_P (y
)
9438 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9442 if (offset
!= NULL_RTX
)
9443 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9444 if (!MEM_P (orig_x
))
9447 return replace_equiv_address_nv (orig_x
, y
);
9451 && GET_CODE (orig_x
) == LO_SUM
9452 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9454 y
= XEXP (XEXP (orig_x
, 1), 0);
9455 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9456 return XVECEXP (y
, 0, 0);
9462 /* Return true if X shouldn't be emitted into the debug info.
9463 The linker doesn't like .toc section references from
9464 .debug_* sections, so reject .toc section symbols. */
9467 rs6000_const_not_ok_for_debug_p (rtx x
)
9469 if (GET_CODE (x
) == UNSPEC
)
9471 if (SYMBOL_REF_P (x
)
9472 && CONSTANT_POOL_ADDRESS_P (x
))
9474 rtx c
= get_pool_constant (x
);
9475 machine_mode cmode
= get_pool_mode (x
);
9476 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9483 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9486 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9488 int icode
= INSN_CODE (insn
);
9490 /* Reject creating doloop insns. Combine should not be allowed
9491 to create these for a number of reasons:
9492 1) In a nested loop, if combine creates one of these in an
9493 outer loop and the register allocator happens to allocate ctr
9494 to the outer loop insn, then the inner loop can't use ctr.
9495 Inner loops ought to be more highly optimized.
9496 2) Combine often wants to create one of these from what was
9497 originally a three insn sequence, first combining the three
9498 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9499 allocated ctr, the splitter takes use back to the three insn
9500 sequence. It's better to stop combine at the two insn
9502 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9503 insns, the register allocator sometimes uses floating point
9504 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9505 jump insn and output reloads are not implemented for jumps,
9506 the ctrsi/ctrdi splitters need to handle all possible cases.
9507 That's a pain, and it gets to be seriously difficult when a
9508 splitter that runs after reload needs memory to transfer from
9509 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9510 for the difficult case. It's better to not create problems
9511 in the first place. */
9512 if (icode
!= CODE_FOR_nothing
9513 && (icode
== CODE_FOR_bdz_si
9514 || icode
== CODE_FOR_bdz_di
9515 || icode
== CODE_FOR_bdnz_si
9516 || icode
== CODE_FOR_bdnz_di
9517 || icode
== CODE_FOR_bdztf_si
9518 || icode
== CODE_FOR_bdztf_di
9519 || icode
== CODE_FOR_bdnztf_si
9520 || icode
== CODE_FOR_bdnztf_di
))
9526 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9528 static GTY(()) rtx rs6000_tls_symbol
;
9530 rs6000_tls_get_addr (void)
9532 if (!rs6000_tls_symbol
)
9533 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9535 return rs6000_tls_symbol
;
9538 /* Construct the SYMBOL_REF for TLS GOT references. */
9540 static GTY(()) rtx rs6000_got_symbol
;
9542 rs6000_got_sym (void)
9544 if (!rs6000_got_symbol
)
9546 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9547 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9548 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9551 return rs6000_got_symbol
;
9554 /* AIX Thread-Local Address support. */
9557 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9559 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9563 /* Place addr into TOC constant pool. */
9564 sym
= force_const_mem (GET_MODE (addr
), addr
);
9566 /* Output the TOC entry and create the MEM referencing the value. */
9567 if (constant_pool_expr_p (XEXP (sym
, 0))
9568 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9570 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9571 mem
= gen_const_mem (Pmode
, tocref
);
9572 set_mem_alias_set (mem
, get_TOC_alias_set ());
9577 /* Use global-dynamic for local-dynamic. */
9578 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9579 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9581 /* Create new TOC reference for @m symbol. */
9582 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9583 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9584 strcpy (tlsname
, "*LCM");
9585 strcat (tlsname
, name
+ 3);
9586 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9587 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9588 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9589 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9590 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9592 rtx modreg
= gen_reg_rtx (Pmode
);
9593 emit_insn (gen_rtx_SET (modreg
, modmem
));
9595 tmpreg
= gen_reg_rtx (Pmode
);
9596 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9598 dest
= gen_reg_rtx (Pmode
);
9600 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9602 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9605 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9606 else if (TARGET_32BIT
)
9608 tlsreg
= gen_reg_rtx (SImode
);
9609 emit_insn (gen_tls_get_tpointer (tlsreg
));
9613 tlsreg
= gen_rtx_REG (DImode
, 13);
9614 xcoff_tls_exec_model_detected
= true;
9617 /* Load the TOC value into temporary register. */
9618 tmpreg
= gen_reg_rtx (Pmode
);
9619 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9620 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9621 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9623 /* Add TOC symbol value to TLS pointer. */
9624 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9629 /* Passes the tls arg value for global dynamic and local dynamic
9630 emit_library_call_value in rs6000_legitimize_tls_address to
9631 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9632 marker relocs put on __tls_get_addr calls. */
9633 static rtx global_tlsarg
;
9635 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9636 this (thread-local) address. */
9639 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9644 return rs6000_legitimize_tls_address_aix (addr
, model
);
9646 dest
= gen_reg_rtx (Pmode
);
9647 if (model
== TLS_MODEL_LOCAL_EXEC
9648 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9654 tlsreg
= gen_rtx_REG (Pmode
, 13);
9655 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9659 tlsreg
= gen_rtx_REG (Pmode
, 2);
9660 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9664 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9668 tmp
= gen_reg_rtx (Pmode
);
9671 tlsreg
= gen_rtx_REG (Pmode
, 13);
9672 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9676 tlsreg
= gen_rtx_REG (Pmode
, 2);
9677 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9681 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9683 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9688 rtx got
, tga
, tmp1
, tmp2
;
9690 /* We currently use relocations like @got@tlsgd for tls, which
9691 means the linker will handle allocation of tls entries, placing
9692 them in the .got section. So use a pointer to the .got section,
9693 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9694 or to secondary GOT sections used by 32-bit -fPIC. */
9695 if (rs6000_pcrel_p ())
9697 else if (TARGET_64BIT
)
9698 got
= gen_rtx_REG (Pmode
, 2);
9702 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9705 rtx gsym
= rs6000_got_sym ();
9706 got
= gen_reg_rtx (Pmode
);
9708 rs6000_emit_move (got
, gsym
, Pmode
);
9713 tmp1
= gen_reg_rtx (Pmode
);
9714 tmp2
= gen_reg_rtx (Pmode
);
9715 mem
= gen_const_mem (Pmode
, tmp1
);
9716 lab
= gen_label_rtx ();
9717 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9718 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9719 if (TARGET_LINK_STACK
)
9720 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9721 emit_move_insn (tmp2
, mem
);
9722 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9723 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9728 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9730 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9732 tga
= rs6000_tls_get_addr ();
9733 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9734 emit_insn (gen_rtx_SET (argreg
, arg
));
9735 global_tlsarg
= arg
;
9736 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9737 global_tlsarg
= NULL_RTX
;
9739 /* Make a note so that the result of this call can be CSEd. */
9740 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9741 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9742 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9744 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9746 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9747 tga
= rs6000_tls_get_addr ();
9748 tmp1
= gen_reg_rtx (Pmode
);
9749 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9750 emit_insn (gen_rtx_SET (argreg
, arg
));
9751 global_tlsarg
= arg
;
9752 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9753 global_tlsarg
= NULL_RTX
;
9755 /* Make a note so that the result of this call can be CSEd. */
9756 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9757 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9758 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9760 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9763 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9765 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9767 else if (rs6000_tls_size
== 32)
9769 tmp2
= gen_reg_rtx (Pmode
);
9771 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9773 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9776 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9778 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9782 tmp2
= gen_reg_rtx (Pmode
);
9784 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9786 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9788 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9794 /* IE, or 64-bit offset LE. */
9795 tmp2
= gen_reg_rtx (Pmode
);
9797 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9799 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9801 if (rs6000_pcrel_p ())
9804 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9806 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9808 else if (TARGET_64BIT
)
9809 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9811 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9819 /* Only create the global variable for the stack protect guard if we are using
9820 the global flavor of that guard. */
9822 rs6000_init_stack_protect_guard (void)
9824 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9825 return default_stack_protect_guard ();
9830 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9833 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9835 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9836 It can not be put into a constant pool. e.g.
9837 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9838 (high:DI (symbol_ref:DI ("var")..)). */
9839 if (GET_CODE (x
) == HIGH
)
9842 /* A TLS symbol in the TOC cannot contain a sum. */
9843 if (GET_CODE (x
) == CONST
9844 && GET_CODE (XEXP (x
, 0)) == PLUS
9845 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9846 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9849 /* Allow AIX TOC TLS symbols in the constant pool,
9850 but not ELF TLS symbols. */
9851 return TARGET_ELF
&& tls_referenced_p (x
);
9854 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9855 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9856 can be addressed relative to the toc pointer. */
9859 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9861 return ((constant_pool_expr_p (sym
)
9862 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9863 get_pool_mode (sym
)))
9864 || (TARGET_CMODEL
== CMODEL_MEDIUM
9865 && SYMBOL_REF_LOCAL_P (sym
)
9866 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9869 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9870 that is a valid memory address for an instruction.
9871 The MODE argument is the machine mode for the MEM expression
9872 that wants to use this address.
9874 On the RS/6000, there are four valid address: a SYMBOL_REF that
9875 refers to a constant pool entry of an address (or the sum of it
9876 plus a constant), a short (16-bit signed) constant plus a register,
9877 the sum of two registers, or a register indirect, possibly with an
9878 auto-increment. For DFmode, DDmode and DImode with a constant plus
9879 register, we must ensure that both words are addressable or PowerPC64
9880 with offset word aligned.
9882 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9883 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9884 because adjacent memory cells are accessed by adding word-sized offsets
9885 during assembly output. */
9887 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9888 code_helper ch
= ERROR_MARK
)
9890 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9891 bool quad_offset_p
= mode_supports_dq_form (mode
);
9893 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9896 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9897 if (ch
.is_internal_fn ()
9898 && (ch
== IFN_LEN_LOAD
|| ch
== IFN_LEN_STORE
)
9899 && GET_CODE (x
) == PLUS
)
9902 /* Handle unaligned altivec lvx/stvx type addresses. */
9903 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9904 && GET_CODE (x
) == AND
9905 && CONST_INT_P (XEXP (x
, 1))
9906 && INTVAL (XEXP (x
, 1)) == -16)
9909 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9910 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9911 || virtual_stack_registers_memory_p (x
));
9914 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9917 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9918 && mode_supports_pre_incdec_p (mode
)
9919 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9922 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9923 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9926 /* Handle restricted vector d-form offsets in ISA 3.0. */
9929 if (quad_address_p (x
, mode
, reg_ok_strict
))
9932 else if (virtual_stack_registers_memory_p (x
))
9935 else if (reg_offset_p
)
9937 if (legitimate_small_data_p (mode
, x
))
9939 if (legitimate_constant_pool_address_p (x
, mode
,
9940 reg_ok_strict
|| lra_in_progress
))
9944 /* For TImode, if we have TImode in VSX registers, only allow register
9945 indirect addresses. This will allow the values to go in either GPRs
9946 or VSX registers without reloading. The vector types would tend to
9947 go into VSX registers, so we allow REG+REG, while TImode seems
9948 somewhat split, in that some uses are GPR based, and some VSX based. */
9949 /* FIXME: We could loosen this by changing the following to
9950 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9951 but currently we cannot allow REG+REG addressing for TImode. See
9952 PR72827 for complete details on how this ends up hoodwinking DSE. */
9953 if (mode
== TImode
&& TARGET_VSX
)
9955 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9958 && GET_CODE (x
) == PLUS
9959 && REG_P (XEXP (x
, 0))
9960 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9961 || XEXP (x
, 0) == arg_pointer_rtx
)
9962 && CONST_INT_P (XEXP (x
, 1)))
9964 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9966 if (!FLOAT128_2REG_P (mode
)
9967 && (TARGET_HARD_FLOAT
9969 || (mode
!= DFmode
&& mode
!= DDmode
))
9970 && (TARGET_POWERPC64
|| mode
!= DImode
)
9971 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9973 && !avoiding_indexed_address_p (mode
)
9974 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9976 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9977 && mode_supports_pre_modify_p (mode
)
9978 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9979 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9980 reg_ok_strict
, false)
9981 || (!avoiding_indexed_address_p (mode
)
9982 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9983 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9985 /* There is no prefixed version of the load/store with update. */
9986 rtx addr
= XEXP (x
, 1);
9987 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9989 if (reg_offset_p
&& !quad_offset_p
9990 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9995 /* Debug version of rs6000_legitimate_address_p. */
9997 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
10000 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
, ch
);
10002 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10003 "strict = %d, reload = %s, code = %s\n",
10004 ret
? "true" : "false",
10005 GET_MODE_NAME (mode
),
10007 (reload_completed
? "after" : "before"),
10008 GET_RTX_NAME (GET_CODE (x
)));
10014 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10017 rs6000_mode_dependent_address_p (const_rtx addr
,
10018 addr_space_t as ATTRIBUTE_UNUSED
)
10020 return rs6000_mode_dependent_address_ptr (addr
);
10023 /* Go to LABEL if ADDR (a legitimate address expression)
10024 has an effect that depends on the machine mode it is used for.
10026 On the RS/6000 this is true of all integral offsets (since AltiVec
10027 and VSX modes don't allow them) or is a pre-increment or decrement.
10029 ??? Except that due to conceptual problems in offsettable_address_p
10030 we can't really report the problems of integral offsets. So leave
10031 this assuming that the adjustable offset must be valid for the
10032 sub-words of a TFmode operand, which is what we had before. */
10035 rs6000_mode_dependent_address (const_rtx addr
)
10037 switch (GET_CODE (addr
))
10040 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10041 is considered a legitimate address before reload, so there
10042 are no offset restrictions in that case. Note that this
10043 condition is safe in strict mode because any address involving
10044 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10045 been rejected as illegitimate. */
10046 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10047 && XEXP (addr
, 0) != arg_pointer_rtx
10048 && CONST_INT_P (XEXP (addr
, 1)))
10050 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10051 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
10052 if (TARGET_PREFIXED
)
10053 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
10055 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
10060 /* Anything in the constant pool is sufficiently aligned that
10061 all bytes have the same high part address. */
10062 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10064 /* Auto-increment cases are now treated generically in recog.cc. */
10066 return TARGET_UPDATE
;
10068 /* AND is only allowed in Altivec loads. */
10079 /* Debug version of rs6000_mode_dependent_address. */
10081 rs6000_debug_mode_dependent_address (const_rtx addr
)
10083 bool ret
= rs6000_mode_dependent_address (addr
);
10085 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10086 ret
? "true" : "false");
10092 /* Implement FIND_BASE_TERM. */
10095 rs6000_find_base_term (rtx op
)
10100 if (GET_CODE (base
) == CONST
)
10101 base
= XEXP (base
, 0);
10102 if (GET_CODE (base
) == PLUS
)
10103 base
= XEXP (base
, 0);
10104 if (GET_CODE (base
) == UNSPEC
)
10105 switch (XINT (base
, 1))
10107 case UNSPEC_TOCREL
:
10108 case UNSPEC_MACHOPIC_OFFSET
:
10109 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10110 for aliasing purposes. */
10111 return XVECEXP (base
, 0, 0);
10117 /* More elaborate version of recog's offsettable_memref_p predicate
10118 that works around the ??? note of rs6000_mode_dependent_address.
10119 In particular it accepts
10121 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10123 in 32-bit mode, that the recog predicate rejects. */
10126 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
10133 /* First mimic offsettable_memref_p. */
10134 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10137 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10138 the latter predicate knows nothing about the mode of the memory
10139 reference and, therefore, assumes that it is the largest supported
10140 mode (TFmode). As a consequence, legitimate offsettable memory
10141 references are rejected. rs6000_legitimate_offset_address_p contains
10142 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10143 at least with a little bit of help here given that we know the
10144 actual registers used. */
10145 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10146 || GET_MODE_SIZE (reg_mode
) == 4);
10147 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10148 strict
, worst_case
);
10151 /* Determine the reassociation width to be used in reassociate_bb.
10152 This takes into account how many parallel operations we
10153 can actually do of a given type, and also the latency.
10155 int add/sub 6/cycle
10157 vect add/sub/mul 2/cycle
10158 fp add/sub/mul 2/cycle
10163 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10166 switch (rs6000_tune
)
10168 case PROCESSOR_POWER8
:
10169 case PROCESSOR_POWER9
:
10170 case PROCESSOR_POWER10
:
10171 if (DECIMAL_FLOAT_MODE_P (mode
))
10173 if (VECTOR_MODE_P (mode
))
10175 if (INTEGRAL_MODE_P (mode
))
10177 if (FLOAT_MODE_P (mode
))
10186 /* Change register usage conditional on target flags. */
10188 rs6000_conditional_register_usage (void)
10192 if (TARGET_DEBUG_TARGET
)
10193 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10195 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10197 fixed_regs
[13] = call_used_regs
[13] = 1;
10199 /* Conditionally disable FPRs. */
10200 if (TARGET_SOFT_FLOAT
)
10201 for (i
= 32; i
< 64; i
++)
10202 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10204 /* The TOC register is not killed across calls in a way that is
10205 visible to the compiler. */
10206 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10207 call_used_regs
[2] = 0;
10209 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10210 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10212 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10213 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10214 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10216 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10217 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10218 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10220 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10221 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10223 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10225 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10226 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10227 call_used_regs
[VRSAVE_REGNO
] = 1;
10230 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10231 global_regs
[VSCR_REGNO
] = 1;
10233 if (TARGET_ALTIVEC_ABI
)
10235 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10236 call_used_regs
[i
] = 1;
10238 /* AIX reserves VR20:31 in non-extended ABI mode. */
10239 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10240 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10241 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10246 /* Output insns to set DEST equal to the constant SOURCE as a series of
10247 lis, ori and shl instructions and return TRUE. */
10250 rs6000_emit_set_const (rtx dest
, rtx source
)
10252 machine_mode mode
= GET_MODE (dest
);
10257 gcc_checking_assert (CONST_INT_P (source
));
10258 c
= INTVAL (source
);
10263 emit_insn (gen_rtx_SET (dest
, source
));
10267 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10269 emit_insn (gen_rtx_SET (temp
, GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10270 emit_insn (gen_rtx_SET (dest
,
10271 gen_rtx_IOR (SImode
, temp
,
10272 GEN_INT (c
& 0xffff))));
10276 if (!TARGET_POWERPC64
)
10280 hi
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
== 0, DImode
);
10281 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0, DImode
);
10282 emit_move_insn (hi
, GEN_INT (c
>> 32));
10283 c
= sext_hwi (c
, 32);
10284 emit_move_insn (lo
, GEN_INT (c
));
10287 rs6000_emit_set_long_const (dest
, c
);
10291 gcc_unreachable ();
10294 insn
= get_last_insn ();
10295 set
= single_set (insn
);
10296 if (! CONSTANT_P (SET_SRC (set
)))
10297 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10302 /* Check if value C can be built by 2 instructions: one is 'li', another is
10305 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10306 is set to the mask operand of rotldi(rldicl), and return true.
10307 Return false otherwise. */
10310 can_be_built_by_li_and_rotldi (HOST_WIDE_INT c
, int *shift
,
10311 HOST_WIDE_INT
*mask
)
10313 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10314 to/from a positive or negative value that 'li' is able to load. */
10316 if (can_be_rotated_to_lowbits (c
, 15, &n
)
10317 || can_be_rotated_to_lowbits (~c
, 15, &n
))
10319 *mask
= HOST_WIDE_INT_M1
;
10320 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10327 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10328 Output insns to set DEST equal to the constant C as a series of
10329 lis, ori and shl instructions. */
10332 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10336 HOST_WIDE_INT mask
;
10337 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10340 ud2
= (c
>> 16) & 0xffff;
10341 ud3
= (c
>> 32) & 0xffff;
10342 ud4
= (c
>> 48) & 0xffff;
10344 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10345 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10346 emit_move_insn (dest
, GEN_INT (sext_hwi (ud1
, 16)));
10348 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10349 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10351 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10353 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10354 GEN_INT (sext_hwi (ud2
<< 16, 32)));
10356 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10358 else if (ud4
== 0xffff && ud3
== 0xffff && !(ud2
& 0x8000) && ud1
== 0)
10361 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10362 emit_move_insn (temp
, GEN_INT (sext_hwi ((ud2
| 0x8000) << 16, 32)));
10363 emit_move_insn (dest
, gen_rtx_XOR (DImode
, temp
, GEN_INT (0x80000000)));
10365 else if (ud4
== 0xffff && ud3
== 0xffff && (ud1
& 0x8000))
10368 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10369 emit_move_insn (temp
, GEN_INT (sext_hwi (ud1
, 16)));
10370 emit_move_insn (dest
, gen_rtx_XOR (DImode
, temp
,
10371 GEN_INT ((ud2
^ 0xffff) << 16)));
10373 else if (can_be_built_by_li_and_rotldi (c
, &shift
, &mask
))
10375 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10376 unsigned HOST_WIDE_INT imm
= (c
| ~mask
);
10377 imm
= (imm
>> shift
) | (imm
<< (HOST_BITS_PER_WIDE_INT
- shift
));
10379 emit_move_insn (temp
, GEN_INT (imm
));
10381 temp
= gen_rtx_ROTATE (DImode
, temp
, GEN_INT (shift
));
10382 emit_move_insn (dest
, temp
);
10384 else if (ud3
== 0 && ud4
== 0)
10386 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10388 gcc_assert (ud2
& 0x8000);
10393 emit_move_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10394 emit_move_insn (dest
,
10395 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10397 else if (!(ud1
& 0x8000))
10400 emit_move_insn (temp
, GEN_INT (ud1
));
10401 emit_move_insn (dest
,
10402 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10406 /* lis; ori; rldicl */
10407 emit_move_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10408 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10409 emit_move_insn (dest
,
10410 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10413 else if (ud1
== ud3
&& ud2
== ud4
)
10415 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10416 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10417 rs6000_emit_set_long_const (temp
, sext_hwi (num
, 32));
10418 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
10419 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
10420 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
10422 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10423 || (ud4
== 0 && ! (ud3
& 0x8000)))
10425 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10427 emit_move_insn (temp
, GEN_INT (sext_hwi (ud3
<< 16, 32)));
10429 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
)));
10430 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10431 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (16)));
10433 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10435 else if (TARGET_PREFIXED
)
10437 if (can_create_pseudo_p ())
10439 /* pli A,L + pli B,H + rldimi A,B,32,0. */
10440 temp
= gen_reg_rtx (DImode
);
10441 rtx temp1
= gen_reg_rtx (DImode
);
10442 emit_move_insn (temp
, GEN_INT ((ud4
<< 16) | ud3
));
10443 emit_move_insn (temp1
, GEN_INT ((ud2
<< 16) | ud1
));
10445 emit_insn (gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp1
,
10446 GEN_INT (0xffffffff)));
10450 /* pli A,H + sldi A,32 + paddi A,A,L. */
10451 emit_move_insn (dest
, GEN_INT ((ud4
<< 16) | ud3
));
10453 emit_move_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10455 bool can_use_paddi
= REGNO (dest
) != FIRST_GPR_REGNO
;
10457 /* Use paddi for the low 32 bits. */
10458 if (ud2
!= 0 && ud1
!= 0 && can_use_paddi
)
10459 emit_move_insn (dest
, gen_rtx_PLUS (DImode
, dest
,
10460 GEN_INT ((ud2
<< 16) | ud1
)));
10462 /* Use oris, ori for low 32 bits. */
10463 if (ud2
!= 0 && (ud1
== 0 || !can_use_paddi
))
10464 emit_move_insn (dest
,
10465 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10466 if (ud1
!= 0 && (ud2
== 0 || !can_use_paddi
))
10467 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10472 if (can_create_pseudo_p ())
10474 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10475 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10476 rtx high
= gen_reg_rtx (DImode
);
10477 rtx low
= gen_reg_rtx (DImode
);
10478 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10479 rs6000_emit_set_long_const (low
, sext_hwi (num
, 32));
10480 num
= (ud4
<< 16) | ud3
;
10481 rs6000_emit_set_long_const (high
, sext_hwi (num
, 32));
10482 emit_insn (gen_rotldi3_insert_3 (dest
, high
, GEN_INT (32), low
,
10483 GEN_INT (0xffffffff)));
10487 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10488 oris DEST,UD2 ; ori DEST,UD1. */
10489 emit_move_insn (dest
, GEN_INT (sext_hwi (ud4
<< 16, 32)));
10491 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud3
)));
10493 emit_move_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10495 emit_move_insn (dest
,
10496 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10498 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10503 /* Helper for the following. Get rid of [r+r] memory refs
10504 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10507 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10509 if (MEM_P (operands
[0])
10510 && !REG_P (XEXP (operands
[0], 0))
10511 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10512 GET_MODE (operands
[0]), false))
10514 = replace_equiv_address (operands
[0],
10515 copy_addr_to_reg (XEXP (operands
[0], 0)));
10517 if (MEM_P (operands
[1])
10518 && !REG_P (XEXP (operands
[1], 0))
10519 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10520 GET_MODE (operands
[1]), false))
10522 = replace_equiv_address (operands
[1],
10523 copy_addr_to_reg (XEXP (operands
[1], 0)));
10526 /* Generate a vector of constants to permute MODE for a little-endian
10527 storage operation by swapping the two halves of a vector. */
10529 rs6000_const_vec (machine_mode mode
)
10557 v
= rtvec_alloc (subparts
);
10559 for (i
= 0; i
< subparts
/ 2; ++i
)
10560 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10561 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10562 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10567 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10568 store operation. */
10570 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10572 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10573 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10575 /* Scalar permutations are easier to express in integer modes rather than
10576 floating-point modes, so cast them here. We use V1TImode instead
10577 of TImode to ensure that the values don't go through GPRs. */
10578 if (FLOAT128_VECTOR_P (mode
))
10580 dest
= gen_lowpart (V1TImode
, dest
);
10581 source
= gen_lowpart (V1TImode
, source
);
10585 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10587 if (mode
== TImode
|| mode
== V1TImode
)
10588 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10592 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10593 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10597 /* Emit a little-endian load from vector memory location SOURCE to VSX
10598 register DEST in mode MODE. The load is done with two permuting
10599 insn's that represent an lxvd2x and xxpermdi. */
10601 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10603 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10605 if (mode
== TImode
|| mode
== V1TImode
)
10608 dest
= gen_lowpart (V2DImode
, dest
);
10609 source
= adjust_address (source
, V2DImode
, 0);
10612 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10613 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10614 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10617 /* Emit a little-endian store to vector memory location DEST from VSX
10618 register SOURCE in mode MODE. The store is done with two permuting
10619 insn's that represent an xxpermdi and an stxvd2x. */
10621 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10623 /* This should never be called after LRA. */
10624 gcc_assert (can_create_pseudo_p ());
10626 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10628 if (mode
== TImode
|| mode
== V1TImode
)
10631 dest
= adjust_address (dest
, V2DImode
, 0);
10632 source
= gen_lowpart (V2DImode
, source
);
10635 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10636 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10637 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10640 /* Emit a sequence representing a little-endian VSX load or store,
10641 moving data from SOURCE to DEST in mode MODE. This is done
10642 separately from rs6000_emit_move to ensure it is called only
10643 during expand. LE VSX loads and stores introduced later are
10644 handled with a split. The expand-time RTL generation allows
10645 us to optimize away redundant pairs of register-permutes. */
10647 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10649 gcc_assert (!BYTES_BIG_ENDIAN
10650 && VECTOR_MEM_VSX_P (mode
)
10651 && !TARGET_P9_VECTOR
10652 && !gpr_or_gpr_p (dest
, source
)
10653 && (MEM_P (source
) ^ MEM_P (dest
)));
10655 if (MEM_P (source
))
10657 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10658 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10662 if (!REG_P (source
))
10663 source
= force_reg (mode
, source
);
10664 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10668 /* Return whether a SFmode or SImode move can be done without converting one
10669 mode to another. This arrises when we have:
10671 (SUBREG:SF (REG:SI ...))
10672 (SUBREG:SI (REG:SF ...))
10674 and one of the values is in a floating point/vector register, where SFmode
10675 scalars are stored in DFmode format. */
10678 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10680 if (TARGET_ALLOW_SF_SUBREG
)
10683 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10686 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10689 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10690 if (SUBREG_P (dest
))
10692 rtx dest_subreg
= SUBREG_REG (dest
);
10693 rtx src_subreg
= SUBREG_REG (src
);
10694 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10701 /* Helper function to change moves with:
10703 (SUBREG:SF (REG:SI)) and
10704 (SUBREG:SI (REG:SF))
10706 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10707 values are stored as DFmode values in the VSX registers. We need to convert
10708 the bits before we can use a direct move or operate on the bits in the
10709 vector register as an integer type.
10711 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10714 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10716 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10717 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10718 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10720 rtx inner_source
= SUBREG_REG (source
);
10721 machine_mode inner_mode
= GET_MODE (inner_source
);
10723 if (mode
== SImode
&& inner_mode
== SFmode
)
10725 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10729 if (mode
== SFmode
&& inner_mode
== SImode
)
10731 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10739 /* Emit a move from SOURCE to DEST in mode MODE. */
10741 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10744 operands
[0] = dest
;
10745 operands
[1] = source
;
10747 if (TARGET_DEBUG_ADDR
)
10750 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10751 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10752 GET_MODE_NAME (mode
),
10755 can_create_pseudo_p ());
10757 fprintf (stderr
, "source:\n");
10758 debug_rtx (source
);
10761 /* Check that we get CONST_WIDE_INT only when we should. */
10762 if (CONST_WIDE_INT_P (operands
[1])
10763 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10764 gcc_unreachable ();
10766 #ifdef HAVE_AS_GNU_ATTRIBUTE
10767 /* If we use a long double type, set the flags in .gnu_attribute that say
10768 what the long double type is. This is to allow the linker's warning
10769 message for the wrong long double to be useful, even if the function does
10770 not do a call (for example, doing a 128-bit add on power9 if the long
10771 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10772 used if they aren't the default long dobule type. */
10773 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10775 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10776 rs6000_passes_float
= rs6000_passes_long_double
= true;
10778 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10779 rs6000_passes_float
= rs6000_passes_long_double
= true;
10783 /* See if we need to special case SImode/SFmode SUBREG moves. */
10784 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10785 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10788 /* Check if GCC is setting up a block move that will end up using FP
10789 registers as temporaries. We must make sure this is acceptable. */
10790 if (MEM_P (operands
[0])
10791 && MEM_P (operands
[1])
10793 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10794 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10795 && ! (rs6000_slow_unaligned_access (SImode
,
10796 (MEM_ALIGN (operands
[0]) > 32
10797 ? 32 : MEM_ALIGN (operands
[0])))
10798 || rs6000_slow_unaligned_access (SImode
,
10799 (MEM_ALIGN (operands
[1]) > 32
10800 ? 32 : MEM_ALIGN (operands
[1]))))
10801 && ! MEM_VOLATILE_P (operands
[0])
10802 && ! MEM_VOLATILE_P (operands
[1]))
10804 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10805 adjust_address (operands
[1], SImode
, 0));
10806 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10807 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10811 if (can_create_pseudo_p () && MEM_P (operands
[0])
10812 && !gpc_reg_operand (operands
[1], mode
))
10813 operands
[1] = force_reg (mode
, operands
[1]);
10815 /* Recognize the case where operand[1] is a reference to thread-local
10816 data and load its address to a register. */
10817 if (tls_referenced_p (operands
[1]))
10819 enum tls_model model
;
10820 rtx tmp
= operands
[1];
10823 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10825 addend
= XEXP (XEXP (tmp
, 0), 1);
10826 tmp
= XEXP (XEXP (tmp
, 0), 0);
10829 gcc_assert (SYMBOL_REF_P (tmp
));
10830 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10831 gcc_assert (model
!= 0);
10833 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10836 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10837 tmp
= force_operand (tmp
, operands
[0]);
10842 /* 128-bit constant floating-point values on Darwin should really be loaded
10843 as two parts. However, this premature splitting is a problem when DFmode
10844 values can go into Altivec registers. */
10845 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10846 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10848 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10849 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10851 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10852 GET_MODE_SIZE (DFmode
)),
10853 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10854 GET_MODE_SIZE (DFmode
)),
10859 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10860 p1:SD) if p1 is not of floating point class and p0 is spilled as
10861 we can have no analogous movsd_store for this. */
10862 if (lra_in_progress
&& mode
== DDmode
10863 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10864 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10865 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10866 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10869 int regno
= REGNO (SUBREG_REG (operands
[1]));
10871 if (!HARD_REGISTER_NUM_P (regno
))
10873 cl
= reg_preferred_class (regno
);
10874 regno
= reg_renumber
[regno
];
10876 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10878 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10881 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10882 operands
[1] = SUBREG_REG (operands
[1]);
10885 if (lra_in_progress
10887 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10888 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10889 && (REG_P (operands
[1])
10890 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10892 int regno
= reg_or_subregno (operands
[1]);
10895 if (!HARD_REGISTER_NUM_P (regno
))
10897 cl
= reg_preferred_class (regno
);
10898 gcc_assert (cl
!= NO_REGS
);
10899 regno
= reg_renumber
[regno
];
10901 regno
= ira_class_hard_regs
[cl
][0];
10903 if (FP_REGNO_P (regno
))
10905 if (GET_MODE (operands
[0]) != DDmode
)
10906 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10907 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10909 else if (INT_REGNO_P (regno
))
10910 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10915 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10916 p:DD)) if p0 is not of floating point class and p1 is spilled as
10917 we can have no analogous movsd_load for this. */
10918 if (lra_in_progress
&& mode
== DDmode
10919 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10920 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10921 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10922 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10925 int regno
= REGNO (SUBREG_REG (operands
[0]));
10927 if (!HARD_REGISTER_NUM_P (regno
))
10929 cl
= reg_preferred_class (regno
);
10930 regno
= reg_renumber
[regno
];
10932 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10934 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10937 operands
[0] = SUBREG_REG (operands
[0]);
10938 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10941 if (lra_in_progress
10943 && (REG_P (operands
[0])
10944 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10945 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10946 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10948 int regno
= reg_or_subregno (operands
[0]);
10951 if (!HARD_REGISTER_NUM_P (regno
))
10953 cl
= reg_preferred_class (regno
);
10954 gcc_assert (cl
!= NO_REGS
);
10955 regno
= reg_renumber
[regno
];
10957 regno
= ira_class_hard_regs
[cl
][0];
10959 if (FP_REGNO_P (regno
))
10961 if (GET_MODE (operands
[1]) != DDmode
)
10962 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10963 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10965 else if (INT_REGNO_P (regno
))
10966 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10972 /* FIXME: In the long term, this switch statement should go away
10973 and be replaced by a sequence of tests based on things like
10979 if (CONSTANT_P (operands
[1])
10980 && !CONST_INT_P (operands
[1]))
10981 operands
[1] = force_const_mem (mode
, operands
[1]);
10988 if (FLOAT128_2REG_P (mode
))
10989 rs6000_eliminate_indexed_memrefs (operands
);
10996 if (CONSTANT_P (operands
[1])
10997 && ! easy_fp_constant (operands
[1], mode
))
10998 operands
[1] = force_const_mem (mode
, operands
[1]);
11008 if (CONSTANT_P (operands
[1])
11009 && !easy_vector_constant (operands
[1], mode
))
11010 operands
[1] = force_const_mem (mode
, operands
[1]);
11015 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
11016 error ("%qs is an opaque type, and you cannot set it to other values",
11017 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
11022 /* Use default pattern for address of ELF small data */
11025 && DEFAULT_ABI
== ABI_V4
11026 && (SYMBOL_REF_P (operands
[1])
11027 || GET_CODE (operands
[1]) == CONST
)
11028 && small_data_operand (operands
[1], mode
))
11030 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11034 /* Use the default pattern for loading up PC-relative addresses. */
11035 if (TARGET_PCREL
&& mode
== Pmode
11036 && pcrel_local_or_external_address (operands
[1], Pmode
))
11038 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11042 if (DEFAULT_ABI
== ABI_V4
11043 && mode
== Pmode
&& mode
== SImode
11044 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11046 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11050 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11051 && TARGET_NO_TOC_OR_PCREL
11054 && CONSTANT_P (operands
[1])
11055 && GET_CODE (operands
[1]) != HIGH
11056 && !CONST_INT_P (operands
[1]))
11058 rtx target
= (!can_create_pseudo_p ()
11060 : gen_reg_rtx (mode
));
11062 /* If this is a function address on -mcall-aixdesc,
11063 convert it to the address of the descriptor. */
11064 if (DEFAULT_ABI
== ABI_AIX
11065 && SYMBOL_REF_P (operands
[1])
11066 && XSTR (operands
[1], 0)[0] == '.')
11068 const char *name
= XSTR (operands
[1], 0);
11070 while (*name
== '.')
11072 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11073 CONSTANT_POOL_ADDRESS_P (new_ref
)
11074 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11075 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11076 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11077 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11078 operands
[1] = new_ref
;
11081 if (DEFAULT_ABI
== ABI_DARWIN
)
11084 /* This is not PIC code, but could require the subset of
11085 indirections used by mdynamic-no-pic. */
11086 if (MACHO_DYNAMIC_NO_PIC_P
)
11088 /* Take care of any required data indirection. */
11089 operands
[1] = rs6000_machopic_legitimize_pic_address (
11090 operands
[1], mode
, operands
[0]);
11091 if (operands
[0] != operands
[1])
11092 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11096 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
11097 emit_insn (gen_macho_low (Pmode
, operands
[0],
11098 target
, operands
[1]));
11102 emit_insn (gen_elf_high (target
, operands
[1]));
11103 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11107 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11108 and we have put it in the TOC, we just need to make a TOC-relative
11109 reference to it. */
11111 && SYMBOL_REF_P (operands
[1])
11112 && use_toc_relative_ref (operands
[1], mode
))
11113 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11114 else if (mode
== Pmode
11115 && CONSTANT_P (operands
[1])
11116 && GET_CODE (operands
[1]) != HIGH
11117 && ((REG_P (operands
[0])
11118 && FP_REGNO_P (REGNO (operands
[0])))
11119 || !CONST_INT_P (operands
[1])
11120 || (num_insns_constant (operands
[1], mode
)
11121 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11122 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11123 && (TARGET_CMODEL
== CMODEL_SMALL
11124 || can_create_pseudo_p ()
11125 || (REG_P (operands
[0])
11126 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11130 /* Darwin uses a special PIC legitimizer. */
11131 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11134 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11136 if (operands
[0] != operands
[1])
11137 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11142 /* If we are to limit the number of things we put in the TOC and
11143 this is a symbol plus a constant we can add in one insn,
11144 just put the symbol in the TOC and add the constant. */
11145 if (GET_CODE (operands
[1]) == CONST
11146 && TARGET_NO_SUM_IN_TOC
11147 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11148 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11149 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11150 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
11151 && ! side_effects_p (operands
[0]))
11154 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11155 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11157 sym
= force_reg (mode
, sym
);
11158 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11162 operands
[1] = force_const_mem (mode
, operands
[1]);
11165 && SYMBOL_REF_P (XEXP (operands
[1], 0))
11166 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11168 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11170 operands
[1] = gen_const_mem (mode
, tocref
);
11171 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11177 if (!VECTOR_MEM_VSX_P (TImode
))
11178 rs6000_eliminate_indexed_memrefs (operands
);
11182 rs6000_eliminate_indexed_memrefs (operands
);
11186 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11189 /* Above, we may have called force_const_mem which may have returned
11190 an invalid address. If we can, fix this up; otherwise, reload will
11191 have to deal with it. */
11192 if (MEM_P (operands
[1]))
11193 operands
[1] = validize_mem (operands
[1]);
11195 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11199 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11201 init_float128_ibm (machine_mode mode
)
11203 if (!TARGET_XL_COMPAT
)
11205 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
11206 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
11207 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
11208 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
11210 if (!TARGET_HARD_FLOAT
)
11212 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
11213 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
11214 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
11215 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
11216 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
11217 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
11218 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
11219 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
11221 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
11222 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
11223 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
11224 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
11225 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
11226 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
11227 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
11228 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11233 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11234 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11235 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11236 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11239 /* Add various conversions for IFmode to use the traditional TFmode
11241 if (mode
== IFmode
)
11243 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11244 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11245 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11246 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11247 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11248 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11250 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11251 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11253 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11254 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11256 if (TARGET_POWERPC64
)
11258 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11259 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11260 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11261 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11266 /* Set up IEEE 128-bit floating point routines. Use different names if the
11267 arguments can be passed in a vector register. The historical PowerPC
11268 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11269 continue to use that if we aren't using vector registers to pass IEEE
11270 128-bit floating point. */
11273 init_float128_ieee (machine_mode mode
)
11275 if (FLOAT128_VECTOR_P (mode
))
11277 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11278 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11279 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11280 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11281 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11282 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11283 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11284 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11286 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11287 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11288 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11289 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11290 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11291 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11292 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11294 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11295 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11296 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11297 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11299 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11300 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11301 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11303 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11304 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11305 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11307 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11308 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11309 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11310 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11311 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11312 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11314 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11315 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11316 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11317 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11319 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11320 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11321 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11322 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11324 if (TARGET_POWERPC64
)
11326 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11327 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11328 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11329 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11335 set_optab_libfunc (add_optab
, mode
, "_q_add");
11336 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11337 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11338 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11339 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11340 if (TARGET_PPC_GPOPT
)
11341 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11343 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11344 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11345 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11346 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11347 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11348 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11350 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11351 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11352 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11353 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11354 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11355 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11356 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11357 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11362 rs6000_init_libfuncs (void)
11364 /* __float128 support. */
11365 if (TARGET_FLOAT128_TYPE
)
11367 init_float128_ibm (IFmode
);
11368 init_float128_ieee (KFmode
);
11371 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11372 if (TARGET_LONG_DOUBLE_128
)
11374 if (!TARGET_IEEEQUAD
)
11375 init_float128_ibm (TFmode
);
11377 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11379 init_float128_ieee (TFmode
);
11383 /* Emit a potentially record-form instruction, setting DST from SRC.
11384 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11385 signed comparison of DST with zero. If DOT is 1, the generated RTL
11386 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11387 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11388 a separate COMPARE. */
11391 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11395 emit_move_insn (dst
, src
);
11399 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11401 emit_move_insn (dst
, src
);
11402 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11406 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11409 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11410 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11414 rtx set
= gen_rtx_SET (dst
, src
);
11415 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11420 /* A validation routine: say whether CODE, a condition code, and MODE
11421 match. The other alternatives either don't make sense or should
11422 never be generated. */
11425 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11427 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11428 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11429 && GET_MODE_CLASS (mode
) == MODE_CC
);
11431 /* These don't make sense. */
11432 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11433 || mode
!= CCUNSmode
);
11435 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11436 || mode
== CCUNSmode
);
11438 gcc_assert (mode
== CCFPmode
11439 || (code
!= ORDERED
&& code
!= UNORDERED
11440 && code
!= UNEQ
&& code
!= LTGT
11441 && code
!= UNGT
&& code
!= UNLT
11442 && code
!= UNGE
&& code
!= UNLE
));
11444 /* These are invalid; the information is not there. */
11445 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11449 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11450 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11451 not zero, store there the bit offset (counted from the right) where
11452 the single stretch of 1 bits begins; and similarly for B, the bit
11453 offset where it ends. */
11456 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11458 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11459 unsigned HOST_WIDE_INT bit
;
11461 int n
= GET_MODE_PRECISION (mode
);
11463 if (mode
!= DImode
&& mode
!= SImode
)
11466 if (INTVAL (mask
) >= 0)
11469 ne
= exact_log2 (bit
);
11470 nb
= exact_log2 (val
+ bit
);
11472 else if (val
+ 1 == 0)
11481 nb
= exact_log2 (bit
);
11482 ne
= exact_log2 (val
+ bit
);
11487 ne
= exact_log2 (bit
);
11488 if (val
+ bit
== 0)
11496 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11508 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11511 if (rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0)
11515 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11517 return (UINTVAL (mask
) << (63 - nb
)) <= 0x7fffffff;
11523 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11524 or rldicr instruction, to implement an AND with it in mode MODE. */
11527 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11531 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11534 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11536 if (mode
== DImode
)
11537 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11539 /* For SImode, rlwinm can do everything. */
11540 if (mode
== SImode
)
11541 return (nb
< 32 && ne
< 32);
11546 /* Return the instruction template for an AND with mask in mode MODE, with
11547 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11550 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11554 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11555 gcc_unreachable ();
11557 if (mode
== DImode
&& ne
== 0)
11559 operands
[3] = GEN_INT (63 - nb
);
11561 return "rldicl. %0,%1,0,%3";
11562 return "rldicl %0,%1,0,%3";
11565 if (mode
== DImode
&& nb
== 63)
11567 operands
[3] = GEN_INT (63 - ne
);
11569 return "rldicr. %0,%1,0,%3";
11570 return "rldicr %0,%1,0,%3";
11573 if (nb
< 32 && ne
< 32)
11575 operands
[3] = GEN_INT (31 - nb
);
11576 operands
[4] = GEN_INT (31 - ne
);
11578 return "rlwinm. %0,%1,0,%3,%4";
11579 return "rlwinm %0,%1,0,%3,%4";
11582 gcc_unreachable ();
11585 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11586 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11587 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11590 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11594 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11597 int n
= GET_MODE_PRECISION (mode
);
11600 if (CONST_INT_P (XEXP (shift
, 1)))
11602 sh
= INTVAL (XEXP (shift
, 1));
11603 if (sh
< 0 || sh
>= n
)
11607 rtx_code code
= GET_CODE (shift
);
11609 /* Convert any shift by 0 to a rotate, to simplify below code. */
11613 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11614 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11616 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11622 /* DImode rotates need rld*. */
11623 if (mode
== DImode
&& code
== ROTATE
)
11624 return (nb
== 63 || ne
== 0 || ne
== sh
);
11626 /* SImode rotates need rlw*. */
11627 if (mode
== SImode
&& code
== ROTATE
)
11628 return (nb
< 32 && ne
< 32 && sh
< 32);
11630 /* Wrap-around masks are only okay for rotates. */
11634 /* Variable shifts are only okay for rotates. */
11638 /* Don't allow ASHIFT if the mask is wrong for that. */
11639 if (code
== ASHIFT
&& ne
< sh
)
11642 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11643 if the mask is wrong for that. */
11644 if (nb
< 32 && ne
< 32 && sh
< 32
11645 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11648 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11649 if the mask is wrong for that. */
11650 if (code
== LSHIFTRT
)
11652 if (nb
== 63 || ne
== 0 || ne
== sh
)
11653 return !(code
== LSHIFTRT
&& nb
>= sh
);
11658 /* Return the instruction template for a shift with mask in mode MODE, with
11659 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11662 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11666 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11667 gcc_unreachable ();
11669 if (mode
== DImode
&& ne
== 0)
11671 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11672 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11673 operands
[3] = GEN_INT (63 - nb
);
11675 return "rld%I2cl. %0,%1,%2,%3";
11676 return "rld%I2cl %0,%1,%2,%3";
11679 if (mode
== DImode
&& nb
== 63)
11681 operands
[3] = GEN_INT (63 - ne
);
11683 return "rld%I2cr. %0,%1,%2,%3";
11684 return "rld%I2cr %0,%1,%2,%3";
11688 && GET_CODE (operands
[4]) != LSHIFTRT
11689 && CONST_INT_P (operands
[2])
11690 && ne
== INTVAL (operands
[2]))
11692 operands
[3] = GEN_INT (63 - nb
);
11694 return "rld%I2c. %0,%1,%2,%3";
11695 return "rld%I2c %0,%1,%2,%3";
11698 if (nb
< 32 && ne
< 32)
11700 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11701 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11702 operands
[3] = GEN_INT (31 - nb
);
11703 operands
[4] = GEN_INT (31 - ne
);
11704 /* This insn can also be a 64-bit rotate with mask that really makes
11705 it just a shift right (with mask); the %h below are to adjust for
11706 that situation (shift count is >= 32 in that case). */
11708 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11709 return "rlw%I2nm %0,%1,%h2,%3,%4";
11712 gcc_unreachable ();
11715 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11716 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11717 ASHIFT, or LSHIFTRT) in mode MODE. */
11720 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11724 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11727 int n
= GET_MODE_PRECISION (mode
);
11729 int sh
= INTVAL (XEXP (shift
, 1));
11730 if (sh
< 0 || sh
>= n
)
11733 rtx_code code
= GET_CODE (shift
);
11735 /* Convert any shift by 0 to a rotate, to simplify below code. */
11739 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11740 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11742 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11748 /* DImode rotates need rldimi. */
11749 if (mode
== DImode
&& code
== ROTATE
)
11752 /* SImode rotates need rlwimi. */
11753 if (mode
== SImode
&& code
== ROTATE
)
11754 return (nb
< 32 && ne
< 32 && sh
< 32);
11756 /* Wrap-around masks are only okay for rotates. */
11760 /* Don't allow ASHIFT if the mask is wrong for that. */
11761 if (code
== ASHIFT
&& ne
< sh
)
11764 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11765 if the mask is wrong for that. */
11766 if (nb
< 32 && ne
< 32 && sh
< 32
11767 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11770 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11771 if the mask is wrong for that. */
11772 if (code
== LSHIFTRT
)
11775 return !(code
== LSHIFTRT
&& nb
>= sh
);
11780 /* Return the instruction template for an insert with mask in mode MODE, with
11781 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11784 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11788 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11789 gcc_unreachable ();
11791 /* Prefer rldimi because rlwimi is cracked. */
11792 if (TARGET_POWERPC64
11793 && (!dot
|| mode
== DImode
)
11794 && GET_CODE (operands
[4]) != LSHIFTRT
11795 && ne
== INTVAL (operands
[2]))
11797 operands
[3] = GEN_INT (63 - nb
);
11799 return "rldimi. %0,%1,%2,%3";
11800 return "rldimi %0,%1,%2,%3";
11803 if (nb
< 32 && ne
< 32)
11805 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11806 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11807 operands
[3] = GEN_INT (31 - nb
);
11808 operands
[4] = GEN_INT (31 - ne
);
11810 return "rlwimi. %0,%1,%2,%3,%4";
11811 return "rlwimi %0,%1,%2,%3,%4";
11814 gcc_unreachable ();
11817 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11818 using two machine instructions. */
11821 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11823 /* There are two kinds of AND we can handle with two insns:
11824 1) those we can do with two rl* insn;
11827 We do not handle that last case yet. */
11829 /* If there is just one stretch of ones, we can do it. */
11830 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11833 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11834 one insn, we can do the whole thing with two. */
11835 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11836 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11837 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11838 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11839 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11840 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11843 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11844 If EXPAND is true, split rotate-and-mask instructions we generate to
11845 their constituent parts as well (this is used during expand); if DOT
11846 is 1, make the last insn a record-form instruction clobbering the
11847 destination GPR and setting the CC reg (from operands[3]); if 2, set
11848 that GPR as well as the CC reg. */
11851 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11853 gcc_assert (!(expand
&& dot
));
11855 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11857 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11858 shift right. This generates better code than doing the masks without
11859 shifts, or shifting first right and then left. */
11861 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11863 gcc_assert (mode
== DImode
);
11865 int shift
= 63 - nb
;
11868 rtx tmp1
= gen_reg_rtx (DImode
);
11869 rtx tmp2
= gen_reg_rtx (DImode
);
11870 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11871 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11872 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11876 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11877 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11878 emit_move_insn (operands
[0], tmp
);
11879 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11880 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11885 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11886 that does the rest. */
11887 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11888 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11889 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11890 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11892 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11893 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11895 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11897 /* Two "no-rotate"-and-mask instructions, for SImode. */
11898 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11900 gcc_assert (mode
== SImode
);
11902 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11903 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11904 emit_move_insn (reg
, tmp
);
11905 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11906 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11910 gcc_assert (mode
== DImode
);
11912 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11913 insns; we have to do the first in SImode, because it wraps. */
11914 if (mask2
<= 0xffffffff
11915 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11917 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11918 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11920 rtx reg_low
= gen_lowpart (SImode
, reg
);
11921 emit_move_insn (reg_low
, tmp
);
11922 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11923 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11927 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11928 at the top end), rotate back and clear the other hole. */
11929 int right
= exact_log2 (bit3
);
11930 int left
= 64 - right
;
11932 /* Rotate the mask too. */
11933 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11937 rtx tmp1
= gen_reg_rtx (DImode
);
11938 rtx tmp2
= gen_reg_rtx (DImode
);
11939 rtx tmp3
= gen_reg_rtx (DImode
);
11940 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11941 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11942 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11943 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11947 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11948 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11949 emit_move_insn (operands
[0], tmp
);
11950 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11951 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11952 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11956 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11957 for lfq and stfq insns iff the registers are hard registers. */
11960 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11962 /* We might have been passed a SUBREG. */
11963 if (!REG_P (reg1
) || !REG_P (reg2
))
11966 /* We might have been passed non floating point registers. */
11967 if (!FP_REGNO_P (REGNO (reg1
))
11968 || !FP_REGNO_P (REGNO (reg2
)))
11971 return (REGNO (reg1
) == REGNO (reg2
) - 1);
11974 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11975 addr1 and addr2 must be in consecutive memory locations
11976 (addr2 == addr1 + 8). */
11979 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
11982 unsigned int reg1
, reg2
;
11983 int offset1
, offset2
;
11985 /* The mems cannot be volatile. */
11986 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
11989 addr1
= XEXP (mem1
, 0);
11990 addr2
= XEXP (mem2
, 0);
11992 /* Extract an offset (if used) from the first addr. */
11993 if (GET_CODE (addr1
) == PLUS
)
11995 /* If not a REG, return zero. */
11996 if (!REG_P (XEXP (addr1
, 0)))
12000 reg1
= REGNO (XEXP (addr1
, 0));
12001 /* The offset must be constant! */
12002 if (!CONST_INT_P (XEXP (addr1
, 1)))
12004 offset1
= INTVAL (XEXP (addr1
, 1));
12007 else if (!REG_P (addr1
))
12011 reg1
= REGNO (addr1
);
12012 /* This was a simple (mem (reg)) expression. Offset is 0. */
12016 /* And now for the second addr. */
12017 if (GET_CODE (addr2
) == PLUS
)
12019 /* If not a REG, return zero. */
12020 if (!REG_P (XEXP (addr2
, 0)))
12024 reg2
= REGNO (XEXP (addr2
, 0));
12025 /* The offset must be constant. */
12026 if (!CONST_INT_P (XEXP (addr2
, 1)))
12028 offset2
= INTVAL (XEXP (addr2
, 1));
12031 else if (!REG_P (addr2
))
12035 reg2
= REGNO (addr2
);
12036 /* This was a simple (mem (reg)) expression. Offset is 0. */
12040 /* Both of these must have the same base register. */
12044 /* The offset for the second addr must be 8 more than the first addr. */
12045 if (offset2
!= offset1
+ 8)
12048 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12053 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12054 need to use DDmode, in all other cases we can use the same mode. */
12055 static machine_mode
12056 rs6000_secondary_memory_needed_mode (machine_mode mode
)
12058 if (lra_in_progress
&& mode
== SDmode
)
12063 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12064 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12065 only work on the traditional altivec registers, note if an altivec register
12068 static enum rs6000_reg_type
12069 register_to_reg_type (rtx reg
, bool *is_altivec
)
12071 HOST_WIDE_INT regno
;
12072 enum reg_class rclass
;
12074 if (SUBREG_P (reg
))
12075 reg
= SUBREG_REG (reg
);
12078 return NO_REG_TYPE
;
12080 regno
= REGNO (reg
);
12081 if (!HARD_REGISTER_NUM_P (regno
))
12083 if (!lra_in_progress
&& !reload_completed
)
12084 return PSEUDO_REG_TYPE
;
12086 regno
= true_regnum (reg
);
12087 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
12088 return PSEUDO_REG_TYPE
;
12091 gcc_assert (regno
>= 0);
12093 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
12094 *is_altivec
= true;
12096 rclass
= rs6000_regno_regclass
[regno
];
12097 return reg_class_to_reg_type
[(int)rclass
];
12100 /* Helper function to return the cost of adding a TOC entry address. */
12103 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
12107 if (TARGET_CMODEL
!= CMODEL_SMALL
)
12108 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
12111 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
12116 /* Helper function for rs6000_secondary_reload to determine whether the memory
12117 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12118 needs reloading. Return negative if the memory is not handled by the memory
12119 helper functions and to try a different reload method, 0 if no additional
12120 instructions are need, and positive to give the extra cost for the
12124 rs6000_secondary_reload_memory (rtx addr
,
12125 enum reg_class rclass
,
12128 int extra_cost
= 0;
12129 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
12130 addr_mask_type addr_mask
;
12131 const char *type
= NULL
;
12132 const char *fail_msg
= NULL
;
12134 if (GPR_REG_CLASS_P (rclass
))
12135 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12137 else if (rclass
== FLOAT_REGS
)
12138 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12140 else if (rclass
== ALTIVEC_REGS
)
12141 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12143 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12144 else if (rclass
== VSX_REGS
)
12145 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
12146 & ~RELOAD_REG_AND_M16
);
12148 /* If the register allocator hasn't made up its mind yet on the register
12149 class to use, settle on defaults to use. */
12150 else if (rclass
== NO_REGS
)
12152 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
12153 & ~RELOAD_REG_AND_M16
);
12155 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
12156 addr_mask
&= ~(RELOAD_REG_INDEXED
12157 | RELOAD_REG_PRE_INCDEC
12158 | RELOAD_REG_PRE_MODIFY
);
12164 /* If the register isn't valid in this register class, just return now. */
12165 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12167 if (TARGET_DEBUG_ADDR
)
12170 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12171 "not valid in class\n",
12172 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
12179 switch (GET_CODE (addr
))
12181 /* Does the register class supports auto update forms for this mode? We
12182 don't need a scratch register, since the powerpc only supports
12183 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12186 reg
= XEXP (addr
, 0);
12187 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12189 fail_msg
= "no base register #1";
12193 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12201 reg
= XEXP (addr
, 0);
12202 plus_arg1
= XEXP (addr
, 1);
12203 if (!base_reg_operand (reg
, GET_MODE (reg
))
12204 || GET_CODE (plus_arg1
) != PLUS
12205 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12207 fail_msg
= "bad PRE_MODIFY";
12211 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12218 /* Do we need to simulate AND -16 to clear the bottom address bits used
12219 in VMX load/stores? Only allow the AND for vector sizes. */
12221 and_arg
= XEXP (addr
, 0);
12222 if (GET_MODE_SIZE (mode
) != 16
12223 || !CONST_INT_P (XEXP (addr
, 1))
12224 || INTVAL (XEXP (addr
, 1)) != -16)
12226 fail_msg
= "bad Altivec AND #1";
12230 if (rclass
!= ALTIVEC_REGS
)
12232 if (legitimate_indirect_address_p (and_arg
, false))
12235 else if (legitimate_indexed_address_p (and_arg
, false))
12240 fail_msg
= "bad Altivec AND #2";
12248 /* If this is an indirect address, make sure it is a base register. */
12251 if (!legitimate_indirect_address_p (addr
, false))
12258 /* If this is an indexed address, make sure the register class can handle
12259 indexed addresses for this mode. */
12261 plus_arg0
= XEXP (addr
, 0);
12262 plus_arg1
= XEXP (addr
, 1);
12264 /* (plus (plus (reg) (constant)) (constant)) is generated during
12265 push_reload processing, so handle it now. */
12266 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12268 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12275 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12276 push_reload processing, so handle it now. */
12277 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12279 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12282 type
= "indexed #2";
12286 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12288 fail_msg
= "no base register #2";
12292 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12294 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12295 || !legitimate_indexed_address_p (addr
, false))
12302 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12303 && CONST_INT_P (plus_arg1
))
12305 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12308 type
= "vector d-form offset";
12312 /* Make sure the register class can handle offset addresses. */
12313 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12315 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12318 type
= "offset #2";
12324 fail_msg
= "bad PLUS";
12331 /* Quad offsets are restricted and can't handle normal addresses. */
12332 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12335 type
= "vector d-form lo_sum";
12338 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12340 fail_msg
= "bad LO_SUM";
12344 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12351 /* Static addresses need to create a TOC entry. */
12355 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12358 type
= "vector d-form lo_sum #2";
12364 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12368 /* TOC references look like offsetable memory. */
12370 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12372 fail_msg
= "bad UNSPEC";
12376 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12379 type
= "vector d-form lo_sum #3";
12382 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12385 type
= "toc reference";
12391 fail_msg
= "bad address";
12396 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12398 if (extra_cost
< 0)
12400 "rs6000_secondary_reload_memory error: mode = %s, "
12401 "class = %s, addr_mask = '%s', %s\n",
12402 GET_MODE_NAME (mode
),
12403 reg_class_names
[rclass
],
12404 rs6000_debug_addr_mask (addr_mask
, false),
12405 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12409 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12410 "addr_mask = '%s', extra cost = %d, %s\n",
12411 GET_MODE_NAME (mode
),
12412 reg_class_names
[rclass
],
12413 rs6000_debug_addr_mask (addr_mask
, false),
12415 (type
) ? type
: "<none>");
12423 /* Helper function for rs6000_secondary_reload to return true if a move to a
12424 different register classe is really a simple move. */
12427 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12428 enum rs6000_reg_type from_type
,
12431 int size
= GET_MODE_SIZE (mode
);
12433 /* Add support for various direct moves available. In this function, we only
12434 look at cases where we don't need any extra registers, and one or more
12435 simple move insns are issued. Originally small integers are not allowed
12436 in FPR/VSX registers. Single precision binary floating is not a simple
12437 move because we need to convert to the single precision memory layout.
12438 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12439 need special direct move handling, which we do not support yet. */
12440 if (TARGET_DIRECT_MOVE
12441 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12442 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12444 if (TARGET_POWERPC64
)
12446 /* ISA 2.07: MTVSRD or MVFVSRD. */
12450 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12451 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12455 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12456 if (TARGET_P8_VECTOR
)
12458 if (mode
== SImode
)
12461 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12465 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12466 if (mode
== SDmode
)
12470 /* Move to/from SPR. */
12471 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12472 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12473 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12479 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12480 special direct moves that involve allocating an extra register, return the
12481 insn code of the helper function if there is such a function or
12482 CODE_FOR_nothing if not. */
12485 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12486 enum rs6000_reg_type from_type
,
12488 secondary_reload_info
*sri
,
12492 enum insn_code icode
= CODE_FOR_nothing
;
12494 int size
= GET_MODE_SIZE (mode
);
12496 if (TARGET_POWERPC64
&& size
== 16)
12498 /* Handle moving 128-bit values from GPRs to VSX point registers on
12499 ISA 2.07 (power8, power9) when running in 64-bit mode using
12500 XXPERMDI to glue the two 64-bit values back together. */
12501 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12503 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12504 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12507 /* Handle moving 128-bit values from VSX point registers to GPRs on
12508 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12509 bottom 64-bit value. */
12510 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12512 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12513 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12517 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12519 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12521 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12522 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12525 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12527 cost
= 2; /* mtvsrz, xscvspdpn. */
12528 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12532 else if (!TARGET_POWERPC64
&& size
== 8)
12534 /* Handle moving 64-bit values from GPRs to floating point registers on
12535 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12536 32-bit values back together. Altivec register classes must be handled
12537 specially since a different instruction is used, and the secondary
12538 reload support requires a single instruction class in the scratch
12539 register constraint. However, right now TFmode is not allowed in
12540 Altivec registers, so the pattern will never match. */
12541 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12543 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12544 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12548 if (icode
!= CODE_FOR_nothing
)
12553 sri
->icode
= icode
;
12554 sri
->extra_cost
= cost
;
12561 /* Return whether a move between two register classes can be done either
12562 directly (simple move) or via a pattern that uses a single extra temporary
12563 (using ISA 2.07's direct move in this case. */
12566 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12567 enum rs6000_reg_type from_type
,
12569 secondary_reload_info
*sri
,
12572 /* Fall back to load/store reloads if either type is not a register. */
12573 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12576 /* If we haven't allocated registers yet, assume the move can be done for the
12577 standard register types. */
12578 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12579 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12580 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12583 /* Moves to the same set of registers is a simple move for non-specialized
12585 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12588 /* Check whether a simple move can be done directly. */
12589 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12593 sri
->icode
= CODE_FOR_nothing
;
12594 sri
->extra_cost
= 0;
12599 /* Now check if we can do it in a few steps. */
12600 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12604 /* Inform reload about cases where moving X with a mode MODE to a register in
12605 RCLASS requires an extra scratch or immediate register. Return the class
12606 needed for the immediate register.
12608 For VSX and Altivec, we may need a register to convert sp+offset into
12611 For misaligned 64-bit gpr loads and stores we need a register to
12612 convert an offset address to indirect. */
12615 rs6000_secondary_reload (bool in_p
,
12617 reg_class_t rclass_i
,
12619 secondary_reload_info
*sri
)
12621 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12622 reg_class_t ret
= ALL_REGS
;
12623 enum insn_code icode
;
12624 bool default_p
= false;
12625 bool done_p
= false;
12627 /* Allow subreg of memory before/during reload. */
12628 bool memory_p
= (MEM_P (x
)
12629 || (!reload_completed
&& SUBREG_P (x
)
12630 && MEM_P (SUBREG_REG (x
))));
12632 sri
->icode
= CODE_FOR_nothing
;
12633 sri
->t_icode
= CODE_FOR_nothing
;
12634 sri
->extra_cost
= 0;
12636 ? reg_addr
[mode
].reload_load
12637 : reg_addr
[mode
].reload_store
);
12639 if (REG_P (x
) || register_operand (x
, mode
))
12641 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12642 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12643 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12646 std::swap (to_type
, from_type
);
12648 /* Can we do a direct move of some sort? */
12649 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12652 icode
= (enum insn_code
)sri
->icode
;
12659 /* Make sure 0.0 is not reloaded or forced into memory. */
12660 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12667 /* If this is a scalar floating point value and we want to load it into the
12668 traditional Altivec registers, do it via a move via a traditional floating
12669 point register, unless we have D-form addressing. Also make sure that
12670 non-zero constants use a FPR. */
12671 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12672 && !mode_supports_vmx_dform (mode
)
12673 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12674 && (memory_p
|| CONST_DOUBLE_P (x
)))
12681 /* Handle reload of load/stores if we have reload helper functions. */
12682 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12684 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12687 if (extra_cost
>= 0)
12691 if (extra_cost
> 0)
12693 sri
->extra_cost
= extra_cost
;
12694 sri
->icode
= icode
;
12699 /* Handle unaligned loads and stores of integer registers. */
12700 if (!done_p
&& TARGET_POWERPC64
12701 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12703 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12705 rtx addr
= XEXP (x
, 0);
12706 rtx off
= address_offset (addr
);
12708 if (off
!= NULL_RTX
)
12710 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12711 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12713 /* We need a secondary reload when our legitimate_address_p
12714 says the address is good (as otherwise the entire address
12715 will be reloaded), and the offset is not a multiple of
12716 four or we have an address wrap. Address wrap will only
12717 occur for LO_SUMs since legitimate_offset_address_p
12718 rejects addresses for 16-byte mems that will wrap. */
12719 if (GET_CODE (addr
) == LO_SUM
12720 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12721 && ((offset
& 3) != 0
12722 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12723 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12724 && (offset
& 3) != 0))
12726 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12728 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12729 : CODE_FOR_reload_di_load
);
12731 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12732 : CODE_FOR_reload_di_store
);
12733 sri
->extra_cost
= 2;
12744 if (!done_p
&& !TARGET_POWERPC64
12745 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12747 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12749 rtx addr
= XEXP (x
, 0);
12750 rtx off
= address_offset (addr
);
12752 if (off
!= NULL_RTX
)
12754 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12755 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12757 /* We need a secondary reload when our legitimate_address_p
12758 says the address is good (as otherwise the entire address
12759 will be reloaded), and we have a wrap.
12761 legitimate_lo_sum_address_p allows LO_SUM addresses to
12762 have any offset so test for wrap in the low 16 bits.
12764 legitimate_offset_address_p checks for the range
12765 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12766 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12767 [0x7ff4,0x7fff] respectively, so test for the
12768 intersection of these ranges, [0x7ffc,0x7fff] and
12769 [0x7ff4,0x7ff7] respectively.
12771 Note that the address we see here may have been
12772 manipulated by legitimize_reload_address. */
12773 if (GET_CODE (addr
) == LO_SUM
12774 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12775 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12778 sri
->icode
= CODE_FOR_reload_si_load
;
12780 sri
->icode
= CODE_FOR_reload_si_store
;
12781 sri
->extra_cost
= 2;
12796 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12798 gcc_assert (ret
!= ALL_REGS
);
12800 if (TARGET_DEBUG_ADDR
)
12803 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12805 reg_class_names
[ret
],
12806 in_p
? "true" : "false",
12807 reg_class_names
[rclass
],
12808 GET_MODE_NAME (mode
));
12810 if (reload_completed
)
12811 fputs (", after reload", stderr
);
12814 fputs (", done_p not set", stderr
);
12817 fputs (", default secondary reload", stderr
);
12819 if (sri
->icode
!= CODE_FOR_nothing
)
12820 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12821 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12823 else if (sri
->extra_cost
> 0)
12824 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12826 fputs ("\n", stderr
);
12833 /* Better tracing for rs6000_secondary_reload_inner. */
12836 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12841 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12843 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12844 store_p
? "store" : "load");
12847 set
= gen_rtx_SET (mem
, reg
);
12849 set
= gen_rtx_SET (reg
, mem
);
12851 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12852 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12855 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12856 ATTRIBUTE_NORETURN
;
12859 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12862 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12863 gcc_unreachable ();
12866 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12867 reload helper functions. These were identified in
12868 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12869 reload, it calls the insns:
12870 reload_<RELOAD:mode>_<P:mptrsize>_store
12871 reload_<RELOAD:mode>_<P:mptrsize>_load
12873 which in turn calls this function, to do whatever is necessary to create
12874 valid addresses. */
12877 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12879 int regno
= true_regnum (reg
);
12880 machine_mode mode
= GET_MODE (reg
);
12881 addr_mask_type addr_mask
;
12884 rtx op_reg
, op0
, op1
;
12889 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12890 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12891 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12893 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12894 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12896 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12897 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12899 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12900 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12903 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12905 /* Make sure the mode is valid in this register class. */
12906 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12907 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12909 if (TARGET_DEBUG_ADDR
)
12910 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12912 new_addr
= addr
= XEXP (mem
, 0);
12913 switch (GET_CODE (addr
))
12915 /* Does the register class support auto update forms for this mode? If
12916 not, do the update now. We don't need a scratch register, since the
12917 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12920 op_reg
= XEXP (addr
, 0);
12921 if (!base_reg_operand (op_reg
, Pmode
))
12922 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12924 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12926 int delta
= GET_MODE_SIZE (mode
);
12927 if (GET_CODE (addr
) == PRE_DEC
)
12929 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12935 op0
= XEXP (addr
, 0);
12936 op1
= XEXP (addr
, 1);
12937 if (!base_reg_operand (op0
, Pmode
)
12938 || GET_CODE (op1
) != PLUS
12939 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12940 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12942 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12944 emit_insn (gen_rtx_SET (op0
, op1
));
12949 /* Do we need to simulate AND -16 to clear the bottom address bits used
12950 in VMX load/stores? */
12952 op0
= XEXP (addr
, 0);
12953 op1
= XEXP (addr
, 1);
12954 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
12956 if (REG_P (op0
) || SUBREG_P (op0
))
12959 else if (GET_CODE (op1
) == PLUS
)
12961 emit_insn (gen_rtx_SET (scratch
, op1
));
12966 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12968 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
12969 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
12970 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
12971 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
12972 new_addr
= scratch
;
12976 /* If this is an indirect address, make sure it is a base register. */
12979 if (!base_reg_operand (addr
, GET_MODE (addr
)))
12981 emit_insn (gen_rtx_SET (scratch
, addr
));
12982 new_addr
= scratch
;
12986 /* If this is an indexed address, make sure the register class can handle
12987 indexed addresses for this mode. */
12989 op0
= XEXP (addr
, 0);
12990 op1
= XEXP (addr
, 1);
12991 if (!base_reg_operand (op0
, Pmode
))
12992 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12994 else if (int_reg_operand (op1
, Pmode
))
12996 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12998 emit_insn (gen_rtx_SET (scratch
, addr
));
12999 new_addr
= scratch
;
13003 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
13005 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
13006 || !quad_address_p (addr
, mode
, false))
13008 emit_insn (gen_rtx_SET (scratch
, addr
));
13009 new_addr
= scratch
;
13013 /* Make sure the register class can handle offset addresses. */
13014 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
13016 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13018 emit_insn (gen_rtx_SET (scratch
, addr
));
13019 new_addr
= scratch
;
13024 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13029 op0
= XEXP (addr
, 0);
13030 op1
= XEXP (addr
, 1);
13031 if (!base_reg_operand (op0
, Pmode
))
13032 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13034 else if (int_reg_operand (op1
, Pmode
))
13036 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13038 emit_insn (gen_rtx_SET (scratch
, addr
));
13039 new_addr
= scratch
;
13043 /* Quad offsets are restricted and can't handle normal addresses. */
13044 else if (mode_supports_dq_form (mode
))
13046 emit_insn (gen_rtx_SET (scratch
, addr
));
13047 new_addr
= scratch
;
13050 /* Make sure the register class can handle offset addresses. */
13051 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
13053 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13055 emit_insn (gen_rtx_SET (scratch
, addr
));
13056 new_addr
= scratch
;
13061 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13068 rs6000_emit_move (scratch
, addr
, Pmode
);
13069 new_addr
= scratch
;
13073 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13076 /* Adjust the address if it changed. */
13077 if (addr
!= new_addr
)
13079 mem
= replace_equiv_address_nv (mem
, new_addr
);
13080 if (TARGET_DEBUG_ADDR
)
13081 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13084 /* Now create the move. */
13086 emit_insn (gen_rtx_SET (mem
, reg
));
13088 emit_insn (gen_rtx_SET (reg
, mem
));
13093 /* Convert reloads involving 64-bit gprs and misaligned offset
13094 addressing, or multiple 32-bit gprs and offsets that are too large,
13095 to use indirect addressing. */
13098 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13100 int regno
= true_regnum (reg
);
13101 enum reg_class rclass
;
13103 rtx scratch_or_premodify
= scratch
;
13105 if (TARGET_DEBUG_ADDR
)
13107 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
13108 store_p
? "store" : "load");
13109 fprintf (stderr
, "reg:\n");
13111 fprintf (stderr
, "mem:\n");
13113 fprintf (stderr
, "scratch:\n");
13114 debug_rtx (scratch
);
13117 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
13118 gcc_assert (MEM_P (mem
));
13119 rclass
= REGNO_REG_CLASS (regno
);
13120 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
13121 addr
= XEXP (mem
, 0);
13123 if (GET_CODE (addr
) == PRE_MODIFY
)
13125 gcc_assert (REG_P (XEXP (addr
, 0))
13126 && GET_CODE (XEXP (addr
, 1)) == PLUS
13127 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
13128 scratch_or_premodify
= XEXP (addr
, 0);
13129 addr
= XEXP (addr
, 1);
13131 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
13133 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
13135 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
13137 /* Now create the move. */
13139 emit_insn (gen_rtx_SET (mem
, reg
));
13141 emit_insn (gen_rtx_SET (reg
, mem
));
13146 /* Given an rtx X being reloaded into a reg required to be
13147 in class CLASS, return the class of reg to actually use.
13148 In general this is just CLASS; but on some machines
13149 in some cases it is preferable to use a more restrictive class.
13151 On the RS/6000, we have to return NO_REGS when we want to reload a
13152 floating-point CONST_DOUBLE to force it to be copied to memory.
13154 We also don't want to reload integer values into floating-point
13155 registers if we can at all help it. In fact, this can
13156 cause reload to die, if it tries to generate a reload of CTR
13157 into a FP register and discovers it doesn't have the memory location
13160 ??? Would it be a good idea to have reload do the converse, that is
13161 try to reload floating modes into FP registers if possible?
13164 static enum reg_class
13165 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
13167 machine_mode mode
= GET_MODE (x
);
13168 bool is_constant
= CONSTANT_P (x
);
13170 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13171 reload class for it. */
13172 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13173 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
13176 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
13177 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
13180 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13181 the reloading of address expressions using PLUS into floating point
13183 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13187 /* Zero is always allowed in all VSX registers. */
13188 if (x
== CONST0_RTX (mode
))
13191 /* If this is a vector constant that can be formed with a few Altivec
13192 instructions, we want altivec registers. */
13193 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13194 return ALTIVEC_REGS
;
13196 /* If this is an integer constant that can easily be loaded into
13197 vector registers, allow it. */
13198 if (CONST_INT_P (x
))
13200 HOST_WIDE_INT value
= INTVAL (x
);
13202 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13203 2.06 can generate it in the Altivec registers with
13207 if (TARGET_P8_VECTOR
)
13209 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13210 return ALTIVEC_REGS
;
13215 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13216 a sign extend in the Altivec registers. */
13217 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13218 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13219 return ALTIVEC_REGS
;
13222 /* Force constant to memory. */
13226 /* D-form addressing can easily reload the value. */
13227 if (mode_supports_vmx_dform (mode
)
13228 || mode_supports_dq_form (mode
))
13231 /* If this is a scalar floating point value and we don't have D-form
13232 addressing, prefer the traditional floating point registers so that we
13233 can use D-form (register+offset) addressing. */
13234 if (rclass
== VSX_REGS
13235 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13238 /* Prefer the Altivec registers if Altivec is handling the vector
13239 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13241 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13242 || mode
== V1TImode
)
13243 return ALTIVEC_REGS
;
13248 if (is_constant
|| GET_CODE (x
) == PLUS
)
13250 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13251 return GENERAL_REGS
;
13252 if (reg_class_subset_p (BASE_REGS
, rclass
))
13257 /* For the vector pair and vector quad modes, prefer their natural register
13258 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13259 the GPR registers. */
13260 if (rclass
== GEN_OR_FLOAT_REGS
)
13262 if (mode
== OOmode
)
13265 if (mode
== XOmode
)
13268 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13269 return GENERAL_REGS
;
13275 /* Debug version of rs6000_preferred_reload_class. */
13276 static enum reg_class
13277 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13279 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13282 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13284 reg_class_names
[ret
], reg_class_names
[rclass
],
13285 GET_MODE_NAME (GET_MODE (x
)));
13291 /* If we are copying between FP or AltiVec registers and anything else, we need
13292 a memory location. The exception is when we are targeting ppc64 and the
13293 move to/from fpr to gpr instructions are available. Also, under VSX, you
13294 can copy vector registers from the FP register set to the Altivec register
13295 set and vice versa. */
13298 rs6000_secondary_memory_needed (machine_mode mode
,
13299 reg_class_t from_class
,
13300 reg_class_t to_class
)
13302 enum rs6000_reg_type from_type
, to_type
;
13303 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13304 || (to_class
== ALTIVEC_REGS
));
13306 /* If a simple/direct move is available, we don't need secondary memory */
13307 from_type
= reg_class_to_reg_type
[(int)from_class
];
13308 to_type
= reg_class_to_reg_type
[(int)to_class
];
13310 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13311 (secondary_reload_info
*)0, altivec_p
))
13314 /* If we have a floating point or vector register class, we need to use
13315 memory to transfer the data. */
13316 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13322 /* Debug version of rs6000_secondary_memory_needed. */
13324 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13325 reg_class_t from_class
,
13326 reg_class_t to_class
)
13328 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13331 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13332 "to_class = %s, mode = %s\n",
13333 ret
? "true" : "false",
13334 reg_class_names
[from_class
],
13335 reg_class_names
[to_class
],
13336 GET_MODE_NAME (mode
));
13341 /* Return the register class of a scratch register needed to copy IN into
13342 or out of a register in RCLASS in MODE. If it can be done directly,
13343 NO_REGS is returned. */
13345 static enum reg_class
13346 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13351 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13353 && MACHOPIC_INDIRECT
13357 /* We cannot copy a symbolic operand directly into anything
13358 other than BASE_REGS for TARGET_ELF. So indicate that a
13359 register from BASE_REGS is needed as an intermediate
13362 On Darwin, pic addresses require a load from memory, which
13363 needs a base register. */
13364 if (rclass
!= BASE_REGS
13365 && (SYMBOL_REF_P (in
)
13366 || GET_CODE (in
) == HIGH
13367 || GET_CODE (in
) == LABEL_REF
13368 || GET_CODE (in
) == CONST
))
13374 regno
= REGNO (in
);
13375 if (!HARD_REGISTER_NUM_P (regno
))
13377 regno
= true_regnum (in
);
13378 if (!HARD_REGISTER_NUM_P (regno
))
13382 else if (SUBREG_P (in
))
13384 regno
= true_regnum (in
);
13385 if (!HARD_REGISTER_NUM_P (regno
))
13391 /* If we have VSX register moves, prefer moving scalar values between
13392 Altivec registers and GPR by going via an FPR (and then via memory)
13393 instead of reloading the secondary memory address for Altivec moves. */
13395 && GET_MODE_SIZE (mode
) < 16
13396 && !mode_supports_vmx_dform (mode
)
13397 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13398 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13399 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13400 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13403 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13405 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13406 || (regno
>= 0 && INT_REGNO_P (regno
)))
13409 /* Constants, memory, and VSX registers can go into VSX registers (both the
13410 traditional floating point and the altivec registers). */
13411 if (rclass
== VSX_REGS
13412 && (regno
== -1 || VSX_REGNO_P (regno
)))
13415 /* Constants, memory, and FP registers can go into FP registers. */
13416 if ((regno
== -1 || FP_REGNO_P (regno
))
13417 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13418 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13420 /* Memory, and AltiVec registers can go into AltiVec registers. */
13421 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13422 && rclass
== ALTIVEC_REGS
)
13425 /* We can copy among the CR registers. */
13426 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13427 && regno
>= 0 && CR_REGNO_P (regno
))
13430 /* Otherwise, we need GENERAL_REGS. */
13431 return GENERAL_REGS
;
13434 /* Debug version of rs6000_secondary_reload_class. */
13435 static enum reg_class
13436 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13437 machine_mode mode
, rtx in
)
13439 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13441 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13442 "mode = %s, input rtx:\n",
13443 reg_class_names
[ret
], reg_class_names
[rclass
],
13444 GET_MODE_NAME (mode
));
13450 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13453 rs6000_can_change_mode_class (machine_mode from
,
13455 reg_class_t rclass
)
13457 unsigned from_size
= GET_MODE_SIZE (from
);
13458 unsigned to_size
= GET_MODE_SIZE (to
);
13460 if (from_size
!= to_size
)
13462 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13464 if (reg_classes_intersect_p (xclass
, rclass
))
13466 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13467 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13468 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13469 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13471 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13472 single register under VSX because the scalar part of the register
13473 is in the upper 64-bits, and not the lower 64-bits. Types like
13474 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13475 IEEE floating point can't overlap, and neither can small
13478 if (to_float128_vector_p
&& from_float128_vector_p
)
13481 else if (to_float128_vector_p
|| from_float128_vector_p
)
13484 /* TDmode in floating-mode registers must always go into a register
13485 pair with the most significant word in the even-numbered register
13486 to match ISA requirements. In little-endian mode, this does not
13487 match subreg numbering, so we cannot allow subregs. */
13488 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13491 /* Allow SD<->DD changes, since SDmode values are stored in
13492 the low half of the DDmode, just like target-independent
13493 code expects. We need to allow at least SD->DD since
13494 rs6000_secondary_memory_needed_mode asks for that change
13495 to be made for SD reloads. */
13496 if ((to
== DDmode
&& from
== SDmode
)
13497 || (to
== SDmode
&& from
== DDmode
))
13500 if (from_size
< 8 || to_size
< 8)
13503 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13506 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13515 /* Since the VSX register set includes traditional floating point registers
13516 and altivec registers, just check for the size being different instead of
13517 trying to check whether the modes are vector modes. Otherwise it won't
13518 allow say DF and DI to change classes. For types like TFmode and TDmode
13519 that take 2 64-bit registers, rather than a single 128-bit register, don't
13520 allow subregs of those types to other 128 bit types. */
13521 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13523 unsigned num_regs
= (from_size
+ 15) / 16;
13524 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13525 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13528 return (from_size
== 8 || from_size
== 16);
13531 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13532 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13538 /* Debug version of rs6000_can_change_mode_class. */
13540 rs6000_debug_can_change_mode_class (machine_mode from
,
13542 reg_class_t rclass
)
13544 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13547 "rs6000_can_change_mode_class, return %s, from = %s, "
13548 "to = %s, rclass = %s\n",
13549 ret
? "true" : "false",
13550 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13551 reg_class_names
[rclass
]);
13556 /* Return a string to do a move operation of 128 bits of data. */
13559 rs6000_output_move_128bit (rtx operands
[])
13561 rtx dest
= operands
[0];
13562 rtx src
= operands
[1];
13563 machine_mode mode
= GET_MODE (dest
);
13566 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13567 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13571 dest_regno
= REGNO (dest
);
13572 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13573 dest_fp_p
= FP_REGNO_P (dest_regno
);
13574 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13575 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13580 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13585 src_regno
= REGNO (src
);
13586 src_gpr_p
= INT_REGNO_P (src_regno
);
13587 src_fp_p
= FP_REGNO_P (src_regno
);
13588 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13589 src_vsx_p
= src_fp_p
| src_vmx_p
;
13594 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13597 /* Register moves. */
13598 if (dest_regno
>= 0 && src_regno
>= 0)
13605 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13606 return (WORDS_BIG_ENDIAN
13607 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13608 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13610 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13614 else if (TARGET_VSX
&& dest_vsx_p
)
13617 return "xxlor %x0,%x1,%x1";
13619 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13620 return (WORDS_BIG_ENDIAN
13621 ? "mtvsrdd %x0,%1,%L1"
13622 : "mtvsrdd %x0,%L1,%1");
13624 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13628 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13629 return "vor %0,%1,%1";
13631 else if (dest_fp_p
&& src_fp_p
)
13636 else if (dest_regno
>= 0 && MEM_P (src
))
13640 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13646 else if (TARGET_ALTIVEC
&& dest_vmx_p
13647 && altivec_indexed_or_indirect_operand (src
, mode
))
13648 return "lvx %0,%y1";
13650 else if (TARGET_VSX
&& dest_vsx_p
)
13652 if (mode_supports_dq_form (mode
)
13653 && quad_address_p (XEXP (src
, 0), mode
, true))
13654 return "lxv %x0,%1";
13656 else if (TARGET_P9_VECTOR
)
13657 return "lxvx %x0,%y1";
13659 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13660 return "lxvw4x %x0,%y1";
13663 return "lxvd2x %x0,%y1";
13666 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13667 return "lvx %0,%y1";
13669 else if (dest_fp_p
)
13674 else if (src_regno
>= 0 && MEM_P (dest
))
13678 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13679 return "stq %1,%0";
13684 else if (TARGET_ALTIVEC
&& src_vmx_p
13685 && altivec_indexed_or_indirect_operand (dest
, mode
))
13686 return "stvx %1,%y0";
13688 else if (TARGET_VSX
&& src_vsx_p
)
13690 if (mode_supports_dq_form (mode
)
13691 && quad_address_p (XEXP (dest
, 0), mode
, true))
13692 return "stxv %x1,%0";
13694 else if (TARGET_P9_VECTOR
)
13695 return "stxvx %x1,%y0";
13697 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13698 return "stxvw4x %x1,%y0";
13701 return "stxvd2x %x1,%y0";
13704 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13705 return "stvx %1,%y0";
13712 else if (dest_regno
>= 0
13713 && (CONST_INT_P (src
)
13714 || CONST_WIDE_INT_P (src
)
13715 || CONST_DOUBLE_P (src
)
13716 || GET_CODE (src
) == CONST_VECTOR
))
13721 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13722 || (dest_vsx_p
&& TARGET_VSX
))
13723 return output_vec_const_move (operands
);
13726 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13729 /* Validate a 128-bit move. */
13731 rs6000_move_128bit_ok_p (rtx operands
[])
13733 machine_mode mode
= GET_MODE (operands
[0]);
13734 return (gpc_reg_operand (operands
[0], mode
)
13735 || gpc_reg_operand (operands
[1], mode
));
13738 /* Return true if a 128-bit move needs to be split. */
13740 rs6000_split_128bit_ok_p (rtx operands
[])
13742 if (!reload_completed
)
13745 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13748 if (quad_load_store_p (operands
[0], operands
[1]))
13755 /* Given a comparison operation, return the bit number in CCR to test. We
13756 know this is a valid comparison.
13758 SCC_P is 1 if this is for an scc. That means that %D will have been
13759 used instead of %C, so the bits will be in different places.
13761 Return -1 if OP isn't a valid comparison for some reason. */
13764 ccr_bit (rtx op
, int scc_p
)
13766 enum rtx_code code
= GET_CODE (op
);
13767 machine_mode cc_mode
;
13772 if (!COMPARISON_P (op
))
13775 reg
= XEXP (op
, 0);
13777 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13780 cc_mode
= GET_MODE (reg
);
13781 cc_regnum
= REGNO (reg
);
13782 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13784 validate_condition_mode (code
, cc_mode
);
13786 /* When generating a sCOND operation, only positive conditions are
13805 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13807 return base_bit
+ 2;
13808 case GT
: case GTU
: case UNLE
:
13809 return base_bit
+ 1;
13810 case LT
: case LTU
: case UNGE
:
13812 case ORDERED
: case UNORDERED
:
13813 return base_bit
+ 3;
13816 /* If scc, we will have done a cror to put the bit in the
13817 unordered position. So test that bit. For integer, this is ! LT
13818 unless this is an scc insn. */
13819 return scc_p
? base_bit
+ 3 : base_bit
;
13822 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13829 /* Return the GOT register. */
13832 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13834 /* The second flow pass currently (June 1999) can't update
13835 regs_ever_live without disturbing other parts of the compiler, so
13836 update it here to make the prolog/epilogue code happy. */
13837 if (!can_create_pseudo_p ()
13838 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13839 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13841 crtl
->uses_pic_offset_table
= 1;
13843 return pic_offset_table_rtx
;
13846 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13848 /* Write out a function code label. */
13851 rs6000_output_function_entry (FILE *file
, const char *fname
)
13853 if (fname
[0] != '.')
13855 switch (DEFAULT_ABI
)
13858 gcc_unreachable ();
13864 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13874 RS6000_OUTPUT_BASENAME (file
, fname
);
13877 /* Print an operand. Recognize special options, documented below. */
13880 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13881 only introduced by the linker, when applying the sda21
13883 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13884 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13886 #define SMALL_DATA_RELOC "sda21"
13887 #define SMALL_DATA_REG 0
13891 print_operand (FILE *file
, rtx x
, int code
)
13894 unsigned HOST_WIDE_INT uval
;
13898 /* %a is output_address. */
13900 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13904 /* Write the MMA accumulator number associated with VSX register X. */
13905 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13906 output_operand_lossage ("invalid %%A value");
13908 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13912 /* Like 'J' but get to the GT bit only. */
13913 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13915 output_operand_lossage ("invalid %%D value");
13919 /* Bit 1 is GT bit. */
13920 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13922 /* Add one for shift count in rlinm for scc. */
13923 fprintf (file
, "%d", i
+ 1);
13927 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13930 output_operand_lossage ("invalid %%e value");
13935 if ((uval
& 0xffff) == 0 && uval
!= 0)
13940 /* X is a CR register. Print the number of the EQ bit of the CR */
13941 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13942 output_operand_lossage ("invalid %%E value");
13944 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
13948 /* X is a CR register. Print the shift count needed to move it
13949 to the high-order four bits. */
13950 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13951 output_operand_lossage ("invalid %%f value");
13953 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
13957 /* Similar, but print the count for the rotate in the opposite
13959 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13960 output_operand_lossage ("invalid %%F value");
13962 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
13966 /* X is a constant integer. If it is negative, print "m",
13967 otherwise print "z". This is to make an aze or ame insn. */
13968 if (!CONST_INT_P (x
))
13969 output_operand_lossage ("invalid %%G value");
13970 else if (INTVAL (x
) >= 0)
13977 /* If constant, output low-order five bits. Otherwise, write
13980 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
13982 print_operand (file
, x
, 0);
13986 /* If constant, output low-order six bits. Otherwise, write
13989 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
13991 print_operand (file
, x
, 0);
13995 /* Print `i' if this is a constant, else nothing. */
14001 /* Write the bit number in CCR for jump. */
14002 i
= ccr_bit (x
, 0);
14004 output_operand_lossage ("invalid %%j code");
14006 fprintf (file
, "%d", i
);
14010 /* Similar, but add one for shift count in rlinm for scc and pass
14011 scc flag to `ccr_bit'. */
14012 i
= ccr_bit (x
, 1);
14014 output_operand_lossage ("invalid %%J code");
14016 /* If we want bit 31, write a shift count of zero, not 32. */
14017 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14021 /* X must be a constant. Write the 1's complement of the
14024 output_operand_lossage ("invalid %%k value");
14026 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
14030 /* X must be a symbolic constant on ELF. Write an
14031 expression suitable for an 'addi' that adds in the low 16
14032 bits of the MEM. */
14033 if (GET_CODE (x
) == CONST
)
14035 if (GET_CODE (XEXP (x
, 0)) != PLUS
14036 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
14037 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
14038 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
14039 output_operand_lossage ("invalid %%K value");
14041 print_operand_address (file
, x
);
14042 fputs ("@l", file
);
14045 /* %l is output_asm_label. */
14048 /* Write second word of DImode or DFmode reference. Works on register
14049 or non-indexed memory only. */
14051 fputs (reg_names
[REGNO (x
) + 1], file
);
14052 else if (MEM_P (x
))
14054 machine_mode mode
= GET_MODE (x
);
14055 /* Handle possible auto-increment. Since it is pre-increment and
14056 we have already done it, we can just use an offset of word. */
14057 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14058 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14059 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14061 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14062 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14065 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
14069 if (small_data_operand (x
, GET_MODE (x
)))
14070 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14071 reg_names
[SMALL_DATA_REG
]);
14075 case 'N': /* Unused */
14076 /* Write the number of elements in the vector times 4. */
14077 if (GET_CODE (x
) != PARALLEL
)
14078 output_operand_lossage ("invalid %%N value");
14080 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
14083 case 'O': /* Unused */
14084 /* Similar, but subtract 1 first. */
14085 if (GET_CODE (x
) != PARALLEL
)
14086 output_operand_lossage ("invalid %%O value");
14088 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
14092 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14095 || (i
= exact_log2 (INTVAL (x
))) < 0)
14096 output_operand_lossage ("invalid %%p value");
14098 fprintf (file
, "%d", i
);
14102 /* The operand must be an indirect memory reference. The result
14103 is the register name. */
14104 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
14105 || REGNO (XEXP (x
, 0)) >= 32)
14106 output_operand_lossage ("invalid %%P value");
14108 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
14112 /* This outputs the logical code corresponding to a boolean
14113 expression. The expression may have one or both operands
14114 negated (if one, only the first one). For condition register
14115 logical operations, it will also treat the negated
14116 CR codes as NOTs, but not handle NOTs of them. */
14118 const char *const *t
= 0;
14120 enum rtx_code code
= GET_CODE (x
);
14121 static const char * const tbl
[3][3] = {
14122 { "and", "andc", "nor" },
14123 { "or", "orc", "nand" },
14124 { "xor", "eqv", "xor" } };
14128 else if (code
== IOR
)
14130 else if (code
== XOR
)
14133 output_operand_lossage ("invalid %%q value");
14135 if (GET_CODE (XEXP (x
, 0)) != NOT
)
14139 if (GET_CODE (XEXP (x
, 1)) == NOT
)
14150 if (! TARGET_MFCRF
)
14156 /* X is a CR register. Print the mask for `mtcrf'. */
14157 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14158 output_operand_lossage ("invalid %%R value");
14160 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
14164 /* Low 5 bits of 32 - value */
14166 output_operand_lossage ("invalid %%s value");
14168 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
14172 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14173 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14175 output_operand_lossage ("invalid %%t value");
14179 /* Bit 3 is OV bit. */
14180 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14182 /* If we want bit 31, write a shift count of zero, not 32. */
14183 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14187 /* Print the symbolic name of a branch target register. */
14188 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14189 x
= XVECEXP (x
, 0, 0);
14190 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14191 && REGNO (x
) != CTR_REGNO
))
14192 output_operand_lossage ("invalid %%T value");
14193 else if (REGNO (x
) == LR_REGNO
)
14194 fputs ("lr", file
);
14196 fputs ("ctr", file
);
14200 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14201 for use in unsigned operand. */
14204 output_operand_lossage ("invalid %%u value");
14209 if ((uval
& 0xffff) == 0)
14212 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14216 /* High-order 16 bits of constant for use in signed operand. */
14218 output_operand_lossage ("invalid %%v value");
14220 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14221 (INTVAL (x
) >> 16) & 0xffff);
14225 /* Print `u' if this has an auto-increment or auto-decrement. */
14227 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14228 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14229 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14234 /* Print the trap code for this operand. */
14235 switch (GET_CODE (x
))
14238 fputs ("eq", file
); /* 4 */
14241 fputs ("ne", file
); /* 24 */
14244 fputs ("lt", file
); /* 16 */
14247 fputs ("le", file
); /* 20 */
14250 fputs ("gt", file
); /* 8 */
14253 fputs ("ge", file
); /* 12 */
14256 fputs ("llt", file
); /* 2 */
14259 fputs ("lle", file
); /* 6 */
14262 fputs ("lgt", file
); /* 1 */
14265 fputs ("lge", file
); /* 5 */
14268 output_operand_lossage ("invalid %%V value");
14273 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14276 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, sext_hwi (INTVAL (x
), 16));
14278 print_operand (file
, x
, 0);
14282 /* X is a FPR or Altivec register used in a VSX context. */
14283 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14284 output_operand_lossage ("invalid %%x value");
14287 int reg
= REGNO (x
);
14288 int vsx_reg
= (FP_REGNO_P (reg
)
14290 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14292 #ifdef TARGET_REGNAMES
14293 if (TARGET_REGNAMES
)
14294 fprintf (file
, "%%vs%d", vsx_reg
);
14297 fprintf (file
, "%d", vsx_reg
);
14303 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14304 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14305 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14310 /* Like 'L', for third word of TImode/PTImode */
14312 fputs (reg_names
[REGNO (x
) + 2], file
);
14313 else if (MEM_P (x
))
14315 machine_mode mode
= GET_MODE (x
);
14316 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14317 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14318 output_address (mode
, plus_constant (Pmode
,
14319 XEXP (XEXP (x
, 0), 0), 8));
14320 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14321 output_address (mode
, plus_constant (Pmode
,
14322 XEXP (XEXP (x
, 0), 0), 8));
14324 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14325 if (small_data_operand (x
, GET_MODE (x
)))
14326 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14327 reg_names
[SMALL_DATA_REG
]);
14332 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14333 x
= XVECEXP (x
, 0, 1);
14334 /* X is a SYMBOL_REF. Write out the name preceded by a
14335 period and without any trailing data in brackets. Used for function
14336 names. If we are configured for System V (or the embedded ABI) on
14337 the PowerPC, do not emit the period, since those systems do not use
14338 TOCs and the like. */
14339 if (!SYMBOL_REF_P (x
))
14341 output_operand_lossage ("invalid %%z value");
14345 /* For macho, check to see if we need a stub. */
14348 const char *name
= XSTR (x
, 0);
14350 if (darwin_symbol_stubs
14351 && MACHOPIC_INDIRECT
14352 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14353 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14355 assemble_name (file
, name
);
14357 else if (!DOT_SYMBOLS
)
14358 assemble_name (file
, XSTR (x
, 0));
14360 rs6000_output_function_entry (file
, XSTR (x
, 0));
14364 /* Like 'L', for last word of TImode/PTImode. */
14366 fputs (reg_names
[REGNO (x
) + 3], file
);
14367 else if (MEM_P (x
))
14369 machine_mode mode
= GET_MODE (x
);
14370 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14371 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14372 output_address (mode
, plus_constant (Pmode
,
14373 XEXP (XEXP (x
, 0), 0), 12));
14374 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14375 output_address (mode
, plus_constant (Pmode
,
14376 XEXP (XEXP (x
, 0), 0), 12));
14378 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14379 if (small_data_operand (x
, GET_MODE (x
)))
14380 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14381 reg_names
[SMALL_DATA_REG
]);
14385 /* Print AltiVec memory operand. */
14390 gcc_assert (MEM_P (x
));
14394 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14395 && GET_CODE (tmp
) == AND
14396 && CONST_INT_P (XEXP (tmp
, 1))
14397 && INTVAL (XEXP (tmp
, 1)) == -16)
14398 tmp
= XEXP (tmp
, 0);
14399 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14400 && GET_CODE (tmp
) == PRE_MODIFY
)
14401 tmp
= XEXP (tmp
, 1);
14403 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14406 if (GET_CODE (tmp
) != PLUS
14407 || !REG_P (XEXP (tmp
, 0))
14408 || !REG_P (XEXP (tmp
, 1)))
14410 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14414 if (REGNO (XEXP (tmp
, 0)) == 0)
14415 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14416 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14418 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14419 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14426 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14427 else if (MEM_P (x
))
14429 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14430 know the width from the mode. */
14431 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14432 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14433 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14434 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14435 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14436 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14437 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14438 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14440 output_address (GET_MODE (x
), XEXP (x
, 0));
14442 else if (toc_relative_expr_p (x
, false,
14443 &tocrel_base_oac
, &tocrel_offset_oac
))
14444 /* This hack along with a corresponding hack in
14445 rs6000_output_addr_const_extra arranges to output addends
14446 where the assembler expects to find them. eg.
14447 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14448 without this hack would be output as "x@toc+4". We
14450 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14451 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14452 output_addr_const (file
, XVECEXP (x
, 0, 0));
14453 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14454 output_addr_const (file
, XVECEXP (x
, 0, 1));
14456 output_addr_const (file
, x
);
14460 if (const char *name
= get_some_local_dynamic_name ())
14461 assemble_name (file
, name
);
14463 output_operand_lossage ("'%%&' used without any "
14464 "local dynamic TLS references");
14468 output_operand_lossage ("invalid %%xn code");
14472 /* Print the address of an operand. */
14475 print_operand_address (FILE *file
, rtx x
)
14478 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14480 /* Is it a PC-relative address? */
14481 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14483 HOST_WIDE_INT offset
;
14485 if (GET_CODE (x
) == CONST
)
14488 if (GET_CODE (x
) == PLUS
)
14490 offset
= INTVAL (XEXP (x
, 1));
14496 output_addr_const (file
, x
);
14499 fprintf (file
, "%+" PRId64
, offset
);
14501 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14502 fprintf (file
, "@got");
14504 fprintf (file
, "@pcrel");
14506 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14507 || GET_CODE (x
) == LABEL_REF
)
14509 output_addr_const (file
, x
);
14510 if (small_data_operand (x
, GET_MODE (x
)))
14511 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14512 reg_names
[SMALL_DATA_REG
]);
14514 gcc_assert (!TARGET_TOC
);
14516 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14517 && REG_P (XEXP (x
, 1)))
14519 if (REGNO (XEXP (x
, 0)) == 0)
14520 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14521 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14523 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14524 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14526 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14527 && CONST_INT_P (XEXP (x
, 1)))
14528 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14529 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14531 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14532 && CONSTANT_P (XEXP (x
, 1)))
14534 fprintf (file
, "lo16(");
14535 output_addr_const (file
, XEXP (x
, 1));
14536 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14540 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14541 && CONSTANT_P (XEXP (x
, 1)))
14543 output_addr_const (file
, XEXP (x
, 1));
14544 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14547 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14549 /* This hack along with a corresponding hack in
14550 rs6000_output_addr_const_extra arranges to output addends
14551 where the assembler expects to find them. eg.
14553 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14554 without this hack would be output as "x@toc+8@l(9)". We
14555 want "x+8@toc@l(9)". */
14556 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14557 if (GET_CODE (x
) == LO_SUM
)
14558 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14560 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14563 output_addr_const (file
, x
);
14566 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14569 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14571 if (GET_CODE (x
) == UNSPEC
)
14572 switch (XINT (x
, 1))
14574 case UNSPEC_TOCREL
:
14575 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14576 && REG_P (XVECEXP (x
, 0, 1))
14577 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14578 output_addr_const (file
, XVECEXP (x
, 0, 0));
14579 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14581 if (INTVAL (tocrel_offset_oac
) >= 0)
14582 fprintf (file
, "+");
14583 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14585 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14588 assemble_name (file
, toc_label_name
);
14591 else if (TARGET_ELF
)
14592 fputs ("@toc", file
);
14596 case UNSPEC_MACHOPIC_OFFSET
:
14597 output_addr_const (file
, XVECEXP (x
, 0, 0));
14599 machopic_output_function_base_name (file
);
14606 /* Target hook for assembling integer objects. The PowerPC version has
14607 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14608 is defined. It also needs to handle DI-mode objects on 64-bit
14612 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14614 #ifdef RELOCATABLE_NEEDS_FIXUP
14615 /* Special handling for SI values. */
14616 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14618 static int recurse
= 0;
14620 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14621 the .fixup section. Since the TOC section is already relocated, we
14622 don't need to mark it here. We used to skip the text section, but it
14623 should never be valid for relocated addresses to be placed in the text
14625 if (DEFAULT_ABI
== ABI_V4
14626 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14627 && in_section
!= toc_section
14629 && !CONST_SCALAR_INT_P (x
)
14635 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14637 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14638 fprintf (asm_out_file
, "\t.long\t(");
14639 output_addr_const (asm_out_file
, x
);
14640 fprintf (asm_out_file
, ")@fixup\n");
14641 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14642 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14643 fprintf (asm_out_file
, "\t.long\t");
14644 assemble_name (asm_out_file
, buf
);
14645 fprintf (asm_out_file
, "\n\t.previous\n");
14649 /* Remove initial .'s to turn a -mcall-aixdesc function
14650 address into the address of the descriptor, not the function
14652 else if (SYMBOL_REF_P (x
)
14653 && XSTR (x
, 0)[0] == '.'
14654 && DEFAULT_ABI
== ABI_AIX
)
14656 const char *name
= XSTR (x
, 0);
14657 while (*name
== '.')
14660 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14664 #endif /* RELOCATABLE_NEEDS_FIXUP */
14665 return default_assemble_integer (x
, size
, aligned_p
);
14668 /* Return a template string for assembly to emit when making an
14669 external call. FUNOP is the call mem argument operand number. */
14671 static const char *
14672 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14674 /* -Wformat-overflow workaround, without which gcc thinks that %u
14675 might produce 10 digits. */
14676 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14680 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14682 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14683 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14684 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14685 sprintf (arg
, "(%%&@tlsld)");
14688 /* The magic 32768 offset here corresponds to the offset of
14689 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14691 sprintf (z
, "%%z%u%s", funop
,
14692 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14695 static char str
[32]; /* 1 spare */
14696 if (rs6000_pcrel_p ())
14697 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14698 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14699 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14700 sibcall
? "" : "\n\tnop");
14701 else if (DEFAULT_ABI
== ABI_V4
)
14702 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14703 flag_pic
? "@plt" : "");
14705 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14706 else if (DEFAULT_ABI
== ABI_DARWIN
)
14708 /* The cookie is in operand func+2. */
14709 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14710 int cookie
= INTVAL (operands
[funop
+ 2]);
14711 if (cookie
& CALL_LONG
)
14713 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14714 tree labelname
= get_prev_label (funname
);
14715 gcc_checking_assert (labelname
&& !sibcall
);
14717 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14718 instruction will reach 'foo', otherwise link as 'bl L42'".
14719 "L42" should be a 'branch island', that will do a far jump to
14720 'foo'. Branch islands are generated in
14721 macho_branch_islands(). */
14722 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14723 IDENTIFIER_POINTER (labelname
));
14726 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14728 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14732 gcc_unreachable ();
14737 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14739 return rs6000_call_template_1 (operands
, funop
, false);
14743 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14745 return rs6000_call_template_1 (operands
, funop
, true);
14748 /* As above, for indirect calls. */
14750 static const char *
14751 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14754 /* -Wformat-overflow workaround, without which gcc thinks that %u
14755 might produce 10 digits. Note that -Wformat-overflow will not
14756 currently warn here for str[], so do not rely on a warning to
14757 ensure str[] is correctly sized. */
14758 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14760 /* Currently, funop is either 0 or 1. The maximum string is always
14761 a !speculate 64-bit __tls_get_addr call.
14764 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14765 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14767 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14768 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14775 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14776 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14778 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14779 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14786 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14787 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14789 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14790 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14797 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14798 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14800 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14801 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14805 static char str
[160]; /* 8 spare */
14807 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14809 if (DEFAULT_ABI
== ABI_AIX
)
14812 ptrload
, funop
+ 3);
14814 /* We don't need the extra code to stop indirect call speculation if
14816 bool speculate
= (TARGET_MACHO
14817 || rs6000_speculate_indirect_jumps
14818 || (REG_P (operands
[funop
])
14819 && REGNO (operands
[funop
]) == LR_REGNO
));
14821 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14823 const char *rel64
= TARGET_64BIT
? "64" : "";
14826 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14828 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14829 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14831 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14832 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14836 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14837 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14838 && flag_pic
== 2 ? "+32768" : "");
14842 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14843 tls
, rel64
, notoc
, funop
, addend
);
14844 s
+= sprintf (s
, "crset 2\n\t");
14847 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14848 tls
, rel64
, notoc
, funop
, addend
);
14850 else if (!speculate
)
14851 s
+= sprintf (s
, "crset 2\n\t");
14853 if (rs6000_pcrel_p ())
14856 sprintf (s
, "b%%T%ul", funop
);
14858 sprintf (s
, "beq%%T%ul-", funop
);
14860 else if (DEFAULT_ABI
== ABI_AIX
)
14866 funop
, ptrload
, funop
+ 4);
14871 funop
, ptrload
, funop
+ 4);
14873 else if (DEFAULT_ABI
== ABI_ELFv2
)
14879 funop
, ptrload
, funop
+ 3);
14884 funop
, ptrload
, funop
+ 3);
14891 funop
, sibcall
? "" : "l");
14895 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14901 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14903 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14907 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14909 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14913 /* Output indirect call insns. WHICH identifies the type of sequence. */
14915 rs6000_pltseq_template (rtx
*operands
, int which
)
14917 const char *rel64
= TARGET_64BIT
? "64" : "";
14920 if (GET_CODE (operands
[3]) == UNSPEC
)
14922 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14923 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14924 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14926 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14927 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14931 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
14932 static char str
[96]; /* 10 spare */
14933 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
14934 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14935 && flag_pic
== 2 ? "+32768" : "");
14938 case RS6000_PLTSEQ_TOCSAVE
:
14941 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14942 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
14945 case RS6000_PLTSEQ_PLT16_HA
:
14946 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
14949 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14953 "addis %%0,%%1,0\n\t"
14954 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14955 tls
, off
, rel64
, addend
);
14957 case RS6000_PLTSEQ_PLT16_LO
:
14959 "l%s %%0,0(%%1)\n\t"
14960 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14961 TARGET_64BIT
? "d" : "wz",
14962 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
14964 case RS6000_PLTSEQ_MTCTR
:
14967 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14968 tls
, rel64
, addend
);
14970 case RS6000_PLTSEQ_PLT_PCREL34
:
14972 "pl%s %%0,0(0),1\n\t"
14973 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14974 TARGET_64BIT
? "d" : "wz",
14978 gcc_unreachable ();
14984 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14985 /* Emit an assembler directive to set symbol visibility for DECL to
14986 VISIBILITY_TYPE. */
14989 rs6000_assemble_visibility (tree decl
, int vis
)
14994 /* Functions need to have their entry point symbol visibility set as
14995 well as their descriptor symbol visibility. */
14996 if (DEFAULT_ABI
== ABI_AIX
14998 && TREE_CODE (decl
) == FUNCTION_DECL
)
15000 static const char * const visibility_types
[] = {
15001 NULL
, "protected", "hidden", "internal"
15004 const char *name
, *type
;
15006 name
= ((* targetm
.strip_name_encoding
)
15007 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
15008 type
= visibility_types
[vis
];
15010 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
15011 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
15014 default_assemble_visibility (decl
, vis
);
15018 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15019 entry. If RECORD_P is true and the target supports named sections,
15020 the location of the NOPs will be recorded in a special object section
15021 called "__patchable_function_entries". This routine may be called
15022 twice per function to put NOPs before and after the function
15026 rs6000_print_patchable_function_entry (FILE *file
,
15027 unsigned HOST_WIDE_INT patch_area_size
,
15030 bool global_entry_needed_p
= rs6000_global_entry_point_prologue_needed_p ();
15031 /* For a function which needs global entry point, we will emit the
15032 patchable area before and after local entry point under the control of
15033 cfun->machine->global_entry_emitted, see the handling in function
15034 rs6000_output_function_prologue. */
15035 if (!global_entry_needed_p
|| cfun
->machine
->global_entry_emitted
)
15036 default_print_patchable_function_entry (file
, patch_area_size
, record_p
);
15040 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
15042 /* Reversal of FP compares takes care -- an ordered compare
15043 becomes an unordered compare and vice versa. */
15044 if (mode
== CCFPmode
15045 && (!flag_finite_math_only
15046 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
15047 || code
== UNEQ
|| code
== LTGT
))
15048 return reverse_condition_maybe_unordered (code
);
15050 return reverse_condition (code
);
15053 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15054 nonzero bits at the LOWBITS low bits only.
15056 Return true if C can be rotated to such constant. If so, *ROT is written
15057 to the number by which C is rotated.
15058 Return false otherwise. */
15061 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c
, int lowbits
, int *rot
)
15063 int clz
= HOST_BITS_PER_WIDE_INT
- lowbits
;
15065 /* case a. 0..0xxx: already at least clz zeros. */
15066 int lz
= clz_hwi (c
);
15073 /* case b. 0..0xxx0..0: at least clz zeros. */
15074 int tz
= ctz_hwi (c
);
15075 if (lz
+ tz
>= clz
)
15077 *rot
= HOST_BITS_PER_WIDE_INT
- tz
;
15081 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15082 ^bit -> Vbit, , then zeros are at head or tail.
15083 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15084 const int rot_bits
= lowbits
+ 1;
15085 unsigned HOST_WIDE_INT rc
= (c
>> rot_bits
) | (c
<< (clz
- 1));
15087 if (clz_hwi (rc
) + tz
>= clz
)
15089 *rot
= HOST_BITS_PER_WIDE_INT
- (tz
+ rot_bits
);
15096 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15097 which contains 48bits leading zeros and 16bits of any value. */
15100 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c
)
15103 bool res
= can_be_rotated_to_lowbits (c
, 16, &rot
);
15104 return res
&& rot
> 0;
15107 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15108 which contains 49bits leading ones and 15bits of any value. */
15111 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c
)
15114 bool res
= can_be_rotated_to_lowbits (~c
, 15, &rot
);
15115 return res
&& rot
> 0;
15118 /* Generate a compare for CODE. Return a brand-new rtx that
15119 represents the result of the compare. */
15122 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
15124 machine_mode comp_mode
;
15125 rtx compare_result
;
15126 enum rtx_code code
= GET_CODE (cmp
);
15127 rtx op0
= XEXP (cmp
, 0);
15128 rtx op1
= XEXP (cmp
, 1);
15130 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15131 comp_mode
= CCmode
;
15132 else if (FLOAT_MODE_P (mode
))
15133 comp_mode
= CCFPmode
;
15134 else if (code
== GTU
|| code
== LTU
15135 || code
== GEU
|| code
== LEU
)
15136 comp_mode
= CCUNSmode
;
15137 else if ((code
== EQ
|| code
== NE
)
15138 && unsigned_reg_p (op0
)
15139 && (unsigned_reg_p (op1
)
15140 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
15141 /* These are unsigned values, perhaps there will be a later
15142 ordering compare that can be shared with this one. */
15143 comp_mode
= CCUNSmode
;
15145 comp_mode
= CCmode
;
15147 /* If we have an unsigned compare, make sure we don't have a signed value as
15149 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
15150 && INTVAL (op1
) < 0)
15152 op0
= copy_rtx_if_shared (op0
);
15153 op1
= force_reg (GET_MODE (op0
), op1
);
15154 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
15157 /* First, the compare. */
15158 compare_result
= gen_reg_rtx (comp_mode
);
15160 /* IEEE 128-bit support in VSX registers when we do not have hardware
15162 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15164 rtx libfunc
= NULL_RTX
;
15165 bool check_nan
= false;
15172 libfunc
= optab_libfunc (eq_optab
, mode
);
15177 libfunc
= optab_libfunc (ge_optab
, mode
);
15182 libfunc
= optab_libfunc (le_optab
, mode
);
15187 libfunc
= optab_libfunc (unord_optab
, mode
);
15188 code
= (code
== UNORDERED
) ? NE
: EQ
;
15194 libfunc
= optab_libfunc (ge_optab
, mode
);
15195 code
= (code
== UNGE
) ? GE
: GT
;
15201 libfunc
= optab_libfunc (le_optab
, mode
);
15202 code
= (code
== UNLE
) ? LE
: LT
;
15208 libfunc
= optab_libfunc (eq_optab
, mode
);
15209 code
= (code
= UNEQ
) ? EQ
: NE
;
15213 gcc_unreachable ();
15216 gcc_assert (libfunc
);
15219 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15220 SImode
, op0
, mode
, op1
, mode
);
15222 /* The library signals an exception for signalling NaNs, so we need to
15223 handle isgreater, etc. by first checking isordered. */
15226 rtx ne_rtx
, normal_dest
, unord_dest
;
15227 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
15228 rtx join_label
= gen_label_rtx ();
15229 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
15230 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
15233 /* Test for either value being a NaN. */
15234 gcc_assert (unord_func
);
15235 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
15236 SImode
, op0
, mode
, op1
, mode
);
15238 /* Set value (0) if either value is a NaN, and jump to the join
15240 dest
= gen_reg_rtx (SImode
);
15241 emit_move_insn (dest
, const1_rtx
);
15242 emit_insn (gen_rtx_SET (unord_cmp
,
15243 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15246 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15247 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15248 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15252 /* Do the normal comparison, knowing that the values are not
15254 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15255 SImode
, op0
, mode
, op1
, mode
);
15257 emit_insn (gen_cstoresi4 (dest
,
15258 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15260 normal_dest
, const0_rtx
));
15262 /* Join NaN and non-Nan paths. Compare dest against 0. */
15263 emit_label (join_label
);
15267 emit_insn (gen_rtx_SET (compare_result
,
15268 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15273 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15274 CLOBBERs to match cmptf_internal2 pattern. */
15275 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15276 && FLOAT128_IBM_P (GET_MODE (op0
))
15277 && TARGET_HARD_FLOAT
)
15278 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15280 gen_rtx_SET (compare_result
,
15281 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15282 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15283 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15284 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15285 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15286 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15287 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15288 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15289 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15290 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15291 else if (GET_CODE (op1
) == UNSPEC
15292 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15294 rtx op1b
= XVECEXP (op1
, 0, 0);
15295 comp_mode
= CCEQmode
;
15296 compare_result
= gen_reg_rtx (CCEQmode
);
15298 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15300 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15303 emit_insn (gen_rtx_SET (compare_result
,
15304 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15307 validate_condition_mode (code
, GET_MODE (compare_result
));
15309 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15313 /* Return the diagnostic message string if the binary operation OP is
15314 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15317 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15321 machine_mode mode1
= TYPE_MODE (type1
);
15322 machine_mode mode2
= TYPE_MODE (type2
);
15324 /* For complex modes, use the inner type. */
15325 if (COMPLEX_MODE_P (mode1
))
15326 mode1
= GET_MODE_INNER (mode1
);
15328 if (COMPLEX_MODE_P (mode2
))
15329 mode2
= GET_MODE_INNER (mode2
);
15331 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15332 double to intermix unless -mfloat128-convert. */
15333 if (mode1
== mode2
)
15336 if (!TARGET_FLOAT128_CVT
)
15338 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15339 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15340 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15348 /* Expand floating point conversion to/from __float128 and __ibm128. */
15351 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15353 machine_mode dest_mode
= GET_MODE (dest
);
15354 machine_mode src_mode
= GET_MODE (src
);
15355 convert_optab cvt
= unknown_optab
;
15356 bool do_move
= false;
15357 rtx libfunc
= NULL_RTX
;
15359 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15360 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15364 rtx_2func_t from_df
;
15365 rtx_2func_t from_sf
;
15366 rtx_2func_t from_si_sign
;
15367 rtx_2func_t from_si_uns
;
15368 rtx_2func_t from_di_sign
;
15369 rtx_2func_t from_di_uns
;
15372 rtx_2func_t to_si_sign
;
15373 rtx_2func_t to_si_uns
;
15374 rtx_2func_t to_di_sign
;
15375 rtx_2func_t to_di_uns
;
15376 } hw_conversions
[2] = {
15377 /* convertions to/from KFmode */
15379 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15380 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15381 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15382 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15383 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15384 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15385 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15386 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15387 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15388 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15389 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15390 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15393 /* convertions to/from TFmode */
15395 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15396 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15397 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15398 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15399 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15400 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15401 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15402 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15403 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15404 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15405 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15406 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15410 if (dest_mode
== src_mode
)
15411 gcc_unreachable ();
15413 /* Eliminate memory operations. */
15415 src
= force_reg (src_mode
, src
);
15419 rtx tmp
= gen_reg_rtx (dest_mode
);
15420 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15421 rs6000_emit_move (dest
, tmp
, dest_mode
);
15425 /* Convert to IEEE 128-bit floating point. */
15426 if (FLOAT128_IEEE_P (dest_mode
))
15428 if (dest_mode
== KFmode
)
15430 else if (dest_mode
== TFmode
)
15433 gcc_unreachable ();
15439 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15444 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15450 if (FLOAT128_IBM_P (src_mode
))
15459 cvt
= ufloat_optab
;
15460 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15464 cvt
= sfloat_optab
;
15465 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15472 cvt
= ufloat_optab
;
15473 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15477 cvt
= sfloat_optab
;
15478 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15483 gcc_unreachable ();
15487 /* Convert from IEEE 128-bit floating point. */
15488 else if (FLOAT128_IEEE_P (src_mode
))
15490 if (src_mode
== KFmode
)
15492 else if (src_mode
== TFmode
)
15495 gcc_unreachable ();
15501 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15506 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15512 if (FLOAT128_IBM_P (dest_mode
))
15522 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15527 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15535 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15540 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15545 gcc_unreachable ();
15549 /* Both IBM format. */
15550 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15554 gcc_unreachable ();
15556 /* Handle conversion between TFmode/KFmode/IFmode. */
15558 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15560 /* Handle conversion if we have hardware support. */
15561 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15562 emit_insn ((hw_convert
) (dest
, src
));
15564 /* Call an external function to do the conversion. */
15565 else if (cvt
!= unknown_optab
)
15567 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15568 gcc_assert (libfunc
!= NULL_RTX
);
15570 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15573 gcc_assert (dest2
!= NULL_RTX
);
15574 if (!rtx_equal_p (dest
, dest2
))
15575 emit_move_insn (dest
, dest2
);
15579 gcc_unreachable ();
15585 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15586 can be used as that dest register. Return the dest register. */
15589 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15591 if (op2
== const0_rtx
)
15594 if (GET_CODE (scratch
) == SCRATCH
)
15595 scratch
= gen_reg_rtx (mode
);
15597 if (logical_operand (op2
, mode
))
15598 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15600 emit_insn (gen_rtx_SET (scratch
,
15601 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15606 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15607 requires this. The result is mode MODE. */
15609 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15613 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15614 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15615 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15616 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15617 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15618 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15619 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15620 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15622 gcc_assert (n
== 2);
15624 rtx cc
= gen_reg_rtx (CCEQmode
);
15625 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15626 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15632 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15634 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15635 rtx_code cond_code
= GET_CODE (condition_rtx
);
15637 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15638 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15640 else if (cond_code
== NE
15641 || cond_code
== GE
|| cond_code
== LE
15642 || cond_code
== GEU
|| cond_code
== LEU
15643 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15645 rtx not_result
= gen_reg_rtx (CCEQmode
);
15646 rtx not_op
, rev_cond_rtx
;
15647 machine_mode cc_mode
;
15649 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15651 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15652 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15653 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15654 emit_insn (gen_rtx_SET (not_result
, not_op
));
15655 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15658 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15659 if (op_mode
== VOIDmode
)
15660 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15662 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15664 PUT_MODE (condition_rtx
, DImode
);
15665 convert_move (operands
[0], condition_rtx
, 0);
15669 PUT_MODE (condition_rtx
, SImode
);
15670 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15674 /* Emit a branch of kind CODE to location LOC. */
15677 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15679 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15680 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15681 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15682 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15685 /* Return the string to output a conditional branch to LABEL, which is
15686 the operand template of the label, or NULL if the branch is really a
15687 conditional return.
15689 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15690 condition code register and its mode specifies what kind of
15691 comparison we made.
15693 REVERSED is nonzero if we should reverse the sense of the comparison.
15695 INSN is the insn. */
15698 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15700 static char string
[64];
15701 enum rtx_code code
= GET_CODE (op
);
15702 rtx cc_reg
= XEXP (op
, 0);
15703 machine_mode mode
= GET_MODE (cc_reg
);
15704 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15705 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15706 int really_reversed
= reversed
^ need_longbranch
;
15712 validate_condition_mode (code
, mode
);
15714 /* Work out which way this really branches. We could use
15715 reverse_condition_maybe_unordered here always but this
15716 makes the resulting assembler clearer. */
15717 if (really_reversed
)
15719 /* Reversal of FP compares takes care -- an ordered compare
15720 becomes an unordered compare and vice versa. */
15721 if (mode
== CCFPmode
)
15722 code
= reverse_condition_maybe_unordered (code
);
15724 code
= reverse_condition (code
);
15729 /* Not all of these are actually distinct opcodes, but
15730 we distinguish them for clarity of the resulting assembler. */
15731 case NE
: case LTGT
:
15732 ccode
= "ne"; break;
15733 case EQ
: case UNEQ
:
15734 ccode
= "eq"; break;
15736 ccode
= "ge"; break;
15737 case GT
: case GTU
: case UNGT
:
15738 ccode
= "gt"; break;
15740 ccode
= "le"; break;
15741 case LT
: case LTU
: case UNLT
:
15742 ccode
= "lt"; break;
15743 case UNORDERED
: ccode
= "un"; break;
15744 case ORDERED
: ccode
= "nu"; break;
15745 case UNGE
: ccode
= "nl"; break;
15746 case UNLE
: ccode
= "ng"; break;
15748 gcc_unreachable ();
15751 /* Maybe we have a guess as to how likely the branch is. */
15753 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15754 if (note
!= NULL_RTX
)
15756 /* PROB is the difference from 50%. */
15757 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15758 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15760 /* Only hint for highly probable/improbable branches on newer cpus when
15761 we have real profile data, as static prediction overrides processor
15762 dynamic prediction. For older cpus we may as well always hint, but
15763 assume not taken for branches that are very close to 50% as a
15764 mispredicted taken branch is more expensive than a
15765 mispredicted not-taken branch. */
15766 if (rs6000_always_hint
15767 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15768 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15769 && br_prob_note_reliable_p (note
)))
15771 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15772 && ((prob
> 0) ^ need_longbranch
))
15780 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15782 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15784 /* We need to escape any '%' characters in the reg_names string.
15785 Assume they'd only be the first character.... */
15786 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15788 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15792 /* If the branch distance was too far, we may have to use an
15793 unconditional branch to go the distance. */
15794 if (need_longbranch
)
15795 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15797 s
+= sprintf (s
, ",%s", label
);
15803 /* Return insn for VSX or Altivec comparisons. */
15806 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15809 machine_mode mode
= GET_MODE (op0
);
15817 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15828 mask
= gen_reg_rtx (mode
);
15829 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15836 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15837 DMODE is expected destination mode. This is a recursive function. */
15840 rs6000_emit_vector_compare (enum rtx_code rcode
,
15842 machine_mode dmode
)
15845 bool swap_operands
= false;
15846 bool try_again
= false;
15848 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15849 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15851 /* See if the comparison works as is. */
15852 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15860 swap_operands
= true;
15865 swap_operands
= true;
15873 /* Invert condition and try again.
15874 e.g., A != B becomes ~(A==B). */
15876 enum rtx_code rev_code
;
15877 enum insn_code nor_code
;
15880 rev_code
= reverse_condition_maybe_unordered (rcode
);
15881 if (rev_code
== UNKNOWN
)
15884 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15885 if (nor_code
== CODE_FOR_nothing
)
15888 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15892 mask
= gen_reg_rtx (dmode
);
15893 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15901 /* Try GT/GTU/LT/LTU OR EQ */
15904 enum insn_code ior_code
;
15905 enum rtx_code new_code
;
15926 gcc_unreachable ();
15929 ior_code
= optab_handler (ior_optab
, dmode
);
15930 if (ior_code
== CODE_FOR_nothing
)
15933 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
15937 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
15941 mask
= gen_reg_rtx (dmode
);
15942 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
15953 std::swap (op0
, op1
);
15955 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15960 /* You only get two chances. */
15964 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15965 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15966 operands for the relation operation COND. */
15969 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
15970 rtx cond
, rtx cc_op0
, rtx cc_op1
)
15972 machine_mode dest_mode
= GET_MODE (dest
);
15973 machine_mode mask_mode
= GET_MODE (cc_op0
);
15974 enum rtx_code rcode
= GET_CODE (cond
);
15976 bool invert_move
= false;
15978 if (VECTOR_UNIT_NONE_P (dest_mode
))
15981 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
15982 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
15986 /* Swap operands if we can, and fall back to doing the operation as
15987 specified, and doing a NOR to invert the test. */
15993 /* Invert condition and try again.
15994 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15995 invert_move
= true;
15996 rcode
= reverse_condition_maybe_unordered (rcode
);
15997 if (rcode
== UNKNOWN
)
16003 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
16005 /* Invert condition to avoid compound test. */
16006 invert_move
= true;
16007 rcode
= reverse_condition (rcode
);
16016 /* Invert condition to avoid compound test if necessary. */
16017 if (rcode
== GEU
|| rcode
== LEU
)
16019 invert_move
= true;
16020 rcode
= reverse_condition (rcode
);
16028 /* Get the vector mask for the given relational operations. */
16029 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
16034 if (mask_mode
!= dest_mode
)
16035 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
16038 std::swap (op_true
, op_false
);
16040 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16041 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
16042 && (GET_CODE (op_true
) == CONST_VECTOR
16043 || GET_CODE (op_false
) == CONST_VECTOR
))
16045 rtx constant_0
= CONST0_RTX (dest_mode
);
16046 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
16048 if (op_true
== constant_m1
&& op_false
== constant_0
)
16050 emit_move_insn (dest
, mask
);
16054 else if (op_true
== constant_0
&& op_false
== constant_m1
)
16056 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
16060 /* If we can't use the vector comparison directly, perhaps we can use
16061 the mask for the true or false fields, instead of loading up a
16063 if (op_true
== constant_m1
)
16066 if (op_false
== constant_0
)
16070 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
16071 op_true
= force_reg (dest_mode
, op_true
);
16073 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
16074 op_false
= force_reg (dest_mode
, op_false
);
16076 rtx tmp
= gen_rtx_IOR (dest_mode
,
16077 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
16079 gen_rtx_AND (dest_mode
, mask
, op_true
));
16080 emit_insn (gen_rtx_SET (dest
, tmp
));
16084 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16085 maximum or minimum with "C" semantics.
16087 Unless you use -ffast-math, you can't use these instructions to replace
16088 conditions that implicitly reverse the condition because the comparison
16089 might generate a NaN or signed zer0.
16091 I.e. the following can be replaced all of the time
16092 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16093 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16094 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16095 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16097 The following can be replaced only if -ffast-math is used:
16098 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16099 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16100 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16101 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16103 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16104 nonzero/true, FALSE_COND if it is zero/false.
16106 Return false if we can't generate the appropriate minimum or maximum, and
16107 true if we can did the minimum or maximum. */
16110 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16112 enum rtx_code code
= GET_CODE (op
);
16113 rtx op0
= XEXP (op
, 0);
16114 rtx op1
= XEXP (op
, 1);
16115 machine_mode compare_mode
= GET_MODE (op0
);
16116 machine_mode result_mode
= GET_MODE (dest
);
16118 if (result_mode
!= compare_mode
)
16121 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16122 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16123 we need to do the reversions first to make the following checks
16124 support fewer cases, like:
16126 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16127 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16128 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16129 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16131 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16132 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16133 have to check for fast-math or the like. */
16134 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
16136 code
= reverse_condition_maybe_unordered (code
);
16137 std::swap (true_cond
, false_cond
);
16141 if (code
== GE
|| code
== GT
)
16143 else if (code
== LE
|| code
== LT
)
16148 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
16151 /* Only when NaNs and signed-zeros are not in effect, smax could be
16152 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16153 `op0 > op1 ? op1 : op0`. */
16154 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
16155 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
16161 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
16165 /* Possibly emit a floating point conditional move by generating a compare that
16166 sets a mask instruction and a XXSEL select instruction.
16168 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16169 nonzero/true, FALSE_COND if it is zero/false.
16171 Return false if the operation cannot be generated, and true if we could
16172 generate the instruction. */
16175 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16177 enum rtx_code code
= GET_CODE (op
);
16178 rtx op0
= XEXP (op
, 0);
16179 rtx op1
= XEXP (op
, 1);
16180 machine_mode compare_mode
= GET_MODE (op0
);
16181 machine_mode result_mode
= GET_MODE (dest
);
16186 if (!can_create_pseudo_p ())
16189 /* We allow the comparison to be either SFmode/DFmode and the true/false
16190 condition to be either SFmode/DFmode. I.e. we allow:
16195 r = (a == b) ? c : d;
16202 r = (a == b) ? c : d;
16204 but we don't allow intermixing the IEEE 128-bit floating point types with
16205 the 32/64-bit scalar types. */
16207 if (!(compare_mode
== result_mode
16208 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
16209 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
16222 code
= swap_condition (code
);
16223 std::swap (op0
, op1
);
16230 /* Generate: [(parallel [(set (dest)
16231 (if_then_else (op (cmp1) (cmp2))
16234 (clobber (scratch))])]. */
16236 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
16237 cmove_rtx
= gen_rtx_SET (dest
,
16238 gen_rtx_IF_THEN_ELSE (result_mode
,
16243 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16244 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16245 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16250 /* Helper function to return true if the target has instructions to do a
16251 compare and set mask instruction that can be used with XXSEL to implement a
16252 conditional move. It is also assumed that such a target also supports the
16253 "C" minimum and maximum instructions. */
16256 have_compare_and_set_mask (machine_mode mode
)
16262 return TARGET_P9_MINMAX
;
16266 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16275 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16276 operands of the last comparison is nonzero/true, FALSE_COND if it
16277 is zero/false. Return 0 if the hardware has no such operation. */
16280 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16282 enum rtx_code code
= GET_CODE (op
);
16283 rtx op0
= XEXP (op
, 0);
16284 rtx op1
= XEXP (op
, 1);
16285 machine_mode compare_mode
= GET_MODE (op0
);
16286 machine_mode result_mode
= GET_MODE (dest
);
16288 bool is_against_zero
;
16290 /* These modes should always match. */
16291 if (GET_MODE (op1
) != compare_mode
16292 /* In the isel case however, we can use a compare immediate, so
16293 op1 may be a small constant. */
16294 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16296 if (GET_MODE (true_cond
) != result_mode
)
16298 if (GET_MODE (false_cond
) != result_mode
)
16301 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16303 if (have_compare_and_set_mask (compare_mode
)
16304 && have_compare_and_set_mask (result_mode
))
16306 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16309 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16313 /* Don't allow using floating point comparisons for integer results for
16315 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16318 /* First, work out if the hardware can do this at all, or
16319 if it's too slow.... */
16320 if (!FLOAT_MODE_P (compare_mode
))
16323 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16327 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16329 /* A floating-point subtract might overflow, underflow, or produce
16330 an inexact result, thus changing the floating-point flags, so it
16331 can't be generated if we care about that. It's safe if one side
16332 of the construct is zero, since then no subtract will be
16334 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16335 && flag_trapping_math
&& ! is_against_zero
)
16338 /* Eliminate half of the comparisons by switching operands, this
16339 makes the remaining code simpler. */
16340 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16341 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16343 code
= reverse_condition_maybe_unordered (code
);
16345 true_cond
= false_cond
;
16349 /* UNEQ and LTGT take four instructions for a comparison with zero,
16350 it'll probably be faster to use a branch here too. */
16351 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16354 /* We're going to try to implement comparisons by performing
16355 a subtract, then comparing against zero. Unfortunately,
16356 Inf - Inf is NaN which is not zero, and so if we don't
16357 know that the operand is finite and the comparison
16358 would treat EQ different to UNORDERED, we can't do it. */
16359 if (HONOR_INFINITIES (compare_mode
)
16360 && code
!= GT
&& code
!= UNGE
16361 && (!CONST_DOUBLE_P (op1
)
16362 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16363 /* Constructs of the form (a OP b ? a : b) are safe. */
16364 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16365 || (! rtx_equal_p (op0
, true_cond
)
16366 && ! rtx_equal_p (op1
, true_cond
))))
16369 /* At this point we know we can use fsel. */
16371 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16372 is no fsel instruction. */
16373 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16376 /* Reduce the comparison to a comparison against zero. */
16377 if (! is_against_zero
)
16379 temp
= gen_reg_rtx (compare_mode
);
16380 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16382 op1
= CONST0_RTX (compare_mode
);
16385 /* If we don't care about NaNs we can reduce some of the comparisons
16386 down to faster ones. */
16387 if (! HONOR_NANS (compare_mode
))
16393 true_cond
= false_cond
;
16406 /* Now, reduce everything down to a GE. */
16413 temp
= gen_reg_rtx (compare_mode
);
16414 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16419 temp
= gen_reg_rtx (compare_mode
);
16420 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16425 temp
= gen_reg_rtx (compare_mode
);
16426 emit_insn (gen_rtx_SET (temp
,
16427 gen_rtx_NEG (compare_mode
,
16428 gen_rtx_ABS (compare_mode
, op0
))));
16433 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16434 temp
= gen_reg_rtx (result_mode
);
16435 emit_insn (gen_rtx_SET (temp
,
16436 gen_rtx_IF_THEN_ELSE (result_mode
,
16437 gen_rtx_GE (VOIDmode
,
16439 true_cond
, false_cond
)));
16440 false_cond
= true_cond
;
16443 temp
= gen_reg_rtx (compare_mode
);
16444 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16449 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16450 temp
= gen_reg_rtx (result_mode
);
16451 emit_insn (gen_rtx_SET (temp
,
16452 gen_rtx_IF_THEN_ELSE (result_mode
,
16453 gen_rtx_GE (VOIDmode
,
16455 true_cond
, false_cond
)));
16456 true_cond
= false_cond
;
16459 temp
= gen_reg_rtx (compare_mode
);
16460 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16465 gcc_unreachable ();
16468 emit_insn (gen_rtx_SET (dest
,
16469 gen_rtx_IF_THEN_ELSE (result_mode
,
16470 gen_rtx_GE (VOIDmode
,
16472 true_cond
, false_cond
)));
16476 /* Same as above, but for ints (isel). */
16479 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16481 rtx condition_rtx
, cr
;
16482 machine_mode mode
= GET_MODE (dest
);
16483 enum rtx_code cond_code
;
16484 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16487 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16490 /* PR104335: We now need to expect CC-mode "comparisons"
16491 coming from ifcvt. The following code expects proper
16492 comparisons so better abort here. */
16493 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16496 /* We still have to do the compare, because isel doesn't do a
16497 compare, it just looks at the CRx bits set by a previous compare
16499 condition_rtx
= rs6000_generate_compare (op
, mode
);
16500 cond_code
= GET_CODE (condition_rtx
);
16501 cr
= XEXP (condition_rtx
, 0);
16502 signedp
= GET_MODE (cr
) == CCmode
;
16504 isel_func
= (mode
== SImode
16505 ? (signedp
? gen_isel_cc_si
: gen_isel_ccuns_si
)
16506 : (signedp
? gen_isel_cc_di
: gen_isel_ccuns_di
));
16510 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16511 /* isel handles these directly. */
16515 /* We need to swap the sense of the comparison. */
16517 std::swap (false_cond
, true_cond
);
16518 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16523 false_cond
= force_reg (mode
, false_cond
);
16524 if (true_cond
!= const0_rtx
)
16525 true_cond
= force_reg (mode
, true_cond
);
16527 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16533 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16535 machine_mode mode
= GET_MODE (op0
);
16539 /* VSX/altivec have direct min/max insns. */
16540 if ((code
== SMAX
|| code
== SMIN
)
16541 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16542 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16543 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16545 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16549 if (code
== SMAX
|| code
== SMIN
)
16554 if (code
== SMAX
|| code
== UMAX
)
16555 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16556 op0
, op1
, mode
, 0);
16558 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16559 op1
, op0
, mode
, 0);
16560 gcc_assert (target
);
16561 if (target
!= dest
)
16562 emit_move_insn (dest
, target
);
16565 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16566 COND is true. Mark the jump as unlikely to be taken. */
16569 emit_unlikely_jump (rtx cond
, rtx label
)
16571 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16572 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16573 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16576 /* A subroutine of the atomic operation splitters. Emit a load-locked
16577 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16578 the zero_extend operation. */
16581 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16583 rtx (*fn
) (rtx
, rtx
) = NULL
;
16588 fn
= gen_load_lockedqi
;
16591 fn
= gen_load_lockedhi
;
16594 if (GET_MODE (mem
) == QImode
)
16595 fn
= gen_load_lockedqi_si
;
16596 else if (GET_MODE (mem
) == HImode
)
16597 fn
= gen_load_lockedhi_si
;
16599 fn
= gen_load_lockedsi
;
16602 fn
= gen_load_lockeddi
;
16605 fn
= gen_load_lockedti
;
16608 gcc_unreachable ();
16610 emit_insn (fn (reg
, mem
));
16613 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16614 instruction in MODE. */
16617 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16619 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16624 fn
= gen_store_conditionalqi
;
16627 fn
= gen_store_conditionalhi
;
16630 fn
= gen_store_conditionalsi
;
16633 fn
= gen_store_conditionaldi
;
16636 fn
= gen_store_conditionalti
;
16639 gcc_unreachable ();
16642 /* Emit sync before stwcx. to address PPC405 Erratum. */
16643 if (PPC405_ERRATUM77
)
16644 emit_insn (gen_hwsync ());
16646 emit_insn (fn (res
, mem
, val
));
16649 /* Expand barriers before and after a load_locked/store_cond sequence. */
16652 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16654 rtx addr
= XEXP (mem
, 0);
16656 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16657 && !legitimate_indexed_address_p (addr
, reload_completed
))
16659 addr
= force_reg (Pmode
, addr
);
16660 mem
= replace_equiv_address_nv (mem
, addr
);
16665 case MEMMODEL_RELAXED
:
16666 case MEMMODEL_CONSUME
:
16667 case MEMMODEL_ACQUIRE
:
16669 case MEMMODEL_RELEASE
:
16670 case MEMMODEL_ACQ_REL
:
16671 emit_insn (gen_lwsync ());
16673 case MEMMODEL_SEQ_CST
:
16674 emit_insn (gen_hwsync ());
16677 gcc_unreachable ();
16683 rs6000_post_atomic_barrier (enum memmodel model
)
16687 case MEMMODEL_RELAXED
:
16688 case MEMMODEL_CONSUME
:
16689 case MEMMODEL_RELEASE
:
16691 case MEMMODEL_ACQUIRE
:
16692 case MEMMODEL_ACQ_REL
:
16693 case MEMMODEL_SEQ_CST
:
16694 emit_insn (gen_isync ());
16697 gcc_unreachable ();
16701 /* A subroutine of the various atomic expanders. For sub-word operations,
16702 we must adjust things to operate on SImode. Given the original MEM,
16703 return a new aligned memory. Also build and return the quantities by
16704 which to shift and mask. */
16707 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16709 rtx addr
, align
, shift
, mask
, mem
;
16710 HOST_WIDE_INT shift_mask
;
16711 machine_mode mode
= GET_MODE (orig_mem
);
16713 /* For smaller modes, we have to implement this via SImode. */
16714 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16716 addr
= XEXP (orig_mem
, 0);
16717 addr
= force_reg (GET_MODE (addr
), addr
);
16719 /* Aligned memory containing subword. Generate a new memory. We
16720 do not want any of the existing MEM_ATTR data, as we're now
16721 accessing memory outside the original object. */
16722 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16723 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16724 mem
= gen_rtx_MEM (SImode
, align
);
16725 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16726 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16727 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16729 /* Shift amount for subword relative to aligned word. */
16730 shift
= gen_reg_rtx (SImode
);
16731 addr
= gen_lowpart (SImode
, addr
);
16732 rtx tmp
= gen_reg_rtx (SImode
);
16733 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16734 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16735 if (BYTES_BIG_ENDIAN
)
16736 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16737 shift
, 1, OPTAB_LIB_WIDEN
);
16740 /* Mask for insertion. */
16741 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16742 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16748 /* A subroutine of the various atomic expanders. For sub-word operands,
16749 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16752 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16756 x
= gen_reg_rtx (SImode
);
16757 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16758 gen_rtx_NOT (SImode
, mask
),
16761 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16766 /* A subroutine of the various atomic expanders. For sub-word operands,
16767 extract WIDE to NARROW via SHIFT. */
16770 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16772 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16773 wide
, 1, OPTAB_LIB_WIDEN
);
16774 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16777 /* Expand an atomic compare and swap operation. */
16780 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16782 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16783 rtx label1
, label2
, x
, mask
, shift
;
16784 machine_mode mode
, orig_mode
;
16785 enum memmodel mod_s
, mod_f
;
16788 boolval
= operands
[0];
16789 retval
= operands
[1];
16791 oldval
= operands
[3];
16792 newval
= operands
[4];
16793 is_weak
= (INTVAL (operands
[5]) != 0);
16794 mod_s
= memmodel_base (INTVAL (operands
[6]));
16795 mod_f
= memmodel_base (INTVAL (operands
[7]));
16796 orig_mode
= mode
= GET_MODE (mem
);
16798 mask
= shift
= NULL_RTX
;
16799 if (mode
== QImode
|| mode
== HImode
)
16801 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16802 lwarx and shift/mask operations. With power8, we need to do the
16803 comparison in SImode, but the store is still done in QI/HImode. */
16804 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16806 if (!TARGET_SYNC_HI_QI
)
16808 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16810 /* Shift and mask OLDVAL into position with the word. */
16811 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16812 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16814 /* Shift and mask NEWVAL into position within the word. */
16815 newval
= convert_modes (SImode
, mode
, newval
, 1);
16816 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16817 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16820 /* Prepare to adjust the return value. */
16821 retval
= gen_reg_rtx (SImode
);
16824 else if (reg_overlap_mentioned_p (retval
, oldval
))
16825 oldval
= copy_to_reg (oldval
);
16827 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16828 oldval
= copy_to_mode_reg (mode
, oldval
);
16830 if (reg_overlap_mentioned_p (retval
, newval
))
16831 newval
= copy_to_reg (newval
);
16833 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16838 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16839 emit_label (XEXP (label1
, 0));
16841 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16843 emit_load_locked (mode
, retval
, mem
);
16847 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16848 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16850 cond
= gen_reg_rtx (CCmode
);
16851 /* If we have TImode, synthesize a comparison. */
16852 if (mode
!= TImode
)
16853 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16856 rtx xor1_result
= gen_reg_rtx (DImode
);
16857 rtx xor2_result
= gen_reg_rtx (DImode
);
16858 rtx or_result
= gen_reg_rtx (DImode
);
16859 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16860 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16861 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16862 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16864 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16865 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16866 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16867 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16870 emit_insn (gen_rtx_SET (cond
, x
));
16872 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16873 emit_unlikely_jump (x
, label2
);
16877 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16879 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16883 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16884 emit_unlikely_jump (x
, label1
);
16887 if (!is_mm_relaxed (mod_f
))
16888 emit_label (XEXP (label2
, 0));
16890 rs6000_post_atomic_barrier (mod_s
);
16892 if (is_mm_relaxed (mod_f
))
16893 emit_label (XEXP (label2
, 0));
16896 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16897 else if (mode
!= GET_MODE (operands
[1]))
16898 convert_move (operands
[1], retval
, 1);
16900 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16901 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16902 emit_insn (gen_rtx_SET (boolval
, x
));
16905 /* Expand an atomic exchange operation. */
16908 rs6000_expand_atomic_exchange (rtx operands
[])
16910 rtx retval
, mem
, val
, cond
;
16912 enum memmodel model
;
16913 rtx label
, x
, mask
, shift
;
16915 retval
= operands
[0];
16918 model
= memmodel_base (INTVAL (operands
[3]));
16919 mode
= GET_MODE (mem
);
16921 mask
= shift
= NULL_RTX
;
16922 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16924 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16926 /* Shift and mask VAL into position with the word. */
16927 val
= convert_modes (SImode
, mode
, val
, 1);
16928 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16929 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16931 /* Prepare to adjust the return value. */
16932 retval
= gen_reg_rtx (SImode
);
16936 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16938 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16939 emit_label (XEXP (label
, 0));
16941 emit_load_locked (mode
, retval
, mem
);
16945 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
16947 cond
= gen_reg_rtx (CCmode
);
16948 emit_store_conditional (mode
, cond
, mem
, x
);
16950 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16951 emit_unlikely_jump (x
, label
);
16953 rs6000_post_atomic_barrier (model
);
16956 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
16959 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16960 to perform. MEM is the memory on which to operate. VAL is the second
16961 operand of the binary operator. BEFORE and AFTER are optional locations to
16962 return the value of MEM either before of after the operation. MODEL_RTX
16963 is a CONST_INT containing the memory model to use. */
16966 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
16967 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
16969 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
16970 machine_mode mode
= GET_MODE (mem
);
16971 machine_mode store_mode
= mode
;
16972 rtx label
, x
, cond
, mask
, shift
;
16973 rtx before
= orig_before
, after
= orig_after
;
16975 mask
= shift
= NULL_RTX
;
16976 /* On power8, we want to use SImode for the operation. On previous systems,
16977 use the operation in a subword and shift/mask to get the proper byte or
16979 if (mode
== QImode
|| mode
== HImode
)
16981 if (TARGET_SYNC_HI_QI
)
16983 val
= convert_modes (SImode
, mode
, val
, 1);
16985 /* Prepare to adjust the return value. */
16986 before
= gen_reg_rtx (SImode
);
16988 after
= gen_reg_rtx (SImode
);
16993 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16995 /* Shift and mask VAL into position with the word. */
16996 val
= convert_modes (SImode
, mode
, val
, 1);
16997 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16998 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17004 /* We've already zero-extended VAL. That is sufficient to
17005 make certain that it does not affect other bits. */
17010 /* If we make certain that all of the other bits in VAL are
17011 set, that will be sufficient to not affect other bits. */
17012 x
= gen_rtx_NOT (SImode
, mask
);
17013 x
= gen_rtx_IOR (SImode
, x
, val
);
17014 emit_insn (gen_rtx_SET (val
, x
));
17021 /* These will all affect bits outside the field and need
17022 adjustment via MASK within the loop. */
17026 gcc_unreachable ();
17029 /* Prepare to adjust the return value. */
17030 before
= gen_reg_rtx (SImode
);
17032 after
= gen_reg_rtx (SImode
);
17033 store_mode
= mode
= SImode
;
17037 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17039 label
= gen_label_rtx ();
17040 emit_label (label
);
17041 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
17043 if (before
== NULL_RTX
)
17044 before
= gen_reg_rtx (mode
);
17046 emit_load_locked (mode
, before
, mem
);
17050 x
= expand_simple_binop (mode
, AND
, before
, val
,
17051 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17052 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
17056 after
= expand_simple_binop (mode
, code
, before
, val
,
17057 after
, 1, OPTAB_LIB_WIDEN
);
17063 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
17064 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17065 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
17067 else if (store_mode
!= mode
)
17068 x
= convert_modes (store_mode
, mode
, x
, 1);
17070 cond
= gen_reg_rtx (CCmode
);
17071 emit_store_conditional (store_mode
, cond
, mem
, x
);
17073 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17074 emit_unlikely_jump (x
, label
);
17076 rs6000_post_atomic_barrier (model
);
17080 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17081 then do the calcuations in a SImode register. */
17083 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
17085 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
17087 else if (store_mode
!= mode
)
17089 /* QImode/HImode on machines with lbarx/lharx where we do the native
17090 operation and then do the calcuations in a SImode register. */
17092 convert_move (orig_before
, before
, 1);
17094 convert_move (orig_after
, after
, 1);
17096 else if (orig_after
&& after
!= orig_after
)
17097 emit_move_insn (orig_after
, after
);
17100 static GTY(()) alias_set_type TOC_alias_set
= -1;
17103 get_TOC_alias_set (void)
17105 if (TOC_alias_set
== -1)
17106 TOC_alias_set
= new_alias_set ();
17107 return TOC_alias_set
;
17110 /* The mode the ABI uses for a word. This is not the same as word_mode
17111 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17113 static scalar_int_mode
17114 rs6000_abi_word_mode (void)
17116 return TARGET_32BIT
? SImode
: DImode
;
17119 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17121 rs6000_offload_options (void)
17124 return xstrdup ("-foffload-abi=lp64");
17126 return xstrdup ("-foffload-abi=ilp32");
17130 /* A quick summary of the various types of 'constant-pool tables'
17133 Target Flags Name One table per
17134 AIX (none) AIX TOC object file
17135 AIX -mfull-toc AIX TOC object file
17136 AIX -mminimal-toc AIX minimal TOC translation unit
17137 SVR4/EABI (none) SVR4 SDATA object file
17138 SVR4/EABI -fpic SVR4 pic object file
17139 SVR4/EABI -fPIC SVR4 PIC translation unit
17140 SVR4/EABI -mrelocatable EABI TOC function
17141 SVR4/EABI -maix AIX TOC object file
17142 SVR4/EABI -maix -mminimal-toc
17143 AIX minimal TOC translation unit
17145 Name Reg. Set by entries contains:
17146 made by addrs? fp? sum?
17148 AIX TOC 2 crt0 as Y option option
17149 AIX minimal TOC 30 prolog gcc Y Y option
17150 SVR4 SDATA 13 crt0 gcc N Y N
17151 SVR4 pic 30 prolog ld Y not yet N
17152 SVR4 PIC 30 prolog gcc Y option option
17153 EABI TOC 30 prolog gcc Y option option
17157 /* Hash functions for the hash table. */
17160 rs6000_hash_constant (rtx k
)
17162 enum rtx_code code
= GET_CODE (k
);
17163 machine_mode mode
= GET_MODE (k
);
17164 unsigned result
= (code
<< 3) ^ mode
;
17165 const char *format
;
17168 format
= GET_RTX_FORMAT (code
);
17169 flen
= strlen (format
);
17175 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
17177 case CONST_WIDE_INT
:
17180 flen
= CONST_WIDE_INT_NUNITS (k
);
17181 for (i
= 0; i
< flen
; i
++)
17182 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
17187 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
17197 for (; fidx
< flen
; fidx
++)
17198 switch (format
[fidx
])
17203 const char *str
= XSTR (k
, fidx
);
17204 len
= strlen (str
);
17205 result
= result
* 613 + len
;
17206 for (i
= 0; i
< len
; i
++)
17207 result
= result
* 613 + (unsigned) str
[i
];
17212 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
17216 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
17219 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
17220 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
17224 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
17225 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
17232 gcc_unreachable ();
17239 toc_hasher::hash (toc_hash_struct
*thc
)
17241 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17244 /* Compare H1 and H2 for equivalence. */
17247 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17252 if (h1
->key_mode
!= h2
->key_mode
)
17255 return rtx_equal_p (r1
, r2
);
17258 /* These are the names given by the C++ front-end to vtables, and
17259 vtable-like objects. Ideally, this logic should not be here;
17260 instead, there should be some programmatic way of inquiring as
17261 to whether or not an object is a vtable. */
17263 #define VTABLE_NAME_P(NAME) \
17264 (startswith (name, "_vt.") \
17265 || startswith (name, "_ZTV") \
17266 || startswith (name, "_ZTT") \
17267 || startswith (name, "_ZTI") \
17268 || startswith (name, "_ZTC"))
17270 #ifdef NO_DOLLAR_IN_LABEL
17271 /* Return a GGC-allocated character string translating dollar signs in
17272 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17275 rs6000_xcoff_strip_dollar (const char *name
)
17281 q
= (const char *) strchr (name
, '$');
17283 if (q
== 0 || q
== name
)
17286 len
= strlen (name
);
17287 strip
= XALLOCAVEC (char, len
+ 1);
17288 strcpy (strip
, name
);
17289 p
= strip
+ (q
- name
);
17293 p
= strchr (p
+ 1, '$');
17296 return ggc_alloc_string (strip
, len
);
17301 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17303 const char *name
= XSTR (x
, 0);
17305 /* Currently C++ toc references to vtables can be emitted before it
17306 is decided whether the vtable is public or private. If this is
17307 the case, then the linker will eventually complain that there is
17308 a reference to an unknown section. Thus, for vtables only,
17309 we emit the TOC reference to reference the identifier and not the
17311 if (VTABLE_NAME_P (name
))
17313 RS6000_OUTPUT_BASENAME (file
, name
);
17316 assemble_name (file
, name
);
17319 /* Output a TOC entry. We derive the entry name from what is being
17323 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17326 const char *name
= buf
;
17328 HOST_WIDE_INT offset
= 0;
17330 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17332 /* When the linker won't eliminate them, don't output duplicate
17333 TOC entries (this happens on AIX if there is any kind of TOC,
17334 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17336 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17338 struct toc_hash_struct
*h
;
17340 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17341 time because GGC is not initialized at that point. */
17342 if (toc_hash_table
== NULL
)
17343 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17345 h
= ggc_alloc
<toc_hash_struct
> ();
17347 h
->key_mode
= mode
;
17348 h
->labelno
= labelno
;
17350 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17351 if (*found
== NULL
)
17353 else /* This is indeed a duplicate.
17354 Set this label equal to that label. */
17356 fputs ("\t.set ", file
);
17357 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17358 fprintf (file
, "%d,", labelno
);
17359 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17360 fprintf (file
, "%d\n", ((*found
)->labelno
));
17363 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17364 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17365 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17367 fputs ("\t.set ", file
);
17368 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17369 fprintf (file
, "%d,", labelno
);
17370 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17371 fprintf (file
, "%d\n", ((*found
)->labelno
));
17378 /* If we're going to put a double constant in the TOC, make sure it's
17379 aligned properly when strict alignment is on. */
17380 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17381 && STRICT_ALIGNMENT
17382 && GET_MODE_BITSIZE (mode
) >= 64
17383 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17384 ASM_OUTPUT_ALIGN (file
, 3);
17387 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17389 /* Handle FP constants specially. Note that if we have a minimal
17390 TOC, things we put here aren't actually in the TOC, so we can allow
17392 if (CONST_DOUBLE_P (x
)
17393 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17394 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17398 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17399 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17401 real_to_target (k
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
17405 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17406 fputs (DOUBLE_INT_ASM_OP
, file
);
17408 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17409 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17410 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17411 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17412 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17413 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17414 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17415 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17420 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17421 fputs ("\t.long ", file
);
17423 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17424 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17425 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17426 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17427 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17428 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17432 else if (CONST_DOUBLE_P (x
)
17433 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17437 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17438 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17440 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17444 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17445 fputs (DOUBLE_INT_ASM_OP
, file
);
17447 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17448 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17449 fprintf (file
, "0x%lx%08lx\n",
17450 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17451 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17456 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17457 fputs ("\t.long ", file
);
17459 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17460 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17461 fprintf (file
, "0x%lx,0x%lx\n",
17462 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17466 else if (CONST_DOUBLE_P (x
)
17467 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17471 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17472 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17474 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17478 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17479 fputs (DOUBLE_INT_ASM_OP
, file
);
17481 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17482 if (WORDS_BIG_ENDIAN
)
17483 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17485 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17490 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17491 fputs ("\t.long ", file
);
17493 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17494 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17498 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17500 unsigned HOST_WIDE_INT low
;
17501 HOST_WIDE_INT high
;
17503 low
= INTVAL (x
) & 0xffffffff;
17504 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17506 /* TOC entries are always Pmode-sized, so when big-endian
17507 smaller integer constants in the TOC need to be padded.
17508 (This is still a win over putting the constants in
17509 a separate constant pool, because then we'd have
17510 to have both a TOC entry _and_ the actual constant.)
17512 For a 32-bit target, CONST_INT values are loaded and shifted
17513 entirely within `low' and can be stored in one TOC entry. */
17515 /* It would be easy to make this work, but it doesn't now. */
17516 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17518 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17521 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17522 high
= (HOST_WIDE_INT
) low
>> 32;
17528 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17529 fputs (DOUBLE_INT_ASM_OP
, file
);
17531 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17532 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17533 fprintf (file
, "0x%lx%08lx\n",
17534 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17539 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17541 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17542 fputs ("\t.long ", file
);
17544 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17545 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17546 fprintf (file
, "0x%lx,0x%lx\n",
17547 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17551 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17552 fputs ("\t.long ", file
);
17554 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17555 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17561 if (GET_CODE (x
) == CONST
)
17563 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17564 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17566 base
= XEXP (XEXP (x
, 0), 0);
17567 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17570 switch (GET_CODE (base
))
17573 name
= XSTR (base
, 0);
17577 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17578 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17582 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17586 gcc_unreachable ();
17589 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17590 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17593 fputs ("\t.tc ", file
);
17594 RS6000_OUTPUT_BASENAME (file
, name
);
17597 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17599 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17601 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17602 after other TOC symbols, reducing overflow of small TOC access
17603 to [TC] symbols. */
17604 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17605 ? "[TE]," : "[TC],", file
);
17608 /* Currently C++ toc references to vtables can be emitted before it
17609 is decided whether the vtable is public or private. If this is
17610 the case, then the linker will eventually complain that there is
17611 a TOC reference to an unknown section. Thus, for vtables only,
17612 we emit the TOC reference to reference the symbol and not the
17614 if (VTABLE_NAME_P (name
))
17616 RS6000_OUTPUT_BASENAME (file
, name
);
17618 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17619 else if (offset
> 0)
17620 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17623 output_addr_const (file
, x
);
17626 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17628 switch (SYMBOL_REF_TLS_MODEL (base
))
17632 case TLS_MODEL_LOCAL_EXEC
:
17633 fputs ("@le", file
);
17635 case TLS_MODEL_INITIAL_EXEC
:
17636 fputs ("@ie", file
);
17638 /* Use global-dynamic for local-dynamic. */
17639 case TLS_MODEL_GLOBAL_DYNAMIC
:
17640 case TLS_MODEL_LOCAL_DYNAMIC
:
17642 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17643 fputs ("\t.tc .", file
);
17644 RS6000_OUTPUT_BASENAME (file
, name
);
17645 fputs ("[TC],", file
);
17646 output_addr_const (file
, x
);
17647 fputs ("@m", file
);
17650 gcc_unreachable ();
17658 /* Output an assembler pseudo-op to write an ASCII string of N characters
17659 starting at P to FILE.
17661 On the RS/6000, we have to do this using the .byte operation and
17662 write out special characters outside the quoted string.
17663 Also, the assembler is broken; very long strings are truncated,
17664 so we must artificially break them up early. */
17667 output_ascii (FILE *file
, const char *p
, int n
)
17670 int i
, count_string
;
17671 const char *for_string
= "\t.byte \"";
17672 const char *for_decimal
= "\t.byte ";
17673 const char *to_close
= NULL
;
17676 for (i
= 0; i
< n
; i
++)
17679 if (c
>= ' ' && c
< 0177)
17682 fputs (for_string
, file
);
17685 /* Write two quotes to get one. */
17693 for_decimal
= "\"\n\t.byte ";
17697 if (count_string
>= 512)
17699 fputs (to_close
, file
);
17701 for_string
= "\t.byte \"";
17702 for_decimal
= "\t.byte ";
17710 fputs (for_decimal
, file
);
17711 fprintf (file
, "%d", c
);
17713 for_string
= "\n\t.byte \"";
17714 for_decimal
= ", ";
17720 /* Now close the string if we have written one. Then end the line. */
17722 fputs (to_close
, file
);
17725 /* Generate a unique section name for FILENAME for a section type
17726 represented by SECTION_DESC. Output goes into BUF.
17728 SECTION_DESC can be any string, as long as it is different for each
17729 possible section type.
17731 We name the section in the same manner as xlc. The name begins with an
17732 underscore followed by the filename (after stripping any leading directory
17733 names) with the last period replaced by the string SECTION_DESC. If
17734 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17738 rs6000_gen_section_name (char **buf
, const char *filename
,
17739 const char *section_desc
)
17741 const char *q
, *after_last_slash
, *last_period
= 0;
17745 after_last_slash
= filename
;
17746 for (q
= filename
; *q
; q
++)
17749 after_last_slash
= q
+ 1;
17750 else if (*q
== '.')
17754 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17755 *buf
= (char *) xmalloc (len
);
17760 for (q
= after_last_slash
; *q
; q
++)
17762 if (q
== last_period
)
17764 strcpy (p
, section_desc
);
17765 p
+= strlen (section_desc
);
17769 else if (ISALNUM (*q
))
17773 if (last_period
== 0)
17774 strcpy (p
, section_desc
);
17779 /* Emit profile function. */
17782 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17784 /* Non-standard profiling for kernels, which just saves LR then calls
17785 _mcount without worrying about arg saves. The idea is to change
17786 the function prologue as little as possible as it isn't easy to
17787 account for arg save/restore code added just for _mcount. */
17788 if (TARGET_PROFILE_KERNEL
)
17791 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17793 #ifndef NO_PROFILE_COUNTERS
17794 # define NO_PROFILE_COUNTERS 0
17796 if (NO_PROFILE_COUNTERS
)
17797 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17798 LCT_NORMAL
, VOIDmode
);
17802 const char *label_name
;
17805 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17806 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17807 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17809 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17810 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17813 else if (DEFAULT_ABI
== ABI_DARWIN
)
17815 const char *mcount_name
= RS6000_MCOUNT
;
17816 int caller_addr_regno
= LR_REGNO
;
17818 /* Be conservative and always set this, at least for now. */
17819 crtl
->uses_pic_offset_table
= 1;
17822 /* For PIC code, set up a stub and collect the caller's address
17823 from r0, which is where the prologue puts it. */
17824 if (MACHOPIC_INDIRECT
17825 && crtl
->uses_pic_offset_table
)
17826 caller_addr_regno
= 0;
17828 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17829 LCT_NORMAL
, VOIDmode
,
17830 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17834 /* Write function profiler code. */
17837 output_function_profiler (FILE *file
, int labelno
)
17841 switch (DEFAULT_ABI
)
17844 gcc_unreachable ();
17849 warning (0, "no profiling of 64-bit code for this ABI");
17852 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17853 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17854 if (NO_PROFILE_COUNTERS
)
17856 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17857 reg_names
[0], reg_names
[1]);
17859 else if (TARGET_SECURE_PLT
&& flag_pic
)
17861 if (TARGET_LINK_STACK
)
17864 get_ppc476_thunk_name (name
);
17865 asm_fprintf (file
, "\tbl %s\n", name
);
17868 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17869 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17870 reg_names
[0], reg_names
[1]);
17871 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17872 asm_fprintf (file
, "\taddis %s,%s,",
17873 reg_names
[12], reg_names
[12]);
17874 assemble_name (file
, buf
);
17875 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17876 assemble_name (file
, buf
);
17877 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17879 else if (flag_pic
== 1)
17881 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17882 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17883 reg_names
[0], reg_names
[1]);
17884 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17885 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17886 assemble_name (file
, buf
);
17887 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17889 else if (flag_pic
> 1)
17891 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17892 reg_names
[0], reg_names
[1]);
17893 /* Now, we need to get the address of the label. */
17894 if (TARGET_LINK_STACK
)
17897 get_ppc476_thunk_name (name
);
17898 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17899 assemble_name (file
, buf
);
17900 fputs ("-.\n1:", file
);
17901 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17902 asm_fprintf (file
, "\taddi %s,%s,4\n",
17903 reg_names
[11], reg_names
[11]);
17907 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17908 assemble_name (file
, buf
);
17909 fputs ("-.\n1:", file
);
17910 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17912 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17913 reg_names
[0], reg_names
[11]);
17914 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17915 reg_names
[0], reg_names
[0], reg_names
[11]);
17919 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17920 assemble_name (file
, buf
);
17921 fputs ("@ha\n", file
);
17922 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17923 reg_names
[0], reg_names
[1]);
17924 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17925 assemble_name (file
, buf
);
17926 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17929 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17930 fprintf (file
, "\tbl %s%s\n",
17931 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
17937 /* Don't do anything, done in output_profile_hook (). */
17944 /* The following variable value is the last issued insn. */
17946 static rtx_insn
*last_scheduled_insn
;
17948 /* The following variable helps to balance issuing of load and
17949 store instructions */
17951 static int load_store_pendulum
;
17953 /* The following variable helps pair divide insns during scheduling. */
17954 static int divide_cnt
;
17955 /* The following variable helps pair and alternate vector and vector load
17956 insns during scheduling. */
17957 static int vec_pairing
;
17960 /* Power4 load update and store update instructions are cracked into a
17961 load or store and an integer insn which are executed in the same cycle.
17962 Branches have their own dispatch slot which does not count against the
17963 GCC issue rate, but it changes the program flow so there are no other
17964 instructions to issue in this cycle. */
17967 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
17969 last_scheduled_insn
= insn
;
17970 if (GET_CODE (PATTERN (insn
)) == USE
17971 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17973 cached_can_issue_more
= more
;
17974 return cached_can_issue_more
;
17977 if (insn_terminates_group_p (insn
, current_group
))
17979 cached_can_issue_more
= 0;
17980 return cached_can_issue_more
;
17983 /* If no reservation, but reach here */
17984 if (recog_memoized (insn
) < 0)
17987 if (rs6000_sched_groups
)
17989 if (is_microcoded_insn (insn
))
17990 cached_can_issue_more
= 0;
17991 else if (is_cracked_insn (insn
))
17992 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
17994 cached_can_issue_more
= more
- 1;
17996 return cached_can_issue_more
;
17999 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
18002 cached_can_issue_more
= more
- 1;
18003 return cached_can_issue_more
;
18007 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
18009 int r
= rs6000_variable_issue_1 (insn
, more
);
18011 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
18015 /* Adjust the cost of a scheduling dependency. Return the new cost of
18016 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18019 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
18022 enum attr_type attr_type
;
18024 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
18031 /* Data dependency; DEP_INSN writes a register that INSN reads
18032 some cycles later. */
18034 /* Separate a load from a narrower, dependent store. */
18035 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
18036 || rs6000_tune
== PROCESSOR_POWER10
)
18037 && GET_CODE (PATTERN (insn
)) == SET
18038 && GET_CODE (PATTERN (dep_insn
)) == SET
18039 && MEM_P (XEXP (PATTERN (insn
), 1))
18040 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
18041 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
18042 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
18045 attr_type
= get_attr_type (insn
);
18050 /* Tell the first scheduling pass about the latency between
18051 a mtctr and bctr (and mtlr and br/blr). The first
18052 scheduling pass will not know about this latency since
18053 the mtctr instruction, which has the latency associated
18054 to it, will be generated by reload. */
18057 /* Leave some extra cycles between a compare and its
18058 dependent branch, to inhibit expensive mispredicts. */
18059 if ((rs6000_tune
== PROCESSOR_PPC603
18060 || rs6000_tune
== PROCESSOR_PPC604
18061 || rs6000_tune
== PROCESSOR_PPC604e
18062 || rs6000_tune
== PROCESSOR_PPC620
18063 || rs6000_tune
== PROCESSOR_PPC630
18064 || rs6000_tune
== PROCESSOR_PPC750
18065 || rs6000_tune
== PROCESSOR_PPC7400
18066 || rs6000_tune
== PROCESSOR_PPC7450
18067 || rs6000_tune
== PROCESSOR_PPCE5500
18068 || rs6000_tune
== PROCESSOR_PPCE6500
18069 || rs6000_tune
== PROCESSOR_POWER4
18070 || rs6000_tune
== PROCESSOR_POWER5
18071 || rs6000_tune
== PROCESSOR_POWER7
18072 || rs6000_tune
== PROCESSOR_POWER8
18073 || rs6000_tune
== PROCESSOR_POWER9
18074 || rs6000_tune
== PROCESSOR_POWER10
18075 || rs6000_tune
== PROCESSOR_CELL
)
18076 && recog_memoized (dep_insn
)
18077 && (INSN_CODE (dep_insn
) >= 0))
18079 switch (get_attr_type (dep_insn
))
18082 case TYPE_FPCOMPARE
:
18083 case TYPE_CR_LOGICAL
:
18087 if (get_attr_dot (dep_insn
) == DOT_YES
)
18092 if (get_attr_dot (dep_insn
) == DOT_YES
18093 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
18104 if ((rs6000_tune
== PROCESSOR_POWER6
)
18105 && recog_memoized (dep_insn
)
18106 && (INSN_CODE (dep_insn
) >= 0))
18109 if (GET_CODE (PATTERN (insn
)) != SET
)
18110 /* If this happens, we have to extend this to schedule
18111 optimally. Return default for now. */
18114 /* Adjust the cost for the case where the value written
18115 by a fixed point operation is used as the address
18116 gen value on a store. */
18117 switch (get_attr_type (dep_insn
))
18122 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18123 return get_attr_sign_extend (dep_insn
)
18124 == SIGN_EXTEND_YES
? 6 : 4;
18129 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18130 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18140 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18148 if (get_attr_update (dep_insn
) == UPDATE_YES
18149 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
18155 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18161 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18162 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18172 if ((rs6000_tune
== PROCESSOR_POWER6
)
18173 && recog_memoized (dep_insn
)
18174 && (INSN_CODE (dep_insn
) >= 0))
18177 /* Adjust the cost for the case where the value written
18178 by a fixed point instruction is used within the address
18179 gen portion of a subsequent load(u)(x) */
18180 switch (get_attr_type (dep_insn
))
18185 if (set_to_load_agen (dep_insn
, insn
))
18186 return get_attr_sign_extend (dep_insn
)
18187 == SIGN_EXTEND_YES
? 6 : 4;
18192 if (set_to_load_agen (dep_insn
, insn
))
18193 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18203 if (set_to_load_agen (dep_insn
, insn
))
18211 if (get_attr_update (dep_insn
) == UPDATE_YES
18212 && set_to_load_agen (dep_insn
, insn
))
18218 if (set_to_load_agen (dep_insn
, insn
))
18224 if (set_to_load_agen (dep_insn
, insn
))
18225 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18238 /* Fall out to return default cost. */
18242 case REG_DEP_OUTPUT
:
18243 /* Output dependency; DEP_INSN writes a register that INSN writes some
18245 if ((rs6000_tune
== PROCESSOR_POWER6
)
18246 && recog_memoized (dep_insn
)
18247 && (INSN_CODE (dep_insn
) >= 0))
18249 attr_type
= get_attr_type (insn
);
18254 case TYPE_FPSIMPLE
:
18255 if (get_attr_type (dep_insn
) == TYPE_FP
18256 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18263 /* Fall through, no cost for output dependency. */
18267 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18272 gcc_unreachable ();
18278 /* Debug version of rs6000_adjust_cost. */
18281 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18282 int cost
, unsigned int dw
)
18284 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18292 default: dep
= "unknown depencency"; break;
18293 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18294 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18295 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18299 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18300 "%s, insn:\n", ret
, cost
, dep
);
18308 /* The function returns a true if INSN is microcoded.
18309 Return false otherwise. */
18312 is_microcoded_insn (rtx_insn
*insn
)
18314 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18315 || GET_CODE (PATTERN (insn
)) == USE
18316 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18319 if (rs6000_tune
== PROCESSOR_CELL
)
18320 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18322 if (rs6000_sched_groups
18323 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18325 enum attr_type type
= get_attr_type (insn
);
18326 if ((type
== TYPE_LOAD
18327 && get_attr_update (insn
) == UPDATE_YES
18328 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18329 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18330 && get_attr_update (insn
) == UPDATE_YES
18331 && get_attr_indexed (insn
) == INDEXED_YES
)
18332 || type
== TYPE_MFCR
)
18339 /* The function returns true if INSN is cracked into 2 instructions
18340 by the processor (and therefore occupies 2 issue slots). */
18343 is_cracked_insn (rtx_insn
*insn
)
18345 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18346 || GET_CODE (PATTERN (insn
)) == USE
18347 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18350 if (rs6000_sched_groups
18351 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18353 enum attr_type type
= get_attr_type (insn
);
18354 if ((type
== TYPE_LOAD
18355 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18356 && get_attr_update (insn
) == UPDATE_NO
)
18357 || (type
== TYPE_LOAD
18358 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18359 && get_attr_update (insn
) == UPDATE_YES
18360 && get_attr_indexed (insn
) == INDEXED_NO
)
18361 || (type
== TYPE_STORE
18362 && get_attr_update (insn
) == UPDATE_YES
18363 && get_attr_indexed (insn
) == INDEXED_NO
)
18364 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18365 && get_attr_update (insn
) == UPDATE_YES
)
18366 || (type
== TYPE_CR_LOGICAL
18367 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18368 || (type
== TYPE_EXTS
18369 && get_attr_dot (insn
) == DOT_YES
)
18370 || (type
== TYPE_SHIFT
18371 && get_attr_dot (insn
) == DOT_YES
18372 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18373 || (type
== TYPE_MUL
18374 && get_attr_dot (insn
) == DOT_YES
)
18375 || type
== TYPE_DIV
18376 || (type
== TYPE_INSERT
18377 && get_attr_size (insn
) == SIZE_32
))
18384 /* The function returns true if INSN can be issued only from
18385 the branch slot. */
18388 is_branch_slot_insn (rtx_insn
*insn
)
18390 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18391 || GET_CODE (PATTERN (insn
)) == USE
18392 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18395 if (rs6000_sched_groups
)
18397 enum attr_type type
= get_attr_type (insn
);
18398 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18406 /* The function returns true if out_inst sets a value that is
18407 used in the address generation computation of in_insn */
18409 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18411 rtx out_set
, in_set
;
18413 /* For performance reasons, only handle the simple case where
18414 both loads are a single_set. */
18415 out_set
= single_set (out_insn
);
18418 in_set
= single_set (in_insn
);
18420 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18426 /* Try to determine base/offset/size parts of the given MEM.
18427 Return true if successful, false if all the values couldn't
18430 This function only looks for REG or REG+CONST address forms.
18431 REG+REG address form will return false. */
18434 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18435 HOST_WIDE_INT
*size
)
18438 if (MEM_SIZE_KNOWN_P (mem
))
18439 *size
= MEM_SIZE (mem
);
18443 addr_rtx
= (XEXP (mem
, 0));
18444 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18445 addr_rtx
= XEXP (addr_rtx
, 1);
18448 while (GET_CODE (addr_rtx
) == PLUS
18449 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18451 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18452 addr_rtx
= XEXP (addr_rtx
, 0);
18454 if (!REG_P (addr_rtx
))
18461 /* If the target storage locations of arguments MEM1 and MEM2 are
18462 adjacent, then return the argument that has the lower address.
18463 Otherwise, return NULL_RTX. */
18466 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18469 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18473 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18474 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18475 && REGNO (reg1
) == REGNO (reg2
))
18477 if (off1
+ size1
== off2
)
18479 else if (off2
+ size2
== off1
)
18486 /* This function returns true if it can be determined that the two MEM
18487 locations overlap by at least 1 byte based on base reg/offset/size. */
18490 mem_locations_overlap (rtx mem1
, rtx mem2
)
18493 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18495 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18496 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18497 return ((REGNO (reg1
) == REGNO (reg2
))
18498 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18499 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18504 /* A C statement (sans semicolon) to update the integer scheduling
18505 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18506 INSN earlier, reduce the priority to execute INSN later. Do not
18507 define this macro if you do not need to adjust the scheduling
18508 priorities of insns. */
18511 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18513 rtx load_mem
, str_mem
;
18514 /* On machines (like the 750) which have asymmetric integer units,
18515 where one integer unit can do multiply and divides and the other
18516 can't, reduce the priority of multiply/divide so it is scheduled
18517 before other integer operations. */
18520 if (! INSN_P (insn
))
18523 if (GET_CODE (PATTERN (insn
)) == USE
)
18526 switch (rs6000_tune
) {
18527 case PROCESSOR_PPC750
:
18528 switch (get_attr_type (insn
))
18535 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18536 priority
, priority
);
18537 if (priority
>= 0 && priority
< 0x01000000)
18544 if (insn_must_be_first_in_group (insn
)
18545 && reload_completed
18546 && current_sched_info
->sched_max_insns_priority
18547 && rs6000_sched_restricted_insns_priority
)
18550 /* Prioritize insns that can be dispatched only in the first
18552 if (rs6000_sched_restricted_insns_priority
== 1)
18553 /* Attach highest priority to insn. This means that in
18554 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18555 precede 'priority' (critical path) considerations. */
18556 return current_sched_info
->sched_max_insns_priority
;
18557 else if (rs6000_sched_restricted_insns_priority
== 2)
18558 /* Increase priority of insn by a minimal amount. This means that in
18559 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18560 considerations precede dispatch-slot restriction considerations. */
18561 return (priority
+ 1);
18564 if (rs6000_tune
== PROCESSOR_POWER6
18565 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18566 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18567 /* Attach highest priority to insn if the scheduler has just issued two
18568 stores and this instruction is a load, or two loads and this instruction
18569 is a store. Power6 wants loads and stores scheduled alternately
18571 return current_sched_info
->sched_max_insns_priority
;
18576 /* Return true if the instruction is nonpipelined on the Cell. */
18578 is_nonpipeline_insn (rtx_insn
*insn
)
18580 enum attr_type type
;
18581 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18582 || GET_CODE (PATTERN (insn
)) == USE
18583 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18586 type
= get_attr_type (insn
);
18587 if (type
== TYPE_MUL
18588 || type
== TYPE_DIV
18589 || type
== TYPE_SDIV
18590 || type
== TYPE_DDIV
18591 || type
== TYPE_SSQRT
18592 || type
== TYPE_DSQRT
18593 || type
== TYPE_MFCR
18594 || type
== TYPE_MFCRF
18595 || type
== TYPE_MFJMPR
)
18603 /* Return how many instructions the machine can issue per cycle. */
18606 rs6000_issue_rate (void)
18608 /* Unless scheduling for register pressure, use issue rate of 1 for
18609 first scheduling pass to decrease degradation. */
18610 if (!reload_completed
&& !flag_sched_pressure
)
18613 switch (rs6000_tune
) {
18614 case PROCESSOR_RS64A
:
18615 case PROCESSOR_PPC601
: /* ? */
18616 case PROCESSOR_PPC7450
:
18618 case PROCESSOR_PPC440
:
18619 case PROCESSOR_PPC603
:
18620 case PROCESSOR_PPC750
:
18621 case PROCESSOR_PPC7400
:
18622 case PROCESSOR_PPC8540
:
18623 case PROCESSOR_PPC8548
:
18624 case PROCESSOR_CELL
:
18625 case PROCESSOR_PPCE300C2
:
18626 case PROCESSOR_PPCE300C3
:
18627 case PROCESSOR_PPCE500MC
:
18628 case PROCESSOR_PPCE500MC64
:
18629 case PROCESSOR_PPCE5500
:
18630 case PROCESSOR_PPCE6500
:
18631 case PROCESSOR_TITAN
:
18633 case PROCESSOR_PPC476
:
18634 case PROCESSOR_PPC604
:
18635 case PROCESSOR_PPC604e
:
18636 case PROCESSOR_PPC620
:
18637 case PROCESSOR_PPC630
:
18639 case PROCESSOR_POWER4
:
18640 case PROCESSOR_POWER5
:
18641 case PROCESSOR_POWER6
:
18642 case PROCESSOR_POWER7
:
18644 case PROCESSOR_POWER8
:
18646 case PROCESSOR_POWER9
:
18648 case PROCESSOR_POWER10
:
18655 /* Return how many instructions to look ahead for better insn
18659 rs6000_use_sched_lookahead (void)
18661 switch (rs6000_tune
)
18663 case PROCESSOR_PPC8540
:
18664 case PROCESSOR_PPC8548
:
18667 case PROCESSOR_CELL
:
18668 return (reload_completed
? 8 : 0);
18675 /* We are choosing insn from the ready queue. Return zero if INSN can be
18678 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18680 if (ready_index
== 0)
18683 if (rs6000_tune
!= PROCESSOR_CELL
)
18686 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18688 if (!reload_completed
18689 || is_nonpipeline_insn (insn
)
18690 || is_microcoded_insn (insn
))
18696 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18697 and return true. */
18700 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18705 /* stack_tie does not produce any real memory traffic. */
18706 if (tie_operand (pat
, VOIDmode
))
18715 /* Recursively process the pattern. */
18716 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18718 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18722 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18725 else if (fmt
[i
] == 'E')
18726 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18728 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18736 /* Determine if PAT is a PATTERN of a load insn. */
18739 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18741 if (!pat
|| pat
== NULL_RTX
)
18744 if (GET_CODE (pat
) == SET
)
18746 if (REG_P (SET_DEST (pat
)))
18747 return find_mem_ref (SET_SRC (pat
), load_mem
);
18752 if (GET_CODE (pat
) == PARALLEL
)
18756 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18757 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18764 /* Determine if INSN loads from memory. */
18767 is_load_insn (rtx insn
, rtx
*load_mem
)
18769 if (!insn
|| !INSN_P (insn
))
18775 return is_load_insn1 (PATTERN (insn
), load_mem
);
18778 /* Determine if PAT is a PATTERN of a store insn. */
18781 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18783 if (!pat
|| pat
== NULL_RTX
)
18786 if (GET_CODE (pat
) == SET
)
18788 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18789 return find_mem_ref (SET_DEST (pat
), str_mem
);
18794 if (GET_CODE (pat
) == PARALLEL
)
18798 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18799 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18806 /* Determine if INSN stores to memory. */
18809 is_store_insn (rtx insn
, rtx
*str_mem
)
18811 if (!insn
|| !INSN_P (insn
))
18814 return is_store_insn1 (PATTERN (insn
), str_mem
);
18817 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18820 is_power9_pairable_vec_type (enum attr_type type
)
18824 case TYPE_VECSIMPLE
:
18825 case TYPE_VECCOMPLEX
:
18829 case TYPE_VECFLOAT
:
18831 case TYPE_VECDOUBLE
:
18839 /* Returns whether the dependence between INSN and NEXT is considered
18840 costly by the given target. */
18843 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18847 rtx load_mem
, str_mem
;
18849 /* If the flag is not enabled - no dependence is considered costly;
18850 allow all dependent insns in the same group.
18851 This is the most aggressive option. */
18852 if (rs6000_sched_costly_dep
== no_dep_costly
)
18855 /* If the flag is set to 1 - a dependence is always considered costly;
18856 do not allow dependent instructions in the same group.
18857 This is the most conservative option. */
18858 if (rs6000_sched_costly_dep
== all_deps_costly
)
18861 insn
= DEP_PRO (dep
);
18862 next
= DEP_CON (dep
);
18864 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18865 && is_load_insn (next
, &load_mem
)
18866 && is_store_insn (insn
, &str_mem
))
18867 /* Prevent load after store in the same group. */
18870 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18871 && is_load_insn (next
, &load_mem
)
18872 && is_store_insn (insn
, &str_mem
)
18873 && DEP_TYPE (dep
) == REG_DEP_TRUE
18874 && mem_locations_overlap(str_mem
, load_mem
))
18875 /* Prevent load after store in the same group if it is a true
18879 /* The flag is set to X; dependences with latency >= X are considered costly,
18880 and will not be scheduled in the same group. */
18881 if (rs6000_sched_costly_dep
<= max_dep_latency
18882 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18888 /* Return the next insn after INSN that is found before TAIL is reached,
18889 skipping any "non-active" insns - insns that will not actually occupy
18890 an issue slot. Return NULL_RTX if such an insn is not found. */
18893 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18895 if (insn
== NULL_RTX
|| insn
== tail
)
18900 insn
= NEXT_INSN (insn
);
18901 if (insn
== NULL_RTX
|| insn
== tail
)
18905 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18906 || (NONJUMP_INSN_P (insn
)
18907 && GET_CODE (PATTERN (insn
)) != USE
18908 && GET_CODE (PATTERN (insn
)) != CLOBBER
18909 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18915 /* Move instruction at POS to the end of the READY list. */
18918 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18924 for (i
= pos
; i
< lastpos
; i
++)
18925 ready
[i
] = ready
[i
+ 1];
18926 ready
[lastpos
] = tmp
;
18929 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18932 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18934 /* For Power6, we need to handle some special cases to try and keep the
18935 store queue from overflowing and triggering expensive flushes.
18937 This code monitors how load and store instructions are being issued
18938 and skews the ready list one way or the other to increase the likelihood
18939 that a desired instruction is issued at the proper time.
18941 A couple of things are done. First, we maintain a "load_store_pendulum"
18942 to track the current state of load/store issue.
18944 - If the pendulum is at zero, then no loads or stores have been
18945 issued in the current cycle so we do nothing.
18947 - If the pendulum is 1, then a single load has been issued in this
18948 cycle and we attempt to locate another load in the ready list to
18951 - If the pendulum is -2, then two stores have already been
18952 issued in this cycle, so we increase the priority of the first load
18953 in the ready list to increase it's likelihood of being chosen first
18956 - If the pendulum is -1, then a single store has been issued in this
18957 cycle and we attempt to locate another store in the ready list to
18958 issue with it, preferring a store to an adjacent memory location to
18959 facilitate store pairing in the store queue.
18961 - If the pendulum is 2, then two loads have already been
18962 issued in this cycle, so we increase the priority of the first store
18963 in the ready list to increase it's likelihood of being chosen first
18966 - If the pendulum < -2 or > 2, then do nothing.
18968 Note: This code covers the most common scenarios. There exist non
18969 load/store instructions which make use of the LSU and which
18970 would need to be accounted for to strictly model the behavior
18971 of the machine. Those instructions are currently unaccounted
18972 for to help minimize compile time overhead of this code.
18975 rtx load_mem
, str_mem
;
18977 if (is_store_insn (last_scheduled_insn
, &str_mem
))
18978 /* Issuing a store, swing the load_store_pendulum to the left */
18979 load_store_pendulum
--;
18980 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
18981 /* Issuing a load, swing the load_store_pendulum to the right */
18982 load_store_pendulum
++;
18984 return cached_can_issue_more
;
18986 /* If the pendulum is balanced, or there is only one instruction on
18987 the ready list, then all is well, so return. */
18988 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
18989 return cached_can_issue_more
;
18991 if (load_store_pendulum
== 1)
18993 /* A load has been issued in this cycle. Scan the ready list
18994 for another load to issue with it */
18999 if (is_load_insn (ready
[pos
], &load_mem
))
19001 /* Found a load. Move it to the head of the ready list,
19002 and adjust it's priority so that it is more likely to
19004 move_to_end_of_ready (ready
, pos
, lastpos
);
19006 if (!sel_sched_p ()
19007 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19008 INSN_PRIORITY (ready
[lastpos
])++;
19014 else if (load_store_pendulum
== -2)
19016 /* Two stores have been issued in this cycle. Increase the
19017 priority of the first load in the ready list to favor it for
19018 issuing in the next cycle. */
19023 if (is_load_insn (ready
[pos
], &load_mem
)
19025 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19027 INSN_PRIORITY (ready
[pos
])++;
19029 /* Adjust the pendulum to account for the fact that a load
19030 was found and increased in priority. This is to prevent
19031 increasing the priority of multiple loads */
19032 load_store_pendulum
--;
19039 else if (load_store_pendulum
== -1)
19041 /* A store has been issued in this cycle. Scan the ready list for
19042 another store to issue with it, preferring a store to an adjacent
19044 int first_store_pos
= -1;
19050 if (is_store_insn (ready
[pos
], &str_mem
))
19053 /* Maintain the index of the first store found on the
19055 if (first_store_pos
== -1)
19056 first_store_pos
= pos
;
19058 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
19059 && adjacent_mem_locations (str_mem
, str_mem2
))
19061 /* Found an adjacent store. Move it to the head of the
19062 ready list, and adjust it's priority so that it is
19063 more likely to stay there */
19064 move_to_end_of_ready (ready
, pos
, lastpos
);
19066 if (!sel_sched_p ()
19067 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19068 INSN_PRIORITY (ready
[lastpos
])++;
19070 first_store_pos
= -1;
19078 if (first_store_pos
>= 0)
19080 /* An adjacent store wasn't found, but a non-adjacent store was,
19081 so move the non-adjacent store to the front of the ready
19082 list, and adjust its priority so that it is more likely to
19084 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
19085 if (!sel_sched_p ()
19086 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19087 INSN_PRIORITY (ready
[lastpos
])++;
19090 else if (load_store_pendulum
== 2)
19092 /* Two loads have been issued in this cycle. Increase the priority
19093 of the first store in the ready list to favor it for issuing in
19099 if (is_store_insn (ready
[pos
], &str_mem
)
19101 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19103 INSN_PRIORITY (ready
[pos
])++;
19105 /* Adjust the pendulum to account for the fact that a store
19106 was found and increased in priority. This is to prevent
19107 increasing the priority of multiple stores */
19108 load_store_pendulum
++;
19116 return cached_can_issue_more
;
19119 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19122 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19125 enum attr_type type
, type2
;
19127 type
= get_attr_type (last_scheduled_insn
);
19129 /* Try to issue fixed point divides back-to-back in pairs so they will be
19130 routed to separate execution units and execute in parallel. */
19131 if (type
== TYPE_DIV
&& divide_cnt
== 0)
19133 /* First divide has been scheduled. */
19136 /* Scan the ready list looking for another divide, if found move it
19137 to the end of the list so it is chosen next. */
19141 if (recog_memoized (ready
[pos
]) >= 0
19142 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
19144 move_to_end_of_ready (ready
, pos
, lastpos
);
19152 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19155 /* The best dispatch throughput for vector and vector load insns can be
19156 achieved by interleaving a vector and vector load such that they'll
19157 dispatch to the same superslice. If this pairing cannot be achieved
19158 then it is best to pair vector insns together and vector load insns
19161 To aid in this pairing, vec_pairing maintains the current state with
19162 the following values:
19164 0 : Initial state, no vecload/vector pairing has been started.
19166 1 : A vecload or vector insn has been issued and a candidate for
19167 pairing has been found and moved to the end of the ready
19169 if (type
== TYPE_VECLOAD
)
19171 /* Issued a vecload. */
19172 if (vec_pairing
== 0)
19174 int vecload_pos
= -1;
19175 /* We issued a single vecload, look for a vector insn to pair it
19176 with. If one isn't found, try to pair another vecload. */
19180 if (recog_memoized (ready
[pos
]) >= 0)
19182 type2
= get_attr_type (ready
[pos
]);
19183 if (is_power9_pairable_vec_type (type2
))
19185 /* Found a vector insn to pair with, move it to the
19186 end of the ready list so it is scheduled next. */
19187 move_to_end_of_ready (ready
, pos
, lastpos
);
19189 return cached_can_issue_more
;
19191 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
19192 /* Remember position of first vecload seen. */
19197 if (vecload_pos
>= 0)
19199 /* Didn't find a vector to pair with but did find a vecload,
19200 move it to the end of the ready list. */
19201 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
19203 return cached_can_issue_more
;
19207 else if (is_power9_pairable_vec_type (type
))
19209 /* Issued a vector operation. */
19210 if (vec_pairing
== 0)
19213 /* We issued a single vector insn, look for a vecload to pair it
19214 with. If one isn't found, try to pair another vector. */
19218 if (recog_memoized (ready
[pos
]) >= 0)
19220 type2
= get_attr_type (ready
[pos
]);
19221 if (type2
== TYPE_VECLOAD
)
19223 /* Found a vecload insn to pair with, move it to the
19224 end of the ready list so it is scheduled next. */
19225 move_to_end_of_ready (ready
, pos
, lastpos
);
19227 return cached_can_issue_more
;
19229 else if (is_power9_pairable_vec_type (type2
)
19231 /* Remember position of first vector insn seen. */
19238 /* Didn't find a vecload to pair with but did find a vector
19239 insn, move it to the end of the ready list. */
19240 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19242 return cached_can_issue_more
;
19247 /* We've either finished a vec/vecload pair, couldn't find an insn to
19248 continue the current pair, or the last insn had nothing to do with
19249 with pairing. In any case, reset the state. */
19253 return cached_can_issue_more
;
19256 /* Determine if INSN is a store to memory that can be fused with a similar
19260 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19262 /* Insn must be a non-prefixed base+disp form store. */
19263 if (is_store_insn (insn
, str_mem
)
19264 && get_attr_prefixed (insn
) == PREFIXED_NO
19265 && get_attr_update (insn
) == UPDATE_NO
19266 && get_attr_indexed (insn
) == INDEXED_NO
)
19268 /* Further restrictions by mode and size. */
19269 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19272 machine_mode mode
= GET_MODE (*str_mem
);
19273 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19275 if (INTEGRAL_MODE_P (mode
))
19276 /* Must be word or dword size. */
19277 return (size
== 4 || size
== 8);
19278 else if (FLOAT_MODE_P (mode
))
19279 /* Must be dword size. */
19280 return (size
== 8);
19286 /* Do Power10 specific reordering of the ready list. */
19289 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19293 /* Do store fusion during sched2 only. */
19294 if (!reload_completed
)
19295 return cached_can_issue_more
;
19297 /* If the prior insn finished off a store fusion pair then simply
19298 reset the counter and return, nothing more to do. */
19299 if (load_store_pendulum
!= 0)
19301 load_store_pendulum
= 0;
19302 return cached_can_issue_more
;
19305 /* Try to pair certain store insns to adjacent memory locations
19306 so that the hardware will fuse them to a single operation. */
19307 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19310 /* A fusable store was just scheduled. Scan the ready list for another
19311 store that it can fuse with. */
19316 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19317 must be ascending only. */
19318 if (is_fusable_store (ready
[pos
], &mem2
)
19319 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19320 && adjacent_mem_locations (mem1
, mem2
))
19321 || (FLOAT_MODE_P (GET_MODE (mem1
))
19322 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19324 /* Found a fusable store. Move it to the end of the ready list
19325 so it is scheduled next. */
19326 move_to_end_of_ready (ready
, pos
, lastpos
);
19328 load_store_pendulum
= -1;
19335 return cached_can_issue_more
;
19338 /* We are about to begin issuing insns for this clock cycle. */
19341 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19342 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19343 int *pn_ready ATTRIBUTE_UNUSED
,
19344 int clock_var ATTRIBUTE_UNUSED
)
19346 int n_ready
= *pn_ready
;
19349 fprintf (dump
, "// rs6000_sched_reorder :\n");
19351 /* Reorder the ready list, if the second to last ready insn
19352 is a nonepipeline insn. */
19353 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19355 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19356 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19357 /* Simply swap first two insns. */
19358 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19361 if (rs6000_tune
== PROCESSOR_POWER6
)
19362 load_store_pendulum
= 0;
19364 /* Do Power10 dependent reordering. */
19365 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19366 power10_sched_reorder (ready
, n_ready
- 1);
19368 return rs6000_issue_rate ();
19371 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19374 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19375 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19378 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19380 /* Do Power6 dependent reordering if necessary. */
19381 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19382 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19384 /* Do Power9 dependent reordering if necessary. */
19385 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19386 && recog_memoized (last_scheduled_insn
) >= 0)
19387 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19389 /* Do Power10 dependent reordering. */
19390 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19391 return power10_sched_reorder (ready
, *pn_ready
- 1);
19393 return cached_can_issue_more
;
19396 /* Return whether the presence of INSN causes a dispatch group termination
19397 of group WHICH_GROUP.
19399 If WHICH_GROUP == current_group, this function will return true if INSN
19400 causes the termination of the current group (i.e, the dispatch group to
19401 which INSN belongs). This means that INSN will be the last insn in the
19402 group it belongs to.
19404 If WHICH_GROUP == previous_group, this function will return true if INSN
19405 causes the termination of the previous group (i.e, the dispatch group that
19406 precedes the group to which INSN belongs). This means that INSN will be
19407 the first insn in the group it belongs to). */
19410 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19417 first
= insn_must_be_first_in_group (insn
);
19418 last
= insn_must_be_last_in_group (insn
);
19423 if (which_group
== current_group
)
19425 else if (which_group
== previous_group
)
19433 insn_must_be_first_in_group (rtx_insn
*insn
)
19435 enum attr_type type
;
19439 || DEBUG_INSN_P (insn
)
19440 || GET_CODE (PATTERN (insn
)) == USE
19441 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19444 switch (rs6000_tune
)
19446 case PROCESSOR_POWER5
:
19447 if (is_cracked_insn (insn
))
19450 case PROCESSOR_POWER4
:
19451 if (is_microcoded_insn (insn
))
19454 if (!rs6000_sched_groups
)
19457 type
= get_attr_type (insn
);
19464 case TYPE_CR_LOGICAL
:
19477 case PROCESSOR_POWER6
:
19478 type
= get_attr_type (insn
);
19487 case TYPE_FPCOMPARE
:
19498 if (get_attr_dot (insn
) == DOT_NO
19499 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19504 if (get_attr_size (insn
) == SIZE_32
)
19512 if (get_attr_update (insn
) == UPDATE_YES
)
19520 case PROCESSOR_POWER7
:
19521 type
= get_attr_type (insn
);
19525 case TYPE_CR_LOGICAL
:
19539 if (get_attr_dot (insn
) == DOT_YES
)
19544 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19545 || get_attr_update (insn
) == UPDATE_YES
)
19552 if (get_attr_update (insn
) == UPDATE_YES
)
19560 case PROCESSOR_POWER8
:
19561 type
= get_attr_type (insn
);
19565 case TYPE_CR_LOGICAL
:
19573 case TYPE_VECSTORE
:
19580 if (get_attr_dot (insn
) == DOT_YES
)
19585 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19586 || get_attr_update (insn
) == UPDATE_YES
)
19591 if (get_attr_update (insn
) == UPDATE_YES
19592 && get_attr_indexed (insn
) == INDEXED_YES
)
19608 insn_must_be_last_in_group (rtx_insn
*insn
)
19610 enum attr_type type
;
19614 || DEBUG_INSN_P (insn
)
19615 || GET_CODE (PATTERN (insn
)) == USE
19616 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19619 switch (rs6000_tune
) {
19620 case PROCESSOR_POWER4
:
19621 case PROCESSOR_POWER5
:
19622 if (is_microcoded_insn (insn
))
19625 if (is_branch_slot_insn (insn
))
19629 case PROCESSOR_POWER6
:
19630 type
= get_attr_type (insn
);
19638 case TYPE_FPCOMPARE
:
19649 if (get_attr_dot (insn
) == DOT_NO
19650 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19655 if (get_attr_size (insn
) == SIZE_32
)
19663 case PROCESSOR_POWER7
:
19664 type
= get_attr_type (insn
);
19674 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19675 && get_attr_update (insn
) == UPDATE_YES
)
19680 if (get_attr_update (insn
) == UPDATE_YES
19681 && get_attr_indexed (insn
) == INDEXED_YES
)
19689 case PROCESSOR_POWER8
:
19690 type
= get_attr_type (insn
);
19702 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19703 && get_attr_update (insn
) == UPDATE_YES
)
19708 if (get_attr_update (insn
) == UPDATE_YES
19709 && get_attr_indexed (insn
) == INDEXED_YES
)
19724 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19725 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19728 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19731 int issue_rate
= rs6000_issue_rate ();
19733 for (i
= 0; i
< issue_rate
; i
++)
19735 sd_iterator_def sd_it
;
19737 rtx insn
= group_insns
[i
];
19742 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19744 rtx next
= DEP_CON (dep
);
19746 if (next
== next_insn
19747 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19755 /* Utility of the function redefine_groups.
19756 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19757 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19758 to keep it "far" (in a separate group) from GROUP_INSNS, following
19759 one of the following schemes, depending on the value of the flag
19760 -minsert_sched_nops = X:
19761 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19762 in order to force NEXT_INSN into a separate group.
19763 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19764 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19765 insertion (has a group just ended, how many vacant issue slots remain in the
19766 last group, and how many dispatch groups were encountered so far). */
19769 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19770 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19775 int issue_rate
= rs6000_issue_rate ();
19776 bool end
= *group_end
;
19779 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19780 return can_issue_more
;
19782 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19783 return can_issue_more
;
19785 force
= is_costly_group (group_insns
, next_insn
);
19787 return can_issue_more
;
19789 if (sched_verbose
> 6)
19790 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19791 *group_count
,can_issue_more
);
19793 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19796 can_issue_more
= 0;
19798 /* Since only a branch can be issued in the last issue_slot, it is
19799 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19800 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19801 in this case the last nop will start a new group and the branch
19802 will be forced to the new group. */
19803 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19806 /* Do we have a special group ending nop? */
19807 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19808 || rs6000_tune
== PROCESSOR_POWER8
)
19810 nop
= gen_group_ending_nop ();
19811 emit_insn_before (nop
, next_insn
);
19812 can_issue_more
= 0;
19815 while (can_issue_more
> 0)
19818 emit_insn_before (nop
, next_insn
);
19826 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19828 int n_nops
= rs6000_sched_insert_nops
;
19830 /* Nops can't be issued from the branch slot, so the effective
19831 issue_rate for nops is 'issue_rate - 1'. */
19832 if (can_issue_more
== 0)
19833 can_issue_more
= issue_rate
;
19835 if (can_issue_more
== 0)
19837 can_issue_more
= issue_rate
- 1;
19840 for (i
= 0; i
< issue_rate
; i
++)
19842 group_insns
[i
] = 0;
19849 emit_insn_before (nop
, next_insn
);
19850 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19853 if (can_issue_more
== 0)
19855 can_issue_more
= issue_rate
- 1;
19858 for (i
= 0; i
< issue_rate
; i
++)
19860 group_insns
[i
] = 0;
19866 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19869 /* Is next_insn going to start a new group? */
19872 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19873 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19874 || (can_issue_more
< issue_rate
&&
19875 insn_terminates_group_p (next_insn
, previous_group
)));
19876 if (*group_end
&& end
)
19879 if (sched_verbose
> 6)
19880 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19881 *group_count
, can_issue_more
);
19882 return can_issue_more
;
19885 return can_issue_more
;
19888 /* This function tries to synch the dispatch groups that the compiler "sees"
19889 with the dispatch groups that the processor dispatcher is expected to
19890 form in practice. It tries to achieve this synchronization by forcing the
19891 estimated processor grouping on the compiler (as opposed to the function
19892 'pad_goups' which tries to force the scheduler's grouping on the processor).
19894 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19895 examines the (estimated) dispatch groups that will be formed by the processor
19896 dispatcher. It marks these group boundaries to reflect the estimated
19897 processor grouping, overriding the grouping that the scheduler had marked.
19898 Depending on the value of the flag '-minsert-sched-nops' this function can
19899 force certain insns into separate groups or force a certain distance between
19900 them by inserting nops, for example, if there exists a "costly dependence"
19903 The function estimates the group boundaries that the processor will form as
19904 follows: It keeps track of how many vacant issue slots are available after
19905 each insn. A subsequent insn will start a new group if one of the following
19907 - no more vacant issue slots remain in the current dispatch group.
19908 - only the last issue slot, which is the branch slot, is vacant, but the next
19909 insn is not a branch.
19910 - only the last 2 or less issue slots, including the branch slot, are vacant,
19911 which means that a cracked insn (which occupies two issue slots) can't be
19912 issued in this group.
19913 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19914 start a new group. */
19917 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19920 rtx_insn
*insn
, *next_insn
;
19922 int can_issue_more
;
19925 int group_count
= 0;
19929 issue_rate
= rs6000_issue_rate ();
19930 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
19931 for (i
= 0; i
< issue_rate
; i
++)
19933 group_insns
[i
] = 0;
19935 can_issue_more
= issue_rate
;
19937 insn
= get_next_active_insn (prev_head_insn
, tail
);
19940 while (insn
!= NULL_RTX
)
19942 slot
= (issue_rate
- can_issue_more
);
19943 group_insns
[slot
] = insn
;
19945 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19946 if (insn_terminates_group_p (insn
, current_group
))
19947 can_issue_more
= 0;
19949 next_insn
= get_next_active_insn (insn
, tail
);
19950 if (next_insn
== NULL_RTX
)
19951 return group_count
+ 1;
19953 /* Is next_insn going to start a new group? */
19955 = (can_issue_more
== 0
19956 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19957 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19958 || (can_issue_more
< issue_rate
&&
19959 insn_terminates_group_p (next_insn
, previous_group
)));
19961 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
19962 next_insn
, &group_end
, can_issue_more
,
19968 can_issue_more
= 0;
19969 for (i
= 0; i
< issue_rate
; i
++)
19971 group_insns
[i
] = 0;
19975 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
19976 PUT_MODE (next_insn
, VOIDmode
);
19977 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
19978 PUT_MODE (next_insn
, TImode
);
19981 if (can_issue_more
== 0)
19982 can_issue_more
= issue_rate
;
19985 return group_count
;
19988 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19989 dispatch group boundaries that the scheduler had marked. Pad with nops
19990 any dispatch groups which have vacant issue slots, in order to force the
19991 scheduler's grouping on the processor dispatcher. The function
19992 returns the number of dispatch groups found. */
19995 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19998 rtx_insn
*insn
, *next_insn
;
20001 int can_issue_more
;
20003 int group_count
= 0;
20005 /* Initialize issue_rate. */
20006 issue_rate
= rs6000_issue_rate ();
20007 can_issue_more
= issue_rate
;
20009 insn
= get_next_active_insn (prev_head_insn
, tail
);
20010 next_insn
= get_next_active_insn (insn
, tail
);
20012 while (insn
!= NULL_RTX
)
20015 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20017 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
20019 if (next_insn
== NULL_RTX
)
20024 /* If the scheduler had marked group termination at this location
20025 (between insn and next_insn), and neither insn nor next_insn will
20026 force group termination, pad the group with nops to force group
20029 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20030 && !insn_terminates_group_p (insn
, current_group
)
20031 && !insn_terminates_group_p (next_insn
, previous_group
))
20033 if (!is_branch_slot_insn (next_insn
))
20036 while (can_issue_more
)
20039 emit_insn_before (nop
, next_insn
);
20044 can_issue_more
= issue_rate
;
20049 next_insn
= get_next_active_insn (insn
, tail
);
20052 return group_count
;
20055 /* We're beginning a new block. Initialize data structures as necessary. */
20058 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
20059 int sched_verbose ATTRIBUTE_UNUSED
,
20060 int max_ready ATTRIBUTE_UNUSED
)
20062 last_scheduled_insn
= NULL
;
20063 load_store_pendulum
= 0;
20068 /* The following function is called at the end of scheduling BB.
20069 After reload, it inserts nops at insn group bundling. */
20072 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
20077 fprintf (dump
, "=== Finishing schedule.\n");
20079 if (reload_completed
&& rs6000_sched_groups
)
20081 /* Do not run sched_finish hook when selective scheduling enabled. */
20082 if (sel_sched_p ())
20085 if (rs6000_sched_insert_nops
== sched_finish_none
)
20088 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20089 n_groups
= pad_groups (dump
, sched_verbose
,
20090 current_sched_info
->prev_head
,
20091 current_sched_info
->next_tail
);
20093 n_groups
= redefine_groups (dump
, sched_verbose
,
20094 current_sched_info
->prev_head
,
20095 current_sched_info
->next_tail
);
20097 if (sched_verbose
>= 6)
20099 fprintf (dump
, "ngroups = %d\n", n_groups
);
20100 print_rtl (dump
, current_sched_info
->prev_head
);
20101 fprintf (dump
, "Done finish_sched\n");
20106 struct rs6000_sched_context
20108 short cached_can_issue_more
;
20109 rtx_insn
*last_scheduled_insn
;
20110 int load_store_pendulum
;
20115 typedef struct rs6000_sched_context rs6000_sched_context_def
;
20116 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
20118 /* Allocate store for new scheduling context. */
20120 rs6000_alloc_sched_context (void)
20122 return xmalloc (sizeof (rs6000_sched_context_def
));
20125 /* If CLEAN_P is true then initializes _SC with clean data,
20126 and from the global context otherwise. */
20128 rs6000_init_sched_context (void *_sc
, bool clean_p
)
20130 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20134 sc
->cached_can_issue_more
= 0;
20135 sc
->last_scheduled_insn
= NULL
;
20136 sc
->load_store_pendulum
= 0;
20137 sc
->divide_cnt
= 0;
20138 sc
->vec_pairing
= 0;
20142 sc
->cached_can_issue_more
= cached_can_issue_more
;
20143 sc
->last_scheduled_insn
= last_scheduled_insn
;
20144 sc
->load_store_pendulum
= load_store_pendulum
;
20145 sc
->divide_cnt
= divide_cnt
;
20146 sc
->vec_pairing
= vec_pairing
;
20150 /* Sets the global scheduling context to the one pointed to by _SC. */
20152 rs6000_set_sched_context (void *_sc
)
20154 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20156 gcc_assert (sc
!= NULL
);
20158 cached_can_issue_more
= sc
->cached_can_issue_more
;
20159 last_scheduled_insn
= sc
->last_scheduled_insn
;
20160 load_store_pendulum
= sc
->load_store_pendulum
;
20161 divide_cnt
= sc
->divide_cnt
;
20162 vec_pairing
= sc
->vec_pairing
;
20167 rs6000_free_sched_context (void *_sc
)
20169 gcc_assert (_sc
!= NULL
);
20175 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
20177 switch (get_attr_type (insn
))
20192 /* Length in units of the trampoline for entering a nested function. */
20195 rs6000_trampoline_size (void)
20199 switch (DEFAULT_ABI
)
20202 gcc_unreachable ();
20205 ret
= (TARGET_32BIT
) ? 12 : 24;
20209 gcc_assert (!TARGET_32BIT
);
20215 ret
= (TARGET_32BIT
) ? 40 : 48;
20222 /* Emit RTL insns to initialize the variable parts of a trampoline.
20223 FNADDR is an RTX for the address of the function's pure code.
20224 CXT is an RTX for the static chain value for the function. */
20227 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
20229 int regsize
= (TARGET_32BIT
) ? 4 : 8;
20230 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
20231 rtx ctx_reg
= force_reg (Pmode
, cxt
);
20232 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
20234 switch (DEFAULT_ABI
)
20237 gcc_unreachable ();
20239 /* Under AIX, just build the 3 word function descriptor */
20242 rtx fnmem
, fn_reg
, toc_reg
;
20244 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20245 error ("you cannot take the address of a nested function if you use "
20246 "the %qs option", "-mno-pointers-to-nested-functions");
20248 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20249 fn_reg
= gen_reg_rtx (Pmode
);
20250 toc_reg
= gen_reg_rtx (Pmode
);
20252 /* Macro to shorten the code expansions below. */
20253 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20255 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20257 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20258 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20259 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20260 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20261 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20267 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20271 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20272 LCT_NORMAL
, VOIDmode
,
20274 GEN_INT (rs6000_trampoline_size ()), SImode
,
20282 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20283 identifier as an argument, so the front end shouldn't look it up. */
20286 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20288 return is_attribute_p ("altivec", attr_id
);
20291 /* Handle the "altivec" attribute. The attribute may have
20292 arguments as follows:
20294 __attribute__((altivec(vector__)))
20295 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20296 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20298 and may appear more than once (e.g., 'vector bool char') in a
20299 given declaration. */
20302 rs6000_handle_altivec_attribute (tree
*node
,
20303 tree name ATTRIBUTE_UNUSED
,
20305 int flags ATTRIBUTE_UNUSED
,
20306 bool *no_add_attrs
)
20308 tree type
= *node
, result
= NULL_TREE
;
20312 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20313 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20314 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20317 while (POINTER_TYPE_P (type
)
20318 || TREE_CODE (type
) == FUNCTION_TYPE
20319 || TREE_CODE (type
) == METHOD_TYPE
20320 || TREE_CODE (type
) == ARRAY_TYPE
)
20321 type
= TREE_TYPE (type
);
20323 mode
= TYPE_MODE (type
);
20325 /* Check for invalid AltiVec type qualifiers. */
20326 if (type
== long_double_type_node
)
20327 error ("use of %<long double%> in AltiVec types is invalid");
20328 else if (type
== boolean_type_node
)
20329 error ("use of boolean types in AltiVec types is invalid");
20330 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20331 error ("use of %<complex%> in AltiVec types is invalid");
20332 else if (DECIMAL_FLOAT_MODE_P (mode
))
20333 error ("use of decimal floating-point types in AltiVec types is invalid");
20334 else if (!TARGET_VSX
)
20336 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20339 error ("use of %<long%> in AltiVec types is invalid for "
20340 "64-bit code without %qs", "-mvsx");
20341 else if (rs6000_warn_altivec_long
)
20342 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20345 else if (type
== long_long_unsigned_type_node
20346 || type
== long_long_integer_type_node
)
20347 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20349 else if (type
== double_type_node
)
20350 error ("use of %<double%> in AltiVec types is invalid without %qs",
20354 switch (altivec_type
)
20357 unsigned_p
= TYPE_UNSIGNED (type
);
20361 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20364 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20367 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20370 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20373 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20375 case E_SFmode
: result
= V4SF_type_node
; break;
20376 case E_DFmode
: result
= V2DF_type_node
; break;
20377 /* If the user says 'vector int bool', we may be handed the 'bool'
20378 attribute _before_ the 'vector' attribute, and so select the
20379 proper type in the 'b' case below. */
20380 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20381 case E_V2DImode
: case E_V2DFmode
:
20389 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20390 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20391 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20392 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20393 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20400 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20406 /* Propagate qualifiers attached to the element type
20407 onto the vector type. */
20408 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20409 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20411 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20414 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20419 /* AltiVec defines five built-in scalar types that serve as vector
20420 elements; we must teach the compiler how to mangle them. The 128-bit
20421 floating point mangling is target-specific as well. MMA defines
20422 two built-in types to be used as opaque vector types. */
20424 static const char *
20425 rs6000_mangle_type (const_tree type
)
20427 type
= TYPE_MAIN_VARIANT (type
);
20429 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20430 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20431 && TREE_CODE (type
) != OPAQUE_TYPE
)
20434 if (type
== bool_char_type_node
) return "U6__boolc";
20435 if (type
== bool_short_type_node
) return "U6__bools";
20436 if (type
== pixel_type_node
) return "u7__pixel";
20437 if (type
== bool_int_type_node
) return "U6__booli";
20438 if (type
== bool_long_long_type_node
) return "U6__boolx";
20440 if (type
== float128_type_node
|| type
== float64x_type_node
)
20443 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20445 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20446 return "u9__ieee128";
20448 if (type
== vector_pair_type_node
)
20449 return "u13__vector_pair";
20450 if (type
== vector_quad_type_node
)
20451 return "u13__vector_quad";
20453 /* For all other types, use the default mangling. */
20457 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20458 struct attribute_spec.handler. */
20461 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20462 tree args ATTRIBUTE_UNUSED
,
20463 int flags ATTRIBUTE_UNUSED
,
20464 bool *no_add_attrs
)
20466 if (TREE_CODE (*node
) != FUNCTION_TYPE
20467 && TREE_CODE (*node
) != FIELD_DECL
20468 && TREE_CODE (*node
) != TYPE_DECL
)
20470 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20472 *no_add_attrs
= true;
20478 /* Set longcall attributes on all functions declared when
20479 rs6000_default_long_calls is true. */
20481 rs6000_set_default_type_attributes (tree type
)
20483 if (rs6000_default_long_calls
20484 && FUNC_OR_METHOD_TYPE_P (type
))
20485 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20487 TYPE_ATTRIBUTES (type
));
20490 darwin_set_default_type_attributes (type
);
20494 /* Return a reference suitable for calling a function with the
20495 longcall attribute. */
20498 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20500 /* System V adds '.' to the internal name, so skip them. */
20501 const char *call_name
= XSTR (call_ref
, 0);
20502 if (*call_name
== '.')
20504 while (*call_name
== '.')
20507 tree node
= get_identifier (call_name
);
20508 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20513 rtx base
= const0_rtx
;
20515 if (rs6000_pcrel_p ())
20517 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20518 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20519 gen_rtvec (3, base
, call_ref
, arg
),
20520 UNSPECV_PLT_PCREL
);
20521 emit_insn (gen_rtx_SET (reg
, u
));
20525 if (DEFAULT_ABI
== ABI_ELFv2
)
20526 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20530 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20533 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20534 may be used by a function global entry point. For SysV4, r11
20535 is used by __glink_PLTresolve lazy resolver entry. */
20536 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20537 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20539 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20540 gen_rtvec (3, reg
, call_ref
, arg
),
20542 emit_insn (gen_rtx_SET (reg
, hi
));
20543 emit_insn (gen_rtx_SET (reg
, lo
));
20547 return force_reg (Pmode
, call_ref
);
20550 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20551 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20554 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20555 struct attribute_spec.handler. */
20557 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20558 tree args ATTRIBUTE_UNUSED
,
20559 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20562 if (DECL_P (*node
))
20564 if (TREE_CODE (*node
) == TYPE_DECL
)
20565 type
= &TREE_TYPE (*node
);
20570 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20571 || TREE_CODE (*type
) == UNION_TYPE
)))
20573 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20574 *no_add_attrs
= true;
20577 else if ((is_attribute_p ("ms_struct", name
)
20578 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20579 || ((is_attribute_p ("gcc_struct", name
)
20580 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20582 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20584 *no_add_attrs
= true;
20591 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20593 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20594 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20595 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20598 #ifdef USING_ELFOS_H
20600 /* A get_unnamed_section callback, used for switching to toc_section. */
20603 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20605 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20606 && TARGET_MINIMAL_TOC
)
20608 if (!toc_initialized
)
20610 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20611 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20612 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20613 fprintf (asm_out_file
, "\t.tc ");
20614 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20615 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20616 fprintf (asm_out_file
, "\n");
20618 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20619 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20620 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20621 fprintf (asm_out_file
, " = .+32768\n");
20622 toc_initialized
= 1;
20625 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20627 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20629 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20630 if (!toc_initialized
)
20632 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20633 toc_initialized
= 1;
20638 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20639 if (!toc_initialized
)
20641 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20642 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20643 fprintf (asm_out_file
, " = .+32768\n");
20644 toc_initialized
= 1;
20649 /* Implement TARGET_ASM_INIT_SECTIONS. */
20652 rs6000_elf_asm_init_sections (void)
20655 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20658 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20659 SDATA2_SECTION_ASM_OP
);
20662 /* Implement TARGET_SELECT_RTX_SECTION. */
20665 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20666 unsigned HOST_WIDE_INT align
)
20668 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20669 return toc_section
;
20671 return default_elf_select_rtx_section (mode
, x
, align
);
20674 /* For a SYMBOL_REF, set generic flags and then perform some
20675 target-specific processing.
20677 When the AIX ABI is requested on a non-AIX system, replace the
20678 function name with the real name (with a leading .) rather than the
20679 function descriptor name. This saves a lot of overriding code to
20680 read the prefixes. */
20682 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20684 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20686 default_encode_section_info (decl
, rtl
, first
);
20689 && TREE_CODE (decl
) == FUNCTION_DECL
20691 && DEFAULT_ABI
== ABI_AIX
)
20693 rtx sym_ref
= XEXP (rtl
, 0);
20694 size_t len
= strlen (XSTR (sym_ref
, 0));
20695 char *str
= XALLOCAVEC (char, len
+ 2);
20697 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20698 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20703 compare_section_name (const char *section
, const char *templ
)
20707 len
= strlen (templ
);
20708 return (strncmp (section
, templ
, len
) == 0
20709 && (section
[len
] == 0 || section
[len
] == '.'));
20713 rs6000_elf_in_small_data_p (const_tree decl
)
20715 if (rs6000_sdata
== SDATA_NONE
)
20718 /* We want to merge strings, so we never consider them small data. */
20719 if (TREE_CODE (decl
) == STRING_CST
)
20722 /* Functions are never in the small data area. */
20723 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20726 if (VAR_P (decl
) && DECL_SECTION_NAME (decl
))
20728 const char *section
= DECL_SECTION_NAME (decl
);
20729 if (compare_section_name (section
, ".sdata")
20730 || compare_section_name (section
, ".sdata2")
20731 || compare_section_name (section
, ".gnu.linkonce.s")
20732 || compare_section_name (section
, ".sbss")
20733 || compare_section_name (section
, ".sbss2")
20734 || compare_section_name (section
, ".gnu.linkonce.sb")
20735 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20736 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20741 /* If we are told not to put readonly data in sdata, then don't. */
20742 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20743 && !rs6000_readonly_in_sdata
)
20746 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20749 && size
<= g_switch_value
20750 /* If it's not public, and we're not going to reference it there,
20751 there's no need to put it in the small data section. */
20752 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20759 #endif /* USING_ELFOS_H */
20761 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20764 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20766 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20769 /* Do not place thread-local symbols refs in the object blocks. */
20772 rs6000_use_blocks_for_decl_p (const_tree decl
)
20774 return !DECL_THREAD_LOCAL_P (decl
);
20777 /* Return a REG that occurs in ADDR with coefficient 1.
20778 ADDR can be effectively incremented by incrementing REG.
20780 r0 is special and we must not select it as an address
20781 register by this routine since our caller will try to
20782 increment the returned register via an "la" instruction. */
20785 find_addr_reg (rtx addr
)
20787 while (GET_CODE (addr
) == PLUS
)
20789 if (REG_P (XEXP (addr
, 0))
20790 && REGNO (XEXP (addr
, 0)) != 0)
20791 addr
= XEXP (addr
, 0);
20792 else if (REG_P (XEXP (addr
, 1))
20793 && REGNO (XEXP (addr
, 1)) != 0)
20794 addr
= XEXP (addr
, 1);
20795 else if (CONSTANT_P (XEXP (addr
, 0)))
20796 addr
= XEXP (addr
, 1);
20797 else if (CONSTANT_P (XEXP (addr
, 1)))
20798 addr
= XEXP (addr
, 0);
20800 gcc_unreachable ();
20802 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20807 rs6000_fatal_bad_address (rtx op
)
20809 fatal_insn ("bad address", op
);
20814 vec
<branch_island
, va_gc
> *branch_islands
;
20816 /* Remember to generate a branch island for far calls to the given
20820 add_compiler_branch_island (tree label_name
, tree function_name
,
20823 branch_island bi
= {function_name
, label_name
, line_number
};
20824 vec_safe_push (branch_islands
, bi
);
20827 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20828 already there or not. */
20831 no_previous_def (tree function_name
)
20836 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20837 if (function_name
== bi
->function_name
)
20842 /* GET_PREV_LABEL gets the label name from the previous definition of
20846 get_prev_label (tree function_name
)
20851 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20852 if (function_name
== bi
->function_name
)
20853 return bi
->label_name
;
20857 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20860 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20862 unsigned int length
;
20863 char *symbol_name
, *lazy_ptr_name
;
20864 char *local_label_0
;
20865 static unsigned label
= 0;
20867 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20868 symb
= (*targetm
.strip_name_encoding
) (symb
);
20870 length
= strlen (symb
);
20871 symbol_name
= XALLOCAVEC (char, length
+ 32);
20872 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20874 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20875 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20879 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20880 fprintf (file
, "\t.align 5\n");
20882 fprintf (file
, "%s:\n", stub
);
20883 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20886 local_label_0
= XALLOCAVEC (char, 16);
20887 sprintf (local_label_0
, "L%u$spb", label
);
20889 fprintf (file
, "\tmflr r0\n");
20890 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20891 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20892 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20893 lazy_ptr_name
, local_label_0
);
20894 fprintf (file
, "\tmtlr r0\n");
20895 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20896 (TARGET_64BIT
? "ldu" : "lwzu"),
20897 lazy_ptr_name
, local_label_0
);
20898 fprintf (file
, "\tmtctr r12\n");
20899 fprintf (file
, "\tbctr\n");
20901 else /* mdynamic-no-pic or mkernel. */
20903 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20904 fprintf (file
, "\t.align 4\n");
20906 fprintf (file
, "%s:\n", stub
);
20907 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20909 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20910 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20911 (TARGET_64BIT
? "ldu" : "lwzu"),
20913 fprintf (file
, "\tmtctr r12\n");
20914 fprintf (file
, "\tbctr\n");
20917 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20918 fprintf (file
, "%s:\n", lazy_ptr_name
);
20919 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20920 fprintf (file
, "%sdyld_stub_binding_helper\n",
20921 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
20924 /* Legitimize PIC addresses. If the address is already
20925 position-independent, we return ORIG. Newly generated
20926 position-independent addresses go into a reg. This is REG if non
20927 zero, otherwise we allocate register(s) as necessary. */
20929 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20932 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
20937 if (reg
== NULL
&& !reload_completed
)
20938 reg
= gen_reg_rtx (Pmode
);
20940 if (GET_CODE (orig
) == CONST
)
20944 if (GET_CODE (XEXP (orig
, 0)) == PLUS
20945 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
20948 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
20950 /* Use a different reg for the intermediate value, as
20951 it will be marked UNCHANGING. */
20952 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
20953 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
20956 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
20959 if (CONST_INT_P (offset
))
20961 if (SMALL_INT (offset
))
20962 return plus_constant (Pmode
, base
, INTVAL (offset
));
20963 else if (!reload_completed
)
20964 offset
= force_reg (Pmode
, offset
);
20967 rtx mem
= force_const_mem (Pmode
, orig
);
20968 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
20971 return gen_rtx_PLUS (Pmode
, base
, offset
);
20974 /* Fall back on generic machopic code. */
20975 return machopic_legitimize_pic_address (orig
, mode
, reg
);
20978 /* Output a .machine directive for the Darwin assembler, and call
20979 the generic start_file routine. */
20982 rs6000_darwin_file_start (void)
20984 static const struct
20988 HOST_WIDE_INT if_set
;
20990 { "ppc64", "ppc64", MASK_64BIT
},
20991 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
20992 | MASK_POWERPC64
},
20993 { "power4", "ppc970", 0 },
20994 { "G5", "ppc970", 0 },
20995 { "7450", "ppc7450", 0 },
20996 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
20997 { "G4", "ppc7400", 0 },
20998 { "750", "ppc750", 0 },
20999 { "740", "ppc750", 0 },
21000 { "G3", "ppc750", 0 },
21001 { "604e", "ppc604e", 0 },
21002 { "604", "ppc604", 0 },
21003 { "603e", "ppc603", 0 },
21004 { "603", "ppc603", 0 },
21005 { "601", "ppc601", 0 },
21006 { NULL
, "ppc", 0 } };
21007 const char *cpu_id
= "";
21010 rs6000_file_start ();
21011 darwin_file_start ();
21013 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21015 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
21016 cpu_id
= rs6000_default_cpu
;
21018 if (OPTION_SET_P (rs6000_cpu_index
))
21019 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
21021 /* Look through the mapping array. Pick the first name that either
21022 matches the argument, has a bit set in IF_SET that is also set
21023 in the target flags, or has a NULL name. */
21026 while (mapping
[i
].arg
!= NULL
21027 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
21028 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
21031 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
21034 #endif /* TARGET_MACHO */
21038 rs6000_elf_reloc_rw_mask (void)
21042 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
21048 /* Record an element in the table of global constructors. SYMBOL is
21049 a SYMBOL_REF of the function to be called; PRIORITY is a number
21050 between 0 and MAX_INIT_PRIORITY.
21052 This differs from default_named_section_asm_out_constructor in
21053 that we have special handling for -mrelocatable. */
21055 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
21057 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
21059 const char *section
= ".ctors";
21062 if (priority
!= DEFAULT_INIT_PRIORITY
)
21064 sprintf (buf
, ".ctors.%.5u",
21065 /* Invert the numbering so the linker puts us in the proper
21066 order; constructors are run from right to left, and the
21067 linker sorts in increasing order. */
21068 MAX_INIT_PRIORITY
- priority
);
21072 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21073 assemble_align (POINTER_SIZE
);
21075 if (DEFAULT_ABI
== ABI_V4
21076 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21078 fputs ("\t.long (", asm_out_file
);
21079 output_addr_const (asm_out_file
, symbol
);
21080 fputs (")@fixup\n", asm_out_file
);
21083 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21086 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
21088 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
21090 const char *section
= ".dtors";
21093 if (priority
!= DEFAULT_INIT_PRIORITY
)
21095 sprintf (buf
, ".dtors.%.5u",
21096 /* Invert the numbering so the linker puts us in the proper
21097 order; constructors are run from right to left, and the
21098 linker sorts in increasing order. */
21099 MAX_INIT_PRIORITY
- priority
);
21103 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21104 assemble_align (POINTER_SIZE
);
21106 if (DEFAULT_ABI
== ABI_V4
21107 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21109 fputs ("\t.long (", asm_out_file
);
21110 output_addr_const (asm_out_file
, symbol
);
21111 fputs (")@fixup\n", asm_out_file
);
21114 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21118 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
21120 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
21122 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
21123 ASM_OUTPUT_LABEL (file
, name
);
21124 fputs (DOUBLE_INT_ASM_OP
, file
);
21125 rs6000_output_function_entry (file
, name
);
21126 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
21129 fputs ("\t.size\t", file
);
21130 assemble_name (file
, name
);
21131 fputs (",24\n\t.type\t.", file
);
21132 assemble_name (file
, name
);
21133 fputs (",@function\n", file
);
21134 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
21136 fputs ("\t.globl\t.", file
);
21137 assemble_name (file
, name
);
21142 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21143 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21144 rs6000_output_function_entry (file
, name
);
21145 fputs (":\n", file
);
21150 if (DEFAULT_ABI
== ABI_V4
21151 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21152 && !TARGET_SECURE_PLT
21153 && (!constant_pool_empty_p () || crtl
->profile
)
21154 && (uses_toc
= uses_TOC ()))
21159 switch_to_other_text_partition ();
21160 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21162 fprintf (file
, "\t.long ");
21163 assemble_name (file
, toc_label_name
);
21166 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21167 assemble_name (file
, buf
);
21170 switch_to_other_text_partition ();
21173 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21174 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21176 if (TARGET_CMODEL
== CMODEL_LARGE
21177 && rs6000_global_entry_point_prologue_needed_p ())
21181 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21183 fprintf (file
, "\t.quad .TOC.-");
21184 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21185 assemble_name (file
, buf
);
21189 if (DEFAULT_ABI
== ABI_AIX
)
21191 const char *desc_name
, *orig_name
;
21193 orig_name
= (*targetm
.strip_name_encoding
) (name
);
21194 desc_name
= orig_name
;
21195 while (*desc_name
== '.')
21198 if (TREE_PUBLIC (decl
))
21199 fprintf (file
, "\t.globl %s\n", desc_name
);
21201 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
21202 fprintf (file
, "%s:\n", desc_name
);
21203 fprintf (file
, "\t.long %s\n", orig_name
);
21204 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
21205 fputs ("\t.long 0\n", file
);
21206 fprintf (file
, "\t.previous\n");
21208 ASM_OUTPUT_LABEL (file
, name
);
21211 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
21213 rs6000_elf_file_end (void)
21215 #ifdef HAVE_AS_GNU_ATTRIBUTE
21216 /* ??? The value emitted depends on options active at file end.
21217 Assume anyone using #pragma or attributes that might change
21218 options knows what they are doing. */
21219 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
21220 && rs6000_passes_float
)
21224 if (TARGET_HARD_FLOAT
)
21228 if (rs6000_passes_long_double
)
21230 if (!TARGET_LONG_DOUBLE_128
)
21232 else if (TARGET_IEEEQUAD
)
21237 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21239 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21241 if (rs6000_passes_vector
)
21242 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21243 (TARGET_ALTIVEC_ABI
? 2 : 1));
21244 if (rs6000_returns_struct
)
21245 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21246 aix_struct_return
? 2 : 1);
21249 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21250 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21251 file_end_indicate_exec_stack ();
21254 if (flag_split_stack
)
21255 file_end_indicate_split_stack ();
21259 /* We have expanded a CPU builtin, so we need to emit a reference to
21260 the special symbol that LIBC uses to declare it supports the
21261 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21262 switch_to_section (data_section
);
21263 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21264 fprintf (asm_out_file
, "\t%s %s\n",
21265 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21272 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21273 #define HAVE_XCOFF_DWARF_EXTRAS 0
21277 /* Names of bss and data sections. These should be unique names for each
21278 compilation unit. */
21280 char *xcoff_bss_section_name
;
21281 char *xcoff_private_data_section_name
;
21282 char *xcoff_private_rodata_section_name
;
21283 char *xcoff_tls_data_section_name
;
21284 char *xcoff_read_only_section_name
;
21286 static enum unwind_info_type
21287 rs6000_xcoff_debug_unwind_info (void)
21293 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21297 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21298 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21299 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21300 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21301 fprintf (asm_out_file
, ",");
21302 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21303 fprintf (asm_out_file
, "\n");
21307 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21309 fputs (GLOBAL_ASM_OP
, stream
);
21310 RS6000_OUTPUT_BASENAME (stream
, name
);
21311 putc ('\n', stream
);
21314 /* A get_unnamed_decl callback, used for read-only sections. PTR
21315 points to the section string variable. */
21318 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21320 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21322 ? xcoff_private_rodata_section_name
21323 : xcoff_read_only_section_name
,
21324 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21327 /* Likewise for read-write sections. */
21330 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21332 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21333 xcoff_private_data_section_name
,
21334 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21338 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21340 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21342 ? xcoff_private_data_section_name
21343 : xcoff_tls_data_section_name
,
21344 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21347 /* A get_unnamed_section callback, used for switching to toc_section. */
21350 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21352 if (TARGET_MINIMAL_TOC
)
21354 /* toc_section is always selected at least once from
21355 rs6000_xcoff_file_start, so this is guaranteed to
21356 always be defined once and only once in each file. */
21357 if (!toc_initialized
)
21359 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21360 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21361 toc_initialized
= 1;
21363 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21364 (TARGET_32BIT
? "" : ",3"));
21367 fputs ("\t.toc\n", asm_out_file
);
21370 /* Implement TARGET_ASM_INIT_SECTIONS. */
21373 rs6000_xcoff_asm_init_sections (void)
21375 read_only_data_section
21376 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21379 private_data_section
21380 = get_unnamed_section (SECTION_WRITE
,
21381 rs6000_xcoff_output_readwrite_section_asm_op
,
21384 read_only_private_data_section
21385 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21389 = get_unnamed_section (SECTION_TLS
,
21390 rs6000_xcoff_output_tls_section_asm_op
,
21393 tls_private_data_section
21394 = get_unnamed_section (SECTION_TLS
,
21395 rs6000_xcoff_output_tls_section_asm_op
,
21399 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21401 readonly_data_section
= read_only_data_section
;
21405 rs6000_xcoff_reloc_rw_mask (void)
21411 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21412 tree decl ATTRIBUTE_UNUSED
)
21415 static const char * const suffix
[7]
21416 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21418 if (flags
& SECTION_EXCLUDE
)
21420 else if (flags
& SECTION_DEBUG
)
21422 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21425 else if (flags
& SECTION_CODE
)
21427 else if (flags
& SECTION_TLS
)
21429 if (flags
& SECTION_BSS
)
21434 else if (flags
& SECTION_WRITE
)
21436 if (flags
& SECTION_BSS
)
21444 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21445 (flags
& SECTION_CODE
) ? "." : "",
21446 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21449 #define IN_NAMED_SECTION(DECL) \
21450 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21451 && DECL_SECTION_NAME (DECL) != NULL)
21454 rs6000_xcoff_select_section (tree decl
, int reloc
,
21455 unsigned HOST_WIDE_INT align
)
21457 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21459 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21461 resolve_unique_section (decl
, reloc
, true);
21462 if (IN_NAMED_SECTION (decl
))
21463 return get_named_section (decl
, NULL
, reloc
);
21466 if (decl_readonly_section (decl
, reloc
))
21468 if (TREE_PUBLIC (decl
))
21469 return read_only_data_section
;
21471 return read_only_private_data_section
;
21476 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21478 if (bss_initializer_p (decl
))
21479 return tls_comm_section
;
21480 else if (TREE_PUBLIC (decl
))
21481 return tls_data_section
;
21483 return tls_private_data_section
;
21487 if (TREE_PUBLIC (decl
))
21488 return data_section
;
21490 return private_data_section
;
21495 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21499 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21500 name
= (*targetm
.strip_name_encoding
) (name
);
21501 set_decl_section_name (decl
, name
);
21504 /* Select section for constant in constant pool.
21506 On RS/6000, all constants are in the private read-only data area.
21507 However, if this is being placed in the TOC it must be output as a
21511 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21512 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21514 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21515 return toc_section
;
21517 return read_only_private_data_section
;
21520 /* Remove any trailing [DS] or the like from the symbol name. */
21522 static const char *
21523 rs6000_xcoff_strip_name_encoding (const char *name
)
21528 len
= strlen (name
);
21529 if (name
[len
- 1] == ']')
21530 return ggc_alloc_string (name
, len
- 4);
21535 /* Section attributes. AIX is always PIC. */
21537 static unsigned int
21538 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21540 unsigned int align
;
21541 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21543 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21544 flags
|= SECTION_BSS
;
21546 /* Align to at least UNIT size. */
21547 if (!decl
|| !DECL_P (decl
))
21548 align
= MIN_UNITS_PER_WORD
;
21549 /* Align code CSECT to at least 32 bytes. */
21550 else if ((flags
& SECTION_CODE
) != 0)
21551 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21553 /* Increase alignment of large objects if not already stricter. */
21554 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21555 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21556 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21558 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21561 /* Output at beginning of assembler file.
21563 Initialize the section names for the RS/6000 at this point.
21565 Specify filename, including full path, to assembler.
21567 We want to go into the TOC section so at least one .toc will be emitted.
21568 Also, in order to output proper .bs/.es pairs, we need at least one static
21569 [RW] section emitted.
21571 Finally, declare mcount when profiling to make the assembler happy. */
21574 rs6000_xcoff_file_start (void)
21576 rs6000_gen_section_name (&xcoff_bss_section_name
,
21577 main_input_filename
, ".bss_");
21578 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21579 main_input_filename
, ".rw_");
21580 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21581 main_input_filename
, ".rop_");
21582 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21583 main_input_filename
, ".ro_");
21584 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21585 main_input_filename
, ".tls_");
21587 fputs ("\t.file\t", asm_out_file
);
21588 output_quoted_string (asm_out_file
, main_input_filename
);
21589 fputc ('\n', asm_out_file
);
21590 if (write_symbols
!= NO_DEBUG
)
21591 switch_to_section (private_data_section
);
21592 switch_to_section (toc_section
);
21593 switch_to_section (text_section
);
21595 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21596 rs6000_file_start ();
21599 /* Output at end of assembler file.
21600 On the RS/6000, referencing data should automatically pull in text. */
21603 rs6000_xcoff_file_end (void)
21605 switch_to_section (text_section
);
21606 if (xcoff_tls_exec_model_detected
)
21608 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21609 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21611 fputs ("_section_.text:\n", asm_out_file
);
21612 switch_to_section (data_section
);
21613 fputs (TARGET_32BIT
21614 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21619 struct declare_alias_data
21622 bool function_descriptor
;
21625 /* Declare alias N. A helper function for for_node_and_aliases. */
21628 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21630 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21631 /* Main symbol is output specially, because varasm machinery does part of
21632 the job for us - we do not need to declare .globl/lglobs and such. */
21633 if (!n
->alias
|| n
->weakref
)
21636 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21639 /* Prevent assemble_alias from trying to use .set pseudo operation
21640 that does not behave as expected by the middle-end. */
21641 TREE_ASM_WRITTEN (n
->decl
) = true;
21643 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21644 char *buffer
= (char *) alloca (strlen (name
) + 2);
21646 int dollar_inside
= 0;
21648 strcpy (buffer
, name
);
21649 p
= strchr (buffer
, '$');
21653 p
= strchr (p
+ 1, '$');
21655 if (TREE_PUBLIC (n
->decl
))
21657 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21659 if (dollar_inside
) {
21660 if (data
->function_descriptor
)
21661 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21662 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21664 if (data
->function_descriptor
)
21666 fputs ("\t.globl .", data
->file
);
21667 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21668 putc ('\n', data
->file
);
21670 fputs ("\t.globl ", data
->file
);
21671 assemble_name (data
->file
, buffer
);
21672 putc ('\n', data
->file
);
21674 #ifdef ASM_WEAKEN_DECL
21675 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21676 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21683 if (data
->function_descriptor
)
21684 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21685 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21687 if (data
->function_descriptor
)
21689 fputs ("\t.lglobl .", data
->file
);
21690 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21691 putc ('\n', data
->file
);
21693 fputs ("\t.lglobl ", data
->file
);
21694 assemble_name (data
->file
, buffer
);
21695 putc ('\n', data
->file
);
21697 if (data
->function_descriptor
)
21698 putc ('.', data
->file
);
21699 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21704 #ifdef HAVE_GAS_HIDDEN
21705 /* Helper function to calculate visibility of a DECL
21706 and return the value as a const string. */
21708 static const char *
21709 rs6000_xcoff_visibility (tree decl
)
21711 static const char * const visibility_types
[] = {
21712 "", ",protected", ",hidden", ",internal"
21715 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21716 return visibility_types
[vis
];
21721 /* This macro produces the initial definition of a function name.
21722 On the RS/6000, we need to place an extra '.' in the function name and
21723 output the function descriptor.
21724 Dollar signs are converted to underscores.
21726 The csect for the function will have already been created when
21727 text_section was selected. We do have to go back to that csect, however.
21729 The third and fourth parameters to the .function pseudo-op (16 and 044)
21730 are placeholders which no longer have any use.
21732 Because AIX assembler's .set command has unexpected semantics, we output
21733 all aliases as alternative labels in front of the definition. */
21736 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21738 char *buffer
= (char *) alloca (strlen (name
) + 1);
21740 int dollar_inside
= 0;
21741 struct declare_alias_data data
= {file
, false};
21743 strcpy (buffer
, name
);
21744 p
= strchr (buffer
, '$');
21748 p
= strchr (p
+ 1, '$');
21750 if (TREE_PUBLIC (decl
))
21752 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21754 if (dollar_inside
) {
21755 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21756 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21758 fputs ("\t.globl .", file
);
21759 RS6000_OUTPUT_BASENAME (file
, buffer
);
21760 #ifdef HAVE_GAS_HIDDEN
21761 fputs (rs6000_xcoff_visibility (decl
), file
);
21768 if (dollar_inside
) {
21769 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21770 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21772 fputs ("\t.lglobl .", file
);
21773 RS6000_OUTPUT_BASENAME (file
, buffer
);
21777 fputs ("\t.csect ", file
);
21778 assemble_name (file
, buffer
);
21779 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21781 ASM_OUTPUT_LABEL (file
, buffer
);
21783 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21785 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21786 RS6000_OUTPUT_BASENAME (file
, buffer
);
21787 fputs (", TOC[tc0], 0\n", file
);
21790 switch_to_section (function_section (decl
));
21792 ASM_OUTPUT_LABEL (file
, buffer
);
21794 data
.function_descriptor
= true;
21795 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21797 if (!DECL_IGNORED_P (decl
))
21799 if (dwarf_debuginfo_p ())
21801 name
= (*targetm
.strip_name_encoding
) (name
);
21802 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21809 /* Output assembly language to globalize a symbol from a DECL,
21810 possibly with visibility. */
21813 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21815 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21816 fputs (GLOBAL_ASM_OP
, stream
);
21817 assemble_name (stream
, name
);
21818 #ifdef HAVE_GAS_HIDDEN
21819 fputs (rs6000_xcoff_visibility (decl
), stream
);
21821 putc ('\n', stream
);
21824 /* Output assembly language to define a symbol as COMMON from a DECL,
21825 possibly with visibility. */
21828 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21829 tree decl ATTRIBUTE_UNUSED
,
21831 unsigned HOST_WIDE_INT size
,
21832 unsigned int align
)
21834 unsigned int align2
= 2;
21837 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
21840 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21844 if (! DECL_COMMON (decl
))
21846 /* Forget section. */
21849 /* Globalize TLS BSS. */
21850 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
21852 fputs (GLOBAL_ASM_OP
, stream
);
21853 assemble_name (stream
, name
);
21854 fputc ('\n', stream
);
21857 /* Switch to section and skip space. */
21858 fputs ("\t.csect ", stream
);
21859 assemble_name (stream
, name
);
21860 fprintf (stream
, ",%u\n", align2
);
21861 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
21862 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
21866 if (TREE_PUBLIC (decl
))
21869 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
21870 name
, size
, align2
);
21872 #ifdef HAVE_GAS_HIDDEN
21874 fputs (rs6000_xcoff_visibility (decl
), stream
);
21876 putc ('\n', stream
);
21880 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
21881 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
21884 /* This macro produces the initial definition of a object (variable) name.
21885 Because AIX assembler's .set command has unexpected semantics, we output
21886 all aliases as alternative labels in front of the definition. */
21889 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21891 struct declare_alias_data data
= {file
, false};
21892 ASM_OUTPUT_LABEL (file
, name
);
21893 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21897 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21900 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21902 fputs (integer_asm_op (size
, FALSE
), file
);
21903 assemble_name (file
, label
);
21904 fputs ("-$", file
);
21907 /* Output a symbol offset relative to the dbase for the current object.
21908 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21911 __gcc_unwind_dbase is embedded in all executables/libraries through
21912 libgcc/config/rs6000/crtdbase.S. */
21915 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21917 fputs (integer_asm_op (size
, FALSE
), file
);
21918 assemble_name (file
, label
);
21919 fputs("-__gcc_unwind_dbase", file
);
21924 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
21928 const char *symname
;
21930 default_encode_section_info (decl
, rtl
, first
);
21932 /* Careful not to prod global register variables. */
21935 symbol
= XEXP (rtl
, 0);
21936 if (!SYMBOL_REF_P (symbol
))
21939 flags
= SYMBOL_REF_FLAGS (symbol
);
21941 if (VAR_P (decl
) && DECL_THREAD_LOCAL_P (decl
))
21942 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
21944 SYMBOL_REF_FLAGS (symbol
) = flags
;
21946 symname
= XSTR (symbol
, 0);
21948 /* Append CSECT mapping class, unless the symbol already is qualified.
21949 Aliases are implemented as labels, so the symbol name should not add
21950 a mapping class. */
21953 && VAR_OR_FUNCTION_DECL_P (decl
)
21954 && (symtab_node::get (decl
) == NULL
21955 || symtab_node::get (decl
)->alias
== 0)
21956 && symname
[strlen (symname
) - 1] != ']')
21958 const char *smclass
= NULL
;
21960 if (TREE_CODE (decl
) == FUNCTION_DECL
)
21962 else if (DECL_THREAD_LOCAL_P (decl
))
21964 if (bss_initializer_p (decl
))
21966 else if (flag_data_sections
)
21969 else if (DECL_EXTERNAL (decl
))
21971 else if (bss_initializer_p (decl
))
21973 else if (flag_data_sections
)
21975 /* This must exactly match the logic of select section. */
21976 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
21982 if (smclass
!= NULL
)
21984 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
21986 strcpy (newname
, symname
);
21987 strcat (newname
, smclass
);
21988 XSTR (symbol
, 0) = ggc_strdup (newname
);
21992 #endif /* HAVE_AS_TLS */
21993 #endif /* TARGET_XCOFF */
21996 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
21997 const char *name
, const char *val
)
21999 fputs ("\t.weak\t", stream
);
22000 assemble_name (stream
, name
);
22001 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22002 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22004 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22006 fputs (rs6000_xcoff_visibility (decl
), stream
);
22008 fputs ("\n\t.weak\t.", stream
);
22009 RS6000_OUTPUT_BASENAME (stream
, name
);
22011 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22013 fputs (rs6000_xcoff_visibility (decl
), stream
);
22015 fputc ('\n', stream
);
22019 #ifdef ASM_OUTPUT_DEF
22020 ASM_OUTPUT_DEF (stream
, name
, val
);
22022 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22023 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22025 fputs ("\t.set\t.", stream
);
22026 RS6000_OUTPUT_BASENAME (stream
, name
);
22027 fputs (",.", stream
);
22028 RS6000_OUTPUT_BASENAME (stream
, val
);
22029 fputc ('\n', stream
);
22035 /* Return true if INSN should not be copied. */
22038 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
22040 return recog_memoized (insn
) >= 0
22041 && get_attr_cannot_copy (insn
);
22044 /* Compute a (partial) cost for rtx X. Return true if the complete
22045 cost has been computed, and false if subexpressions should be
22046 scanned. In either case, *TOTAL contains the cost result. */
22049 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22050 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
22052 int code
= GET_CODE (x
);
22056 /* On the RS/6000, if it is valid in the insn, it is free. */
22058 if (((outer_code
== SET
22059 || outer_code
== PLUS
22060 || outer_code
== MINUS
)
22061 && (satisfies_constraint_I (x
)
22062 || satisfies_constraint_L (x
)))
22063 || (outer_code
== AND
22064 && (satisfies_constraint_K (x
)
22066 ? satisfies_constraint_L (x
)
22067 : satisfies_constraint_J (x
))))
22068 || ((outer_code
== IOR
|| outer_code
== XOR
)
22069 && (satisfies_constraint_K (x
)
22071 ? satisfies_constraint_L (x
)
22072 : satisfies_constraint_J (x
))))
22073 || outer_code
== ASHIFT
22074 || outer_code
== ASHIFTRT
22075 || outer_code
== LSHIFTRT
22076 || outer_code
== ROTATE
22077 || outer_code
== ROTATERT
22078 || outer_code
== ZERO_EXTRACT
22079 || (outer_code
== MULT
22080 && satisfies_constraint_I (x
))
22081 || ((outer_code
== DIV
|| outer_code
== UDIV
22082 || outer_code
== MOD
|| outer_code
== UMOD
)
22083 && exact_log2 (INTVAL (x
)) >= 0)
22084 || (outer_code
== COMPARE
22085 && (satisfies_constraint_I (x
)
22086 || satisfies_constraint_K (x
)))
22087 || ((outer_code
== EQ
|| outer_code
== NE
)
22088 && (satisfies_constraint_I (x
)
22089 || satisfies_constraint_K (x
)
22091 ? satisfies_constraint_L (x
)
22092 : satisfies_constraint_J (x
))))
22093 || (outer_code
== GTU
22094 && satisfies_constraint_I (x
))
22095 || (outer_code
== LTU
22096 && satisfies_constraint_P (x
)))
22101 else if ((outer_code
== PLUS
22102 && reg_or_add_cint_operand (x
, mode
))
22103 || (outer_code
== MINUS
22104 && reg_or_sub_cint_operand (x
, mode
))
22105 || ((outer_code
== SET
22106 || outer_code
== IOR
22107 || outer_code
== XOR
)
22109 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
22111 *total
= COSTS_N_INSNS (1);
22117 case CONST_WIDE_INT
:
22121 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22125 /* When optimizing for size, MEM should be slightly more expensive
22126 than generating address, e.g., (plus (reg) (const)).
22127 L1 cache latency is about two instructions. */
22128 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22129 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
22130 *total
+= COSTS_N_INSNS (100);
22139 if (FLOAT_MODE_P (mode
))
22140 *total
= rs6000_cost
->fp
;
22142 *total
= COSTS_N_INSNS (1);
22146 if (CONST_INT_P (XEXP (x
, 1))
22147 && satisfies_constraint_I (XEXP (x
, 1)))
22149 if (INTVAL (XEXP (x
, 1)) >= -256
22150 && INTVAL (XEXP (x
, 1)) <= 255)
22151 *total
= rs6000_cost
->mulsi_const9
;
22153 *total
= rs6000_cost
->mulsi_const
;
22155 else if (mode
== SFmode
)
22156 *total
= rs6000_cost
->fp
;
22157 else if (FLOAT_MODE_P (mode
))
22158 *total
= rs6000_cost
->dmul
;
22159 else if (mode
== DImode
)
22160 *total
= rs6000_cost
->muldi
;
22162 *total
= rs6000_cost
->mulsi
;
22166 if (mode
== SFmode
)
22167 *total
= rs6000_cost
->fp
;
22169 *total
= rs6000_cost
->dmul
;
22174 if (FLOAT_MODE_P (mode
))
22176 *total
= mode
== DFmode
? rs6000_cost
->ddiv
22177 : rs6000_cost
->sdiv
;
22184 if (CONST_INT_P (XEXP (x
, 1))
22185 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
22187 if (code
== DIV
|| code
== MOD
)
22189 *total
= COSTS_N_INSNS (2);
22192 *total
= COSTS_N_INSNS (1);
22196 if (GET_MODE (XEXP (x
, 1)) == DImode
)
22197 *total
= rs6000_cost
->divdi
;
22199 *total
= rs6000_cost
->divsi
;
22201 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22202 if ((!TARGET_MODULO
22203 || (RS6000_DISABLE_SCALAR_MODULO
&& SCALAR_INT_MODE_P (mode
)))
22204 && (code
== MOD
|| code
== UMOD
))
22205 *total
+= COSTS_N_INSNS (2);
22209 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
22213 *total
= COSTS_N_INSNS (4);
22217 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
22221 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
22225 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
22228 *total
= COSTS_N_INSNS (1);
22232 if (CONST_INT_P (XEXP (x
, 1)))
22234 rtx left
= XEXP (x
, 0);
22235 rtx_code left_code
= GET_CODE (left
);
22237 /* rotate-and-mask: 1 insn. */
22238 if ((left_code
== ROTATE
22239 || left_code
== ASHIFT
22240 || left_code
== LSHIFTRT
)
22241 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
22243 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
22244 if (!CONST_INT_P (XEXP (left
, 1)))
22245 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
22246 *total
+= COSTS_N_INSNS (1);
22250 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22251 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22252 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22253 || (val
& 0xffff) == val
22254 || (val
& 0xffff0000) == val
22255 || ((val
& 0xffff) == 0 && mode
== SImode
))
22257 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22258 *total
+= COSTS_N_INSNS (1);
22263 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22265 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22266 *total
+= COSTS_N_INSNS (2);
22271 *total
= COSTS_N_INSNS (1);
22276 *total
= COSTS_N_INSNS (1);
22282 *total
= COSTS_N_INSNS (1);
22286 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22287 the sign extend and shift separately within the insn. */
22288 if (TARGET_EXTSWSLI
&& mode
== DImode
22289 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22290 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22301 /* Handle mul_highpart. */
22302 if (outer_code
== TRUNCATE
22303 && GET_CODE (XEXP (x
, 0)) == MULT
)
22305 if (mode
== DImode
)
22306 *total
= rs6000_cost
->muldi
;
22308 *total
= rs6000_cost
->mulsi
;
22311 else if (outer_code
== AND
)
22314 *total
= COSTS_N_INSNS (1);
22319 if (MEM_P (XEXP (x
, 0)))
22322 *total
= COSTS_N_INSNS (1);
22328 if (!FLOAT_MODE_P (mode
))
22330 *total
= COSTS_N_INSNS (1);
22336 case UNSIGNED_FLOAT
:
22339 case FLOAT_TRUNCATE
:
22340 *total
= rs6000_cost
->fp
;
22344 if (mode
== DFmode
)
22345 *total
= rs6000_cost
->sfdf_convert
;
22347 *total
= rs6000_cost
->fp
;
22354 *total
= COSTS_N_INSNS (1);
22357 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22359 *total
= rs6000_cost
->fp
;
22368 /* Carry bit requires mode == Pmode.
22369 NEG or PLUS already counted so only add one. */
22371 && (outer_code
== NEG
|| outer_code
== PLUS
))
22373 *total
= COSTS_N_INSNS (1);
22381 if (outer_code
== SET
)
22383 if (XEXP (x
, 1) == const0_rtx
)
22385 *total
= COSTS_N_INSNS (2);
22390 *total
= COSTS_N_INSNS (3);
22395 if (outer_code
== COMPARE
)
22403 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22417 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22420 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22421 int opno
, int *total
, bool speed
)
22423 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22426 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22427 "opno = %d, total = %d, speed = %s, x:\n",
22428 ret
? "complete" : "scan inner",
22429 GET_MODE_NAME (mode
),
22430 GET_RTX_NAME (outer_code
),
22433 speed
? "true" : "false");
22441 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22443 if (recog_memoized (insn
) < 0)
22446 /* If we are optimizing for size, just use the length. */
22448 return get_attr_length (insn
);
22450 /* Use the cost if provided. */
22451 int cost
= get_attr_cost (insn
);
22455 /* If the insn tells us how many insns there are, use that. Otherwise use
22456 the length/4. Adjust the insn length to remove the extra size that
22457 prefixed instructions take. */
22458 int n
= get_attr_num_insns (insn
);
22461 int length
= get_attr_length (insn
);
22462 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22465 ADJUST_INSN_LENGTH (insn
, adjust
);
22472 enum attr_type type
= get_attr_type (insn
);
22479 cost
= COSTS_N_INSNS (n
+ 1);
22483 switch (get_attr_size (insn
))
22486 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22489 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22492 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22495 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22498 gcc_unreachable ();
22502 switch (get_attr_size (insn
))
22505 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22508 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22511 gcc_unreachable ();
22516 cost
= n
* rs6000_cost
->fp
;
22519 cost
= n
* rs6000_cost
->dmul
;
22522 cost
= n
* rs6000_cost
->sdiv
;
22525 cost
= n
* rs6000_cost
->ddiv
;
22532 cost
= COSTS_N_INSNS (n
+ 2);
22536 cost
= COSTS_N_INSNS (n
);
22542 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22545 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22546 addr_space_t as
, bool speed
)
22548 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22550 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22551 ret
, speed
? "true" : "false");
22558 /* A C expression returning the cost of moving data from a register of class
22559 CLASS1 to one of CLASS2. */
22562 rs6000_register_move_cost (machine_mode mode
,
22563 reg_class_t from
, reg_class_t to
)
22566 reg_class_t rclass
;
22568 if (TARGET_DEBUG_COST
)
22571 /* If we have VSX, we can easily move between FPR or Altivec registers,
22572 otherwise we can only easily move within classes.
22573 Do this first so we give best-case answers for union classes
22574 containing both gprs and vsx regs. */
22575 HARD_REG_SET to_vsx
, from_vsx
;
22576 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22577 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22578 if (!hard_reg_set_empty_p (to_vsx
)
22579 && !hard_reg_set_empty_p (from_vsx
)
22581 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22583 int reg
= FIRST_FPR_REGNO
;
22585 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22586 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22587 reg
= FIRST_ALTIVEC_REGNO
;
22588 ret
= 2 * hard_regno_nregs (reg
, mode
);
22591 /* Moves from/to GENERAL_REGS. */
22592 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22593 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22595 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22597 if (TARGET_DIRECT_MOVE
)
22599 /* Keep the cost for direct moves above that for within
22600 a register class even if the actual processor cost is
22601 comparable. We do this because a direct move insn
22602 can't be a nop, whereas with ideal register
22603 allocation a move within the same class might turn
22604 out to be a nop. */
22605 if (rs6000_tune
== PROCESSOR_POWER9
22606 || rs6000_tune
== PROCESSOR_POWER10
)
22607 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22609 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22610 /* SFmode requires a conversion when moving between gprs
22612 if (mode
== SFmode
)
22616 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22617 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22620 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22622 else if (rclass
== CR_REGS
)
22625 /* For those processors that have slow LR/CTR moves, make them more
22626 expensive than memory in order to bias spills to memory .*/
22627 else if ((rs6000_tune
== PROCESSOR_POWER6
22628 || rs6000_tune
== PROCESSOR_POWER7
22629 || rs6000_tune
== PROCESSOR_POWER8
22630 || rs6000_tune
== PROCESSOR_POWER9
)
22631 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22632 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22635 /* A move will cost one instruction per GPR moved. */
22636 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22639 /* Everything else has to go through GENERAL_REGS. */
22641 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22642 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22644 if (TARGET_DEBUG_COST
)
22646 if (dbg_cost_ctrl
== 1)
22648 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22649 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22650 reg_class_names
[to
]);
22657 /* A C expressions returning the cost of moving data of MODE from a register to
22661 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22662 bool in ATTRIBUTE_UNUSED
)
22666 if (TARGET_DEBUG_COST
)
22669 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22670 ret
= 4 * hard_regno_nregs (0, mode
);
22671 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22672 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22673 ret
= 4 * hard_regno_nregs (32, mode
);
22674 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22675 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22677 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22679 if (TARGET_DEBUG_COST
)
22681 if (dbg_cost_ctrl
== 1)
22683 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22684 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22691 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22693 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22694 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22695 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22696 move cost between GENERAL_REGS and VSX_REGS low.
22698 It might seem reasonable to use a union class. After all, if usage
22699 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22700 rather than memory. However, in cases where register pressure of
22701 both is high, like the cactus_adm spec test, allowing
22702 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22703 the first scheduling pass. This is partly due to an allocno of
22704 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22705 class, which gives too high a pressure for GENERAL_REGS and too low
22706 for VSX_REGS. So, force a choice of the subclass here.
22708 The best class is also the union if GENERAL_REGS and VSX_REGS have
22709 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22710 allocno class, since trying to narrow down the class by regno mode
22711 is prone to error. For example, SImode is allowed in VSX regs and
22712 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22713 it would be wrong to choose an allocno of GENERAL_REGS based on
22717 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22718 reg_class_t allocno_class
,
22719 reg_class_t best_class
)
22721 switch (allocno_class
)
22723 case GEN_OR_VSX_REGS
:
22724 /* best_class must be a subset of allocno_class. */
22725 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22726 || best_class
== GEN_OR_FLOAT_REGS
22727 || best_class
== VSX_REGS
22728 || best_class
== ALTIVEC_REGS
22729 || best_class
== FLOAT_REGS
22730 || best_class
== GENERAL_REGS
22731 || best_class
== BASE_REGS
);
22732 /* Use best_class but choose wider classes when copying from the
22733 wider class to best_class is cheap. This mimics IRA choice
22734 of allocno class. */
22735 if (best_class
== BASE_REGS
)
22736 return GENERAL_REGS
;
22737 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22742 if (best_class
== ALTIVEC_REGS
)
22743 return ALTIVEC_REGS
;
22749 return allocno_class
;
22752 /* Load up a constant. If the mode is a vector mode, splat the value across
22753 all of the vector elements. */
22756 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22760 if (mode
== SFmode
|| mode
== DFmode
)
22762 rtx d
= const_double_from_real_value (dconst
, mode
);
22763 reg
= force_reg (mode
, d
);
22765 else if (mode
== V4SFmode
)
22767 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22768 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22769 reg
= gen_reg_rtx (mode
);
22770 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22772 else if (mode
== V2DFmode
)
22774 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22775 rtvec v
= gen_rtvec (2, d
, d
);
22776 reg
= gen_reg_rtx (mode
);
22777 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22780 gcc_unreachable ();
22785 /* Generate an FMA instruction. */
22788 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22790 machine_mode mode
= GET_MODE (target
);
22793 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22794 gcc_assert (dst
!= NULL
);
22797 emit_move_insn (target
, dst
);
22800 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22803 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22805 machine_mode mode
= GET_MODE (dst
);
22808 /* This is a tad more complicated, since the fnma_optab is for
22809 a different expression: fma(-m1, m2, a), which is the same
22810 thing except in the case of signed zeros.
22812 Fortunately we know that if FMA is supported that FNMSUB is
22813 also supported in the ISA. Just expand it directly. */
22815 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22817 r
= gen_rtx_NEG (mode
, a
);
22818 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22819 r
= gen_rtx_NEG (mode
, r
);
22820 emit_insn (gen_rtx_SET (dst
, r
));
22823 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22824 add a reg_note saying that this was a division. Support both scalar and
22825 vector divide. Assumes no trapping math and finite arguments. */
22828 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22830 machine_mode mode
= GET_MODE (dst
);
22831 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22834 /* Low precision estimates guarantee 5 bits of accuracy. High
22835 precision estimates guarantee 14 bits of accuracy. SFmode
22836 requires 23 bits of accuracy. DFmode requires 52 bits of
22837 accuracy. Each pass at least doubles the accuracy, leading
22838 to the following. */
22839 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22840 if (mode
== DFmode
|| mode
== V2DFmode
)
22843 enum insn_code code
= optab_handler (smul_optab
, mode
);
22844 insn_gen_fn gen_mul
= GEN_FCN (code
);
22846 gcc_assert (code
!= CODE_FOR_nothing
);
22848 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22850 /* x0 = 1./d estimate */
22851 x0
= gen_reg_rtx (mode
);
22852 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22855 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22858 /* e0 = 1. - d * x0 */
22859 e0
= gen_reg_rtx (mode
);
22860 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22862 /* x1 = x0 + e0 * x0 */
22863 x1
= gen_reg_rtx (mode
);
22864 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22866 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22867 ++i
, xprev
= xnext
, eprev
= enext
) {
22869 /* enext = eprev * eprev */
22870 enext
= gen_reg_rtx (mode
);
22871 emit_insn (gen_mul (enext
, eprev
, eprev
));
22873 /* xnext = xprev + enext * xprev */
22874 xnext
= gen_reg_rtx (mode
);
22875 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
22881 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22883 /* u = n * xprev */
22884 u
= gen_reg_rtx (mode
);
22885 emit_insn (gen_mul (u
, n
, xprev
));
22887 /* v = n - (d * u) */
22888 v
= gen_reg_rtx (mode
);
22889 rs6000_emit_nmsub (v
, d
, u
, n
);
22891 /* dst = (v * xprev) + u */
22892 rs6000_emit_madd (dst
, v
, xprev
, u
);
22895 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
22898 /* Goldschmidt's Algorithm for single/double-precision floating point
22899 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22902 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
22904 machine_mode mode
= GET_MODE (src
);
22905 rtx e
= gen_reg_rtx (mode
);
22906 rtx g
= gen_reg_rtx (mode
);
22907 rtx h
= gen_reg_rtx (mode
);
22909 /* Low precision estimates guarantee 5 bits of accuracy. High
22910 precision estimates guarantee 14 bits of accuracy. SFmode
22911 requires 23 bits of accuracy. DFmode requires 52 bits of
22912 accuracy. Each pass at least doubles the accuracy, leading
22913 to the following. */
22914 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22915 if (mode
== DFmode
|| mode
== V2DFmode
)
22920 enum insn_code code
= optab_handler (smul_optab
, mode
);
22921 insn_gen_fn gen_mul
= GEN_FCN (code
);
22923 gcc_assert (code
!= CODE_FOR_nothing
);
22925 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
22927 /* e = rsqrt estimate */
22928 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
22931 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22934 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
22936 if (mode
== SFmode
)
22938 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
22941 emit_move_insn (e
, target
);
22945 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
22946 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
22950 /* g = sqrt estimate. */
22951 emit_insn (gen_mul (g
, e
, src
));
22952 /* h = 1/(2*sqrt) estimate. */
22953 emit_insn (gen_mul (h
, e
, mhalf
));
22959 rtx t
= gen_reg_rtx (mode
);
22960 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22961 /* Apply correction directly to 1/rsqrt estimate. */
22962 rs6000_emit_madd (dst
, e
, t
, e
);
22966 for (i
= 0; i
< passes
; i
++)
22968 rtx t1
= gen_reg_rtx (mode
);
22969 rtx g1
= gen_reg_rtx (mode
);
22970 rtx h1
= gen_reg_rtx (mode
);
22972 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
22973 rs6000_emit_madd (g1
, g
, t1
, g
);
22974 rs6000_emit_madd (h1
, h
, t1
, h
);
22979 /* Multiply by 2 for 1/rsqrt. */
22980 emit_insn (gen_add3_insn (dst
, h
, h
));
22985 rtx t
= gen_reg_rtx (mode
);
22986 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22987 rs6000_emit_madd (dst
, g
, t
, g
);
22993 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22994 (Power7) targets. DST is the target, and SRC is the argument operand. */
22997 rs6000_emit_popcount (rtx dst
, rtx src
)
22999 machine_mode mode
= GET_MODE (dst
);
23002 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23003 if (TARGET_POPCNTD
)
23005 if (mode
== SImode
)
23006 emit_insn (gen_popcntdsi2 (dst
, src
));
23008 emit_insn (gen_popcntddi2 (dst
, src
));
23012 tmp1
= gen_reg_rtx (mode
);
23014 if (mode
== SImode
)
23016 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23017 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
23019 tmp2
= force_reg (SImode
, tmp2
);
23020 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
23024 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23025 tmp2
= expand_mult (DImode
, tmp1
,
23026 GEN_INT ((HOST_WIDE_INT
)
23027 0x01010101 << 32 | 0x01010101),
23029 tmp2
= force_reg (DImode
, tmp2
);
23030 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
23035 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23036 target, and SRC is the argument operand. */
23039 rs6000_emit_parity (rtx dst
, rtx src
)
23041 machine_mode mode
= GET_MODE (dst
);
23044 tmp
= gen_reg_rtx (mode
);
23046 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23049 if (mode
== SImode
)
23051 emit_insn (gen_popcntbsi2 (tmp
, src
));
23052 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
23056 emit_insn (gen_popcntbdi2 (tmp
, src
));
23057 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
23062 if (mode
== SImode
)
23064 /* Is mult+shift >= shift+xor+shift+xor? */
23065 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
23067 rtx tmp1
, tmp2
, tmp3
, tmp4
;
23069 tmp1
= gen_reg_rtx (SImode
);
23070 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23072 tmp2
= gen_reg_rtx (SImode
);
23073 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
23074 tmp3
= gen_reg_rtx (SImode
);
23075 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
23077 tmp4
= gen_reg_rtx (SImode
);
23078 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
23079 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
23082 rs6000_emit_popcount (tmp
, src
);
23083 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
23087 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23088 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
23090 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
23092 tmp1
= gen_reg_rtx (DImode
);
23093 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23095 tmp2
= gen_reg_rtx (DImode
);
23096 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
23097 tmp3
= gen_reg_rtx (DImode
);
23098 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
23100 tmp4
= gen_reg_rtx (DImode
);
23101 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
23102 tmp5
= gen_reg_rtx (DImode
);
23103 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
23105 tmp6
= gen_reg_rtx (DImode
);
23106 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
23107 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
23110 rs6000_emit_popcount (tmp
, src
);
23111 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
23115 /* Expand an Altivec constant permutation for little endian mode.
23116 OP0 and OP1 are the input vectors and TARGET is the output vector.
23117 SEL specifies the constant permutation vector.
23119 There are two issues: First, the two input operands must be
23120 swapped so that together they form a double-wide array in LE
23121 order. Second, the vperm instruction has surprising behavior
23122 in LE mode: it interprets the elements of the source vectors
23123 in BE mode ("left to right") and interprets the elements of
23124 the destination vector in LE mode ("right to left"). To
23125 correct for this, we must subtract each element of the permute
23126 control vector from 31.
23128 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23129 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23130 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23131 serve as the permute control vector. Then, in BE mode,
23135 places the desired result in vr9. However, in LE mode the
23136 vector contents will be
23138 vr10 = 00000003 00000002 00000001 00000000
23139 vr11 = 00000007 00000006 00000005 00000004
23141 The result of the vperm using the same permute control vector is
23143 vr9 = 05000000 07000000 01000000 03000000
23145 That is, the leftmost 4 bytes of vr10 are interpreted as the
23146 source for the rightmost 4 bytes of vr9, and so on.
23148 If we change the permute control vector to
23150 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23158 vr9 = 00000006 00000004 00000002 00000000. */
23161 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
23162 const vec_perm_indices
&sel
)
23166 rtx constv
, unspec
;
23168 /* Unpack and adjust the constant selector. */
23169 for (i
= 0; i
< 16; ++i
)
23171 unsigned int elt
= 31 - (sel
[i
] & 31);
23172 perm
[i
] = GEN_INT (elt
);
23175 /* Expand to a permute, swapping the inputs and using the
23176 adjusted selector. */
23178 op0
= force_reg (V16QImode
, op0
);
23180 op1
= force_reg (V16QImode
, op1
);
23182 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
23183 constv
= force_reg (V16QImode
, constv
);
23184 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
23186 if (!REG_P (target
))
23188 rtx tmp
= gen_reg_rtx (V16QImode
);
23189 emit_move_insn (tmp
, unspec
);
23193 emit_move_insn (target
, unspec
);
23196 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23197 permute control vector. But here it's not a constant, so we must
23198 generate a vector NAND or NOR to do the adjustment. */
23201 altivec_expand_vec_perm_le (rtx operands
[4])
23203 rtx notx
, iorx
, unspec
;
23204 rtx target
= operands
[0];
23205 rtx op0
= operands
[1];
23206 rtx op1
= operands
[2];
23207 rtx sel
= operands
[3];
23209 rtx norreg
= gen_reg_rtx (V16QImode
);
23210 machine_mode mode
= GET_MODE (target
);
23212 /* Get everything in regs so the pattern matches. */
23214 op0
= force_reg (mode
, op0
);
23216 op1
= force_reg (mode
, op1
);
23218 sel
= force_reg (V16QImode
, sel
);
23219 if (!REG_P (target
))
23220 tmp
= gen_reg_rtx (mode
);
23222 if (TARGET_P9_VECTOR
)
23224 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
23229 /* Invert the selector with a VNAND if available, else a VNOR.
23230 The VNAND is preferred for future fusion opportunities. */
23231 notx
= gen_rtx_NOT (V16QImode
, sel
);
23232 iorx
= (TARGET_P8_VECTOR
23233 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
23234 : gen_rtx_AND (V16QImode
, notx
, notx
));
23235 emit_insn (gen_rtx_SET (norreg
, iorx
));
23237 /* Permute with operands reversed and adjusted selector. */
23238 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
23242 /* Copy into target, possibly by way of a register. */
23243 if (!REG_P (target
))
23245 emit_move_insn (tmp
, unspec
);
23249 emit_move_insn (target
, unspec
);
23252 /* Expand an Altivec constant permutation. Return true if we match
23253 an efficient implementation; false to fall back to VPERM.
23255 OP0 and OP1 are the input vectors and TARGET is the output vector.
23256 SEL specifies the constant permutation vector. */
23259 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23260 const vec_perm_indices
&sel
)
23262 struct altivec_perm_insn
{
23263 HOST_WIDE_INT mask
;
23264 enum insn_code impl
;
23265 unsigned char perm
[16];
23267 static const struct altivec_perm_insn patterns
[] = {
23268 {OPTION_MASK_ALTIVEC
,
23269 CODE_FOR_altivec_vpkuhum_direct
,
23270 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23271 {OPTION_MASK_ALTIVEC
,
23272 CODE_FOR_altivec_vpkuwum_direct
,
23273 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23274 {OPTION_MASK_ALTIVEC
,
23275 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
23276 : CODE_FOR_altivec_vmrglb_direct
,
23277 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23278 {OPTION_MASK_ALTIVEC
,
23279 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
23280 : CODE_FOR_altivec_vmrglh_direct
,
23281 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23282 {OPTION_MASK_ALTIVEC
,
23283 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
23284 : CODE_FOR_altivec_vmrglw_direct_v4si
,
23285 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23286 {OPTION_MASK_ALTIVEC
,
23287 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
23288 : CODE_FOR_altivec_vmrghb_direct
,
23289 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23290 {OPTION_MASK_ALTIVEC
,
23291 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
23292 : CODE_FOR_altivec_vmrghh_direct
,
23293 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23294 {OPTION_MASK_ALTIVEC
,
23295 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
23296 : CODE_FOR_altivec_vmrghw_direct_v4si
,
23297 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23298 {OPTION_MASK_P8_VECTOR
,
23299 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23300 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23301 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23302 {OPTION_MASK_P8_VECTOR
,
23303 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23304 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23305 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23306 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23307 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23308 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23309 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23310 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23311 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23312 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23313 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23315 unsigned int i
, j
, elt
, which
;
23316 unsigned char perm
[16];
23320 /* Unpack the constant selector. */
23321 for (i
= which
= 0; i
< 16; ++i
)
23324 which
|= (elt
< 16 ? 1 : 2);
23328 /* Simplify the constant selector based on operands. */
23332 gcc_unreachable ();
23336 if (!rtx_equal_p (op0
, op1
))
23341 for (i
= 0; i
< 16; ++i
)
23353 /* Look for splat patterns. */
23358 for (i
= 0; i
< 16; ++i
)
23359 if (perm
[i
] != elt
)
23363 if (!BYTES_BIG_ENDIAN
)
23365 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23371 for (i
= 0; i
< 16; i
+= 2)
23372 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23376 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23377 x
= gen_reg_rtx (V8HImode
);
23378 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23380 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23387 for (i
= 0; i
< 16; i
+= 4)
23389 || perm
[i
+ 1] != elt
+ 1
23390 || perm
[i
+ 2] != elt
+ 2
23391 || perm
[i
+ 3] != elt
+ 3)
23395 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23396 x
= gen_reg_rtx (V4SImode
);
23397 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23399 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23405 /* Look for merge and pack patterns. */
23406 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23410 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23413 elt
= patterns
[j
].perm
[0];
23414 if (perm
[0] == elt
)
23416 else if (perm
[0] == elt
+ 16)
23420 for (i
= 1; i
< 16; ++i
)
23422 elt
= patterns
[j
].perm
[i
];
23424 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23425 else if (one_vec
&& elt
>= 16)
23427 if (perm
[i
] != elt
)
23432 enum insn_code icode
= patterns
[j
].impl
;
23433 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23434 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23436 rtx perm_idx
= GEN_INT (0);
23437 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23454 perm_idx
= GEN_INT (perm_val
);
23457 /* For little-endian, don't use vpkuwum and vpkuhum if the
23458 underlying vector type is not V4SI and V8HI, respectively.
23459 For example, using vpkuwum with a V8HI picks up the even
23460 halfwords (BE numbering) when the even halfwords (LE
23461 numbering) are what we need. */
23462 if (!BYTES_BIG_ENDIAN
23463 && icode
== CODE_FOR_altivec_vpkuwum_direct
23465 && GET_MODE (op0
) != V4SImode
)
23467 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23469 if (!BYTES_BIG_ENDIAN
23470 && icode
== CODE_FOR_altivec_vpkuhum_direct
23472 && GET_MODE (op0
) != V8HImode
)
23474 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23477 /* For little-endian, the two input operands must be swapped
23478 (or swapped back) to ensure proper right-to-left numbering
23480 if (swapped
^ !BYTES_BIG_ENDIAN
23481 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23482 std::swap (op0
, op1
);
23483 if (imode
!= V16QImode
)
23485 op0
= gen_lowpart (imode
, op0
);
23486 op1
= gen_lowpart (imode
, op1
);
23488 if (omode
== V16QImode
)
23491 x
= gen_reg_rtx (omode
);
23492 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23493 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23495 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23496 if (omode
!= V16QImode
)
23497 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23502 if (!BYTES_BIG_ENDIAN
)
23504 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23511 /* Expand a VSX Permute Doubleword constant permutation.
23512 Return true if we match an efficient implementation. */
23515 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23516 unsigned char perm0
, unsigned char perm1
)
23520 /* If both selectors come from the same operand, fold to single op. */
23521 if ((perm0
& 2) == (perm1
& 2))
23528 /* If both operands are equal, fold to simpler permutation. */
23529 if (rtx_equal_p (op0
, op1
))
23532 perm1
= (perm1
& 1) + 2;
23534 /* If the first selector comes from the second operand, swap. */
23535 else if (perm0
& 2)
23541 std::swap (op0
, op1
);
23543 /* If the second selector does not come from the second operand, fail. */
23544 else if ((perm1
& 2) == 0)
23548 if (target
!= NULL
)
23550 machine_mode vmode
, dmode
;
23553 vmode
= GET_MODE (target
);
23554 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23555 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23556 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23557 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23558 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23559 emit_insn (gen_rtx_SET (target
, x
));
23564 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23567 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23568 rtx target
, rtx op0
, rtx op1
,
23569 const vec_perm_indices
&sel
)
23571 if (vmode
!= op_mode
)
23574 bool testing_p
= !target
;
23576 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23577 if (TARGET_ALTIVEC
&& testing_p
)
23582 rtx nop0
= force_reg (vmode
, op0
);
23588 op1
= force_reg (vmode
, op1
);
23590 /* Check for ps_merge* or xxpermdi insns. */
23591 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23595 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23596 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23598 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23602 if (TARGET_ALTIVEC
)
23604 /* Force the target-independent code to lower to V16QImode. */
23605 if (vmode
!= V16QImode
)
23607 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23614 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23615 OP0 and OP1 are the input vectors and TARGET is the output vector.
23616 PERM specifies the constant permutation vector. */
23619 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23620 machine_mode vmode
, const vec_perm_builder
&perm
)
23622 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23624 emit_move_insn (target
, x
);
23627 /* Expand an extract even operation. */
23630 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23632 machine_mode vmode
= GET_MODE (target
);
23633 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23634 vec_perm_builder
perm (nelt
, nelt
, 1);
23636 for (i
= 0; i
< nelt
; i
++)
23637 perm
.quick_push (i
* 2);
23639 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23642 /* Expand a vector interleave operation. */
23645 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23647 machine_mode vmode
= GET_MODE (target
);
23648 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23649 vec_perm_builder
perm (nelt
, nelt
, 1);
23651 high
= (highp
? 0 : nelt
/ 2);
23652 for (i
= 0; i
< nelt
/ 2; i
++)
23654 perm
.quick_push (i
+ high
);
23655 perm
.quick_push (i
+ nelt
+ high
);
23658 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23661 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23663 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23665 HOST_WIDE_INT
hwi_scale (scale
);
23666 REAL_VALUE_TYPE r_pow
;
23667 rtvec v
= rtvec_alloc (2);
23669 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23670 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23671 elt
= const_double_from_real_value (r_pow
, DFmode
);
23672 RTVEC_ELT (v
, 0) = elt
;
23673 RTVEC_ELT (v
, 1) = elt
;
23674 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23675 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23678 /* Return an RTX representing where to find the function value of a
23679 function returning MODE. */
23681 rs6000_complex_function_value (machine_mode mode
)
23683 unsigned int regno
;
23685 machine_mode inner
= GET_MODE_INNER (mode
);
23686 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23688 if (TARGET_FLOAT128_TYPE
23690 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23691 regno
= ALTIVEC_ARG_RETURN
;
23693 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23694 regno
= FP_ARG_RETURN
;
23698 regno
= GP_ARG_RETURN
;
23700 /* 32-bit is OK since it'll go in r3/r4. */
23701 if (TARGET_32BIT
&& inner_bytes
>= 4)
23702 return gen_rtx_REG (mode
, regno
);
23705 if (inner_bytes
>= 8)
23706 return gen_rtx_REG (mode
, regno
);
23708 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23710 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23711 GEN_INT (inner_bytes
));
23712 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23715 /* Return an rtx describing a return value of MODE as a PARALLEL
23716 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23717 stride REG_STRIDE. */
23720 rs6000_parallel_return (machine_mode mode
,
23721 int n_elts
, machine_mode elt_mode
,
23722 unsigned int regno
, unsigned int reg_stride
)
23724 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23727 for (i
= 0; i
< n_elts
; i
++)
23729 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23730 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23731 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23732 regno
+= reg_stride
;
23738 /* Target hook for TARGET_FUNCTION_VALUE.
23740 An integer value is in r3 and a floating-point value is in fp1,
23741 unless -msoft-float. */
23744 rs6000_function_value (const_tree valtype
,
23745 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23746 bool outgoing ATTRIBUTE_UNUSED
)
23749 unsigned int regno
;
23750 machine_mode elt_mode
;
23753 /* Special handling for structs in darwin64. */
23755 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23757 CUMULATIVE_ARGS valcum
;
23761 valcum
.fregno
= FP_ARG_MIN_REG
;
23762 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23763 /* Do a trial code generation as if this were going to be passed as
23764 an argument; if any part goes in memory, we return NULL. */
23765 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23768 /* Otherwise fall through to standard ABI rules. */
23771 mode
= TYPE_MODE (valtype
);
23773 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23774 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23776 int first_reg
, n_regs
;
23778 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23780 /* _Decimal128 must use even/odd register pairs. */
23781 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23782 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23786 first_reg
= ALTIVEC_ARG_RETURN
;
23790 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23793 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23794 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23803 int count
= GET_MODE_SIZE (mode
) / 4;
23804 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23807 if ((INTEGRAL_TYPE_P (valtype
)
23808 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23809 || POINTER_TYPE_P (valtype
))
23810 mode
= TARGET_32BIT
? SImode
: DImode
;
23812 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23813 /* _Decimal128 must use an even/odd register pair. */
23814 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23815 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23816 && !FLOAT128_VECTOR_P (mode
))
23817 regno
= FP_ARG_RETURN
;
23818 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23819 && targetm
.calls
.split_complex_arg
)
23820 return rs6000_complex_function_value (mode
);
23821 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23822 return register is used in both cases, and we won't see V2DImode/V2DFmode
23823 for pure altivec, combine the two cases. */
23824 else if ((VECTOR_TYPE_P (valtype
) || VECTOR_ALIGNMENT_P (mode
))
23825 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23826 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23827 regno
= ALTIVEC_ARG_RETURN
;
23829 regno
= GP_ARG_RETURN
;
23831 return gen_rtx_REG (mode
, regno
);
23834 /* Define how to find the value returned by a library function
23835 assuming the value has mode MODE. */
23837 rs6000_libcall_value (machine_mode mode
)
23839 unsigned int regno
;
23841 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23842 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23843 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23845 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23846 /* _Decimal128 must use an even/odd register pair. */
23847 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23848 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23849 regno
= FP_ARG_RETURN
;
23850 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23851 return register is used in both cases, and we won't see V2DImode/V2DFmode
23852 for pure altivec, combine the two cases. */
23853 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23854 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23855 regno
= ALTIVEC_ARG_RETURN
;
23856 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23857 return rs6000_complex_function_value (mode
);
23859 regno
= GP_ARG_RETURN
;
23861 return gen_rtx_REG (mode
, regno
);
23864 /* Compute register pressure classes. We implement the target hook to avoid
23865 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23866 lead to incorrect estimates of number of available registers and therefor
23867 increased register pressure/spill. */
23869 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23874 pressure_classes
[n
++] = GENERAL_REGS
;
23875 if (TARGET_ALTIVEC
)
23876 pressure_classes
[n
++] = ALTIVEC_REGS
;
23878 pressure_classes
[n
++] = VSX_REGS
;
23881 if (TARGET_HARD_FLOAT
)
23882 pressure_classes
[n
++] = FLOAT_REGS
;
23884 pressure_classes
[n
++] = CR_REGS
;
23885 pressure_classes
[n
++] = SPECIAL_REGS
;
23890 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23891 Frame pointer elimination is automatically handled.
23893 For the RS/6000, if frame pointer elimination is being done, we would like
23894 to convert ap into fp, not sp.
23896 We need r30 if -mminimal-toc was specified, and there are constant pool
23900 rs6000_can_eliminate (const int from
, const int to
)
23902 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
23903 ? ! frame_pointer_needed
23904 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
23905 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
23906 || constant_pool_empty_p ()
23910 /* Define the offset between two registers, FROM to be eliminated and its
23911 replacement TO, at the start of a routine. */
23913 rs6000_initial_elimination_offset (int from
, int to
)
23915 rs6000_stack_t
*info
= rs6000_stack_info ();
23916 HOST_WIDE_INT offset
;
23918 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23919 offset
= info
->push_p
? 0 : -info
->total_size
;
23920 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23922 offset
= info
->push_p
? 0 : -info
->total_size
;
23923 if (FRAME_GROWS_DOWNWARD
)
23924 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
23926 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23927 offset
= FRAME_GROWS_DOWNWARD
23928 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
23930 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23931 offset
= info
->total_size
;
23932 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23933 offset
= info
->push_p
? info
->total_size
: 0;
23934 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
23937 gcc_unreachable ();
23942 /* Fill in sizes of registers used by unwinder. */
23945 rs6000_init_dwarf_reg_sizes_extra (tree address
)
23947 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
23950 machine_mode mode
= TYPE_MODE (char_type_node
);
23951 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
23952 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
23953 rtx value
= gen_int_mode (16, mode
);
23955 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23956 The unwinder still needs to know the size of Altivec registers. */
23958 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
23960 int column
= DWARF_REG_TO_UNWIND_COLUMN
23961 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
23962 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
23964 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
23969 /* Map internal gcc register numbers to debug format register numbers.
23970 FORMAT specifies the type of debug register number to use:
23971 0 -- debug information, except for frame-related sections
23972 1 -- DWARF .debug_frame section
23973 2 -- DWARF .eh_frame section */
23976 rs6000_debugger_regno (unsigned int regno
, unsigned int format
)
23978 /* On some platforms, we use the standard DWARF register
23979 numbering for .debug_info and .debug_frame. */
23980 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
23982 #ifdef RS6000_USE_DWARF_NUMBERING
23985 if (FP_REGNO_P (regno
))
23986 return regno
- FIRST_FPR_REGNO
+ 32;
23987 if (ALTIVEC_REGNO_P (regno
))
23988 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
23989 if (regno
== LR_REGNO
)
23991 if (regno
== CTR_REGNO
)
23993 if (regno
== CA_REGNO
)
23994 return 101; /* XER */
23995 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23996 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23997 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23998 to the DWARF reg for CR. */
23999 if (format
== 1 && regno
== CR2_REGNO
)
24001 if (CR_REGNO_P (regno
))
24002 return regno
- CR0_REGNO
+ 86;
24003 if (regno
== VRSAVE_REGNO
)
24005 if (regno
== VSCR_REGNO
)
24008 /* These do not make much sense. */
24009 if (regno
== FRAME_POINTER_REGNUM
)
24011 if (regno
== ARG_POINTER_REGNUM
)
24016 gcc_unreachable ();
24020 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24021 information, and also for .eh_frame. */
24022 /* Translate the regnos to their numbers in GCC 7 (and before). */
24025 if (FP_REGNO_P (regno
))
24026 return regno
- FIRST_FPR_REGNO
+ 32;
24027 if (ALTIVEC_REGNO_P (regno
))
24028 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
24029 if (regno
== LR_REGNO
)
24031 if (regno
== CTR_REGNO
)
24033 if (regno
== CA_REGNO
)
24034 return 76; /* XER */
24035 if (CR_REGNO_P (regno
))
24036 return regno
- CR0_REGNO
+ 68;
24037 if (regno
== VRSAVE_REGNO
)
24039 if (regno
== VSCR_REGNO
)
24042 if (regno
== FRAME_POINTER_REGNUM
)
24044 if (regno
== ARG_POINTER_REGNUM
)
24049 gcc_unreachable ();
24052 /* target hook eh_return_filter_mode */
24053 static scalar_int_mode
24054 rs6000_eh_return_filter_mode (void)
24056 return TARGET_32BIT
? SImode
: word_mode
;
24059 /* Target hook for translate_mode_attribute. */
24060 static machine_mode
24061 rs6000_translate_mode_attribute (machine_mode mode
)
24063 if ((FLOAT128_IEEE_P (mode
)
24064 && ieee128_float_type_node
== long_double_type_node
)
24065 || (FLOAT128_IBM_P (mode
)
24066 && ibm128_float_type_node
== long_double_type_node
))
24067 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
24071 /* Target hook for scalar_mode_supported_p. */
24073 rs6000_scalar_mode_supported_p (scalar_mode mode
)
24075 /* -m32 does not support TImode. This is the default, from
24076 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24077 same ABI as for -m32. But default_scalar_mode_supported_p allows
24078 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24079 for -mpowerpc64. */
24080 if (TARGET_32BIT
&& mode
== TImode
)
24083 if (DECIMAL_FLOAT_MODE_P (mode
))
24084 return default_decimal_float_supported_p ();
24085 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
24088 return default_scalar_mode_supported_p (mode
);
24091 /* Target hook for libgcc_floating_mode_supported_p. */
24094 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
24103 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24104 if long double does not use the IEEE 128-bit format. If long double
24105 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24106 Because the code will not use KFmode in that case, there will be aborts
24107 because it can't find KFmode in the Floatn types. */
24109 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
24116 /* Target hook for vector_mode_supported_p. */
24118 rs6000_vector_mode_supported_p (machine_mode mode
)
24120 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24121 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24123 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
24130 /* Target hook for floatn_mode. */
24131 static opt_scalar_float_mode
24132 rs6000_floatn_mode (int n
, bool extended
)
24142 if (TARGET_FLOAT128_TYPE
)
24143 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24145 return opt_scalar_float_mode ();
24148 return opt_scalar_float_mode ();
24151 /* Those are the only valid _FloatNx types. */
24152 gcc_unreachable ();
24166 if (TARGET_FLOAT128_TYPE
)
24167 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24169 return opt_scalar_float_mode ();
24172 return opt_scalar_float_mode ();
24178 /* Target hook for c_mode_for_suffix. */
24179 static machine_mode
24180 rs6000_c_mode_for_suffix (char suffix
)
24182 if (TARGET_FLOAT128_TYPE
)
24184 if (suffix
== 'q' || suffix
== 'Q')
24185 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24187 /* At the moment, we are not defining a suffix for IBM extended double.
24188 If/when the default for -mabi=ieeelongdouble is changed, and we want
24189 to support __ibm128 constants in legacy library code, we may need to
24190 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24191 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24192 __float80 constants. */
24198 /* Target hook for invalid_arg_for_unprototyped_fn. */
24199 static const char *
24200 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
24202 return (!rs6000_darwin64_abi
24204 && VECTOR_TYPE_P (TREE_TYPE (val
))
24205 && (funcdecl
== NULL_TREE
24206 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
24207 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
24208 ? N_("AltiVec argument passed to unprototyped function")
24212 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24213 setup by using __stack_chk_fail_local hidden function instead of
24214 calling __stack_chk_fail directly. Otherwise it is better to call
24215 __stack_chk_fail directly. */
24217 static tree ATTRIBUTE_UNUSED
24218 rs6000_stack_protect_fail (void)
24220 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
24221 ? default_hidden_stack_protect_fail ()
24222 : default_external_stack_protect_fail ();
24225 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24228 static unsigned HOST_WIDE_INT
24229 rs6000_asan_shadow_offset (void)
24231 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
24235 /* Mask options that we want to support inside of attribute((target)) and
24236 #pragma GCC target operations. Note, we do not include things like
24237 64/32-bit, endianness, hard/soft floating point, etc. that would have
24238 different calling sequences. */
24240 struct rs6000_opt_mask
{
24241 const char *name
; /* option name */
24242 HOST_WIDE_INT mask
; /* mask to set */
24243 bool invert
; /* invert sense of mask */
24244 bool valid_target
; /* option is a target option */
24247 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
24249 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
24250 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
24252 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
24254 { "cmpb", OPTION_MASK_CMPB
, false, true },
24255 { "crypto", OPTION_MASK_CRYPTO
, false, true },
24256 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
24257 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
24258 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
24260 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
24261 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24262 { "fprnd", OPTION_MASK_FPRND
, false, true },
24263 { "power10", OPTION_MASK_POWER10
, false, true },
24264 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24265 { "htm", OPTION_MASK_HTM
, false, true },
24266 { "isel", OPTION_MASK_ISEL
, false, true },
24267 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24268 { "mfpgpr", 0, false, true },
24269 { "mma", OPTION_MASK_MMA
, false, true },
24270 { "modulo", OPTION_MASK_MODULO
, false, true },
24271 { "mulhw", OPTION_MASK_MULHW
, false, true },
24272 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24273 { "pcrel", OPTION_MASK_PCREL
, false, true },
24274 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24275 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24276 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24277 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24278 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24279 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24280 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24281 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24282 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24283 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24284 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24285 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24286 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24287 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24288 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24289 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24290 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24291 { "string", 0, false, true },
24292 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24293 { "vsx", OPTION_MASK_VSX
, false, true },
24294 #ifdef OPTION_MASK_64BIT
24296 { "aix64", OPTION_MASK_64BIT
, false, false },
24297 { "aix32", OPTION_MASK_64BIT
, true, false },
24299 { "64", OPTION_MASK_64BIT
, false, false },
24300 { "32", OPTION_MASK_64BIT
, true, false },
24303 #ifdef OPTION_MASK_EABI
24304 { "eabi", OPTION_MASK_EABI
, false, false },
24306 #ifdef OPTION_MASK_LITTLE_ENDIAN
24307 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24308 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24310 #ifdef OPTION_MASK_RELOCATABLE
24311 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24313 #ifdef OPTION_MASK_STRICT_ALIGN
24314 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24316 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24317 { "string", 0, false, false },
24320 /* Option variables that we want to support inside attribute((target)) and
24321 #pragma GCC target operations. */
24323 struct rs6000_opt_var
{
24324 const char *name
; /* option name */
24325 size_t global_offset
; /* offset of the option in global_options. */
24326 size_t target_offset
; /* offset of the option in target options. */
24329 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24332 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24333 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24334 { "avoid-indexed-addresses",
24335 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24336 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24338 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24339 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24340 { "optimize-swaps",
24341 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24342 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24343 { "allow-movmisalign",
24344 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24345 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24347 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24348 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24350 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24351 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24352 { "align-branch-targets",
24353 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24354 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24356 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24357 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24359 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24360 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24361 { "speculate-indirect-jumps",
24362 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24363 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24366 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24367 parsing. Return true if there were no errors. */
24370 rs6000_inner_target_options (tree args
, bool attr_p
)
24374 if (args
== NULL_TREE
)
24377 else if (TREE_CODE (args
) == STRING_CST
)
24379 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24382 while ((q
= strtok (p
, ",")) != NULL
)
24384 bool error_p
= false;
24385 bool not_valid_p
= false;
24386 const char *cpu_opt
= NULL
;
24389 if (startswith (q
, "cpu="))
24391 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24392 if (cpu_index
>= 0)
24393 rs6000_cpu_index
= cpu_index
;
24400 else if (startswith (q
, "tune="))
24402 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24403 if (tune_index
>= 0)
24404 rs6000_tune_index
= tune_index
;
24414 bool invert
= false;
24418 if (startswith (r
, "no-"))
24424 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24425 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24427 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24429 if (!rs6000_opt_masks
[i
].valid_target
)
24430 not_valid_p
= true;
24434 rs6000_isa_flags_explicit
|= mask
;
24436 /* VSX needs altivec, so -mvsx automagically sets
24437 altivec and disables -mavoid-indexed-addresses. */
24440 if (mask
== OPTION_MASK_VSX
)
24442 mask
|= OPTION_MASK_ALTIVEC
;
24443 TARGET_AVOID_XFORM
= 0;
24447 if (rs6000_opt_masks
[i
].invert
)
24451 rs6000_isa_flags
&= ~mask
;
24453 rs6000_isa_flags
|= mask
;
24458 if (error_p
&& !not_valid_p
)
24460 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24461 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24463 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24464 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24466 not_valid_p
= false;
24474 const char *eprefix
, *esuffix
;
24479 eprefix
= "__attribute__((__target__(";
24484 eprefix
= "#pragma GCC target ";
24489 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24491 else if (not_valid_p
)
24492 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24494 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24499 else if (TREE_CODE (args
) == TREE_LIST
)
24503 tree value
= TREE_VALUE (args
);
24506 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24510 args
= TREE_CHAIN (args
);
24512 while (args
!= NULL_TREE
);
24517 error ("attribute %<target%> argument not a string");
24524 /* Print out the target options as a list for -mdebug=target. */
24527 rs6000_debug_target_options (tree args
, const char *prefix
)
24529 if (args
== NULL_TREE
)
24530 fprintf (stderr
, "%s<NULL>", prefix
);
24532 else if (TREE_CODE (args
) == STRING_CST
)
24534 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24537 while ((q
= strtok (p
, ",")) != NULL
)
24540 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24545 else if (TREE_CODE (args
) == TREE_LIST
)
24549 tree value
= TREE_VALUE (args
);
24552 rs6000_debug_target_options (value
, prefix
);
24555 args
= TREE_CHAIN (args
);
24557 while (args
!= NULL_TREE
);
24561 gcc_unreachable ();
24567 /* Hook to validate attribute((target("..."))). */
24570 rs6000_valid_attribute_p (tree fndecl
,
24571 tree
ARG_UNUSED (name
),
24575 struct cl_target_option cur_target
;
24578 tree new_target
, new_optimize
;
24579 tree func_optimize
;
24581 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24583 if (TARGET_DEBUG_TARGET
)
24585 tree tname
= DECL_NAME (fndecl
);
24586 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24588 fprintf (stderr
, "function: %.*s\n",
24589 (int) IDENTIFIER_LENGTH (tname
),
24590 IDENTIFIER_POINTER (tname
));
24592 fprintf (stderr
, "function: unknown\n");
24594 fprintf (stderr
, "args:");
24595 rs6000_debug_target_options (args
, " ");
24596 fprintf (stderr
, "\n");
24599 fprintf (stderr
, "flags: 0x%x\n", flags
);
24601 fprintf (stderr
, "--------------------\n");
24604 /* attribute((target("default"))) does nothing, beyond
24605 affecting multi-versioning. */
24606 if (TREE_VALUE (args
)
24607 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24608 && TREE_CHAIN (args
) == NULL_TREE
24609 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24612 old_optimize
= build_optimization_node (&global_options
,
24613 &global_options_set
);
24614 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24616 /* If the function changed the optimization levels as well as setting target
24617 options, start with the optimizations specified. */
24618 if (func_optimize
&& func_optimize
!= old_optimize
)
24619 cl_optimization_restore (&global_options
, &global_options_set
,
24620 TREE_OPTIMIZATION (func_optimize
));
24622 /* The target attributes may also change some optimization flags, so update
24623 the optimization options if necessary. */
24624 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24625 rs6000_cpu_index
= rs6000_tune_index
= -1;
24626 ret
= rs6000_inner_target_options (args
, true);
24628 /* Set up any additional state. */
24631 ret
= rs6000_option_override_internal (false);
24632 new_target
= build_target_option_node (&global_options
,
24633 &global_options_set
);
24638 new_optimize
= build_optimization_node (&global_options
,
24639 &global_options_set
);
24646 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24648 if (old_optimize
!= new_optimize
)
24649 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24652 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24654 if (old_optimize
!= new_optimize
)
24655 cl_optimization_restore (&global_options
, &global_options_set
,
24656 TREE_OPTIMIZATION (old_optimize
));
24662 /* Hook to validate the current #pragma GCC target and set the state, and
24663 update the macros based on what was changed. If ARGS is NULL, then
24664 POP_TARGET is used to reset the options. */
24667 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24669 tree prev_tree
= build_target_option_node (&global_options
,
24670 &global_options_set
);
24672 struct cl_target_option
*prev_opt
, *cur_opt
;
24673 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24675 if (TARGET_DEBUG_TARGET
)
24677 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24678 fprintf (stderr
, "args:");
24679 rs6000_debug_target_options (args
, " ");
24680 fprintf (stderr
, "\n");
24684 fprintf (stderr
, "pop_target:\n");
24685 debug_tree (pop_target
);
24688 fprintf (stderr
, "pop_target: <NULL>\n");
24690 fprintf (stderr
, "--------------------\n");
24695 cur_tree
= ((pop_target
)
24697 : target_option_default_node
);
24698 cl_target_option_restore (&global_options
, &global_options_set
,
24699 TREE_TARGET_OPTION (cur_tree
));
24703 rs6000_cpu_index
= rs6000_tune_index
= -1;
24704 if (!rs6000_inner_target_options (args
, false)
24705 || !rs6000_option_override_internal (false)
24706 || (cur_tree
= build_target_option_node (&global_options
,
24707 &global_options_set
))
24710 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24711 fprintf (stderr
, "invalid pragma\n");
24717 target_option_current_node
= cur_tree
;
24718 rs6000_activate_target_options (target_option_current_node
);
24720 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24721 change the macros that are defined. */
24722 if (rs6000_target_modify_macros_ptr
)
24724 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24725 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24727 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24728 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24730 diff_flags
= (prev_flags
^ cur_flags
);
24732 if (diff_flags
!= 0)
24734 /* Delete old macros. */
24735 rs6000_target_modify_macros_ptr (false,
24736 prev_flags
& diff_flags
);
24738 /* Define new macros. */
24739 rs6000_target_modify_macros_ptr (true,
24740 cur_flags
& diff_flags
);
24748 /* Remember the last target of rs6000_set_current_function. */
24749 static GTY(()) tree rs6000_previous_fndecl
;
24751 /* Restore target's globals from NEW_TREE and invalidate the
24752 rs6000_previous_fndecl cache. */
24755 rs6000_activate_target_options (tree new_tree
)
24757 cl_target_option_restore (&global_options
, &global_options_set
,
24758 TREE_TARGET_OPTION (new_tree
));
24759 if (TREE_TARGET_GLOBALS (new_tree
))
24760 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24761 else if (new_tree
== target_option_default_node
)
24762 restore_target_globals (&default_target_globals
);
24764 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24765 rs6000_previous_fndecl
= NULL_TREE
;
24768 /* Establish appropriate back-end context for processing the function
24769 FNDECL. The argument might be NULL to indicate processing at top
24770 level, outside of any function scope. */
24772 rs6000_set_current_function (tree fndecl
)
24774 if (TARGET_DEBUG_TARGET
)
24776 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24779 fprintf (stderr
, ", fndecl %s (%p)",
24780 (DECL_NAME (fndecl
)
24781 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24782 : "<unknown>"), (void *)fndecl
);
24784 if (rs6000_previous_fndecl
)
24785 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24787 fprintf (stderr
, "\n");
24790 /* Only change the context if the function changes. This hook is called
24791 several times in the course of compiling a function, and we don't want to
24792 slow things down too much or call target_reinit when it isn't safe. */
24793 if (fndecl
== rs6000_previous_fndecl
)
24797 if (rs6000_previous_fndecl
== NULL_TREE
)
24798 old_tree
= target_option_current_node
;
24799 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24800 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24802 old_tree
= target_option_default_node
;
24805 if (fndecl
== NULL_TREE
)
24807 if (old_tree
!= target_option_current_node
)
24808 new_tree
= target_option_current_node
;
24810 new_tree
= NULL_TREE
;
24814 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24815 if (new_tree
== NULL_TREE
)
24816 new_tree
= target_option_default_node
;
24819 if (TARGET_DEBUG_TARGET
)
24823 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24824 debug_tree (new_tree
);
24829 fprintf (stderr
, "\nold fndecl target specific options:\n");
24830 debug_tree (old_tree
);
24833 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24834 fprintf (stderr
, "--------------------\n");
24837 if (new_tree
&& old_tree
!= new_tree
)
24838 rs6000_activate_target_options (new_tree
);
24841 rs6000_previous_fndecl
= fndecl
;
24845 /* Save the current options */
24848 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24849 struct gcc_options
*opts
,
24850 struct gcc_options */
* opts_set */
)
24852 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24853 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24856 /* Restore the current options */
24859 rs6000_function_specific_restore (struct gcc_options
*opts
,
24860 struct gcc_options */
* opts_set */
,
24861 struct cl_target_option
*ptr
)
24864 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24865 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24866 (void) rs6000_option_override_internal (false);
24869 /* Print the current options */
24872 rs6000_function_specific_print (FILE *file
, int indent
,
24873 struct cl_target_option
*ptr
)
24875 rs6000_print_isa_options (file
, indent
, "Isa options set",
24876 ptr
->x_rs6000_isa_flags
);
24878 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
24879 ptr
->x_rs6000_isa_flags_explicit
);
24882 /* Helper function to print the current isa or misc options on a line. */
24885 rs6000_print_options_internal (FILE *file
,
24887 const char *string
,
24888 HOST_WIDE_INT flags
,
24889 const char *prefix
,
24890 const struct rs6000_opt_mask
*opts
,
24891 size_t num_elements
)
24894 size_t start_column
= 0;
24896 size_t max_column
= 120;
24897 size_t prefix_len
= strlen (prefix
);
24898 size_t comma_len
= 0;
24899 const char *comma
= "";
24902 start_column
+= fprintf (file
, "%*s", indent
, "");
24906 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
24910 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
24912 /* Print the various mask options. */
24913 cur_column
= start_column
;
24914 for (i
= 0; i
< num_elements
; i
++)
24916 bool invert
= opts
[i
].invert
;
24917 const char *name
= opts
[i
].name
;
24918 const char *no_str
= "";
24919 HOST_WIDE_INT mask
= opts
[i
].mask
;
24920 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
24924 if ((flags
& mask
) == 0)
24927 len
+= strlen ("no-");
24935 if ((flags
& mask
) != 0)
24938 len
+= strlen ("no-");
24945 if (cur_column
> max_column
)
24947 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
24948 cur_column
= start_column
+ len
;
24952 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
24954 comma_len
= strlen (", ");
24957 fputs ("\n", file
);
24960 /* Helper function to print the current isa options on a line. */
24963 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
24964 HOST_WIDE_INT flags
)
24966 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
24967 &rs6000_opt_masks
[0],
24968 ARRAY_SIZE (rs6000_opt_masks
));
24971 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24972 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24973 -mupper-regs-df, etc.).
24975 If the user used -mno-power8-vector, we need to turn off all of the implicit
24976 ISA 2.07 and 3.0 options that relate to the vector unit.
24978 If the user used -mno-power9-vector, we need to turn off all of the implicit
24979 ISA 3.0 options that relate to the vector unit.
24981 This function does not handle explicit options such as the user specifying
24982 -mdirect-move. These are handled in rs6000_option_override_internal, and
24983 the appropriate error is given if needed.
24985 We return a mask of all of the implicit options that should not be enabled
24988 static HOST_WIDE_INT
24989 rs6000_disable_incompatible_switches (void)
24991 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
24994 static const struct {
24995 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
24996 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
24997 const char *const name
; /* name of the switch. */
24999 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
25000 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
25001 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
25002 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
25005 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
25007 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
25009 if ((rs6000_isa_flags
& no_flag
) == 0
25010 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
25012 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
25013 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
25019 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
25020 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
25022 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
25023 error ("%<-mno-%s%> turns off %<-m%s%>",
25025 rs6000_opt_masks
[j
].name
);
25028 gcc_assert (!set_flags
);
25031 rs6000_isa_flags
&= ~dep_flags
;
25032 ignore_masks
|= no_flag
| dep_flags
;
25036 return ignore_masks
;
25040 /* Helper function for printing the function name when debugging. */
25042 static const char *
25043 get_decl_name (tree fn
)
25050 name
= DECL_NAME (fn
);
25052 return "<no-name>";
25054 return IDENTIFIER_POINTER (name
);
25057 /* Return the clone id of the target we are compiling code for in a target
25058 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25059 the priority list for the target clones (ordered from lowest to
25063 rs6000_clone_priority (tree fndecl
)
25065 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25066 HOST_WIDE_INT isa_masks
;
25067 int ret
= CLONE_DEFAULT
;
25068 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
25069 const char *attrs_str
= NULL
;
25071 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
25072 attrs_str
= TREE_STRING_POINTER (attrs
);
25074 /* Return priority zero for default function. Return the ISA needed for the
25075 function if it is not the default. */
25076 if (strcmp (attrs_str
, "default") != 0)
25078 if (fn_opts
== NULL_TREE
)
25079 fn_opts
= target_option_default_node
;
25081 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
25082 isa_masks
= rs6000_isa_flags
;
25084 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
25086 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
25087 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
25091 if (TARGET_DEBUG_TARGET
)
25092 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
25093 get_decl_name (fndecl
), ret
);
25098 /* This compares the priority of target features in function DECL1 and DECL2.
25099 It returns positive value if DECL1 is higher priority, negative value if
25100 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25101 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25104 rs6000_compare_version_priority (tree decl1
, tree decl2
)
25106 int priority1
= rs6000_clone_priority (decl1
);
25107 int priority2
= rs6000_clone_priority (decl2
);
25108 int ret
= priority1
- priority2
;
25110 if (TARGET_DEBUG_TARGET
)
25111 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
25112 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
25117 /* Make a dispatcher declaration for the multi-versioned function DECL.
25118 Calls to DECL function will be replaced with calls to the dispatcher
25119 by the front-end. Returns the decl of the dispatcher function. */
25122 rs6000_get_function_versions_dispatcher (void *decl
)
25124 tree fn
= (tree
) decl
;
25125 struct cgraph_node
*node
= NULL
;
25126 struct cgraph_node
*default_node
= NULL
;
25127 struct cgraph_function_version_info
*node_v
= NULL
;
25128 struct cgraph_function_version_info
*first_v
= NULL
;
25130 tree dispatch_decl
= NULL
;
25132 struct cgraph_function_version_info
*default_version_info
= NULL
;
25133 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
25135 if (TARGET_DEBUG_TARGET
)
25136 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
25137 get_decl_name (fn
));
25139 node
= cgraph_node::get (fn
);
25140 gcc_assert (node
!= NULL
);
25142 node_v
= node
->function_version ();
25143 gcc_assert (node_v
!= NULL
);
25145 if (node_v
->dispatcher_resolver
!= NULL
)
25146 return node_v
->dispatcher_resolver
;
25148 /* Find the default version and make it the first node. */
25150 /* Go to the beginning of the chain. */
25151 while (first_v
->prev
!= NULL
)
25152 first_v
= first_v
->prev
;
25154 default_version_info
= first_v
;
25155 while (default_version_info
!= NULL
)
25157 const tree decl2
= default_version_info
->this_node
->decl
;
25158 if (is_function_default_version (decl2
))
25160 default_version_info
= default_version_info
->next
;
25163 /* If there is no default node, just return NULL. */
25164 if (default_version_info
== NULL
)
25167 /* Make default info the first node. */
25168 if (first_v
!= default_version_info
)
25170 default_version_info
->prev
->next
= default_version_info
->next
;
25171 if (default_version_info
->next
)
25172 default_version_info
->next
->prev
= default_version_info
->prev
;
25173 first_v
->prev
= default_version_info
;
25174 default_version_info
->next
= first_v
;
25175 default_version_info
->prev
= NULL
;
25178 default_node
= default_version_info
->this_node
;
25180 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25181 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25182 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25183 "exports hardware capability bits");
25186 if (targetm
.has_ifunc_p ())
25188 struct cgraph_function_version_info
*it_v
= NULL
;
25189 struct cgraph_node
*dispatcher_node
= NULL
;
25190 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
25192 /* Right now, the dispatching is done via ifunc. */
25193 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
25194 TREE_NOTHROW (dispatch_decl
) = TREE_NOTHROW (fn
);
25196 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
25197 gcc_assert (dispatcher_node
!= NULL
);
25198 dispatcher_node
->dispatcher_function
= 1;
25199 dispatcher_version_info
25200 = dispatcher_node
->insert_new_function_version ();
25201 dispatcher_version_info
->next
= default_version_info
;
25202 dispatcher_node
->definition
= 1;
25204 /* Set the dispatcher for all the versions. */
25205 it_v
= default_version_info
;
25206 while (it_v
!= NULL
)
25208 it_v
->dispatcher_resolver
= dispatch_decl
;
25214 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25215 "multiversioning needs %<ifunc%> which is not supported "
25220 return dispatch_decl
;
25223 /* Make the resolver function decl to dispatch the versions of a multi-
25224 versioned function, DEFAULT_DECL. Create an empty basic block in the
25225 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25229 make_resolver_func (const tree default_decl
,
25230 const tree dispatch_decl
,
25231 basic_block
*empty_bb
)
25233 /* Make the resolver function static. The resolver function returns
25235 tree decl_name
= clone_function_name (default_decl
, "resolver");
25236 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25237 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25238 tree decl
= build_fn_decl (resolver_name
, type
);
25239 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25241 DECL_NAME (decl
) = decl_name
;
25242 TREE_USED (decl
) = 1;
25243 DECL_ARTIFICIAL (decl
) = 1;
25244 DECL_IGNORED_P (decl
) = 0;
25245 TREE_PUBLIC (decl
) = 0;
25246 DECL_UNINLINABLE (decl
) = 1;
25248 /* Resolver is not external, body is generated. */
25249 DECL_EXTERNAL (decl
) = 0;
25250 DECL_EXTERNAL (dispatch_decl
) = 0;
25252 DECL_CONTEXT (decl
) = NULL_TREE
;
25253 DECL_INITIAL (decl
) = make_node (BLOCK
);
25254 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25256 if (DECL_COMDAT_GROUP (default_decl
)
25257 || TREE_PUBLIC (default_decl
))
25259 /* In this case, each translation unit with a call to this
25260 versioned function will put out a resolver. Ensure it
25261 is comdat to keep just one copy. */
25262 DECL_COMDAT (decl
) = 1;
25263 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25266 TREE_PUBLIC (dispatch_decl
) = 0;
25268 /* Build result decl and add to function_decl. */
25269 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25270 DECL_CONTEXT (t
) = decl
;
25271 DECL_ARTIFICIAL (t
) = 1;
25272 DECL_IGNORED_P (t
) = 1;
25273 DECL_RESULT (decl
) = t
;
25275 gimplify_function_tree (decl
);
25276 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25277 *empty_bb
= init_lowered_empty_function (decl
, false,
25278 profile_count::uninitialized ());
25280 cgraph_node::add_new_function (decl
, true);
25281 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25285 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25286 DECL_ATTRIBUTES (dispatch_decl
)
25287 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25289 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25294 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25295 return a pointer to VERSION_DECL if we are running on a machine that
25296 supports the index CLONE_ISA hardware architecture bits. This function will
25297 be called during version dispatch to decide which function version to
25298 execute. It returns the basic block at the end, to which more conditions
25302 add_condition_to_bb (tree function_decl
, tree version_decl
,
25303 int clone_isa
, basic_block new_bb
)
25305 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25307 gcc_assert (new_bb
!= NULL
);
25308 gimple_seq gseq
= bb_seq (new_bb
);
25311 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25312 build_fold_addr_expr (version_decl
));
25313 tree result_var
= create_tmp_var (ptr_type_node
);
25314 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25315 gimple
*return_stmt
= gimple_build_return (result_var
);
25317 if (clone_isa
== CLONE_DEFAULT
)
25319 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25320 gimple_seq_add_stmt (&gseq
, return_stmt
);
25321 set_bb_seq (new_bb
, gseq
);
25322 gimple_set_bb (convert_stmt
, new_bb
);
25323 gimple_set_bb (return_stmt
, new_bb
);
25328 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25329 tree cond_var
= create_tmp_var (bool_int_type_node
);
25330 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25331 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25332 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25333 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25334 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25336 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25337 gimple_set_bb (call_cond_stmt
, new_bb
);
25338 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25340 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25341 NULL_TREE
, NULL_TREE
);
25342 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25343 gimple_set_bb (if_else_stmt
, new_bb
);
25344 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25346 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25347 gimple_seq_add_stmt (&gseq
, return_stmt
);
25348 set_bb_seq (new_bb
, gseq
);
25350 basic_block bb1
= new_bb
;
25351 edge e12
= split_block (bb1
, if_else_stmt
);
25352 basic_block bb2
= e12
->dest
;
25353 e12
->flags
&= ~EDGE_FALLTHRU
;
25354 e12
->flags
|= EDGE_TRUE_VALUE
;
25356 edge e23
= split_block (bb2
, return_stmt
);
25357 gimple_set_bb (convert_stmt
, bb2
);
25358 gimple_set_bb (return_stmt
, bb2
);
25360 basic_block bb3
= e23
->dest
;
25361 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25364 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25370 /* This function generates the dispatch function for multi-versioned functions.
25371 DISPATCH_DECL is the function which will contain the dispatch logic.
25372 FNDECLS are the function choices for dispatch, and is a tree chain.
25373 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25374 code is generated. */
25377 dispatch_function_versions (tree dispatch_decl
,
25379 basic_block
*empty_bb
)
25383 vec
<tree
> *fndecls
;
25384 tree clones
[CLONE_MAX
];
25386 if (TARGET_DEBUG_TARGET
)
25387 fputs ("dispatch_function_versions, top\n", stderr
);
25389 gcc_assert (dispatch_decl
!= NULL
25390 && fndecls_p
!= NULL
25391 && empty_bb
!= NULL
);
25393 /* fndecls_p is actually a vector. */
25394 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25396 /* At least one more version other than the default. */
25397 gcc_assert (fndecls
->length () >= 2);
25399 /* The first version in the vector is the default decl. */
25400 memset ((void *) clones
, '\0', sizeof (clones
));
25401 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25403 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25404 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25405 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25406 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25407 to insert the code here to do the call. */
25409 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25411 int priority
= rs6000_clone_priority (ele
);
25412 if (!clones
[priority
])
25413 clones
[priority
] = ele
;
25416 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25419 if (TARGET_DEBUG_TARGET
)
25420 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25421 ix
, get_decl_name (clones
[ix
]));
25423 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25430 /* Generate the dispatching code body to dispatch multi-versioned function
25431 DECL. The target hook is called to process the "target" attributes and
25432 provide the code to dispatch the right function at run-time. NODE points
25433 to the dispatcher decl whose body will be created. */
25436 rs6000_generate_version_dispatcher_body (void *node_p
)
25439 basic_block empty_bb
;
25440 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25441 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25443 if (ninfo
->dispatcher_resolver
)
25444 return ninfo
->dispatcher_resolver
;
25446 /* node is going to be an alias, so remove the finalized bit. */
25447 node
->definition
= false;
25449 /* The first version in the chain corresponds to the default version. */
25450 ninfo
->dispatcher_resolver
= resolver
25451 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25453 if (TARGET_DEBUG_TARGET
)
25454 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25455 get_decl_name (resolver
));
25457 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25458 auto_vec
<tree
, 2> fn_ver_vec
;
25460 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25462 vinfo
= vinfo
->next
)
25464 struct cgraph_node
*version
= vinfo
->this_node
;
25465 /* Check for virtual functions here again, as by this time it should
25466 have been determined if this function needs a vtable index or
25467 not. This happens for methods in derived classes that override
25468 virtual methods in base classes but are not explicitly marked as
25470 if (DECL_VINDEX (version
->decl
))
25471 sorry ("Virtual function multiversioning not supported");
25473 fn_ver_vec
.safe_push (version
->decl
);
25476 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25477 cgraph_edge::rebuild_edges ();
25482 /* Hook to decide if we need to scan function gimple statements to
25483 collect target specific information for inlining, and update the
25484 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25485 to predict which ISA feature is used at this time. Return true
25486 if we need to scan, otherwise return false. */
25489 rs6000_need_ipa_fn_target_info (const_tree decl
,
25490 unsigned int &info ATTRIBUTE_UNUSED
)
25492 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25494 target
= target_option_default_node
;
25495 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25497 /* See PR102059, we only handle HTM for now, so will only do
25498 the consequent scannings when HTM feature enabled. */
25499 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25505 /* Hook to update target specific information INFO for inlining by
25506 checking the given STMT. Return false if we don't need to scan
25507 any more, otherwise return true. */
25510 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25512 /* Assume inline asm can use any instruction features. */
25513 if (gimple_code (stmt
) == GIMPLE_ASM
)
25515 const char *asm_str
= gimple_asm_string (as_a
<const gasm
*> (stmt
));
25516 /* Ignore empty inline asm string. */
25517 if (strlen (asm_str
) > 0)
25518 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25519 the only bit we care about. */
25520 info
|= RS6000_FN_TARGET_INFO_HTM
;
25523 else if (gimple_code (stmt
) == GIMPLE_CALL
)
25525 tree fndecl
= gimple_call_fndecl (stmt
);
25526 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25528 enum rs6000_gen_builtins fcode
25529 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25530 /* HTM bifs definitely exploit HTM insns. */
25531 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25533 info
|= RS6000_FN_TARGET_INFO_HTM
;
25542 /* Hook to determine if one function can safely inline another. */
25545 rs6000_can_inline_p (tree caller
, tree callee
)
25548 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25549 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25551 /* If the caller/callee has option attributes, then use them.
25552 Otherwise, use the command line options. */
25554 callee_tree
= target_option_default_node
;
25556 caller_tree
= target_option_default_node
;
25558 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25559 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
25561 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25562 HOST_WIDE_INT caller_isa
= caller_opts
->x_rs6000_isa_flags
;
25563 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25565 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25566 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25568 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25569 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25571 callee_isa
&= ~OPTION_MASK_HTM
;
25572 explicit_isa
&= ~OPTION_MASK_HTM
;
25576 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25578 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25579 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25581 /* The callee's options must be a subset of the caller's options, i.e.
25582 a vsx function may inline an altivec function, but a no-vsx function
25583 must not inline a vsx function. However, for those options that the
25584 callee has explicitly enabled or disabled, then we must enforce that
25585 the callee's and caller's options match exactly; see PR70010. */
25586 if (((caller_isa
& callee_isa
) == callee_isa
)
25587 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25590 if (TARGET_DEBUG_TARGET
)
25591 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25592 get_decl_name (caller
), get_decl_name (callee
),
25593 (ret
? "can" : "cannot"));
25598 /* Allocate a stack temp and fixup the address so it meets the particular
25599 memory requirements (either offetable or REG+REG addressing). */
25602 rs6000_allocate_stack_temp (machine_mode mode
,
25603 bool offsettable_p
,
25606 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25607 rtx addr
= XEXP (stack
, 0);
25608 int strict_p
= reload_completed
;
25610 if (!legitimate_indirect_address_p (addr
, strict_p
))
25613 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25614 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25616 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25617 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25623 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25624 convert to such a form to deal with memory reference instructions
25625 like STFIWX and LDBRX that only take reg+reg addressing. */
25628 rs6000_force_indexed_or_indirect_mem (rtx x
)
25630 machine_mode mode
= GET_MODE (x
);
25632 gcc_assert (MEM_P (x
));
25633 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25635 rtx addr
= XEXP (x
, 0);
25636 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25638 rtx reg
= XEXP (addr
, 0);
25639 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25640 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25641 gcc_assert (REG_P (reg
));
25642 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25645 else if (GET_CODE (addr
) == PRE_MODIFY
)
25647 rtx reg
= XEXP (addr
, 0);
25648 rtx expr
= XEXP (addr
, 1);
25649 gcc_assert (REG_P (reg
));
25650 gcc_assert (GET_CODE (expr
) == PLUS
);
25651 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25655 if (GET_CODE (addr
) == PLUS
)
25657 rtx op0
= XEXP (addr
, 0);
25658 rtx op1
= XEXP (addr
, 1);
25659 op0
= force_reg (Pmode
, op0
);
25660 op1
= force_reg (Pmode
, op1
);
25661 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25664 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25670 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25672 On the RS/6000, all integer constants are acceptable, most won't be valid
25673 for particular insns, though. Only easy FP constants are acceptable. */
25676 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25678 if (TARGET_ELF
&& tls_referenced_p (x
))
25681 if (CONST_DOUBLE_P (x
))
25682 return easy_fp_constant (x
, mode
);
25684 if (GET_CODE (x
) == CONST_VECTOR
)
25685 return easy_vector_constant (x
, mode
);
25691 /* Implement TARGET_PRECOMPUTE_TLS_P.
25693 On the AIX, TLS symbols are in the TOC, which is maintained in the
25694 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25695 must be considered legitimate constants. */
25698 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25700 return tls_referenced_p (x
);
25705 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25708 chain_already_loaded (rtx_insn
*last
)
25710 for (; last
!= NULL
; last
= PREV_INSN (last
))
25712 if (NONJUMP_INSN_P (last
))
25714 rtx patt
= PATTERN (last
);
25716 if (GET_CODE (patt
) == SET
)
25718 rtx lhs
= XEXP (patt
, 0);
25720 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25728 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25731 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25733 rtx func
= func_desc
;
25734 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25735 rtx toc_load
= NULL_RTX
;
25736 rtx toc_restore
= NULL_RTX
;
25738 rtx abi_reg
= NULL_RTX
;
25742 bool is_pltseq_longcall
;
25745 tlsarg
= global_tlsarg
;
25747 /* Handle longcall attributes. */
25748 is_pltseq_longcall
= false;
25749 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25750 && GET_CODE (func_desc
) == SYMBOL_REF
)
25752 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25754 is_pltseq_longcall
= true;
25757 /* Handle indirect calls. */
25758 if (!SYMBOL_REF_P (func
)
25759 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25761 if (!rs6000_pcrel_p ())
25763 /* Save the TOC into its reserved slot before the call,
25764 and prepare to restore it after the call. */
25765 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25766 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25767 gen_rtvec (1, stack_toc_offset
),
25769 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25771 /* Can we optimize saving the TOC in the prologue or
25772 do we need to do it at every call? */
25773 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25774 cfun
->machine
->save_toc_in_prologue
= true;
25777 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25778 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25779 gen_rtx_PLUS (Pmode
, stack_ptr
,
25780 stack_toc_offset
));
25781 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25782 if (is_pltseq_longcall
)
25784 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25785 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25786 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25789 emit_move_insn (stack_toc_mem
, toc_reg
);
25793 if (DEFAULT_ABI
== ABI_ELFv2
)
25795 /* A function pointer in the ELFv2 ABI is just a plain address, but
25796 the ABI requires it to be loaded into r12 before the call. */
25797 func_addr
= gen_rtx_REG (Pmode
, 12);
25798 emit_move_insn (func_addr
, func
);
25799 abi_reg
= func_addr
;
25800 /* Indirect calls via CTR are strongly preferred over indirect
25801 calls via LR, so move the address there. Needed to mark
25802 this insn for linker plt sequence editing too. */
25803 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25804 if (is_pltseq_longcall
)
25806 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25807 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25808 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25809 v
= gen_rtvec (2, func_addr
, func_desc
);
25810 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25813 emit_move_insn (func_addr
, abi_reg
);
25817 /* A function pointer under AIX is a pointer to a data area whose
25818 first word contains the actual address of the function, whose
25819 second word contains a pointer to its TOC, and whose third word
25820 contains a value to place in the static chain register (r11).
25821 Note that if we load the static chain, our "trampoline" need
25822 not have any executable code. */
25824 /* Load up address of the actual function. */
25825 func
= force_reg (Pmode
, func
);
25826 func_addr
= gen_reg_rtx (Pmode
);
25827 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25829 /* Indirect calls via CTR are strongly preferred over indirect
25830 calls via LR, so move the address there. */
25831 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25832 emit_move_insn (ctr_reg
, func_addr
);
25833 func_addr
= ctr_reg
;
25835 /* Prepare to load the TOC of the called function. Note that the
25836 TOC load must happen immediately before the actual call so
25837 that unwinding the TOC registers works correctly. See the
25838 comment in frob_update_context. */
25839 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25840 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25841 gen_rtx_PLUS (Pmode
, func
,
25843 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25845 /* If we have a static chain, load it up. But, if the call was
25846 originally direct, the 3rd word has not been written since no
25847 trampoline has been built, so we ought not to load it, lest we
25848 override a static chain value. */
25849 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25850 && SYMBOL_REF_FUNCTION_P (func_desc
))
25851 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25852 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25854 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25855 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25856 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25857 gen_rtx_PLUS (Pmode
, func
,
25859 emit_move_insn (sc_reg
, func_sc_mem
);
25866 /* No TOC register needed for calls from PC-relative callers. */
25867 if (!rs6000_pcrel_p ())
25868 /* Direct calls use the TOC: for local calls, the callee will
25869 assume the TOC register is set; for non-local calls, the
25870 PLT stub needs the TOC register. */
25875 /* Create the call. */
25876 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25877 if (value
!= NULL_RTX
)
25878 call
[0] = gen_rtx_SET (value
, call
[0]);
25879 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25883 call
[n_call
++] = toc_load
;
25885 call
[n_call
++] = toc_restore
;
25887 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25889 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
25890 insn
= emit_call_insn (insn
);
25892 /* Mention all registers defined by the ABI to hold information
25893 as uses in CALL_INSN_FUNCTION_USAGE. */
25895 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25898 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25901 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25905 rtx r12
= NULL_RTX
;
25906 rtx func_addr
= func_desc
;
25909 tlsarg
= global_tlsarg
;
25911 /* Handle longcall attributes. */
25912 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
25914 /* PCREL can do a sibling call to a longcall function
25915 because we don't need to restore the TOC register. */
25916 gcc_assert (rs6000_pcrel_p ());
25917 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
25920 gcc_assert (INTVAL (cookie
) == 0);
25922 /* For ELFv2, r12 and CTR need to hold the function address
25923 for an indirect call. */
25924 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
25926 r12
= gen_rtx_REG (Pmode
, 12);
25927 emit_move_insn (r12
, func_desc
);
25928 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25929 emit_move_insn (func_addr
, r12
);
25932 /* Create the call. */
25933 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25934 if (value
!= NULL_RTX
)
25935 call
[0] = gen_rtx_SET (value
, call
[0]);
25937 call
[1] = simple_return_rtx
;
25939 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
25940 insn
= emit_call_insn (insn
);
25942 /* Note use of the TOC register. */
25943 if (!rs6000_pcrel_p ())
25944 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
25945 gen_rtx_REG (Pmode
, TOC_REGNUM
));
25947 /* Note use of r12. */
25949 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
25952 /* Expand code to perform a call under the SYSV4 ABI. */
25955 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25957 rtx func
= func_desc
;
25961 rtx abi_reg
= NULL_RTX
;
25965 tlsarg
= global_tlsarg
;
25967 /* Handle longcall attributes. */
25968 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25969 && GET_CODE (func_desc
) == SYMBOL_REF
)
25971 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25972 /* If the longcall was implemented as an inline PLT call using
25973 PLT unspecs then func will be REG:r11. If not, func will be
25974 a pseudo reg. The inline PLT call sequence supports lazy
25975 linking (and longcalls to functions in dlopen'd libraries).
25976 The other style of longcalls don't. The lazy linking entry
25977 to the dynamic symbol resolver requires r11 be the function
25978 address (as it is for linker generated PLT stubs). Ensure
25979 r11 stays valid to the bctrl by marking r11 used by the call. */
25984 /* Handle indirect calls. */
25985 if (GET_CODE (func
) != SYMBOL_REF
)
25987 func
= force_reg (Pmode
, func
);
25989 /* Indirect calls via CTR are strongly preferred over indirect
25990 calls via LR, so move the address there. That can't be left
25991 to reload because we want to mark every instruction in an
25992 inline PLT call sequence with a reloc, enabling the linker to
25993 edit the sequence back to a direct call when that makes sense. */
25994 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25997 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25998 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25999 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26000 v
= gen_rtvec (2, func_addr
, func_desc
);
26001 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26004 emit_move_insn (func_addr
, func
);
26009 /* Create the call. */
26010 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26011 if (value
!= NULL_RTX
)
26012 call
[0] = gen_rtx_SET (value
, call
[0]);
26014 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26016 if (TARGET_SECURE_PLT
26018 && GET_CODE (func_addr
) == SYMBOL_REF
26019 && !SYMBOL_REF_LOCAL_P (func_addr
))
26020 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
26022 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26024 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
26025 insn
= emit_call_insn (insn
);
26027 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26030 /* Expand code to perform a sibling call under the SysV4 ABI. */
26033 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26035 rtx func
= func_desc
;
26039 rtx abi_reg
= NULL_RTX
;
26042 tlsarg
= global_tlsarg
;
26044 /* Handle longcall attributes. */
26045 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26046 && GET_CODE (func_desc
) == SYMBOL_REF
)
26048 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26049 /* If the longcall was implemented as an inline PLT call using
26050 PLT unspecs then func will be REG:r11. If not, func will be
26051 a pseudo reg. The inline PLT call sequence supports lazy
26052 linking (and longcalls to functions in dlopen'd libraries).
26053 The other style of longcalls don't. The lazy linking entry
26054 to the dynamic symbol resolver requires r11 be the function
26055 address (as it is for linker generated PLT stubs). Ensure
26056 r11 stays valid to the bctr by marking r11 used by the call. */
26061 /* Handle indirect calls. */
26062 if (GET_CODE (func
) != SYMBOL_REF
)
26064 func
= force_reg (Pmode
, func
);
26066 /* Indirect sibcalls must go via CTR. That can't be left to
26067 reload because we want to mark every instruction in an inline
26068 PLT call sequence with a reloc, enabling the linker to edit
26069 the sequence back to a direct call when that makes sense. */
26070 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26073 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26074 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26075 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26076 v
= gen_rtvec (2, func_addr
, func_desc
);
26077 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26080 emit_move_insn (func_addr
, func
);
26085 /* Create the call. */
26086 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26087 if (value
!= NULL_RTX
)
26088 call
[0] = gen_rtx_SET (value
, call
[0]);
26090 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26091 call
[2] = simple_return_rtx
;
26093 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26094 insn
= emit_call_insn (insn
);
26096 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26101 /* Expand code to perform a call under the Darwin ABI.
26102 Modulo handling of mlongcall, this is much the same as sysv.
26103 if/when the longcall optimisation is removed, we could drop this
26104 code and use the sysv case (taking care to avoid the tls stuff).
26106 We can use this for sibcalls too, if needed. */
26109 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
26110 rtx cookie
, bool sibcall
)
26112 rtx func
= func_desc
;
26116 int cookie_val
= INTVAL (cookie
);
26117 bool make_island
= false;
26119 /* Handle longcall attributes, there are two cases for Darwin:
26120 1) Newer linkers are capable of synthesising any branch islands needed.
26121 2) We need a helper branch island synthesised by the compiler.
26122 The second case has mostly been retired and we don't use it for m64.
26123 In fact, it's is an optimisation, we could just indirect as sysv does..
26124 ... however, backwards compatibility for now.
26125 If we're going to use this, then we need to keep the CALL_LONG bit set,
26126 so that we can pick up the special insn form later. */
26127 if ((cookie_val
& CALL_LONG
) != 0
26128 && GET_CODE (func_desc
) == SYMBOL_REF
)
26130 /* FIXME: the longcall opt should not hang off this flag, it is most
26131 likely incorrect for kernel-mode code-generation. */
26132 if (darwin_symbol_stubs
&& TARGET_32BIT
)
26133 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
26136 /* The linker is capable of doing this, but the user explicitly
26137 asked for -mlongcall, so we'll do the 'normal' version. */
26138 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
26139 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
26143 /* Handle indirect calls. */
26144 if (GET_CODE (func
) != SYMBOL_REF
)
26146 func
= force_reg (Pmode
, func
);
26148 /* Indirect calls via CTR are strongly preferred over indirect
26149 calls via LR, and are required for indirect sibcalls, so move
26150 the address there. */
26151 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26152 emit_move_insn (func_addr
, func
);
26157 /* Create the call. */
26158 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26159 if (value
!= NULL_RTX
)
26160 call
[0] = gen_rtx_SET (value
, call
[0]);
26162 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
26165 call
[2] = simple_return_rtx
;
26167 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26169 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26170 insn
= emit_call_insn (insn
);
26171 /* Now we have the debug info in the insn, we can set up the branch island
26172 if we're using one. */
26175 tree funname
= get_identifier (XSTR (func_desc
, 0));
26177 if (no_previous_def (funname
))
26179 rtx label_rtx
= gen_label_rtx ();
26180 char *label_buf
, temp_buf
[256];
26181 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
26182 CODE_LABEL_NUMBER (label_rtx
));
26183 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
26184 tree labelname
= get_identifier (label_buf
);
26185 add_compiler_branch_island (labelname
, funname
,
26186 insn_line ((const rtx_insn
*)insn
));
26193 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26194 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26197 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
26205 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26206 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26209 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
26215 /* Return whether we should generate PC-relative code for FNDECL. */
26217 rs6000_fndecl_pcrel_p (const_tree fndecl
)
26219 if (DEFAULT_ABI
!= ABI_ELFv2
)
26222 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26224 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26225 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26228 /* Return whether we should generate PC-relative code for *FN. */
26230 rs6000_function_pcrel_p (struct function
*fn
)
26232 if (DEFAULT_ABI
!= ABI_ELFv2
)
26235 /* Optimize usual case. */
26237 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26238 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26240 return rs6000_fndecl_pcrel_p (fn
->decl
);
26243 /* Return whether we should generate PC-relative code for the current
26248 return (DEFAULT_ABI
== ABI_ELFv2
26249 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26250 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26254 /* Given an address (ADDR), a mode (MODE), and what the format of the
26255 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26256 for the address. */
26259 address_to_insn_form (rtx addr
,
26261 enum non_prefixed_form non_prefixed_format
)
26263 /* Single register is easy. */
26264 if (REG_P (addr
) || SUBREG_P (addr
))
26265 return INSN_FORM_BASE_REG
;
26267 /* If the non prefixed instruction format doesn't support offset addressing,
26268 make sure only indexed addressing is allowed.
26270 We special case SDmode so that the register allocator does not try to move
26271 SDmode through GPR registers, but instead uses the 32-bit integer load and
26272 store instructions for the floating point registers. */
26273 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26275 if (GET_CODE (addr
) != PLUS
)
26276 return INSN_FORM_BAD
;
26278 rtx op0
= XEXP (addr
, 0);
26279 rtx op1
= XEXP (addr
, 1);
26280 if (!REG_P (op0
) && !SUBREG_P (op0
))
26281 return INSN_FORM_BAD
;
26283 if (!REG_P (op1
) && !SUBREG_P (op1
))
26284 return INSN_FORM_BAD
;
26286 return INSN_FORM_X
;
26289 /* Deal with update forms. */
26290 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26291 return INSN_FORM_UPDATE
;
26293 /* Handle PC-relative symbols and labels. Check for both local and
26294 external symbols. Assume labels are always local. TLS symbols
26295 are not PC-relative for rs6000. */
26298 if (LABEL_REF_P (addr
))
26299 return INSN_FORM_PCREL_LOCAL
;
26301 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26303 if (!SYMBOL_REF_LOCAL_P (addr
))
26304 return INSN_FORM_PCREL_EXTERNAL
;
26306 return INSN_FORM_PCREL_LOCAL
;
26310 if (GET_CODE (addr
) == CONST
)
26311 addr
= XEXP (addr
, 0);
26313 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26314 if (GET_CODE (addr
) == LO_SUM
)
26315 return INSN_FORM_LO_SUM
;
26317 /* Everything below must be an offset address of some form. */
26318 if (GET_CODE (addr
) != PLUS
)
26319 return INSN_FORM_BAD
;
26321 rtx op0
= XEXP (addr
, 0);
26322 rtx op1
= XEXP (addr
, 1);
26324 /* Check for indexed addresses. */
26325 if (REG_P (op1
) || SUBREG_P (op1
))
26327 if (REG_P (op0
) || SUBREG_P (op0
))
26328 return INSN_FORM_X
;
26330 return INSN_FORM_BAD
;
26333 if (!CONST_INT_P (op1
))
26334 return INSN_FORM_BAD
;
26336 HOST_WIDE_INT offset
= INTVAL (op1
);
26337 if (!SIGNED_INTEGER_34BIT_P (offset
))
26338 return INSN_FORM_BAD
;
26340 /* Check for local and external PC-relative addresses. Labels are always
26341 local. TLS symbols are not PC-relative for rs6000. */
26344 if (LABEL_REF_P (op0
))
26345 return INSN_FORM_PCREL_LOCAL
;
26347 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26349 if (!SYMBOL_REF_LOCAL_P (op0
))
26350 return INSN_FORM_PCREL_EXTERNAL
;
26352 return INSN_FORM_PCREL_LOCAL
;
26356 /* If it isn't PC-relative, the address must use a base register. */
26357 if (!REG_P (op0
) && !SUBREG_P (op0
))
26358 return INSN_FORM_BAD
;
26360 /* Large offsets must be prefixed. */
26361 if (!SIGNED_INTEGER_16BIT_P (offset
))
26363 if (TARGET_PREFIXED
)
26364 return INSN_FORM_PREFIXED_NUMERIC
;
26366 return INSN_FORM_BAD
;
26369 /* We have a 16-bit offset, see what default instruction format to use. */
26370 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26372 unsigned size
= GET_MODE_SIZE (mode
);
26374 /* On 64-bit systems, assume 64-bit integers need to use DS form
26375 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26376 (for LXV and STXV). TImode is problematical in that its normal usage
26377 is expected to be GPRs where it wants a DS instruction format, but if
26378 it goes into the vector registers, it wants a DQ instruction
26380 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26381 non_prefixed_format
= NON_PREFIXED_DS
;
26383 else if (TARGET_VSX
&& size
>= 16
26384 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26385 non_prefixed_format
= NON_PREFIXED_DQ
;
26388 non_prefixed_format
= NON_PREFIXED_D
;
26391 /* Classify the D/DS/DQ-form addresses. */
26392 switch (non_prefixed_format
)
26394 /* Instruction format D, all 16 bits are valid. */
26395 case NON_PREFIXED_D
:
26396 return INSN_FORM_D
;
26398 /* Instruction format DS, bottom 2 bits must be 0. */
26399 case NON_PREFIXED_DS
:
26400 if ((offset
& 3) == 0)
26401 return INSN_FORM_DS
;
26403 else if (TARGET_PREFIXED
)
26404 return INSN_FORM_PREFIXED_NUMERIC
;
26407 return INSN_FORM_BAD
;
26409 /* Instruction format DQ, bottom 4 bits must be 0. */
26410 case NON_PREFIXED_DQ
:
26411 if ((offset
& 15) == 0)
26412 return INSN_FORM_DQ
;
26414 else if (TARGET_PREFIXED
)
26415 return INSN_FORM_PREFIXED_NUMERIC
;
26418 return INSN_FORM_BAD
;
26424 return INSN_FORM_BAD
;
26427 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26428 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26429 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26430 a D-form or DS-form instruction. X-form and base_reg are always
26433 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26434 enum non_prefixed_form non_prefixed_format
)
26436 enum insn_form result_form
;
26438 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26440 switch (non_prefixed_format
)
26442 case NON_PREFIXED_D
:
26443 switch (result_form
)
26448 case INSN_FORM_BASE_REG
:
26454 case NON_PREFIXED_DS
:
26455 switch (result_form
)
26459 case INSN_FORM_BASE_REG
:
26471 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26472 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26473 the load or store with the PCREL_OPT optimization to make sure it is an
26474 instruction that can be optimized.
26476 We need to specify the MODE separately from the REG to allow for loads that
26477 include zero/sign/float extension. */
26480 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26482 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26483 PCREL_OPT optimization. */
26484 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26485 if (non_prefixed
== NON_PREFIXED_X
)
26488 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26489 rtx addr
= XEXP (mem
, 0);
26490 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26491 return (iform
== INSN_FORM_BASE_REG
26492 || iform
== INSN_FORM_D
26493 || iform
== INSN_FORM_DS
26494 || iform
== INSN_FORM_DQ
);
26497 /* Helper function to see if we're potentially looking at lfs/stfs.
26498 - PARALLEL containing a SET and a CLOBBER
26500 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26501 - CLOBBER is a V4SF
26503 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26508 is_lfs_stfs_insn (rtx_insn
*insn
)
26510 rtx pattern
= PATTERN (insn
);
26511 if (GET_CODE (pattern
) != PARALLEL
)
26514 /* This should be a parallel with exactly one set and one clobber. */
26515 if (XVECLEN (pattern
, 0) != 2)
26518 rtx set
= XVECEXP (pattern
, 0, 0);
26519 if (GET_CODE (set
) != SET
)
26522 rtx clobber
= XVECEXP (pattern
, 0, 1);
26523 if (GET_CODE (clobber
) != CLOBBER
)
26526 /* All we care is that the destination of the SET is a mem:SI,
26527 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26528 should be a scratch:V4SF. */
26530 rtx dest
= SET_DEST (set
);
26531 rtx src
= SET_SRC (set
);
26532 rtx scratch
= SET_DEST (clobber
);
26534 if (GET_CODE (src
) != UNSPEC
)
26538 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26539 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26540 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26544 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26545 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26546 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26552 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26553 instruction format (D/DS/DQ) used for offset memory. */
26555 enum non_prefixed_form
26556 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26558 /* If it isn't a register, use the defaults. */
26559 if (!REG_P (reg
) && !SUBREG_P (reg
))
26560 return NON_PREFIXED_DEFAULT
;
26562 unsigned int r
= reg_or_subregno (reg
);
26564 /* If we have a pseudo, use the default instruction format. */
26565 if (!HARD_REGISTER_NUM_P (r
))
26566 return NON_PREFIXED_DEFAULT
;
26568 unsigned size
= GET_MODE_SIZE (mode
);
26570 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26571 128-bit floating point, and 128-bit integers. Before power9, only indexed
26572 addressing was available for vectors. */
26573 if (FP_REGNO_P (r
))
26575 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26576 return NON_PREFIXED_D
;
26579 return NON_PREFIXED_X
;
26581 else if (TARGET_VSX
&& size
>= 16
26582 && (VECTOR_MODE_P (mode
)
26583 || VECTOR_ALIGNMENT_P (mode
)
26584 || mode
== TImode
|| mode
== CTImode
))
26585 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26588 return NON_PREFIXED_DEFAULT
;
26591 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26592 128-bit floating point, and 128-bit integers. Before power9, only indexed
26593 addressing was available. */
26594 else if (ALTIVEC_REGNO_P (r
))
26596 if (!TARGET_P9_VECTOR
)
26597 return NON_PREFIXED_X
;
26599 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26600 return NON_PREFIXED_DS
;
26603 return NON_PREFIXED_X
;
26605 else if (TARGET_VSX
&& size
>= 16
26606 && (VECTOR_MODE_P (mode
)
26607 || VECTOR_ALIGNMENT_P (mode
)
26608 || mode
== TImode
|| mode
== CTImode
))
26609 return NON_PREFIXED_DQ
;
26612 return NON_PREFIXED_DEFAULT
;
26615 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26616 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26617 through the GPR registers for memory operations. */
26618 else if (TARGET_POWERPC64
&& size
>= 8)
26619 return NON_PREFIXED_DS
;
26621 return NON_PREFIXED_D
;
26625 /* Whether a load instruction is a prefixed instruction. This is called from
26626 the prefixed attribute processing. */
26629 prefixed_load_p (rtx_insn
*insn
)
26631 /* Validate the insn to make sure it is a normal load insn. */
26632 extract_insn_cached (insn
);
26633 if (recog_data
.n_operands
< 2)
26636 rtx reg
= recog_data
.operand
[0];
26637 rtx mem
= recog_data
.operand
[1];
26639 if (!REG_P (reg
) && !SUBREG_P (reg
))
26645 /* Prefixed load instructions do not support update or indexed forms. */
26646 if (get_attr_indexed (insn
) == INDEXED_YES
26647 || get_attr_update (insn
) == UPDATE_YES
)
26650 /* LWA uses the DS format instead of the D format that LWZ uses. */
26651 enum non_prefixed_form non_prefixed
;
26652 machine_mode reg_mode
= GET_MODE (reg
);
26653 machine_mode mem_mode
= GET_MODE (mem
);
26655 if (mem_mode
== SImode
&& reg_mode
== DImode
26656 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26657 non_prefixed
= NON_PREFIXED_DS
;
26660 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26662 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26663 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26665 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26668 /* Whether a store instruction is a prefixed instruction. This is called from
26669 the prefixed attribute processing. */
26672 prefixed_store_p (rtx_insn
*insn
)
26674 /* Validate the insn to make sure it is a normal store insn. */
26675 extract_insn_cached (insn
);
26676 if (recog_data
.n_operands
< 2)
26679 rtx mem
= recog_data
.operand
[0];
26680 rtx reg
= recog_data
.operand
[1];
26682 if (!REG_P (reg
) && !SUBREG_P (reg
))
26688 /* Prefixed store instructions do not support update or indexed forms. */
26689 if (get_attr_indexed (insn
) == INDEXED_YES
26690 || get_attr_update (insn
) == UPDATE_YES
)
26693 machine_mode mem_mode
= GET_MODE (mem
);
26694 rtx addr
= XEXP (mem
, 0);
26695 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26697 /* Need to make sure we aren't looking at a stfs which doesn't look
26698 like the other things reg_to_non_prefixed/address_is_prefixed
26700 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26701 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26703 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26706 /* Whether a load immediate or add instruction is a prefixed instruction. This
26707 is called from the prefixed attribute processing. */
26710 prefixed_paddi_p (rtx_insn
*insn
)
26712 rtx set
= single_set (insn
);
26716 rtx dest
= SET_DEST (set
);
26717 rtx src
= SET_SRC (set
);
26719 if (!REG_P (dest
) && !SUBREG_P (dest
))
26722 /* Is this a load immediate that can't be done with a simple ADDI or
26724 if (CONST_INT_P (src
))
26725 return (satisfies_constraint_eI (src
)
26726 && !satisfies_constraint_I (src
)
26727 && !satisfies_constraint_L (src
));
26729 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26731 if (GET_CODE (src
) == PLUS
)
26733 rtx op1
= XEXP (src
, 1);
26735 return (CONST_INT_P (op1
)
26736 && satisfies_constraint_eI (op1
)
26737 && !satisfies_constraint_I (op1
)
26738 && !satisfies_constraint_L (op1
));
26741 /* If not, is it a load of a PC-relative address? */
26742 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26745 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26748 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26749 NON_PREFIXED_DEFAULT
);
26751 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26754 /* Whether the next instruction needs a 'p' prefix issued before the
26755 instruction is printed out. */
26756 static bool prepend_p_to_next_insn
;
26758 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26759 outputting the assembler code. On the PowerPC, we remember if the current
26760 insn is a prefixed insn where we need to emit a 'p' before the insn.
26762 In addition, if the insn is part of a PC-relative reference to an external
26763 label optimization, this is recorded also. */
26765 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26767 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26768 == MAYBE_PREFIXED_YES
26769 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26773 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26774 We use it to emit a 'p' for prefixed insns that is set in
26775 FINAL_PRESCAN_INSN. */
26777 rs6000_asm_output_opcode (FILE *stream
)
26779 if (prepend_p_to_next_insn
)
26781 fprintf (stream
, "p");
26783 /* Reset the flag in the case where there are separate insn lines in the
26784 sequence, so the 'p' is only emitted for the first line. This shows up
26785 when we are doing the PCREL_OPT optimization, in that the label created
26786 with %r<n> would have a leading 'p' printed. */
26787 prepend_p_to_next_insn
= false;
26793 /* Emit the relocation to tie the next instruction to a previous instruction
26794 that loads up an external address. This is used to do the PCREL_OPT
26795 optimization. Note, the label is generated after the PLD of the got
26796 pc-relative address to allow for the assembler to insert NOPs before the PLD
26797 instruction. The operand is a constant integer that is the label
26801 output_pcrel_opt_reloc (rtx label_num
)
26803 rtx operands
[1] = { label_num
};
26804 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26808 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26809 should be adjusted to reflect any required changes. This macro is used when
26810 there is some systematic length adjustment required that would be difficult
26811 to express in the length attribute.
26813 In the PowerPC, we use this to adjust the length of an instruction if one or
26814 more prefixed instructions are generated, using the attribute
26815 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26816 hardware requires that a prefied instruciton does not cross a 64-byte
26817 boundary. This means the compiler has to assume the length of the first
26818 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26819 already set for the non-prefixed instruction, we just need to udpate for the
26823 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26825 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26827 rtx pattern
= PATTERN (insn
);
26828 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26829 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26831 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26832 length
+= 4 * (num_prefixed
+ 1);
26840 #ifdef HAVE_GAS_HIDDEN
26841 # define USE_HIDDEN_LINKONCE 1
26843 # define USE_HIDDEN_LINKONCE 0
26846 /* Fills in the label name that should be used for a 476 link stack thunk. */
26849 get_ppc476_thunk_name (char name
[32])
26851 gcc_assert (TARGET_LINK_STACK
);
26853 if (USE_HIDDEN_LINKONCE
)
26854 sprintf (name
, "__ppc476.get_thunk");
26856 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26859 /* This function emits the simple thunk routine that is used to preserve
26860 the link stack on the 476 cpu. */
26862 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26864 rs6000_code_end (void)
26869 if (!TARGET_LINK_STACK
)
26872 get_ppc476_thunk_name (name
);
26874 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
26875 build_function_type_list (void_type_node
, NULL_TREE
));
26876 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
26877 NULL_TREE
, void_type_node
);
26878 TREE_PUBLIC (decl
) = 1;
26879 TREE_STATIC (decl
) = 1;
26882 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
26884 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
26885 targetm
.asm_out
.unique_section (decl
, 0);
26886 switch_to_section (get_named_section (decl
, NULL
, 0));
26887 DECL_WEAK (decl
) = 1;
26888 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
26889 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
26890 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
26891 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
26896 switch_to_section (text_section
);
26897 ASM_OUTPUT_LABEL (asm_out_file
, name
);
26900 DECL_INITIAL (decl
) = make_node (BLOCK
);
26901 current_function_decl
= decl
;
26902 allocate_struct_function (decl
, false);
26903 init_function_start (decl
);
26904 first_function_block_is_cold
= false;
26905 /* Make sure unwind info is emitted for the thunk if needed. */
26906 final_start_function (emit_barrier (), asm_out_file
, 1);
26908 fputs ("\tblr\n", asm_out_file
);
26910 final_end_function ();
26911 init_insn_lengths ();
26912 free_after_compilation (cfun
);
26914 current_function_decl
= NULL
;
26917 /* Add r30 to hard reg set if the prologue sets it up and it is not
26918 pic_offset_table_rtx. */
26921 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
26923 if (!TARGET_SINGLE_PIC_BASE
26925 && TARGET_MINIMAL_TOC
26926 && !constant_pool_empty_p ())
26927 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26928 if (cfun
->machine
->split_stack_argp_used
)
26929 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
26931 /* Make sure the hard reg set doesn't include r2, which was possibly added
26932 via PIC_OFFSET_TABLE_REGNUM. */
26934 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
26938 /* Helper function for rs6000_split_logical to emit a logical instruction after
26939 spliting the operation to single GPR registers.
26941 DEST is the destination register.
26942 OP1 and OP2 are the input source registers.
26943 CODE is the base operation (AND, IOR, XOR, NOT).
26944 MODE is the machine mode.
26945 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26946 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26947 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26950 rs6000_split_logical_inner (rtx dest
,
26953 enum rtx_code code
,
26955 bool complement_final_p
,
26956 bool complement_op1_p
,
26957 bool complement_op2_p
)
26961 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26962 if (op2
&& CONST_INT_P (op2
)
26963 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
26964 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26966 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
26967 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
26969 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26974 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
26978 else if (value
== mask
)
26980 if (!rtx_equal_p (dest
, op1
))
26981 emit_insn (gen_rtx_SET (dest
, op1
));
26986 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26987 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26988 else if (code
== IOR
|| code
== XOR
)
26992 if (!rtx_equal_p (dest
, op1
))
26993 emit_insn (gen_rtx_SET (dest
, op1
));
26999 if (code
== AND
&& mode
== SImode
27000 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27002 emit_insn (gen_andsi3 (dest
, op1
, op2
));
27006 if (complement_op1_p
)
27007 op1
= gen_rtx_NOT (mode
, op1
);
27009 if (complement_op2_p
)
27010 op2
= gen_rtx_NOT (mode
, op2
);
27012 /* For canonical RTL, if only one arm is inverted it is the first. */
27013 if (!complement_op1_p
&& complement_op2_p
)
27014 std::swap (op1
, op2
);
27016 bool_rtx
= ((code
== NOT
)
27017 ? gen_rtx_NOT (mode
, op1
)
27018 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
27020 if (complement_final_p
)
27021 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
27023 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
27026 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27027 operations are split immediately during RTL generation to allow for more
27028 optimizations of the AND/IOR/XOR.
27030 OPERANDS is an array containing the destination and two input operands.
27031 CODE is the base operation (AND, IOR, XOR, NOT).
27032 MODE is the machine mode.
27033 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27034 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27035 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27036 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27037 formation of the AND instructions. */
27040 rs6000_split_logical_di (rtx operands
[3],
27041 enum rtx_code code
,
27042 bool complement_final_p
,
27043 bool complement_op1_p
,
27044 bool complement_op2_p
)
27046 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
27047 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
27048 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
27049 enum hi_lo
{ hi
= 0, lo
= 1 };
27050 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
27053 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
27054 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
27055 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
27056 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
27059 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
27062 if (!CONST_INT_P (operands
[2]))
27064 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
27065 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
27069 HOST_WIDE_INT value
= INTVAL (operands
[2]);
27070 HOST_WIDE_INT value_hi_lo
[2];
27072 gcc_assert (!complement_final_p
);
27073 gcc_assert (!complement_op1_p
);
27074 gcc_assert (!complement_op2_p
);
27076 value_hi_lo
[hi
] = value
>> 32;
27077 value_hi_lo
[lo
] = value
& lower_32bits
;
27079 for (i
= 0; i
< 2; i
++)
27081 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
27083 if (sub_value
& sign_bit
)
27084 sub_value
|= upper_32bits
;
27086 op2_hi_lo
[i
] = GEN_INT (sub_value
);
27088 /* If this is an AND instruction, check to see if we need to load
27089 the value in a register. */
27090 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
27091 && !and_operand (op2_hi_lo
[i
], SImode
))
27092 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
27097 for (i
= 0; i
< 2; i
++)
27099 /* Split large IOR/XOR operations. */
27100 if ((code
== IOR
|| code
== XOR
)
27101 && CONST_INT_P (op2_hi_lo
[i
])
27102 && !complement_final_p
27103 && !complement_op1_p
27104 && !complement_op2_p
27105 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
27107 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
27108 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
27109 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
27110 rtx tmp
= gen_reg_rtx (SImode
);
27112 /* Make sure the constant is sign extended. */
27113 if ((hi_16bits
& sign_bit
) != 0)
27114 hi_16bits
|= upper_32bits
;
27116 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
27117 code
, SImode
, false, false, false);
27119 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
27120 code
, SImode
, false, false, false);
27123 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
27124 code
, SImode
, complement_final_p
,
27125 complement_op1_p
, complement_op2_p
);
27131 /* Split the insns that make up boolean operations operating on multiple GPR
27132 registers. The boolean MD patterns ensure that the inputs either are
27133 exactly the same as the output registers, or there is no overlap.
27135 OPERANDS is an array containing the destination and two input operands.
27136 CODE is the base operation (AND, IOR, XOR, NOT).
27137 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27138 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27139 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27142 rs6000_split_logical (rtx operands
[3],
27143 enum rtx_code code
,
27144 bool complement_final_p
,
27145 bool complement_op1_p
,
27146 bool complement_op2_p
)
27148 machine_mode mode
= GET_MODE (operands
[0]);
27149 machine_mode sub_mode
;
27151 int sub_size
, regno0
, regno1
, nregs
, i
;
27153 /* If this is DImode, use the specialized version that can run before
27154 register allocation. */
27155 if (mode
== DImode
&& !TARGET_POWERPC64
)
27157 rs6000_split_logical_di (operands
, code
, complement_final_p
,
27158 complement_op1_p
, complement_op2_p
);
27164 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
27165 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
27166 sub_size
= GET_MODE_SIZE (sub_mode
);
27167 regno0
= REGNO (op0
);
27168 regno1
= REGNO (op1
);
27170 gcc_assert (reload_completed
);
27171 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27172 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27174 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
27175 gcc_assert (nregs
> 1);
27177 if (op2
&& REG_P (op2
))
27178 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27180 for (i
= 0; i
< nregs
; i
++)
27182 int offset
= i
* sub_size
;
27183 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
27184 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
27185 rtx sub_op2
= ((code
== NOT
)
27187 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
27189 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
27190 complement_final_p
, complement_op1_p
,
27197 /* Emit instructions to move SRC to DST. Called by splitters for
27198 multi-register moves. It will emit at most one instruction for
27199 each register that is accessed; that is, it won't emit li/lis pairs
27200 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27204 rs6000_split_multireg_move (rtx dst
, rtx src
)
27206 /* The register number of the first register being moved. */
27208 /* The mode that is to be moved. */
27210 /* The mode that the move is being done in, and its size. */
27211 machine_mode reg_mode
;
27213 /* The number of registers that will be moved. */
27216 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
27217 mode
= GET_MODE (dst
);
27218 nregs
= hard_regno_nregs (reg
, mode
);
27220 /* If we have a vector quad register for MMA, and this is a load or store,
27221 see if we can use vector paired load/stores. */
27222 if (mode
== XOmode
&& TARGET_MMA
27223 && (MEM_P (dst
) || MEM_P (src
)))
27228 /* If we have a vector pair/quad mode, split it into two/four separate
27230 else if (mode
== OOmode
|| mode
== XOmode
)
27231 reg_mode
= V1TImode
;
27232 else if (FP_REGNO_P (reg
))
27233 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27234 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27235 else if (ALTIVEC_REGNO_P (reg
))
27236 reg_mode
= V16QImode
;
27238 reg_mode
= word_mode
;
27239 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27241 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27243 /* TDmode residing in FP registers is special, since the ISA requires that
27244 the lower-numbered word of a register pair is always the most significant
27245 word, even in little-endian mode. This does not match the usual subreg
27246 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27247 the appropriate constituent registers "by hand" in little-endian mode.
27249 Note we do not need to check for destructive overlap here since TDmode
27250 can only reside in even/odd register pairs. */
27251 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27256 for (i
= 0; i
< nregs
; i
++)
27258 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27259 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27261 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27262 i
* reg_mode_size
);
27264 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27265 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27267 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27268 i
* reg_mode_size
);
27270 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27276 /* The __vector_pair and __vector_quad modes are multi-register
27277 modes, so if we have to load or store the registers, we have to be
27278 careful to properly swap them if we're in little endian mode
27279 below. This means the last register gets the first memory
27280 location. We also need to be careful of using the right register
27281 numbers if we are splitting XO to OO. */
27282 if (mode
== OOmode
|| mode
== XOmode
)
27284 nregs
= hard_regno_nregs (reg
, mode
);
27285 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27288 unsigned offset
= 0;
27289 unsigned size
= GET_MODE_SIZE (reg_mode
);
27291 /* If we are reading an accumulator register, we have to
27292 deprime it before we can access it. */
27294 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27295 emit_insn (gen_mma_xxmfacc (src
, src
));
27297 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27300 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27301 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27302 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27304 emit_insn (gen_rtx_SET (dst2
, src2
));
27312 unsigned offset
= 0;
27313 unsigned size
= GET_MODE_SIZE (reg_mode
);
27315 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27318 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27319 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27320 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27322 emit_insn (gen_rtx_SET (dst2
, src2
));
27325 /* If we are writing an accumulator register, we have to
27326 prime it after we've written it. */
27328 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27329 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27334 if (GET_CODE (src
) == UNSPEC
27335 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27337 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27338 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27339 gcc_assert (REG_P (dst
));
27340 if (GET_MODE (src
) == XOmode
)
27341 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27342 if (GET_MODE (src
) == OOmode
)
27343 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27345 int nvecs
= XVECLEN (src
, 0);
27346 for (int i
= 0; i
< nvecs
; i
++)
27349 int regno
= reg
+ i
;
27351 if (WORDS_BIG_ENDIAN
)
27353 op
= XVECEXP (src
, 0, i
);
27355 /* If we are loading an even VSX register and the memory location
27356 is adjacent to the next register's memory location (if any),
27357 then we can load them both with one LXVP instruction. */
27358 if ((regno
& 1) == 0)
27360 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27361 if (adjacent_mem_locations (op
, op2
) == op
)
27363 op
= adjust_address (op
, OOmode
, 0);
27364 /* Skip the next register, since we're going to
27365 load it together with this register. */
27372 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27374 /* If we are loading an even VSX register and the memory location
27375 is adjacent to the next register's memory location (if any),
27376 then we can load them both with one LXVP instruction. */
27377 if ((regno
& 1) == 0)
27379 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27380 if (adjacent_mem_locations (op2
, op
) == op2
)
27382 op
= adjust_address (op2
, OOmode
, 0);
27383 /* Skip the next register, since we're going to
27384 load it together with this register. */
27390 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27391 emit_insn (gen_rtx_SET (dst_i
, op
));
27394 /* We are writing an accumulator register, so we have to
27395 prime it after we've written it. */
27396 if (GET_MODE (src
) == XOmode
)
27397 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27402 /* Register -> register moves can use common code. */
27405 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27407 /* If we are reading an accumulator register, we have to
27408 deprime it before we can access it. */
27410 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27411 emit_insn (gen_mma_xxmfacc (src
, src
));
27413 /* Move register range backwards, if we might have destructive
27416 /* XO/OO are opaque so cannot use subregs. */
27417 if (mode
== OOmode
|| mode
== XOmode
)
27419 for (i
= nregs
- 1; i
>= 0; i
--)
27421 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27422 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27423 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27428 for (i
= nregs
- 1; i
>= 0; i
--)
27429 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27430 i
* reg_mode_size
),
27431 simplify_gen_subreg (reg_mode
, src
, mode
,
27432 i
* reg_mode_size
)));
27435 /* If we are writing an accumulator register, we have to
27436 prime it after we've written it. */
27438 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27439 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27445 bool used_update
= false;
27446 rtx restore_basereg
= NULL_RTX
;
27448 if (MEM_P (src
) && INT_REGNO_P (reg
))
27452 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27453 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27456 breg
= XEXP (XEXP (src
, 0), 0);
27457 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27458 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27459 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27460 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27461 src
= replace_equiv_address (src
, breg
);
27463 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27465 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27467 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27470 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27471 emit_insn (gen_rtx_SET (ndst
,
27472 gen_rtx_MEM (reg_mode
,
27474 used_update
= true;
27477 emit_insn (gen_rtx_SET (basereg
,
27478 XEXP (XEXP (src
, 0), 1)));
27479 src
= replace_equiv_address (src
, basereg
);
27483 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27484 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27485 src
= replace_equiv_address (src
, basereg
);
27489 breg
= XEXP (src
, 0);
27490 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27491 breg
= XEXP (breg
, 0);
27493 /* If the base register we are using to address memory is
27494 also a destination reg, then change that register last. */
27496 && REGNO (breg
) >= REGNO (dst
)
27497 && REGNO (breg
) < REGNO (dst
) + nregs
)
27498 j
= REGNO (breg
) - REGNO (dst
);
27500 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27504 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27505 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27508 breg
= XEXP (XEXP (dst
, 0), 0);
27509 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27510 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27511 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27513 /* We have to update the breg before doing the store.
27514 Use store with update, if available. */
27518 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27519 emit_insn (TARGET_32BIT
27520 ? (TARGET_POWERPC64
27521 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27522 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27523 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27524 used_update
= true;
27527 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27528 dst
= replace_equiv_address (dst
, breg
);
27530 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27531 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27533 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27535 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27538 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27539 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27542 used_update
= true;
27545 emit_insn (gen_rtx_SET (basereg
,
27546 XEXP (XEXP (dst
, 0), 1)));
27547 dst
= replace_equiv_address (dst
, basereg
);
27551 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27552 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27553 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27555 && REG_P (offsetreg
)
27556 && REGNO (basereg
) != REGNO (offsetreg
));
27557 if (REGNO (basereg
) == 0)
27559 rtx tmp
= offsetreg
;
27560 offsetreg
= basereg
;
27563 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27564 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27565 dst
= replace_equiv_address (dst
, basereg
);
27568 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27569 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27572 /* If we are reading an accumulator register, we have to
27573 deprime it before we can access it. */
27574 if (TARGET_MMA
&& REG_P (src
)
27575 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27576 emit_insn (gen_mma_xxmfacc (src
, src
));
27578 for (i
= 0; i
< nregs
; i
++)
27580 /* Calculate index to next subword. */
27585 /* If compiler already emitted move of first word by
27586 store with update, no need to do anything. */
27587 if (j
== 0 && used_update
)
27590 /* XO/OO are opaque so cannot use subregs. */
27591 if (mode
== OOmode
|| mode
== XOmode
)
27593 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27594 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27595 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27598 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27599 j
* reg_mode_size
),
27600 simplify_gen_subreg (reg_mode
, src
, mode
,
27601 j
* reg_mode_size
)));
27604 /* If we are writing an accumulator register, we have to
27605 prime it after we've written it. */
27606 if (TARGET_MMA
&& REG_P (dst
)
27607 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27608 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27610 if (restore_basereg
!= NULL_RTX
)
27611 emit_insn (restore_basereg
);
27615 /* Return true if the peephole2 can combine a load involving a combination of
27616 an addis instruction and a load with an offset that can be fused together on
27620 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27621 rtx addis_value
, /* addis value. */
27622 rtx target
, /* target register that is loaded. */
27623 rtx mem
) /* bottom part of the memory addr. */
27628 /* Validate arguments. */
27629 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27632 if (!base_reg_operand (target
, GET_MODE (target
)))
27635 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27638 /* Allow sign/zero extension. */
27639 if (GET_CODE (mem
) == ZERO_EXTEND
27640 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27641 mem
= XEXP (mem
, 0);
27646 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27649 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27650 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27653 /* Validate that the register used to load the high value is either the
27654 register being loaded, or we can safely replace its use.
27656 This function is only called from the peephole2 pass and we assume that
27657 there are 2 instructions in the peephole (addis and load), so we want to
27658 check if the target register was not used in the memory address and the
27659 register to hold the addis result is dead after the peephole. */
27660 if (REGNO (addis_reg
) != REGNO (target
))
27662 if (reg_mentioned_p (target
, mem
))
27665 if (!peep2_reg_dead_p (2, addis_reg
))
27668 /* If the target register being loaded is the stack pointer, we must
27669 avoid loading any other value into it, even temporarily. */
27670 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27674 base_reg
= XEXP (addr
, 0);
27675 return REGNO (addis_reg
) == REGNO (base_reg
);
27678 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27679 sequence. We adjust the addis register to use the target register. If the
27680 load sign extends, we adjust the code to do the zero extending load, and an
27681 explicit sign extension later since the fusion only covers zero extending
27685 operands[0] register set with addis (to be replaced with target)
27686 operands[1] value set via addis
27687 operands[2] target register being loaded
27688 operands[3] D-form memory reference using operands[0]. */
27691 expand_fusion_gpr_load (rtx
*operands
)
27693 rtx addis_value
= operands
[1];
27694 rtx target
= operands
[2];
27695 rtx orig_mem
= operands
[3];
27696 rtx new_addr
, new_mem
, orig_addr
, offset
;
27697 enum rtx_code plus_or_lo_sum
;
27698 machine_mode target_mode
= GET_MODE (target
);
27699 machine_mode extend_mode
= target_mode
;
27700 machine_mode ptr_mode
= Pmode
;
27701 enum rtx_code extend
= UNKNOWN
;
27703 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27704 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27706 extend
= GET_CODE (orig_mem
);
27707 orig_mem
= XEXP (orig_mem
, 0);
27708 target_mode
= GET_MODE (orig_mem
);
27711 gcc_assert (MEM_P (orig_mem
));
27713 orig_addr
= XEXP (orig_mem
, 0);
27714 plus_or_lo_sum
= GET_CODE (orig_addr
);
27715 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27717 offset
= XEXP (orig_addr
, 1);
27718 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27719 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27721 if (extend
!= UNKNOWN
)
27722 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27724 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27725 UNSPEC_FUSION_GPR
);
27726 emit_insn (gen_rtx_SET (target
, new_mem
));
27728 if (extend
== SIGN_EXTEND
)
27730 int sub_off
= ((BYTES_BIG_ENDIAN
)
27731 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27734 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27736 emit_insn (gen_rtx_SET (target
,
27737 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27743 /* Emit the addis instruction that will be part of a fused instruction
27747 emit_fusion_addis (rtx target
, rtx addis_value
)
27750 const char *addis_str
= NULL
;
27752 /* Emit the addis instruction. */
27753 fuse_ops
[0] = target
;
27754 if (satisfies_constraint_L (addis_value
))
27756 fuse_ops
[1] = addis_value
;
27757 addis_str
= "lis %0,%v1";
27760 else if (GET_CODE (addis_value
) == PLUS
)
27762 rtx op0
= XEXP (addis_value
, 0);
27763 rtx op1
= XEXP (addis_value
, 1);
27765 if (REG_P (op0
) && CONST_INT_P (op1
)
27766 && satisfies_constraint_L (op1
))
27770 addis_str
= "addis %0,%1,%v2";
27774 else if (GET_CODE (addis_value
) == HIGH
)
27776 rtx value
= XEXP (addis_value
, 0);
27777 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27779 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27780 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27782 addis_str
= "addis %0,%2,%1@toc@ha";
27784 else if (TARGET_XCOFF
)
27785 addis_str
= "addis %0,%1@u(%2)";
27788 gcc_unreachable ();
27791 else if (GET_CODE (value
) == PLUS
)
27793 rtx op0
= XEXP (value
, 0);
27794 rtx op1
= XEXP (value
, 1);
27796 if (GET_CODE (op0
) == UNSPEC
27797 && XINT (op0
, 1) == UNSPEC_TOCREL
27798 && CONST_INT_P (op1
))
27800 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
27801 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
27804 addis_str
= "addis %0,%2,%1+%3@toc@ha";
27806 else if (TARGET_XCOFF
)
27807 addis_str
= "addis %0,%1+%3@u(%2)";
27810 gcc_unreachable ();
27814 else if (satisfies_constraint_L (value
))
27816 fuse_ops
[1] = value
;
27817 addis_str
= "lis %0,%v1";
27820 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
27822 fuse_ops
[1] = value
;
27823 addis_str
= "lis %0,%1@ha";
27828 fatal_insn ("Could not generate addis value for fusion", addis_value
);
27830 output_asm_insn (addis_str
, fuse_ops
);
27833 /* Emit a D-form load or store instruction that is the second instruction
27834 of a fusion sequence. */
27837 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
27840 char insn_template
[80];
27842 fuse_ops
[0] = load_reg
;
27843 fuse_ops
[1] = addis_reg
;
27845 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
27847 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
27848 fuse_ops
[2] = offset
;
27849 output_asm_insn (insn_template
, fuse_ops
);
27852 else if (GET_CODE (offset
) == UNSPEC
27853 && XINT (offset
, 1) == UNSPEC_TOCREL
)
27856 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
27858 else if (TARGET_XCOFF
)
27859 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27862 gcc_unreachable ();
27864 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
27865 output_asm_insn (insn_template
, fuse_ops
);
27868 else if (GET_CODE (offset
) == PLUS
27869 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
27870 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
27871 && CONST_INT_P (XEXP (offset
, 1)))
27873 rtx tocrel_unspec
= XEXP (offset
, 0);
27875 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
27877 else if (TARGET_XCOFF
)
27878 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
27881 gcc_unreachable ();
27883 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
27884 fuse_ops
[3] = XEXP (offset
, 1);
27885 output_asm_insn (insn_template
, fuse_ops
);
27888 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
27890 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27892 fuse_ops
[2] = offset
;
27893 output_asm_insn (insn_template
, fuse_ops
);
27897 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
27902 /* Given an address, convert it into the addis and load offset parts. Addresses
27903 created during the peephole2 process look like:
27904 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27905 (unspec [(...)] UNSPEC_TOCREL)) */
27908 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
27912 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
27914 hi
= XEXP (addr
, 0);
27915 lo
= XEXP (addr
, 1);
27918 gcc_unreachable ();
27924 /* Return a string to fuse an addis instruction with a gpr load to the same
27925 register that we loaded up the addis instruction. The address that is used
27926 is the logical address that was formed during peephole2:
27927 (lo_sum (high) (low-part))
27929 The code is complicated, so we call output_asm_insn directly, and just
27933 emit_fusion_gpr_load (rtx target
, rtx mem
)
27938 const char *load_str
= NULL
;
27941 if (GET_CODE (mem
) == ZERO_EXTEND
)
27942 mem
= XEXP (mem
, 0);
27944 gcc_assert (REG_P (target
) && MEM_P (mem
));
27946 addr
= XEXP (mem
, 0);
27947 fusion_split_address (addr
, &addis_value
, &load_offset
);
27949 /* Now emit the load instruction to the same register. */
27950 mode
= GET_MODE (mem
);
27968 gcc_assert (TARGET_POWERPC64
);
27973 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
27976 /* Emit the addis instruction. */
27977 emit_fusion_addis (target
, addis_value
);
27979 /* Emit the D-form load instruction. */
27980 emit_fusion_load (target
, target
, load_offset
, load_str
);
27985 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27986 ignores it then. */
27987 static GTY(()) tree atomic_hold_decl
;
27988 static GTY(()) tree atomic_clear_decl
;
27989 static GTY(()) tree atomic_update_decl
;
27991 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27993 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
27995 if (!TARGET_HARD_FLOAT
)
27997 #ifdef RS6000_GLIBC_ATOMIC_FENV
27998 if (atomic_hold_decl
== NULL_TREE
)
28001 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28002 get_identifier ("__atomic_feholdexcept"),
28003 build_function_type_list (void_type_node
,
28004 double_ptr_type_node
,
28006 TREE_PUBLIC (atomic_hold_decl
) = 1;
28007 DECL_EXTERNAL (atomic_hold_decl
) = 1;
28010 if (atomic_clear_decl
== NULL_TREE
)
28013 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28014 get_identifier ("__atomic_feclearexcept"),
28015 build_function_type_list (void_type_node
,
28017 TREE_PUBLIC (atomic_clear_decl
) = 1;
28018 DECL_EXTERNAL (atomic_clear_decl
) = 1;
28021 tree const_double
= build_qualified_type (double_type_node
,
28023 tree const_double_ptr
= build_pointer_type (const_double
);
28024 if (atomic_update_decl
== NULL_TREE
)
28027 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28028 get_identifier ("__atomic_feupdateenv"),
28029 build_function_type_list (void_type_node
,
28032 TREE_PUBLIC (atomic_update_decl
) = 1;
28033 DECL_EXTERNAL (atomic_update_decl
) = 1;
28036 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28037 TREE_ADDRESSABLE (fenv_var
) = 1;
28038 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
28039 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
28040 void_node
, NULL_TREE
, NULL_TREE
));
28042 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
28043 *clear
= build_call_expr (atomic_clear_decl
, 0);
28044 *update
= build_call_expr (atomic_update_decl
, 1,
28045 fold_convert (const_double_ptr
, fenv_addr
));
28050 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
28051 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
28052 tree call_mffs
= build_call_expr (mffs
, 0);
28054 /* Generates the equivalent of feholdexcept (&fenv_var)
28056 *fenv_var = __builtin_mffs ();
28058 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28059 __builtin_mtfsf (0xff, fenv_hold); */
28061 /* Mask to clear everything except for the rounding modes and non-IEEE
28062 arithmetic flag. */
28063 const unsigned HOST_WIDE_INT hold_exception_mask
28064 = HOST_WIDE_INT_C (0xffffffff00000007);
28066 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28068 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
28069 NULL_TREE
, NULL_TREE
);
28071 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
28072 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28073 build_int_cst (uint64_type_node
,
28074 hold_exception_mask
));
28076 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28079 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
28080 build_int_cst (unsigned_type_node
, 0xff),
28083 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
28085 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28087 double fenv_clear = __builtin_mffs ();
28088 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28089 __builtin_mtfsf (0xff, fenv_clear); */
28091 /* Mask to clear everything except for the rounding modes and non-IEEE
28092 arithmetic flag. */
28093 const unsigned HOST_WIDE_INT clear_exception_mask
28094 = HOST_WIDE_INT_C (0xffffffff00000000);
28096 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
28098 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
28099 call_mffs
, NULL_TREE
, NULL_TREE
);
28101 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
28102 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
28104 build_int_cst (uint64_type_node
,
28105 clear_exception_mask
));
28107 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28108 fenv_clear_llu_and
);
28110 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
28111 build_int_cst (unsigned_type_node
, 0xff),
28114 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
28116 /* Generates the equivalent of feupdateenv (&fenv_var)
28118 double old_fenv = __builtin_mffs ();
28119 double fenv_update;
28120 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28121 (*(uint64_t*)fenv_var 0x1ff80fff);
28122 __builtin_mtfsf (0xff, fenv_update); */
28124 const unsigned HOST_WIDE_INT update_exception_mask
28125 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28126 const unsigned HOST_WIDE_INT new_exception_mask
28127 = HOST_WIDE_INT_C (0x1ff80fff);
28129 tree old_fenv
= create_tmp_var_raw (double_type_node
);
28130 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
28131 call_mffs
, NULL_TREE
, NULL_TREE
);
28133 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
28134 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
28135 build_int_cst (uint64_type_node
,
28136 update_exception_mask
));
28138 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28139 build_int_cst (uint64_type_node
,
28140 new_exception_mask
));
28142 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
28143 old_llu_and
, new_llu_and
);
28145 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28148 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
28149 build_int_cst (unsigned_type_node
, 0xff),
28150 fenv_update_mtfsf
);
28152 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
28156 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
28158 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28160 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28161 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28163 /* The destination of the vmrgew instruction layout is:
28164 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28165 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28166 vmrgew instruction will be correct. */
28167 if (BYTES_BIG_ENDIAN
)
28169 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
28171 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
28176 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28177 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28180 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28181 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28183 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
28184 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
28186 if (BYTES_BIG_ENDIAN
)
28187 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28189 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28193 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
28195 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28197 rtx_tmp0
= gen_reg_rtx (V2DImode
);
28198 rtx_tmp1
= gen_reg_rtx (V2DImode
);
28200 /* The destination of the vmrgew instruction layout is:
28201 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28202 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28203 vmrgew instruction will be correct. */
28204 if (BYTES_BIG_ENDIAN
)
28206 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28207 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28211 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28212 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28215 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28216 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28218 if (signed_convert
)
28220 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28221 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28225 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28226 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28229 if (BYTES_BIG_ENDIAN
)
28230 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28232 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28236 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28239 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28241 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28242 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28244 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28245 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28247 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28248 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28250 if (signed_convert
)
28252 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28253 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28257 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28258 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28261 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28264 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28267 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28268 optimization_type opt_type
)
28273 return (opt_type
== OPTIMIZE_FOR_SPEED
28274 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28281 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28283 static HOST_WIDE_INT
28284 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28286 if (TREE_CODE (exp
) == STRING_CST
28287 && (STRICT_ALIGNMENT
|| !optimize_size
))
28288 return MAX (align
, BITS_PER_WORD
);
28292 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28294 static HOST_WIDE_INT
28295 rs6000_starting_frame_offset (void)
28297 if (FRAME_GROWS_DOWNWARD
)
28299 return RS6000_STARTING_FRAME_OFFSET
;
28302 /* Internal function to return the built-in function id for the complex
28303 multiply operation for a given mode. */
28305 static inline built_in_function
28306 complex_multiply_builtin_code (machine_mode mode
)
28308 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28309 int func
= BUILT_IN_COMPLEX_MUL_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28310 return (built_in_function
) func
;
28313 /* Internal function to return the built-in function id for the complex divide
28314 operation for a given mode. */
28316 static inline built_in_function
28317 complex_divide_builtin_code (machine_mode mode
)
28319 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28320 int func
= BUILT_IN_COMPLEX_DIV_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28321 return (built_in_function
) func
;
28324 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28325 function names from <foo>l to <foo>f128 if the default long double type is
28326 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28327 include file switches the names on systems that support long double as IEEE
28328 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28329 In the future, glibc will export names like __ieee128_sinf128 and we can
28330 switch to using those instead of using sinf128, which pollutes the user's
28333 This will switch the names for Fortran math functions as well (which doesn't
28334 use math.h). However, Fortran needs other changes to the compiler and
28335 library before you can switch the real*16 type at compile time.
28337 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28338 only do this transformation if the __float128 type is enabled. This
28339 prevents us from doing the transformation on older 32-bit ports that might
28340 have enabled using IEEE 128-bit floating point as the default long double
28343 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28344 function names used for complex multiply and divide to the appropriate
28348 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28350 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28351 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28352 if (TARGET_FLOAT128_TYPE
28353 && TREE_CODE (decl
) == FUNCTION_DECL
28354 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28355 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28357 built_in_function id
= DECL_FUNCTION_CODE (decl
);
28358 const char *newname
= NULL
;
28360 if (id
== complex_multiply_builtin_code (KCmode
))
28361 newname
= "__mulkc3";
28363 else if (id
== complex_multiply_builtin_code (ICmode
))
28364 newname
= "__multc3";
28366 else if (id
== complex_multiply_builtin_code (TCmode
))
28367 newname
= (TARGET_IEEEQUAD
) ? "__mulkc3" : "__multc3";
28369 else if (id
== complex_divide_builtin_code (KCmode
))
28370 newname
= "__divkc3";
28372 else if (id
== complex_divide_builtin_code (ICmode
))
28373 newname
= "__divtc3";
28375 else if (id
== complex_divide_builtin_code (TCmode
))
28376 newname
= (TARGET_IEEEQUAD
) ? "__divkc3" : "__divtc3";
28380 if (TARGET_DEBUG_BUILTIN
)
28381 fprintf (stderr
, "Map complex mul/div => %s\n", newname
);
28383 return get_identifier (newname
);
28387 /* Map long double built-in functions if long double is IEEE 128-bit. */
28388 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28389 && TREE_CODE (decl
) == FUNCTION_DECL
28390 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28391 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28393 size_t len
= IDENTIFIER_LENGTH (id
);
28394 const char *name
= IDENTIFIER_POINTER (id
);
28395 char *newname
= NULL
;
28397 /* See if it is one of the built-in functions with an unusual name. */
28398 switch (DECL_FUNCTION_CODE (decl
))
28400 case BUILT_IN_DREML
:
28401 newname
= xstrdup ("__remainderieee128");
28404 case BUILT_IN_GAMMAL
:
28405 newname
= xstrdup ("__lgammaieee128");
28408 case BUILT_IN_GAMMAL_R
:
28409 case BUILT_IN_LGAMMAL_R
:
28410 newname
= xstrdup ("__lgammaieee128_r");
28413 case BUILT_IN_NEXTTOWARD
:
28414 newname
= xstrdup ("__nexttoward_to_ieee128");
28417 case BUILT_IN_NEXTTOWARDF
:
28418 newname
= xstrdup ("__nexttowardf_to_ieee128");
28421 case BUILT_IN_NEXTTOWARDL
:
28422 newname
= xstrdup ("__nexttowardieee128");
28425 case BUILT_IN_POW10L
:
28426 newname
= xstrdup ("__exp10ieee128");
28429 case BUILT_IN_SCALBL
:
28430 newname
= xstrdup ("__scalbieee128");
28433 case BUILT_IN_SIGNIFICANDL
:
28434 newname
= xstrdup ("__significandieee128");
28437 case BUILT_IN_SINCOSL
:
28438 newname
= xstrdup ("__sincosieee128");
28445 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28448 size_t printf_len
= strlen ("printf");
28449 size_t scanf_len
= strlen ("scanf");
28450 size_t printf_chk_len
= strlen ("printf_chk");
28452 if (len
>= printf_len
28453 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28454 newname
= xasprintf ("__%sieee128", name
);
28456 else if (len
>= scanf_len
28457 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28458 newname
= xasprintf ("__isoc99_%sieee128", name
);
28460 else if (len
>= printf_chk_len
28461 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28462 newname
= xasprintf ("%sieee128", name
);
28464 else if (name
[len
- 1] == 'l')
28466 bool uses_ieee128_p
= false;
28467 tree type
= TREE_TYPE (decl
);
28468 machine_mode ret_mode
= TYPE_MODE (type
);
28470 /* See if the function returns a IEEE 128-bit floating point type or
28472 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28473 uses_ieee128_p
= true;
28476 function_args_iterator args_iter
;
28479 /* See if the function passes a IEEE 128-bit floating point type
28480 or complex type. */
28481 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28483 machine_mode arg_mode
= TYPE_MODE (arg
);
28484 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28486 uses_ieee128_p
= true;
28492 /* If we passed or returned an IEEE 128-bit floating point type,
28493 change the name. Use __<name>ieee128, instead of <name>l. */
28494 if (uses_ieee128_p
)
28495 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28501 if (TARGET_DEBUG_BUILTIN
)
28502 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28504 id
= get_identifier (newname
);
28512 /* Predict whether the given loop in gimple will be transformed in the RTL
28513 doloop_optimize pass. */
28516 rs6000_predict_doloop_p (struct loop
*loop
)
28520 /* On rs6000, targetm.can_use_doloop_p is actually
28521 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28522 if (loop
->inner
!= NULL
)
28524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28525 fprintf (dump_file
, "Predict doloop failure due to"
28526 " loop nesting.\n");
28533 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28535 static machine_mode
28536 rs6000_preferred_doloop_mode (machine_mode
)
28541 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28544 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28546 gcc_assert (MEM_P (mem
));
28548 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28549 type addresses, so don't allow MEMs with those address types to be
28550 substituted as an equivalent expression. See PR93974 for details. */
28551 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28557 /* Implement TARGET_INVALID_CONVERSION. */
28559 static const char *
28560 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28562 /* Make sure we're working with the canonical types. */
28563 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28564 fromtype
= TYPE_CANONICAL (fromtype
);
28565 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28566 totype
= TYPE_CANONICAL (totype
);
28568 machine_mode frommode
= TYPE_MODE (fromtype
);
28569 machine_mode tomode
= TYPE_MODE (totype
);
28571 if (frommode
!= tomode
)
28573 /* Do not allow conversions to/from XOmode and OOmode types. */
28574 if (frommode
== XOmode
)
28575 return N_("invalid conversion from type %<__vector_quad%>");
28576 if (tomode
== XOmode
)
28577 return N_("invalid conversion to type %<__vector_quad%>");
28578 if (frommode
== OOmode
)
28579 return N_("invalid conversion from type %<__vector_pair%>");
28580 if (tomode
== OOmode
)
28581 return N_("invalid conversion to type %<__vector_pair%>");
28584 /* Conversion allowed. */
28588 /* Convert a SFmode constant to the integer bit pattern. */
28591 rs6000_const_f32_to_i32 (rtx operand
)
28594 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28596 gcc_assert (GET_MODE (operand
) == SFmode
);
28597 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28602 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28604 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28605 inform (input_location
,
28606 "the result for the xxspltidp instruction "
28607 "is undefined for subnormal input values");
28608 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28611 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28614 rs6000_gen_pic_addr_diff_vec (void)
28616 return rs6000_relative_jumptables
;
28620 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28622 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28625 fprintf (file
, "%s", directive
);
28626 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28627 assemble_name (file
, buf
);
28628 fprintf (file
, "\n");
28632 /* Copy an integer constant to the vector constant structure. */
28635 constant_int_to_128bit_vector (rtx op
,
28638 vec_const_128bit_type
*info
)
28640 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28641 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28643 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28644 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28647 /* Copy a floating point constant to the vector constant structure. */
28650 constant_fp_to_128bit_vector (rtx op
,
28653 vec_const_128bit_type
*info
)
28655 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28656 unsigned num_words
= bitsize
/ 32;
28657 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28658 long real_words
[VECTOR_128BIT_WORDS
];
28660 /* Make sure we don't overflow the real_words array and that it is
28661 filled completely. */
28662 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28664 real_to_target (real_words
, rtype
, mode
);
28666 /* Iterate over each 32-bit word in the floating point constant. The
28667 real_to_target function puts out words in target endian fashion. We need
28668 to arrange the order so that the bytes are written in big endian order. */
28669 for (unsigned num
= 0; num
< num_words
; num
++)
28671 unsigned endian_num
= (BYTES_BIG_ENDIAN
28673 : num_words
- 1 - num
);
28675 unsigned uvalue
= real_words
[endian_num
];
28676 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28677 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28680 /* Mark that this constant involves floating point. */
28681 info
->fp_constant_p
= true;
28684 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28687 Break out the constant out to bytes, half words, words, and double words.
28688 Return true if we have successfully converted the constant.
28690 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28691 constants. Integer and floating point scalar constants are splatted to fill
28695 vec_const_128bit_to_bytes (rtx op
,
28697 vec_const_128bit_type
*info
)
28699 /* Initialize the constant structure. */
28700 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28702 /* Assume CONST_INTs are DImode. */
28703 if (mode
== VOIDmode
)
28704 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28706 if (mode
== VOIDmode
)
28709 unsigned size
= GET_MODE_SIZE (mode
);
28710 bool splat_p
= false;
28712 if (size
> VECTOR_128BIT_BYTES
)
28715 /* Set up the bits. */
28716 switch (GET_CODE (op
))
28718 /* Integer constants, default to double word. */
28721 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28726 /* Floating point constants. */
28729 /* Fail if the floating point constant is the wrong mode. */
28730 if (GET_MODE (op
) != mode
)
28733 /* SFmode stored as scalars are stored in DFmode format. */
28734 if (mode
== SFmode
)
28737 size
= GET_MODE_SIZE (DFmode
);
28740 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28745 /* Vector constants, iterate over each element. On little endian
28746 systems, we have to reverse the element numbers. */
28749 /* Fail if the vector constant is the wrong mode or size. */
28750 if (GET_MODE (op
) != mode
28751 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28754 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28755 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28756 size_t nunits
= GET_MODE_NUNITS (mode
);
28758 for (size_t num
= 0; num
< nunits
; num
++)
28760 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28761 size_t byte_num
= (BYTES_BIG_ENDIAN
28763 : nunits
- 1 - num
) * ele_size
;
28765 if (CONST_INT_P (ele
))
28766 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28767 else if (CONST_DOUBLE_P (ele
))
28768 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28776 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28777 Since we are duplicating the element, we don't have to worry about
28779 case VEC_DUPLICATE
:
28781 /* Fail if the vector duplicate is the wrong mode or size. */
28782 if (GET_MODE (op
) != mode
28783 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28786 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28787 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28788 rtx ele
= XEXP (op
, 0);
28789 size_t nunits
= GET_MODE_NUNITS (mode
);
28791 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
28794 for (size_t num
= 0; num
< nunits
; num
++)
28796 size_t byte_num
= num
* ele_size
;
28798 if (CONST_INT_P (ele
))
28799 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28801 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28807 /* Any thing else, just return failure. */
28812 /* Splat the constant to fill 128 bits if desired. */
28813 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
28815 if ((VECTOR_128BIT_BYTES
% size
) != 0)
28818 for (size_t offset
= size
;
28819 offset
< VECTOR_128BIT_BYTES
;
28821 memcpy ((void *) &info
->bytes
[offset
],
28822 (void *) &info
->bytes
[0],
28826 /* Remember original size. */
28827 info
->original_size
= size
;
28829 /* Determine if the bytes are all the same. */
28830 unsigned char first_byte
= info
->bytes
[0];
28831 info
->all_bytes_same
= true;
28832 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
28833 if (first_byte
!= info
->bytes
[i
])
28835 info
->all_bytes_same
= false;
28839 /* Pack half words together & determine if all of the half words are the
28841 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28842 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
28843 | info
->bytes
[(i
* 2) + 1]);
28845 unsigned short first_hword
= info
->half_words
[0];
28846 info
->all_half_words_same
= true;
28847 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28848 if (first_hword
!= info
->half_words
[i
])
28850 info
->all_half_words_same
= false;
28854 /* Pack words together & determine if all of the words are the same. */
28855 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
28856 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
28857 | (info
->bytes
[(i
* 4) + 1] << 16)
28858 | (info
->bytes
[(i
* 4) + 2] << 8)
28859 | info
->bytes
[(i
* 4) + 3]);
28861 info
->all_words_same
28862 = (info
->words
[0] == info
->words
[1]
28863 && info
->words
[0] == info
->words
[2]
28864 && info
->words
[0] == info
->words
[3]);
28866 /* Pack double words together & determine if all of the double words are the
28868 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
28870 unsigned HOST_WIDE_INT d_word
= 0;
28871 for (size_t j
= 0; j
< 8; j
++)
28872 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
28874 info
->double_words
[i
] = d_word
;
28877 info
->all_double_words_same
28878 = (info
->double_words
[0] == info
->double_words
[1]);
28883 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28884 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28885 value to be used with the LXVKQ instruction. */
28888 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
28890 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28891 floating point hardware and VSX registers are available. */
28892 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
28896 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28898 if (vsx_const
->words
[1] != 0
28899 || vsx_const
->words
[2] != 0
28900 || vsx_const
->words
[3] != 0)
28903 /* See if we have a match for the first word. */
28904 switch (vsx_const
->words
[0])
28906 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
28907 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
28908 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
28909 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
28910 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
28911 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
28912 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
28913 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
28914 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
28915 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
28916 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
28917 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
28918 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
28919 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
28920 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
28921 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
28922 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
28923 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
28925 /* anything else cannot be loaded. */
28933 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28934 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28935 value to be used with the XXSPLTIW instruction. */
28938 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
28940 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28943 if (!vsx_const
->all_words_same
)
28946 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28947 if (vsx_const
->all_bytes_same
)
28950 /* See if we can use VSPLTISH or VSPLTISW. */
28951 if (vsx_const
->all_half_words_same
)
28953 short sign_h_word
= vsx_const
->half_words
[0];
28954 if (EASY_VECTOR_15 (sign_h_word
))
28958 int sign_word
= vsx_const
->words
[0];
28959 if (EASY_VECTOR_15 (sign_word
))
28962 return vsx_const
->words
[0];
28965 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28966 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28967 value to be used with the XXSPLTIDP instruction. */
28970 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
28972 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28975 /* Reject if the two 64-bit segments are not the same. */
28976 if (!vsx_const
->all_double_words_same
)
28979 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28980 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28981 if (vsx_const
->all_bytes_same
28982 || vsx_const
->all_half_words_same
28983 || vsx_const
->all_words_same
)
28986 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
28988 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28989 pattern and the signalling NaN bit pattern. Recognize infinity and
28990 negative infinity. */
28992 /* Bit representation of DFmode normal quiet NaN. */
28993 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28995 /* Bit representation of DFmode normal signaling NaN. */
28996 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28998 /* Bit representation of DFmode positive infinity. */
28999 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29001 /* Bit representation of DFmode negative infinity. */
29002 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29004 if (value
!= RS6000_CONST_DF_NAN
29005 && value
!= RS6000_CONST_DF_NANS
29006 && value
!= RS6000_CONST_DF_INF
29007 && value
!= RS6000_CONST_DF_NEG_INF
)
29009 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29010 the exponent, and 52 bits for the mantissa (not counting the hidden
29011 bit used for normal numbers). NaN values have the exponent set to all
29012 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29014 int df_exponent
= (value
>> 52) & 0x7ff;
29015 unsigned HOST_WIDE_INT
29016 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
29018 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
29021 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29022 the exponent all 0 bits, and the mantissa non-zero. If the value is
29023 subnormal, then the hidden bit in the mantissa is not set. */
29024 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
29028 /* Change the representation to DFmode constant. */
29029 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
29031 /* real_from_target takes the target words in target order. */
29032 if (!BYTES_BIG_ENDIAN
)
29033 std::swap (df_words
[0], df_words
[1]);
29035 REAL_VALUE_TYPE rv_type
;
29036 real_from_target (&rv_type
, df_words
, DFmode
);
29038 const REAL_VALUE_TYPE
*rv
= &rv_type
;
29040 /* Validate that the number can be stored as a SFmode value. */
29041 if (!exact_real_truncate (SFmode
, rv
))
29044 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29045 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29048 real_to_target (&sf_value
, rv
, SFmode
);
29050 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29051 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29052 0 bits, and the mantissa non-zero. */
29053 long sf_exponent
= (sf_value
>> 23) & 0xFF;
29054 long sf_mantissa
= sf_value
& 0x7FFFFF;
29056 if (sf_exponent
== 0 && sf_mantissa
!= 0)
29059 /* Return the immediate to be used. */
29063 /* Now we have only two opaque types, they are __vector_quad and
29064 __vector_pair built-in types. They are target specific and
29065 only available when MMA is supported. With MMA supported, it
29066 simply returns true, otherwise it checks if the given gimple
29067 STMT is an assignment, asm or call stmt and uses either of
29068 these two opaque types unexpectedly, if yes, it would raise
29069 an error message and returns true, otherwise it returns false. */
29072 rs6000_opaque_type_invalid_use_p (gimple
*stmt
)
29077 /* If the given TYPE is one MMA opaque type, emit the corresponding
29078 error messages and return true, otherwise return false. */
29079 auto check_and_error_invalid_use
= [](tree type
)
29081 tree mv
= TYPE_MAIN_VARIANT (type
);
29082 if (mv
== vector_quad_type_node
)
29084 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29087 else if (mv
== vector_pair_type_node
)
29089 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29097 /* The usage of MMA opaque types is very limited for now,
29098 to check with gassign, gasm and gcall is enough so far. */
29099 if (gassign
*ga
= dyn_cast
<gassign
*> (stmt
))
29101 tree lhs
= gimple_assign_lhs (ga
);
29102 tree type
= TREE_TYPE (lhs
);
29103 if (check_and_error_invalid_use (type
))
29106 else if (gasm
*gs
= dyn_cast
<gasm
*> (stmt
))
29108 unsigned ninputs
= gimple_asm_ninputs (gs
);
29109 for (unsigned i
= 0; i
< ninputs
; i
++)
29111 tree op
= gimple_asm_input_op (gs
, i
);
29112 tree val
= TREE_VALUE (op
);
29113 tree type
= TREE_TYPE (val
);
29114 if (check_and_error_invalid_use (type
))
29117 unsigned noutputs
= gimple_asm_noutputs (gs
);
29118 for (unsigned i
= 0; i
< noutputs
; i
++)
29120 tree op
= gimple_asm_output_op (gs
, i
);
29121 tree val
= TREE_VALUE (op
);
29122 tree type
= TREE_TYPE (val
);
29123 if (check_and_error_invalid_use (type
))
29127 else if (gcall
*gc
= dyn_cast
<gcall
*> (stmt
))
29129 unsigned nargs
= gimple_call_num_args (gc
);
29130 for (unsigned i
= 0; i
< nargs
; i
++)
29132 tree arg
= gimple_call_arg (gc
, i
);
29133 tree type
= TREE_TYPE (arg
);
29134 if (check_and_error_invalid_use (type
))
29143 struct gcc_target targetm
= TARGET_INITIALIZER
;
29145 #include "gt-rs6000.h"