1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
76 #include "ipa-fnsummary.h"
78 #include "case-cfn-macros.h"
80 #include "rs6000-internal.h"
83 /* This file should be included last. */
84 #include "target-def.h"
86 extern tree
rs6000_builtin_mask_for_load (void);
87 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
88 extern tree
rs6000_builtin_reciprocal (tree
);
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
100 #define TARGET_IEEEQUAD_DEFAULT 0
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno
= 0;
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode
;
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected
= false;
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size
;
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float
= false;
138 bool rs6000_passes_long_double
= false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector
= false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct
= false;
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
158 static int dbg_cost_ctrl
;
160 /* Flag to say the TOC is initialized */
161 int toc_initialized
, need_toc_init
;
162 char toc_label_name
[10];
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more
;
168 static GTY(()) section
*read_only_data_section
;
169 static GTY(()) section
*private_data_section
;
170 static GTY(()) section
*tls_data_section
;
171 static GTY(()) section
*tls_private_data_section
;
172 static GTY(()) section
*read_only_private_data_section
;
173 static GTY(()) section
*sdata2_section
;
175 section
*toc_section
= 0;
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
179 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
181 /* Register classes for various constraints that are based on the target
183 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align
[NUM_MACHINE_MODES
];
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
192 /* Masks to determine which reciprocal esitmate instructions to generate
194 enum rs6000_recip_mask
{
195 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
196 RECIP_DF_DIV
= 0x002,
197 RECIP_V4SF_DIV
= 0x004,
198 RECIP_V2DF_DIV
= 0x008,
200 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT
= 0x020,
202 RECIP_V4SF_RSQRT
= 0x040,
203 RECIP_V2DF_RSQRT
= 0x080,
205 /* Various combination of flags for -mrecip=xxx. */
207 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
211 RECIP_HIGH_PRECISION
= RECIP_ALL
,
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
218 /* -mrecip options. */
221 const char *string
; /* option name */
222 unsigned int mask
; /* mask bits to set */
223 } recip_options
[] = {
224 { "all", RECIP_ALL
},
225 { "none", RECIP_NONE
},
226 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
228 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
229 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
230 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT
) },
232 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
233 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
241 CLONE_DEFAULT
= 0, /* default clone. */
242 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
250 /* Map compiler ISA bits into HWCAP names. */
252 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
253 const char *name
; /* name to use in __builtin_cpu_supports. */
256 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p
= false;
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
);
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
285 enum rs6000_reg_type
{
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
310 enum rs6000_reload_reg_type
{
311 RELOAD_REG_GPR
, /* General purpose registers. */
312 RELOAD_REG_FPR
, /* Traditional floating point regs. */
313 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type
{
326 const char *name
; /* Register class name. */
327 int reg
; /* Register in the register class. */
330 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
331 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type
;
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr
{
353 enum insn_code reload_load
; /* INSN to reload for loading. */
354 enum insn_code reload_store
; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
359 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
362 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
366 mode_supports_pre_incdec_p (machine_mode mode
)
368 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
374 mode_supports_pre_modify_p (machine_mode mode
)
376 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
380 /* Return true if we have D-form addressing in altivec registers. */
382 mode_supports_vmx_dform (machine_mode mode
)
384 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
391 mode_supports_dq_form (machine_mode mode
)
393 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
413 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
420 in_set
= single_set (in_insn
);
423 if (MEM_P (SET_DEST (in_set
)))
425 out_set
= single_set (out_insn
);
428 out_pat
= PATTERN (out_insn
);
429 if (GET_CODE (out_pat
) == PARALLEL
)
431 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
433 out_exp
= XVECEXP (out_pat
, 0, i
);
434 if ((GET_CODE (out_exp
) == CLOBBER
)
435 || (GET_CODE (out_exp
) == USE
))
437 else if (GET_CODE (out_exp
) != SET
)
446 in_pat
= PATTERN (in_insn
);
447 if (GET_CODE (in_pat
) != PARALLEL
)
450 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
452 in_exp
= XVECEXP (in_pat
, 0, i
);
453 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
455 else if (GET_CODE (in_exp
) != SET
)
458 if (MEM_P (SET_DEST (in_exp
)))
460 out_set
= single_set (out_insn
);
463 out_pat
= PATTERN (out_insn
);
464 if (GET_CODE (out_pat
) != PARALLEL
)
466 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
468 out_exp
= XVECEXP (out_pat
, 0, j
);
469 if ((GET_CODE (out_exp
) == CLOBBER
)
470 || (GET_CODE (out_exp
) == USE
))
472 else if (GET_CODE (out_exp
) != SET
)
479 return store_data_bypass_p (out_insn
, in_insn
);
483 /* Processor costs (relative to an add) */
485 const struct processor_costs
*rs6000_cost
;
487 /* Instruction size costs on 32bit processors. */
489 struct processor_costs size32_cost
= {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
504 0, /* SF->DF convert */
507 /* Instruction size costs on 64bit processors. */
509 struct processor_costs size64_cost
= {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
524 0, /* SF->DF convert */
527 /* Instruction costs on RS64A processors. */
529 struct processor_costs rs64a_cost
= {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
544 0, /* SF->DF convert */
547 /* Instruction costs on MPCCORE processors. */
549 struct processor_costs mpccore_cost
= {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
564 0, /* SF->DF convert */
567 /* Instruction costs on PPC403 processors. */
569 struct processor_costs ppc403_cost
= {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
584 0, /* SF->DF convert */
587 /* Instruction costs on PPC405 processors. */
589 struct processor_costs ppc405_cost
= {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
604 0, /* SF->DF convert */
607 /* Instruction costs on PPC440 processors. */
609 struct processor_costs ppc440_cost
= {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
624 0, /* SF->DF convert */
627 /* Instruction costs on PPC476 processors. */
629 struct processor_costs ppc476_cost
= {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
644 0, /* SF->DF convert */
647 /* Instruction costs on PPC601 processors. */
649 struct processor_costs ppc601_cost
= {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
664 0, /* SF->DF convert */
667 /* Instruction costs on PPC603 processors. */
669 struct processor_costs ppc603_cost
= {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
684 0, /* SF->DF convert */
687 /* Instruction costs on PPC604 processors. */
689 struct processor_costs ppc604_cost
= {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
704 0, /* SF->DF convert */
707 /* Instruction costs on PPC604e processors. */
709 struct processor_costs ppc604e_cost
= {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
724 0, /* SF->DF convert */
727 /* Instruction costs on PPC620 processors. */
729 struct processor_costs ppc620_cost
= {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
744 0, /* SF->DF convert */
747 /* Instruction costs on PPC630 processors. */
749 struct processor_costs ppc630_cost
= {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
764 0, /* SF->DF convert */
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
770 struct processor_costs ppccell_cost
= {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC750 and PPC7400 processors. */
790 struct processor_costs ppc750_cost
= {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
805 0, /* SF->DF convert */
808 /* Instruction costs on PPC7450 processors. */
810 struct processor_costs ppc7450_cost
= {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
825 0, /* SF->DF convert */
828 /* Instruction costs on PPC8540 processors. */
830 struct processor_costs ppc8540_cost
= {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
848 /* Instruction costs on E300C2 and E300C3 cores. */
850 struct processor_costs ppce300c2c3_cost
= {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
868 /* Instruction costs on PPCE500MC processors. */
870 struct processor_costs ppce500mc_cost
= {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
888 /* Instruction costs on PPCE500MC64 processors. */
890 struct processor_costs ppce500mc64_cost
= {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
908 /* Instruction costs on PPCE5500 processors. */
910 struct processor_costs ppce5500_cost
= {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
928 /* Instruction costs on PPCE6500 processors. */
930 struct processor_costs ppce6500_cost
= {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
948 /* Instruction costs on AppliedMicro Titan processors. */
950 struct processor_costs titan_cost
= {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
968 /* Instruction costs on POWER4 and POWER5 processors. */
970 struct processor_costs power4_cost
= {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
988 /* Instruction costs on POWER6 processors. */
990 struct processor_costs power6_cost
= {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1008 /* Instruction costs on POWER7 processors. */
1010 struct processor_costs power7_cost
= {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1028 /* Instruction costs on POWER8 processors. */
1030 struct processor_costs power8_cost
= {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1048 /* Instruction costs on POWER9 processors. */
1050 struct processor_costs power9_cost
= {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1068 /* Instruction costs on POWER10 processors. */
1070 struct processor_costs power10_cost
= {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1088 /* Instruction costs on POWER A2 processors. */
1090 struct processor_costs ppca2_cost
= {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1112 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool,
1113 code_helper
= ERROR_MARK
);
1114 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1115 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1116 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1117 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1118 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
, int * = nullptr);
1119 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1120 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1121 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1123 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1125 static bool is_microcoded_insn (rtx_insn
*);
1126 static bool is_nonpipeline_insn (rtx_insn
*);
1127 static bool is_cracked_insn (rtx_insn
*);
1128 static bool is_load_insn (rtx
, rtx
*);
1129 static bool is_store_insn (rtx
, rtx
*);
1130 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1131 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1132 static bool insn_must_be_first_in_group (rtx_insn
*);
1133 static bool insn_must_be_last_in_group (rtx_insn
*);
1134 bool easy_vector_constant (rtx
, machine_mode
);
1135 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1136 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1138 static tree
get_prev_label (tree
);
1140 static bool rs6000_mode_dependent_address (const_rtx
);
1141 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1142 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1143 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1145 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1148 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1149 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1151 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1154 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1158 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1159 = rs6000_mode_dependent_address
;
1161 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1163 = rs6000_secondary_reload_class
;
1165 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1166 = rs6000_preferred_reload_class
;
1168 const int INSN_NOT_AVAILABLE
= -1;
1170 static void rs6000_print_isa_options (FILE *, int, const char *,
1172 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1174 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1175 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1176 enum rs6000_reg_type
,
1178 secondary_reload_info
*,
1180 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1182 /* Hash table stuff for keeping track of TOC entries. */
1184 struct GTY((for_user
)) toc_hash_struct
1186 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1187 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1189 machine_mode key_mode
;
1193 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1195 static hashval_t
hash (toc_hash_struct
*);
1196 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1199 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1203 /* Default register names. */
1204 char rs6000_reg_names
[][8] =
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1217 "0", "1", "2", "3", "4", "5", "6", "7",
1218 "8", "9", "10", "11", "12", "13", "14", "15",
1219 "16", "17", "18", "19", "20", "21", "22", "23",
1220 "24", "25", "26", "27", "28", "29", "30", "31",
1222 "lr", "ctr", "ca", "ap",
1224 "0", "1", "2", "3", "4", "5", "6", "7",
1225 /* vrsave vscr sfp */
1226 "vrsave", "vscr", "sfp",
1229 #ifdef TARGET_REGNAMES
1230 static const char alt_reg_names
[][8] =
1233 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1234 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1235 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1236 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1238 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1239 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1240 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1241 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1243 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1244 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1245 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1246 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1248 "lr", "ctr", "ca", "ap",
1250 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1251 /* vrsave vscr sfp */
1252 "vrsave", "vscr", "sfp",
1256 /* Table of valid machine attributes. */
1258 static const attribute_spec rs6000_gnu_attributes
[] =
1260 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1261 affects_type_identity, handler, exclude } */
1262 { "altivec", 1, 1, false, true, false, false,
1263 rs6000_handle_altivec_attribute
, NULL
},
1264 { "longcall", 0, 0, false, true, true, false,
1265 rs6000_handle_longcall_attribute
, NULL
},
1266 { "shortcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute
, NULL
},
1268 { "ms_struct", 0, 0, false, false, false, false,
1269 rs6000_handle_struct_attribute
, NULL
},
1270 { "gcc_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute
, NULL
},
1272 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1273 SUBTARGET_ATTRIBUTE_TABLE
,
1277 static const scoped_attribute_specs rs6000_gnu_attribute_table
=
1279 "gnu", { rs6000_gnu_attributes
}
1282 static const scoped_attribute_specs
*const rs6000_attribute_table
[] =
1284 &rs6000_gnu_attribute_table
1287 #ifndef TARGET_PROFILE_KERNEL
1288 #define TARGET_PROFILE_KERNEL 0
1291 /* Initialize the GCC target structure. */
1292 #undef TARGET_ATTRIBUTE_TABLE
1293 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1294 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1295 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1296 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1297 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1299 #undef TARGET_ASM_ALIGNED_DI_OP
1300 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1302 /* Default unaligned ops are only provided for ELF. Find the ops needed
1303 for non-ELF systems. */
1304 #ifndef OBJECT_FORMAT_ELF
1306 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1308 #undef TARGET_ASM_UNALIGNED_HI_OP
1309 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1310 #undef TARGET_ASM_UNALIGNED_SI_OP
1311 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1312 #undef TARGET_ASM_UNALIGNED_DI_OP
1313 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1316 #undef TARGET_ASM_UNALIGNED_HI_OP
1317 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1318 #undef TARGET_ASM_UNALIGNED_SI_OP
1319 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1320 #undef TARGET_ASM_UNALIGNED_DI_OP
1321 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1322 #undef TARGET_ASM_ALIGNED_DI_OP
1323 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1327 /* This hook deals with fixups for relocatable code and DI-mode objects
1329 #undef TARGET_ASM_INTEGER
1330 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1332 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1333 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1334 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1337 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1338 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1339 rs6000_print_patchable_function_entry
1341 #undef TARGET_SET_UP_BY_PROLOGUE
1342 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1344 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1345 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1346 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1347 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1348 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1349 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1350 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1354 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1357 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1358 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1360 #undef TARGET_INTERNAL_ARG_POINTER
1361 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1363 #undef TARGET_HAVE_TLS
1364 #define TARGET_HAVE_TLS HAVE_AS_TLS
1366 #undef TARGET_CANNOT_FORCE_CONST_MEM
1367 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1369 #undef TARGET_DELEGITIMIZE_ADDRESS
1370 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1372 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1373 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1375 #undef TARGET_LEGITIMATE_COMBINED_INSN
1376 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1378 #undef TARGET_ASM_FUNCTION_PROLOGUE
1379 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1380 #undef TARGET_ASM_FUNCTION_EPILOGUE
1381 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1383 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1384 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1386 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1387 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1389 #undef TARGET_LEGITIMIZE_ADDRESS
1390 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1392 #undef TARGET_SCHED_VARIABLE_ISSUE
1393 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1395 #undef TARGET_SCHED_ISSUE_RATE
1396 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1397 #undef TARGET_SCHED_ADJUST_COST
1398 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1399 #undef TARGET_SCHED_ADJUST_PRIORITY
1400 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1401 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1402 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1403 #undef TARGET_SCHED_INIT
1404 #define TARGET_SCHED_INIT rs6000_sched_init
1405 #undef TARGET_SCHED_FINISH
1406 #define TARGET_SCHED_FINISH rs6000_sched_finish
1407 #undef TARGET_SCHED_REORDER
1408 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1409 #undef TARGET_SCHED_REORDER2
1410 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1412 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1413 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1415 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1416 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1418 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1419 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1420 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1421 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1422 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1423 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1424 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1425 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1427 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1428 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1432 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1433 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1434 rs6000_builtin_support_vector_misalignment
1435 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1436 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1439 rs6000_builtin_vectorization_cost
1440 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1441 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1442 rs6000_preferred_simd_mode
1443 #undef TARGET_VECTORIZE_CREATE_COSTS
1444 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1446 #undef TARGET_LOOP_UNROLL_ADJUST
1447 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1449 #undef TARGET_INIT_BUILTINS
1450 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1451 #undef TARGET_BUILTIN_DECL
1452 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1454 #undef TARGET_FOLD_BUILTIN
1455 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1456 #undef TARGET_GIMPLE_FOLD_BUILTIN
1457 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1459 #undef TARGET_EXPAND_BUILTIN
1460 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1462 #undef TARGET_MANGLE_TYPE
1463 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1465 #undef TARGET_INIT_LIBFUNCS
1466 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1469 #undef TARGET_BINDS_LOCAL_P
1470 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1473 #undef TARGET_MS_BITFIELD_LAYOUT_P
1474 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1476 #undef TARGET_ASM_OUTPUT_MI_THUNK
1477 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1479 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1480 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1483 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1485 #undef TARGET_REGISTER_MOVE_COST
1486 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1487 #undef TARGET_MEMORY_MOVE_COST
1488 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1489 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1490 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1491 rs6000_ira_change_pseudo_allocno_class
1492 #undef TARGET_CANNOT_COPY_INSN_P
1493 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1494 #undef TARGET_RTX_COSTS
1495 #define TARGET_RTX_COSTS rs6000_rtx_costs
1496 #undef TARGET_ADDRESS_COST
1497 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1498 #undef TARGET_INSN_COST
1499 #define TARGET_INSN_COST rs6000_insn_cost
1501 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1502 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1504 #undef TARGET_PROMOTE_FUNCTION_MODE
1505 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1507 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1508 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1510 #undef TARGET_RETURN_IN_MEMORY
1511 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1513 #undef TARGET_RETURN_IN_MSB
1514 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1516 #undef TARGET_SETUP_INCOMING_VARARGS
1517 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1519 /* Always strict argument naming on rs6000. */
1520 #undef TARGET_STRICT_ARGUMENT_NAMING
1521 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1522 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1523 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_SPLIT_COMPLEX_ARG
1525 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1526 #undef TARGET_MUST_PASS_IN_STACK
1527 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1528 #undef TARGET_PASS_BY_REFERENCE
1529 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1530 #undef TARGET_ARG_PARTIAL_BYTES
1531 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1532 #undef TARGET_FUNCTION_ARG_ADVANCE
1533 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1534 #undef TARGET_FUNCTION_ARG
1535 #define TARGET_FUNCTION_ARG rs6000_function_arg
1536 #undef TARGET_FUNCTION_ARG_PADDING
1537 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1538 #undef TARGET_FUNCTION_ARG_BOUNDARY
1539 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1541 #undef TARGET_BUILD_BUILTIN_VA_LIST
1542 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1544 #undef TARGET_EXPAND_BUILTIN_VA_START
1545 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1547 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1548 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1550 #undef TARGET_EH_RETURN_FILTER_MODE
1551 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1553 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1554 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1556 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1557 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1559 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1560 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1561 rs6000_libgcc_floating_mode_supported_p
1563 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1564 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1566 #undef TARGET_FLOATN_MODE
1567 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1569 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1570 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1572 #undef TARGET_MD_ASM_ADJUST
1573 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1575 #undef TARGET_OPTION_OVERRIDE
1576 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1578 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1579 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1580 rs6000_builtin_vectorized_function
1582 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1583 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1584 rs6000_builtin_md_vectorized_function
1586 #undef TARGET_STACK_PROTECT_GUARD
1587 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1590 #undef TARGET_STACK_PROTECT_FAIL
1591 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 /* Use a 32-bit anchor range. This leads to sequences like:
1601 addis tmp,anchor,high
1604 where tmp itself acts as an anchor, and can be shared between
1605 accesses to the same 64k page. */
1606 #undef TARGET_MIN_ANCHOR_OFFSET
1607 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1608 #undef TARGET_MAX_ANCHOR_OFFSET
1609 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1610 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1611 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1612 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1613 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1615 #undef TARGET_BUILTIN_RECIPROCAL
1616 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1618 #undef TARGET_SECONDARY_RELOAD
1619 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1620 #undef TARGET_SECONDARY_MEMORY_NEEDED
1621 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1623 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1625 #undef TARGET_LEGITIMATE_ADDRESS_P
1626 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1628 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1629 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1631 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1632 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1634 #undef TARGET_CAN_ELIMINATE
1635 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1637 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1638 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1640 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1641 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1643 #undef TARGET_TRAMPOLINE_INIT
1644 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1646 #undef TARGET_FUNCTION_VALUE
1647 #define TARGET_FUNCTION_VALUE rs6000_function_value
1649 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1650 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1652 #undef TARGET_OPTION_SAVE
1653 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1655 #undef TARGET_OPTION_RESTORE
1656 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1658 #undef TARGET_OPTION_PRINT
1659 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1661 #undef TARGET_CAN_INLINE_P
1662 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1664 #undef TARGET_SET_CURRENT_FUNCTION
1665 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1667 #undef TARGET_LEGITIMATE_CONSTANT_P
1668 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1670 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1671 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1673 #undef TARGET_CAN_USE_DOLOOP_P
1674 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1676 #undef TARGET_PREDICT_DOLOOP_P
1677 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1679 #undef TARGET_HAVE_COUNT_REG_DECR_P
1680 #define TARGET_HAVE_COUNT_REG_DECR_P true
1682 /* 1000000000 is infinite cost in IVOPTs. */
1683 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1684 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1686 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1687 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1689 #undef TARGET_PREFERRED_DOLOOP_MODE
1690 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1692 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1693 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1695 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1696 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1697 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1698 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1699 #undef TARGET_UNWIND_WORD_MODE
1700 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1702 #undef TARGET_OFFLOAD_OPTIONS
1703 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1705 #undef TARGET_C_MODE_FOR_SUFFIX
1706 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1708 #undef TARGET_INVALID_BINARY_OP
1709 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1711 #undef TARGET_OPTAB_SUPPORTED_P
1712 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1714 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1715 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1717 #undef TARGET_COMPARE_VERSION_PRIORITY
1718 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1720 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1721 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1722 rs6000_generate_version_dispatcher_body
1724 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1725 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1726 rs6000_get_function_versions_dispatcher
1728 #undef TARGET_OPTION_FUNCTION_VERSIONS
1729 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1731 #undef TARGET_HARD_REGNO_NREGS
1732 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1733 #undef TARGET_HARD_REGNO_MODE_OK
1734 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1736 #undef TARGET_MODES_TIEABLE_P
1737 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1739 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1740 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1741 rs6000_hard_regno_call_part_clobbered
1743 #undef TARGET_SLOW_UNALIGNED_ACCESS
1744 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1746 #undef TARGET_CAN_CHANGE_MODE_CLASS
1747 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1749 #undef TARGET_CONSTANT_ALIGNMENT
1750 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1752 #undef TARGET_STARTING_FRAME_OFFSET
1753 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1755 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1756 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1758 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1759 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1761 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1762 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1763 rs6000_cannot_substitute_mem_equiv_p
1765 #undef TARGET_INVALID_CONVERSION
1766 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1768 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1769 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1771 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1772 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1774 #undef TARGET_CONST_ANCHOR
1775 #define TARGET_CONST_ANCHOR 0x8000
1779 /* Processor table. */
1782 const char *const name
; /* Canonical processor name. */
1783 const enum processor_type processor
; /* Processor type enum value. */
1784 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1787 static struct rs6000_ptt
const processor_target_table
[] =
1789 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1790 #include "rs6000-cpus.def"
1794 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1798 rs6000_cpu_name_lookup (const char *name
)
1804 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1805 if (! strcmp (name
, processor_target_table
[i
].name
))
1813 /* Return number of consecutive hard regs needed starting at reg REGNO
1814 to hold something of mode MODE.
1815 This is ordinarily the length in words of a value of mode MODE
1816 but can be less for certain modes in special long registers.
1818 POWER and PowerPC GPRs hold 32 bits worth;
1819 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1822 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1824 unsigned HOST_WIDE_INT reg_size
;
1826 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1827 128-bit floating point that can go in vector registers, which has VSX
1828 memory addressing. */
1829 if (FP_REGNO_P (regno
))
1830 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1831 ? UNITS_PER_VSX_WORD
1832 : UNITS_PER_FP_WORD
);
1834 else if (ALTIVEC_REGNO_P (regno
))
1835 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1838 reg_size
= UNITS_PER_WORD
;
1840 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1843 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1846 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1848 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1850 if (COMPLEX_MODE_P (mode
))
1851 mode
= GET_MODE_INNER (mode
);
1853 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1856 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1858 /* MMA accumulator modes need FPR registers divisible by 4. */
1860 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1862 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1863 register combinations, and use PTImode where we need to deal with quad
1864 word memory operations. Don't allow quad words in the argument or frame
1865 pointer registers, just registers 0..31. */
1866 if (mode
== PTImode
)
1867 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1868 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1869 && ((regno
& 1) == 0));
1871 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1872 implementations. Don't allow an item to be split between a FP register
1873 and an Altivec register. Allow TImode in all VSX registers if the user
1875 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1876 && (VECTOR_MEM_VSX_P (mode
)
1877 || VECTOR_ALIGNMENT_P (mode
)
1878 || reg_addr
[mode
].scalar_in_vmx_p
1880 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1882 if (FP_REGNO_P (regno
))
1883 return FP_REGNO_P (last_regno
);
1885 if (ALTIVEC_REGNO_P (regno
))
1887 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1890 return ALTIVEC_REGNO_P (last_regno
);
1894 /* The GPRs can hold any mode, but values bigger than one register
1895 cannot go past R31. */
1896 if (INT_REGNO_P (regno
))
1897 return INT_REGNO_P (last_regno
);
1899 /* The float registers (except for VSX vector modes) can only hold floating
1900 modes and DImode. */
1901 if (FP_REGNO_P (regno
))
1903 if (VECTOR_ALIGNMENT_P (mode
))
1906 if (SCALAR_FLOAT_MODE_P (mode
)
1907 && (mode
!= TDmode
|| (regno
% 2) == 0)
1908 && FP_REGNO_P (last_regno
))
1911 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1913 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1916 if (TARGET_POPCNTD
&& mode
== SImode
)
1919 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1926 /* The CR register can only hold CC modes. */
1927 if (CR_REGNO_P (regno
))
1928 return GET_MODE_CLASS (mode
) == MODE_CC
;
1930 if (CA_REGNO_P (regno
))
1931 return mode
== Pmode
|| mode
== SImode
;
1933 /* AltiVec only in AldyVec registers. */
1934 if (ALTIVEC_REGNO_P (regno
))
1935 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1936 || mode
== V1TImode
);
1938 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1939 and it must be able to fit within the register set. */
1941 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1944 /* Implement TARGET_HARD_REGNO_NREGS. */
1947 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1949 return rs6000_hard_regno_nregs
[mode
][regno
];
1952 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1955 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1957 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1960 /* Implement TARGET_MODES_TIEABLE_P.
1962 PTImode cannot tie with other modes because PTImode is restricted to even
1963 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1966 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1967 registers) or XOmode (vector quad, restricted to FPR registers divisible
1968 by 4) to tie with other modes.
1970 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1971 128-bit floating point on VSX systems ties with other vectors. */
1974 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1976 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1977 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1978 return mode1
== mode2
;
1980 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1981 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1982 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1985 if (SCALAR_FLOAT_MODE_P (mode1
))
1986 return SCALAR_FLOAT_MODE_P (mode2
);
1987 if (SCALAR_FLOAT_MODE_P (mode2
))
1990 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1991 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1992 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1998 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2001 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
2006 && GET_MODE_SIZE (mode
) > 4
2007 && INT_REGNO_P (regno
))
2011 && FP_REGNO_P (regno
)
2012 && GET_MODE_SIZE (mode
) > 8
2013 && !FLOAT128_2REG_P (mode
))
2019 /* Print interesting facts about registers. */
2021 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2025 for (r
= first_regno
; r
<= last_regno
; ++r
)
2027 const char *comma
= "";
2030 if (first_regno
== last_regno
)
2031 fprintf (stderr
, "%s:\t", reg_name
);
2033 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2036 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2037 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2041 fprintf (stderr
, ",\n\t");
2046 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2047 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2048 rs6000_hard_regno_nregs
[m
][r
]);
2050 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2055 if (call_used_or_fixed_reg_p (r
))
2059 fprintf (stderr
, ",\n\t");
2064 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2072 fprintf (stderr
, ",\n\t");
2077 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2083 fprintf (stderr
, ",\n\t");
2087 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2088 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2093 fprintf (stderr
, ",\n\t");
2097 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2102 rs6000_debug_vector_unit (enum rs6000_vector v
)
2108 case VECTOR_NONE
: ret
= "none"; break;
2109 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2110 case VECTOR_VSX
: ret
= "vsx"; break;
2111 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2112 default: ret
= "unknown"; break;
2118 /* Inner function printing just the address mask for a particular reload
2120 DEBUG_FUNCTION
char *
2121 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2126 if ((mask
& RELOAD_REG_VALID
) != 0)
2128 else if (keep_spaces
)
2131 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2133 else if (keep_spaces
)
2136 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2138 else if (keep_spaces
)
2141 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2143 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2145 else if (keep_spaces
)
2148 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2150 else if (keep_spaces
)
2153 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2155 else if (keep_spaces
)
2158 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2160 else if (keep_spaces
)
2168 /* Print the address masks in a human readble fashion. */
2170 rs6000_debug_print_mode (ssize_t m
)
2175 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2176 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2177 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2178 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2180 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2181 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2183 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2184 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2185 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2189 spaces
+= strlen (" Reload=sl");
2191 if (reg_addr
[m
].scalar_in_vmx_p
)
2193 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2197 spaces
+= strlen (" Upper=y");
2199 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2200 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2202 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2204 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2205 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2208 fputs ("\n", stderr
);
2211 #define DEBUG_FMT_ID "%-32s= "
2212 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2213 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2214 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2216 /* Print various interesting information with -mdebug=reg. */
2218 rs6000_debug_reg_global (void)
2220 static const char *const tf
[2] = { "false", "true" };
2221 const char *nl
= (const char *)0;
2224 char costly_num
[20];
2226 char flags_buffer
[40];
2227 const char *costly_str
;
2228 const char *nop_str
;
2229 const char *trace_str
;
2230 const char *abi_str
;
2231 const char *cmodel_str
;
2232 struct cl_target_option cl_opts
;
2234 /* Modes we want tieable information on. */
2235 static const machine_mode print_tieable_modes
[] = {
2274 /* Virtual regs we are interested in. */
2275 const static struct {
2276 int regno
; /* register number. */
2277 const char *name
; /* register name. */
2278 } virtual_regs
[] = {
2279 { STACK_POINTER_REGNUM
, "stack pointer:" },
2280 { TOC_REGNUM
, "toc: " },
2281 { STATIC_CHAIN_REGNUM
, "static chain: " },
2282 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2283 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2284 { ARG_POINTER_REGNUM
, "arg pointer: " },
2285 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2286 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2287 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2288 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2289 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2290 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2291 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2292 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2293 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2294 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2297 fputs ("\nHard register information:\n", stderr
);
2298 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2299 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2300 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2303 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2304 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2305 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2306 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2307 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2308 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2310 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2311 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2312 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2316 "d reg_class = %s\n"
2317 "v reg_class = %s\n"
2318 "wa reg_class = %s\n"
2319 "we reg_class = %s\n"
2320 "wr reg_class = %s\n"
2321 "wx reg_class = %s\n"
2322 "wA reg_class = %s\n"
2324 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2325 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2326 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2327 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2328 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2329 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2330 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2333 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2334 rs6000_debug_print_mode (m
);
2336 fputs ("\n", stderr
);
2338 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2340 machine_mode mode1
= print_tieable_modes
[m1
];
2341 bool first_time
= true;
2343 nl
= (const char *)0;
2344 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2346 machine_mode mode2
= print_tieable_modes
[m2
];
2347 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2351 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2356 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2361 fputs ("\n", stderr
);
2367 if (rs6000_recip_control
)
2369 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2371 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2372 if (rs6000_recip_bits
[m
])
2375 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2377 (RS6000_RECIP_AUTO_RE_P (m
)
2379 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2380 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2382 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2385 fputs ("\n", stderr
);
2388 if (rs6000_cpu_index
>= 0)
2390 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2392 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2394 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2395 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2398 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2400 if (rs6000_tune_index
>= 0)
2402 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2404 = processor_target_table
[rs6000_tune_index
].target_enable
;
2406 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2407 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2410 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2412 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2413 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2416 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2417 rs6000_isa_flags_explicit
);
2419 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2421 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2422 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2424 switch (rs6000_sched_costly_dep
)
2426 case max_dep_latency
:
2427 costly_str
= "max_dep_latency";
2431 costly_str
= "no_dep_costly";
2434 case all_deps_costly
:
2435 costly_str
= "all_deps_costly";
2438 case true_store_to_load_dep_costly
:
2439 costly_str
= "true_store_to_load_dep_costly";
2442 case store_to_load_dep_costly
:
2443 costly_str
= "store_to_load_dep_costly";
2447 costly_str
= costly_num
;
2448 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2452 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2454 switch (rs6000_sched_insert_nops
)
2456 case sched_finish_regroup_exact
:
2457 nop_str
= "sched_finish_regroup_exact";
2460 case sched_finish_pad_groups
:
2461 nop_str
= "sched_finish_pad_groups";
2464 case sched_finish_none
:
2465 nop_str
= "sched_finish_none";
2470 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2474 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2476 switch (rs6000_sdata
)
2483 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2487 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2491 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2496 switch (rs6000_traceback
)
2498 case traceback_default
: trace_str
= "default"; break;
2499 case traceback_none
: trace_str
= "none"; break;
2500 case traceback_part
: trace_str
= "part"; break;
2501 case traceback_full
: trace_str
= "full"; break;
2502 default: trace_str
= "unknown"; break;
2505 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2507 switch (rs6000_current_cmodel
)
2509 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2510 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2511 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2512 default: cmodel_str
= "unknown"; break;
2515 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2517 switch (rs6000_current_abi
)
2519 case ABI_NONE
: abi_str
= "none"; break;
2520 case ABI_AIX
: abi_str
= "aix"; break;
2521 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2522 case ABI_V4
: abi_str
= "V4"; break;
2523 case ABI_DARWIN
: abi_str
= "darwin"; break;
2524 default: abi_str
= "unknown"; break;
2527 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2529 if (rs6000_altivec_abi
)
2530 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2532 if (rs6000_aix_extabi
)
2533 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2535 if (rs6000_darwin64_abi
)
2536 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2538 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2539 (TARGET_SOFT_FLOAT
? "true" : "false"));
2541 if (TARGET_LINK_STACK
)
2542 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2544 if (TARGET_P8_FUSION
)
2548 strcpy (options
, "power8");
2549 if (TARGET_P8_FUSION_SIGN
)
2550 strcat (options
, ", sign");
2552 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2555 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2556 TARGET_SECURE_PLT
? "secure" : "bss");
2557 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2558 aix_struct_return
? "aix" : "sysv");
2559 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2560 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2561 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2562 tf
[!!rs6000_align_branch_targets
]);
2563 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2564 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2565 rs6000_long_double_type_size
);
2566 if (rs6000_long_double_type_size
> 64)
2568 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2569 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2570 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2571 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2573 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2574 (int)rs6000_sched_restricted_insns_priority
);
2575 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2578 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2579 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2582 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2583 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2585 if (TARGET_DIRECT_MOVE_128
)
2586 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2587 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2591 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2592 legitimate address support to figure out the appropriate addressing to
2596 rs6000_setup_reg_addr_masks (void)
2598 ssize_t rc
, reg
, m
, nregs
;
2599 addr_mask_type any_addr_mask
, addr_mask
;
2601 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2603 machine_mode m2
= (machine_mode
) m
;
2604 bool complex_p
= false;
2605 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2608 if (COMPLEX_MODE_P (m2
))
2611 m2
= GET_MODE_INNER (m2
);
2614 msize
= GET_MODE_SIZE (m2
);
2616 /* SDmode is special in that we want to access it only via REG+REG
2617 addressing on power7 and above, since we want to use the LFIWZX and
2618 STFIWZX instructions to load it. */
2619 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2622 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2625 reg
= reload_reg_map
[rc
].reg
;
2627 /* Can mode values go in the GPR/FPR/Altivec registers? */
2628 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2630 bool small_int_vsx_p
= (small_int_p
2631 && (rc
== RELOAD_REG_FPR
2632 || rc
== RELOAD_REG_VMX
));
2634 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2635 addr_mask
|= RELOAD_REG_VALID
;
2637 /* Indicate if the mode takes more than 1 physical register. If
2638 it takes a single register, indicate it can do REG+REG
2639 addressing. Small integers in VSX registers can only do
2640 REG+REG addressing. */
2641 if (small_int_vsx_p
)
2642 addr_mask
|= RELOAD_REG_INDEXED
;
2643 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2644 addr_mask
|= RELOAD_REG_MULTIPLE
;
2646 addr_mask
|= RELOAD_REG_INDEXED
;
2648 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2649 addressing. If we allow scalars into Altivec registers,
2650 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2652 For VSX systems, we don't allow update addressing for
2653 DFmode/SFmode if those registers can go in both the
2654 traditional floating point registers and Altivec registers.
2655 The load/store instructions for the Altivec registers do not
2656 have update forms. If we allowed update addressing, it seems
2657 to break IV-OPT code using floating point if the index type is
2658 int instead of long (PR target/81550 and target/84042). */
2661 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2663 && !VECTOR_MODE_P (m2
)
2664 && !VECTOR_ALIGNMENT_P (m2
)
2666 && (m
!= E_DFmode
|| !TARGET_VSX
)
2667 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2668 && !small_int_vsx_p
)
2670 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2672 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2673 we don't allow PRE_MODIFY for some multi-register
2678 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2682 if (TARGET_POWERPC64
)
2683 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2688 if (TARGET_HARD_FLOAT
)
2689 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2695 /* GPR and FPR registers can do REG+OFFSET addressing, except
2696 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2697 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2698 if ((addr_mask
!= 0) && !indexed_only_p
2700 && (rc
== RELOAD_REG_GPR
2701 || ((msize
== 8 || m2
== SFmode
)
2702 && (rc
== RELOAD_REG_FPR
2703 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2704 addr_mask
|= RELOAD_REG_OFFSET
;
2706 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2707 instructions are enabled. The offset for 128-bit VSX registers is
2708 only 12-bits. While GPRs can handle the full offset range, VSX
2709 registers can only handle the restricted range. */
2710 else if ((addr_mask
!= 0) && !indexed_only_p
2711 && msize
== 16 && TARGET_P9_VECTOR
2712 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2713 || (m2
== TImode
&& TARGET_VSX
)))
2715 addr_mask
|= RELOAD_REG_OFFSET
;
2716 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2717 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2720 /* Vector pairs can do both indexed and offset loads if the
2721 instructions are enabled, otherwise they can only do offset loads
2722 since it will be broken into two vector moves. Vector quads can
2723 only do offset loads. */
2724 else if ((addr_mask
!= 0) && TARGET_MMA
2725 && (m2
== OOmode
|| m2
== XOmode
))
2727 addr_mask
|= RELOAD_REG_OFFSET
;
2728 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2730 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2732 addr_mask
|= RELOAD_REG_INDEXED
;
2736 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2737 addressing on 128-bit types. */
2738 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2739 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2740 addr_mask
|= RELOAD_REG_AND_M16
;
2742 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2743 any_addr_mask
|= addr_mask
;
2746 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2751 /* Initialize the various global tables that are based on register size. */
2753 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2759 /* Precalculate REGNO_REG_CLASS. */
2760 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2761 for (r
= 1; r
< 32; ++r
)
2762 rs6000_regno_regclass
[r
] = BASE_REGS
;
2764 for (r
= 32; r
< 64; ++r
)
2765 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2767 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2768 rs6000_regno_regclass
[r
] = NO_REGS
;
2770 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2771 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2773 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2774 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2775 rs6000_regno_regclass
[r
] = CR_REGS
;
2777 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2778 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2779 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2780 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2781 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2782 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2783 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2785 /* Precalculate register class to simpler reload register class. We don't
2786 need all of the register classes that are combinations of different
2787 classes, just the simple ones that have constraint letters. */
2788 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2789 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2792 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2793 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2794 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2795 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2796 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2797 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2798 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2799 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2800 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2804 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2805 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2809 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2810 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2813 /* Precalculate the valid memory formats as well as the vector information,
2814 this must be set up before the rs6000_hard_regno_nregs_internal calls
2816 gcc_assert ((int)VECTOR_NONE
== 0);
2817 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2818 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2820 gcc_assert ((int)CODE_FOR_nothing
== 0);
2821 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2823 gcc_assert ((int)NO_REGS
== 0);
2824 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2826 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2827 believes it can use native alignment or still uses 128-bit alignment. */
2828 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2839 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2840 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2841 if (TARGET_FLOAT128_TYPE
)
2843 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2844 rs6000_vector_align
[KFmode
] = 128;
2846 if (FLOAT128_IEEE_P (TFmode
))
2848 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2849 rs6000_vector_align
[TFmode
] = 128;
2853 /* V2DF mode, VSX only. */
2856 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2857 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2858 rs6000_vector_align
[V2DFmode
] = align64
;
2861 /* V4SF mode, either VSX or Altivec. */
2864 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2865 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2866 rs6000_vector_align
[V4SFmode
] = align32
;
2868 else if (TARGET_ALTIVEC
)
2870 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2871 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2872 rs6000_vector_align
[V4SFmode
] = align32
;
2875 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2879 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2880 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2881 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2882 rs6000_vector_align
[V4SImode
] = align32
;
2883 rs6000_vector_align
[V8HImode
] = align32
;
2884 rs6000_vector_align
[V16QImode
] = align32
;
2888 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2889 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2890 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2894 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2895 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2896 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2900 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2901 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2904 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2905 rs6000_vector_unit
[V2DImode
]
2906 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2907 rs6000_vector_align
[V2DImode
] = align64
;
2909 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2910 rs6000_vector_unit
[V1TImode
]
2911 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2912 rs6000_vector_align
[V1TImode
] = 128;
2915 /* DFmode, see if we want to use the VSX unit. Memory is handled
2916 differently, so don't set rs6000_vector_mem. */
2919 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2920 rs6000_vector_align
[DFmode
] = 64;
2923 /* SFmode, see if we want to use the VSX unit. */
2924 if (TARGET_P8_VECTOR
)
2926 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2927 rs6000_vector_align
[SFmode
] = 32;
2930 /* Allow TImode in VSX register and set the VSX memory macros. */
2933 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2934 rs6000_vector_align
[TImode
] = align64
;
2937 /* Add support for vector pairs and vector quad registers. */
2940 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2941 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2942 rs6000_vector_align
[OOmode
] = 256;
2944 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2945 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2946 rs6000_vector_align
[XOmode
] = 512;
2949 /* Register class constraints for the constraints that depend on compile
2950 switches. When the VSX code was added, different constraints were added
2951 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2952 of the VSX registers are used. The register classes for scalar floating
2953 point types is set, based on whether we allow that type into the upper
2954 (Altivec) registers. GCC has register classes to target the Altivec
2955 registers for load/store operations, to select using a VSX memory
2956 operation instead of the traditional floating point operation. The
2959 d - Register class to use with traditional DFmode instructions.
2960 v - Altivec register.
2961 wa - Any VSX register.
2962 wc - Reserved to represent individual CR bits (used in LLVM).
2963 wn - always NO_REGS.
2964 wr - GPR if 64-bit mode is permitted.
2965 wx - Float register if we can do 32-bit int stores. */
2967 if (TARGET_HARD_FLOAT
)
2968 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
2970 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2972 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2974 if (TARGET_POWERPC64
)
2976 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2977 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2981 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2983 /* Support for new direct moves (ISA 3.0 + 64bit). */
2984 if (TARGET_DIRECT_MOVE_128
)
2985 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2987 /* Set up the reload helper and direct move functions. */
2988 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2992 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2993 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2994 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2995 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2996 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2997 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2998 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2999 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
3000 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
3001 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
3002 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
3003 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
3004 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
3005 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
3006 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
3007 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
3008 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
3009 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
3010 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
3011 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
3013 if (FLOAT128_VECTOR_P (KFmode
))
3015 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3016 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3019 if (FLOAT128_VECTOR_P (TFmode
))
3021 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3022 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3025 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3027 if (TARGET_NO_SDMODE_STACK
)
3029 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3030 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3035 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3036 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3039 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3041 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3042 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3043 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3044 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3045 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3046 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3047 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3048 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3049 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3051 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3052 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3053 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3054 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3055 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3056 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3057 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3058 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3059 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3061 if (FLOAT128_VECTOR_P (KFmode
))
3063 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3064 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3067 if (FLOAT128_VECTOR_P (TFmode
))
3069 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3070 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3075 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3076 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3077 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3078 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3084 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3085 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3086 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3087 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3088 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3089 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3090 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3091 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3092 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3093 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3094 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3095 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3096 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3097 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3098 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3099 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3100 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3101 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3102 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3103 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3105 if (FLOAT128_VECTOR_P (KFmode
))
3107 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3108 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3111 if (FLOAT128_IEEE_P (TFmode
))
3113 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3114 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3117 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3119 if (TARGET_NO_SDMODE_STACK
)
3121 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3122 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3127 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3128 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3131 if (TARGET_DIRECT_MOVE
)
3133 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3134 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3135 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3139 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3140 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3142 if (TARGET_P8_VECTOR
)
3144 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3145 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3147 if (TARGET_P9_VECTOR
)
3149 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3150 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3155 /* Precalculate HARD_REGNO_NREGS. */
3156 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3157 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3158 rs6000_hard_regno_nregs
[m
][r
]
3159 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3161 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3162 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3163 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3164 rs6000_hard_regno_mode_ok_p
[m
][r
]
3165 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3167 /* Precalculate CLASS_MAX_NREGS sizes. */
3168 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3172 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3173 reg_size
= UNITS_PER_VSX_WORD
;
3175 else if (c
== ALTIVEC_REGS
)
3176 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3178 else if (c
== FLOAT_REGS
)
3179 reg_size
= UNITS_PER_FP_WORD
;
3182 reg_size
= UNITS_PER_WORD
;
3184 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3186 machine_mode m2
= (machine_mode
)m
;
3187 int reg_size2
= reg_size
;
3189 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3191 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3192 reg_size2
= UNITS_PER_FP_WORD
;
3194 rs6000_class_max_nregs
[m
][c
]
3195 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3199 /* Calculate which modes to automatically generate code to use a the
3200 reciprocal divide and square root instructions. In the future, possibly
3201 automatically generate the instructions even if the user did not specify
3202 -mrecip. The older machines double precision reciprocal sqrt estimate is
3203 not accurate enough. */
3204 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3206 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3208 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3209 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3210 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3211 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3212 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3214 if (TARGET_FRSQRTES
)
3215 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3217 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3218 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3219 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3220 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3221 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3223 if (rs6000_recip_control
)
3225 if (!flag_finite_math_only
)
3226 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3228 if (flag_trapping_math
)
3229 warning (0, "%qs requires %qs or %qs", "-mrecip",
3230 "-fno-trapping-math", "-ffast-math");
3231 if (!flag_reciprocal_math
)
3232 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3234 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3236 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3237 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3238 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3240 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3241 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3242 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3244 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3245 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3246 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3248 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3249 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3250 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3252 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3253 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3254 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3256 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3257 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3258 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3261 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3262 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3265 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3266 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3270 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3271 legitimate address support to figure out the appropriate addressing to
3273 rs6000_setup_reg_addr_masks ();
3275 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3277 if (TARGET_DEBUG_REG
)
3278 rs6000_debug_reg_global ();
3280 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3282 "SImode variable mult cost = %d\n"
3283 "SImode constant mult cost = %d\n"
3284 "SImode short constant mult cost = %d\n"
3285 "DImode multipliciation cost = %d\n"
3286 "SImode division cost = %d\n"
3287 "DImode division cost = %d\n"
3288 "Simple fp operation cost = %d\n"
3289 "DFmode multiplication cost = %d\n"
3290 "SFmode division cost = %d\n"
3291 "DFmode division cost = %d\n"
3292 "cache line size = %d\n"
3293 "l1 cache size = %d\n"
3294 "l2 cache size = %d\n"
3295 "simultaneous prefetches = %d\n"
3298 rs6000_cost
->mulsi_const
,
3299 rs6000_cost
->mulsi_const9
,
3307 rs6000_cost
->cache_line_size
,
3308 rs6000_cost
->l1_cache_size
,
3309 rs6000_cost
->l2_cache_size
,
3310 rs6000_cost
->simultaneous_prefetches
);
3315 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3318 darwin_rs6000_override_options (void)
3320 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3322 rs6000_altivec_abi
= 1;
3323 TARGET_ALTIVEC_VRSAVE
= 1;
3324 rs6000_current_abi
= ABI_DARWIN
;
3326 if (DEFAULT_ABI
== ABI_DARWIN
3328 darwin_one_byte_bool
= 1;
3330 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3332 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3333 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3336 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3337 optimisation, and will not work with the most generic case (where the
3338 symbol is undefined external, but there is no symbl stub). */
3340 rs6000_default_long_calls
= 0;
3342 /* ld_classic is (so far) still used for kernel (static) code, and supports
3343 the JBSR longcall / branch islands. */
3346 rs6000_default_long_calls
= 1;
3348 /* Allow a kext author to do -mkernel -mhard-float. */
3349 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3350 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3353 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3355 if (!flag_mkernel
&& !flag_apple_kext
3357 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3358 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3360 /* Unless the user (not the configurer) has explicitly overridden
3361 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3362 G4 unless targeting the kernel. */
3365 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3366 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3367 && ! OPTION_SET_P (rs6000_cpu_index
))
3369 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3374 /* If not otherwise specified by a target, make 'long double' equivalent to
3377 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3378 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3381 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3382 to clobber the XER[CA] bit because clobbering that bit without telling
3383 the compiler worked just fine with versions of GCC before GCC 5, and
3384 breaking a lot of older code in ways that are hard to track down is
3385 not such a great idea. */
3388 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3389 vec
<machine_mode
> & /*input_modes*/,
3390 vec
<const char *> & /*constraints*/,
3391 vec
<rtx
> &/*uses*/, vec
<rtx
> &clobbers
,
3392 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3394 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3395 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3399 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3400 but is called when the optimize level is changed via an attribute or
3401 pragma or when it is reset at the end of the code affected by the
3402 attribute or pragma. It is not called at the beginning of compilation
3403 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3404 actions then, you should have TARGET_OPTION_OVERRIDE call
3405 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3408 rs6000_override_options_after_change (void)
3410 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3411 turns -frename-registers on. */
3412 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3413 || (OPTION_SET_P (flag_unroll_all_loops
)
3414 && flag_unroll_all_loops
))
3416 if (!OPTION_SET_P (unroll_only_small_loops
))
3417 unroll_only_small_loops
= 0;
3418 if (!OPTION_SET_P (flag_rename_registers
))
3419 flag_rename_registers
= 1;
3420 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3421 flag_cunroll_grow_size
= 1;
3423 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3424 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3426 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3427 if (rs6000_rop_protect
)
3428 flag_shrink_wrap
= 0;
3431 #ifdef TARGET_USES_LINUX64_OPT
3433 rs6000_linux64_override_options ()
3435 if (!OPTION_SET_P (rs6000_alignment_flags
))
3436 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3437 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3439 if (DEFAULT_ABI
!= ABI_AIX
)
3441 rs6000_current_abi
= ABI_AIX
;
3442 error (INVALID_64BIT
, "call");
3444 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3445 if (ELFv2_ABI_CHECK
)
3447 rs6000_current_abi
= ABI_ELFv2
;
3449 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3451 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3453 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3454 error (INVALID_64BIT
, "relocatable");
3456 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3458 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3459 error (INVALID_64BIT
, "eabi");
3461 if (TARGET_PROTOTYPE
)
3463 target_prototype
= 0;
3464 error (INVALID_64BIT
, "prototype");
3466 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3468 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3469 error ("%<-m64%> requires a PowerPC64 cpu");
3471 if (!OPTION_SET_P (rs6000_current_cmodel
))
3472 SET_CMODEL (CMODEL_MEDIUM
);
3473 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3475 if (OPTION_SET_P (rs6000_current_cmodel
)
3476 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3477 error ("%<-mcmodel%> incompatible with other toc options");
3478 if (TARGET_MINIMAL_TOC
)
3479 SET_CMODEL (CMODEL_SMALL
);
3480 else if (TARGET_PCREL
3481 || (PCREL_SUPPORTED_BY_OS
3482 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3483 /* Ignore -mno-minimal-toc. */
3486 SET_CMODEL (CMODEL_SMALL
);
3488 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3490 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3491 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3492 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3493 TARGET_NO_SUM_IN_TOC
= 0;
3495 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3497 if (OPTION_SET_P (rs6000_pltseq
))
3498 warning (0, "%qs unsupported for this ABI",
3500 rs6000_pltseq
= false;
3503 else if (TARGET_64BIT
)
3504 error (INVALID_32BIT
, "32");
3507 if (TARGET_PROFILE_KERNEL
)
3510 error (INVALID_32BIT
, "profile-kernel");
3512 if (OPTION_SET_P (rs6000_current_cmodel
))
3514 SET_CMODEL (CMODEL_SMALL
);
3515 error (INVALID_32BIT
, "cmodel");
3521 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3522 This support is only in little endian GLIBC 2.32 or newer. */
3524 glibc_supports_ieee_128bit (void)
3527 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3528 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3530 #endif /* OPTION_GLIBC. */
3535 /* Override command line options.
3537 Combine build-specific configuration information with options
3538 specified on the command line to set various state variables which
3539 influence code generation, optimization, and expansion of built-in
3540 functions. Assure that command-line configuration preferences are
3541 compatible with each other and with the build configuration; issue
3542 warnings while adjusting configuration or error messages while
3543 rejecting configuration.
3545 Upon entry to this function:
3547 This function is called once at the beginning of
3548 compilation, and then again at the start and end of compiling
3549 each section of code that has a different configuration, as
3550 indicated, for example, by adding the
3552 __attribute__((__target__("cpu=power9")))
3554 qualifier to a function definition or, for example, by bracketing
3557 #pragma GCC target("altivec")
3561 #pragma GCC reset_options
3563 directives. Parameter global_init_p is true for the initial
3564 invocation, which initializes global variables, and false for all
3565 subsequent invocations.
3568 Various global state information is assumed to be valid. This
3569 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3570 default CPU specified at build configure time, TARGET_DEFAULT,
3571 representing the default set of option flags for the default
3572 target, and OPTION_SET_P (rs6000_isa_flags), representing
3573 which options were requested on the command line.
3575 Upon return from this function:
3577 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3578 was set by name on the command line. Additionally, if certain
3579 attributes are automatically enabled or disabled by this function
3580 in order to assure compatibility between options and
3581 configuration, the flags associated with those attributes are
3582 also set. By setting these "explicit bits", we avoid the risk
3583 that other code might accidentally overwrite these particular
3584 attributes with "default values".
3586 The various bits of rs6000_isa_flags are set to indicate the
3587 target options that have been selected for the most current
3588 compilation efforts. This has the effect of also turning on the
3589 associated TARGET_XXX values since these are macros which are
3590 generally defined to test the corresponding bit of the
3591 rs6000_isa_flags variable.
3593 Various other global variables and fields of global structures
3594 (over 50 in all) are initialized to reflect the desired options
3595 for the most current compilation efforts. */
3598 rs6000_option_override_internal (bool global_init_p
)
3602 HOST_WIDE_INT set_masks
;
3603 HOST_WIDE_INT ignore_masks
;
3606 struct cl_target_option
*main_target_opt
3607 = ((global_init_p
|| target_option_default_node
== NULL
)
3608 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3610 /* Print defaults. */
3611 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3612 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3614 /* Remember the explicit arguments. */
3616 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3618 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3619 library functions, so warn about it. The flag may be useful for
3620 performance studies from time to time though, so don't disable it
3622 if (OPTION_SET_P (rs6000_alignment_flags
)
3623 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3624 && DEFAULT_ABI
== ABI_DARWIN
3626 warning (0, "%qs is not supported for 64-bit Darwin;"
3627 " it is incompatible with the installed C and C++ libraries",
3630 /* Numerous experiment shows that IRA based loop pressure
3631 calculation works better for RTL loop invariant motion on targets
3632 with enough (>= 32) registers. It is an expensive optimization.
3633 So it is on only for peak performance. */
3634 if (optimize
>= 3 && global_init_p
3635 && !OPTION_SET_P (flag_ira_loop_pressure
))
3636 flag_ira_loop_pressure
= 1;
3638 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3639 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3640 options were already specified. */
3641 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3642 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3643 flag_asynchronous_unwind_tables
= 1;
3645 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3646 loop unroller is active. It is only checked during unrolling, so
3647 we can just set it on by default. */
3648 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3649 flag_variable_expansion_in_unroller
= 1;
3651 /* Set the pointer size. */
3654 rs6000_pmode
= DImode
;
3655 rs6000_pointer_size
= 64;
3659 rs6000_pmode
= SImode
;
3660 rs6000_pointer_size
= 32;
3663 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3664 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3665 must explicitly specify it and we won't interfere with the user's
3668 set_masks
= POWERPC_MASKS
;
3669 #ifdef OS_MISSING_ALTIVEC
3670 if (OS_MISSING_ALTIVEC
)
3671 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3672 | OTHER_VSX_VECTOR_MASKS
);
3675 /* Don't override by the processor default if given explicitly. */
3676 set_masks
&= ~rs6000_isa_flags_explicit
;
3678 /* Without option powerpc64 specified explicitly, we need to ensure
3679 powerpc64 always enabled for 64 bit here, otherwise some following
3680 checks can use unexpected TARGET_POWERPC64 value. */
3681 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
)
3684 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3685 /* Need to stop powerpc64 from being unset in later processing,
3686 so clear it in set_masks. But as PR108240 shows, to keep it
3687 consistent with before, we want to make this only if 64 bit
3688 is enabled explicitly. This is a hack, revisit this later. */
3689 if (rs6000_isa_flags_explicit
& OPTION_MASK_64BIT
)
3690 set_masks
&= ~OPTION_MASK_POWERPC64
;
3693 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3694 the cpu in a target attribute or pragma, but did not specify a tuning
3695 option, use the cpu for the tuning option rather than the option specified
3696 with -mtune on the command line. Process a '--with-cpu' configuration
3697 request as an implicit --cpu. */
3698 if (rs6000_cpu_index
>= 0)
3699 cpu_index
= rs6000_cpu_index
;
3700 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3701 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3702 else if (OPTION_TARGET_CPU_DEFAULT
)
3703 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3705 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3706 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3707 with those from the cpu, except for options that were explicitly set. If
3708 we don't have a cpu, do not override the target bits set in
3712 rs6000_cpu_index
= cpu_index
;
3713 rs6000_isa_flags
&= ~set_masks
;
3714 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3719 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3720 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3721 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3722 to using rs6000_isa_flags, we need to do the initialization here.
3724 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3725 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3726 HOST_WIDE_INT flags
;
3728 flags
= TARGET_DEFAULT
;
3731 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3732 const char *default_cpu
= (!TARGET_POWERPC64
3737 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3738 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3740 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3743 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3744 since they do not save and restore the high half of the GPRs correctly
3745 in all cases. If the user explicitly specifies it, we won't interfere
3746 with the user's specification. */
3747 #ifdef OS_MISSING_POWERPC64
3748 if (OS_MISSING_POWERPC64
3751 && !(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
))
3752 rs6000_isa_flags
&= ~OPTION_MASK_POWERPC64
;
3755 if (rs6000_tune_index
>= 0)
3756 tune_index
= rs6000_tune_index
;
3757 else if (cpu_index
>= 0)
3758 rs6000_tune_index
= tune_index
= cpu_index
;
3762 enum processor_type tune_proc
3763 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3766 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3767 if (processor_target_table
[i
].processor
== tune_proc
)
3775 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3777 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3779 gcc_assert (tune_index
>= 0);
3780 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3782 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3783 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3784 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3787 error ("AltiVec not supported in this target");
3790 /* If we are optimizing big endian systems for space, use the load/store
3791 multiple instructions. */
3792 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3793 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3795 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3796 because the hardware doesn't support the instructions used in little
3797 endian mode, and causes an alignment trap. The 750 does not cause an
3798 alignment trap (except when the target is unaligned). */
3800 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3802 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3803 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3804 warning (0, "%qs is not supported on little endian systems",
3808 /* If little-endian, default to -mstrict-align on older processors.
3809 Testing for direct_move matches power8 and later. */
3810 if (!BYTES_BIG_ENDIAN
3811 && !(processor_target_table
[tune_index
].target_enable
3812 & OPTION_MASK_DIRECT_MOVE
))
3813 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3815 /* Add some warnings for VSX. */
3818 const char *msg
= NULL
;
3819 if (!TARGET_HARD_FLOAT
)
3821 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3822 msg
= N_("%<-mvsx%> requires hardware floating point");
3825 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3826 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3829 else if (TARGET_AVOID_XFORM
> 0)
3830 msg
= N_("%<-mvsx%> needs indexed addressing");
3831 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3832 & OPTION_MASK_ALTIVEC
))
3834 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3835 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3837 msg
= N_("%<-mno-altivec%> disables vsx");
3843 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3844 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3848 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3849 the -mcpu setting to enable options that conflict. */
3850 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3851 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3852 | OPTION_MASK_ALTIVEC
3853 | OPTION_MASK_VSX
)) != 0)
3854 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3855 | OPTION_MASK_DIRECT_MOVE
)
3856 & ~rs6000_isa_flags_explicit
);
3858 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3859 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3861 #ifdef XCOFF_DEBUGGING_INFO
3862 /* For AIX default to 64-bit DWARF. */
3863 if (!OPTION_SET_P (dwarf_offset_size
))
3864 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3867 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3868 off all of the options that depend on those flags. */
3869 ignore_masks
= rs6000_disable_incompatible_switches ();
3871 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3872 unless the user explicitly used the -mno-<option> to disable the code. */
3873 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3874 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3875 else if (TARGET_P9_MINMAX
)
3879 if (cpu_index
== PROCESSOR_POWER9
)
3881 /* legacy behavior: allow -mcpu=power9 with certain
3882 capabilities explicitly disabled. */
3883 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3886 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3887 "for <xxx> less than power9", "-mcpu");
3889 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3890 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3891 & rs6000_isa_flags_explicit
))
3892 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3893 were explicitly cleared. */
3894 error ("%qs incompatible with explicitly disabled options",
3897 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3899 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3900 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3901 else if (TARGET_VSX
)
3902 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3903 else if (TARGET_POPCNTD
)
3904 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3905 else if (TARGET_DFP
)
3906 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3907 else if (TARGET_CMPB
)
3908 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3909 else if (TARGET_FPRND
)
3910 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3911 else if (TARGET_POPCNTB
)
3912 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3913 else if (TARGET_ALTIVEC
)
3914 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3916 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3917 target attribute or pragma which automatically enables both options,
3918 unless the altivec ABI was set. This is set by default for 64-bit, but
3919 not for 32-bit. Don't move this before the above code using ignore_masks,
3920 since it can reset the cleared VSX/ALTIVEC flag again. */
3921 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
3922 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
)
3923 & ~rs6000_isa_flags_explicit
);
3925 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3927 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3928 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3929 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3932 if (!TARGET_FPRND
&& TARGET_VSX
)
3934 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3935 /* TARGET_VSX = 1 implies Power 7 and newer */
3936 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3937 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3940 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3942 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3943 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3944 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3947 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3949 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3950 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3951 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3954 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3956 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3957 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3958 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3959 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3961 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3962 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3963 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3967 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3969 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3970 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3974 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3976 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3977 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3978 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3981 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3982 silently turn off quad memory mode. */
3983 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3985 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3986 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3988 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3989 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3991 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3992 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3995 /* Non-atomic quad memory load/store are disabled for little endian, since
3996 the words are reversed, but atomic operations can still be done by
3997 swapping the words. */
3998 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
4000 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
4001 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4004 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
4007 /* Assume if the user asked for normal quad memory instructions, they want
4008 the atomic versions as well, unless they explicity told us not to use quad
4009 word atomic instructions. */
4010 if (TARGET_QUAD_MEMORY
4011 && !TARGET_QUAD_MEMORY_ATOMIC
4012 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
4013 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4015 /* If we can shrink-wrap the TOC register save separately, then use
4016 -msave-toc-indirect unless explicitly disabled. */
4017 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4018 && flag_shrink_wrap_separate
4019 && optimize_function_for_speed_p (cfun
))
4020 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4022 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4023 generating power8 instructions. Power9 does not optimize power8 fusion
4025 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4027 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4028 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4030 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4033 /* Setting additional fusion flags turns on base fusion. */
4034 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4036 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4038 if (TARGET_P8_FUSION_SIGN
)
4039 error ("%qs requires %qs", "-mpower8-fusion-sign",
4042 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4045 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4048 /* Power8 does not fuse sign extended loads with the addis. If we are
4049 optimizing at high levels for speed, convert a sign extended load into a
4050 zero extending load, and an explicit sign extension. */
4051 if (TARGET_P8_FUSION
4052 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4053 && optimize_function_for_speed_p (cfun
)
4055 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4057 /* ISA 3.0 vector instructions include ISA 2.07. */
4058 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4060 /* We prefer to not mention undocumented options in
4061 error messages. However, if users have managed to select
4062 power9-vector without selecting power8-vector, they
4063 already know about undocumented flags. */
4064 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4065 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4066 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4067 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4069 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4070 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4071 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4075 /* OPTION_MASK_P9_VECTOR is explicit and
4076 OPTION_MASK_P8_VECTOR is not explicit. */
4077 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4078 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4082 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4083 support. If we only have ISA 2.06 support, and the user did not specify
4084 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4085 but we don't enable the full vectorization support */
4086 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4087 TARGET_ALLOW_MOVMISALIGN
= 1;
4089 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4091 if (TARGET_ALLOW_MOVMISALIGN
> 0
4092 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4093 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4095 TARGET_ALLOW_MOVMISALIGN
= 0;
4098 /* Determine when unaligned vector accesses are permitted, and when
4099 they are preferred over masked Altivec loads. Note that if
4100 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4101 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4103 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4107 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4108 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4110 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4113 else if (!TARGET_ALLOW_MOVMISALIGN
)
4115 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4116 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4117 "-mallow-movmisalign");
4119 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4123 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4125 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4126 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4128 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4131 /* Use long double size to select the appropriate long double. We use
4132 TYPE_PRECISION to differentiate the 3 different long double types. We map
4133 128 into the precision used for TFmode. */
4134 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4136 : FLOAT_PRECISION_TFmode
);
4138 /* Set long double size before the IEEE 128-bit tests. */
4139 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4141 if (main_target_opt
!= NULL
4142 && (main_target_opt
->x_rs6000_long_double_type_size
4143 != default_long_double_size
))
4144 error ("target attribute or pragma changes %<long double%> size");
4146 rs6000_long_double_type_size
= default_long_double_size
;
4148 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4149 ; /* The option value can be seen when cl_target_option_restore is called. */
4150 else if (rs6000_long_double_type_size
== 128)
4151 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4153 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4154 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4155 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4156 those systems will not pick up this default. Warn if the user changes the
4157 default unless -Wno-psabi. */
4158 if (!OPTION_SET_P (rs6000_ieeequad
))
4159 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4161 else if (TARGET_LONG_DOUBLE_128
)
4163 if (global_options
.x_rs6000_ieeequad
4164 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4165 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4167 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4169 /* Determine if the user can change the default long double type at
4170 compilation time. You need GLIBC 2.32 or newer to be able to
4171 change the long double type. Only issue one warning. */
4172 static bool warned_change_long_double
;
4174 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4176 warned_change_long_double
= true;
4177 if (TARGET_IEEEQUAD
)
4178 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4181 warning (OPT_Wpsabi
, "Using IBM extended precision "
4187 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4188 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4189 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4190 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4191 the keyword as well as the type. */
4192 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4194 /* IEEE 128-bit floating point requires VSX support. */
4195 if (TARGET_FLOAT128_KEYWORD
)
4199 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4200 error ("%qs requires VSX support", "-mfloat128");
4202 TARGET_FLOAT128_TYPE
= 0;
4203 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4204 | OPTION_MASK_FLOAT128_HW
);
4206 else if (!TARGET_FLOAT128_TYPE
)
4208 TARGET_FLOAT128_TYPE
= 1;
4209 warning (0, "The %<-mfloat128%> option may not be fully supported");
4213 /* Enable the __float128 keyword under Linux by default. */
4214 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4215 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4216 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4218 /* If we have are supporting the float128 type and full ISA 3.0 support,
4219 enable -mfloat128-hardware by default. However, don't enable the
4220 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4221 because sometimes the compiler wants to put things in an integer
4222 container, and if we don't have __int128 support, it is impossible. */
4223 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4224 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4225 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4226 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4228 if (TARGET_FLOAT128_HW
4229 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4231 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4232 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4234 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4237 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4239 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4240 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4242 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4245 /* Enable -mprefixed by default on power10 systems. */
4246 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4247 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4249 /* -mprefixed requires -mcpu=power10 (or later). */
4250 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4252 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4253 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4255 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4258 /* -mpcrel requires prefixed load/store addressing. */
4259 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4261 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4262 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4264 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4267 /* Print the options after updating the defaults. */
4268 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4269 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4271 /* E500mc does "better" if we inline more aggressively. Respect the
4272 user's opinion, though. */
4273 if (rs6000_block_move_inline_limit
== 0
4274 && (rs6000_tune
== PROCESSOR_PPCE500MC
4275 || rs6000_tune
== PROCESSOR_PPCE500MC64
4276 || rs6000_tune
== PROCESSOR_PPCE5500
4277 || rs6000_tune
== PROCESSOR_PPCE6500
))
4278 rs6000_block_move_inline_limit
= 128;
4280 /* store_one_arg depends on expand_block_move to handle at least the
4281 size of reg_parm_stack_space. */
4282 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4283 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4287 /* If the appropriate debug option is enabled, replace the target hooks
4288 with debug versions that call the real version and then prints
4289 debugging information. */
4290 if (TARGET_DEBUG_COST
)
4292 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4293 targetm
.address_cost
= rs6000_debug_address_cost
;
4294 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4297 if (TARGET_DEBUG_ADDR
)
4299 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4300 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4301 rs6000_secondary_reload_class_ptr
4302 = rs6000_debug_secondary_reload_class
;
4303 targetm
.secondary_memory_needed
4304 = rs6000_debug_secondary_memory_needed
;
4305 targetm
.can_change_mode_class
4306 = rs6000_debug_can_change_mode_class
;
4307 rs6000_preferred_reload_class_ptr
4308 = rs6000_debug_preferred_reload_class
;
4309 rs6000_mode_dependent_address_ptr
4310 = rs6000_debug_mode_dependent_address
;
4313 if (rs6000_veclibabi_name
)
4315 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4316 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4319 error ("unknown vectorization library ABI type in "
4320 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4326 /* Enable Altivec ABI for AIX -maltivec. */
4328 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4329 && !OPTION_SET_P (rs6000_altivec_abi
))
4331 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4332 error ("target attribute or pragma changes AltiVec ABI");
4334 rs6000_altivec_abi
= 1;
4337 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4338 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4339 be explicitly overridden in either case. */
4342 if (!OPTION_SET_P (rs6000_altivec_abi
)
4343 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4345 if (main_target_opt
!= NULL
&&
4346 !main_target_opt
->x_rs6000_altivec_abi
)
4347 error ("target attribute or pragma changes AltiVec ABI");
4349 rs6000_altivec_abi
= 1;
4353 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4354 So far, the only darwin64 targets are also MACH-O. */
4356 && DEFAULT_ABI
== ABI_DARWIN
4359 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4360 error ("target attribute or pragma changes darwin64 ABI");
4363 rs6000_darwin64_abi
= 1;
4364 /* Default to natural alignment, for better performance. */
4365 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4369 /* Place FP constants in the constant pool instead of TOC
4370 if section anchors enabled. */
4371 if (flag_section_anchors
4372 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4373 TARGET_NO_FP_IN_TOC
= 1;
4375 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4376 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4378 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4379 SUBTARGET_OVERRIDE_OPTIONS
;
4381 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4382 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4384 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4385 SUB3TARGET_OVERRIDE_OPTIONS
;
4388 /* If the ABI has support for PC-relative relocations, enable it by default.
4389 This test depends on the sub-target tests above setting the code model to
4390 medium for ELF v2 systems. */
4391 if (PCREL_SUPPORTED_BY_OS
4392 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4393 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4395 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4396 after the subtarget override options are done. */
4397 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4399 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4400 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4402 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4405 /* Enable -mmma by default on power10 systems. */
4406 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4407 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4409 /* Turn off vector pair/mma options on non-power10 systems. */
4410 else if (!TARGET_POWER10
&& TARGET_MMA
)
4412 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4413 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4415 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4418 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4419 generating power10 instructions. */
4420 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
))
4422 if (rs6000_tune
== PROCESSOR_POWER10
)
4423 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4425 rs6000_isa_flags
&= ~OPTION_MASK_P10_FUSION
;
4428 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4429 such as "*movoo" uses vector pair access which use VSX registers.
4430 So make MMA require VSX support here. */
4431 if (TARGET_MMA
&& !TARGET_VSX
)
4433 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4434 error ("%qs requires %qs", "-mmma", "-mvsx");
4435 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4438 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4439 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4441 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4442 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4444 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4445 && rs6000_tune
!= PROCESSOR_POWER5
4446 && rs6000_tune
!= PROCESSOR_POWER6
4447 && rs6000_tune
!= PROCESSOR_POWER7
4448 && rs6000_tune
!= PROCESSOR_POWER8
4449 && rs6000_tune
!= PROCESSOR_POWER9
4450 && rs6000_tune
!= PROCESSOR_POWER10
4451 && rs6000_tune
!= PROCESSOR_PPCA2
4452 && rs6000_tune
!= PROCESSOR_CELL
4453 && rs6000_tune
!= PROCESSOR_PPC476
);
4454 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4455 || rs6000_tune
== PROCESSOR_POWER5
4456 || rs6000_tune
== PROCESSOR_POWER7
4457 || rs6000_tune
== PROCESSOR_POWER8
);
4458 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4459 || rs6000_tune
== PROCESSOR_POWER5
4460 || rs6000_tune
== PROCESSOR_POWER6
4461 || rs6000_tune
== PROCESSOR_POWER7
4462 || rs6000_tune
== PROCESSOR_POWER8
4463 || rs6000_tune
== PROCESSOR_POWER9
4464 || rs6000_tune
== PROCESSOR_POWER10
4465 || rs6000_tune
== PROCESSOR_PPCE500MC
4466 || rs6000_tune
== PROCESSOR_PPCE500MC64
4467 || rs6000_tune
== PROCESSOR_PPCE5500
4468 || rs6000_tune
== PROCESSOR_PPCE6500
);
4470 /* Allow debug switches to override the above settings. These are set to -1
4471 in rs6000.opt to indicate the user hasn't directly set the switch. */
4472 if (TARGET_ALWAYS_HINT
>= 0)
4473 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4475 if (TARGET_SCHED_GROUPS
>= 0)
4476 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4478 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4479 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4481 rs6000_sched_restricted_insns_priority
4482 = (rs6000_sched_groups
? 1 : 0);
4484 /* Handle -msched-costly-dep option. */
4485 rs6000_sched_costly_dep
4486 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4488 if (rs6000_sched_costly_dep_str
)
4490 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4491 rs6000_sched_costly_dep
= no_dep_costly
;
4492 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4493 rs6000_sched_costly_dep
= all_deps_costly
;
4494 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4495 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4496 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4497 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4499 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4500 atoi (rs6000_sched_costly_dep_str
));
4503 /* Handle -minsert-sched-nops option. */
4504 rs6000_sched_insert_nops
4505 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4507 if (rs6000_sched_insert_nops_str
)
4509 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4510 rs6000_sched_insert_nops
= sched_finish_none
;
4511 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4512 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4513 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4514 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4516 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4517 atoi (rs6000_sched_insert_nops_str
));
4520 /* Handle stack protector */
4521 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4522 #ifdef TARGET_THREAD_SSP_OFFSET
4523 rs6000_stack_protector_guard
= SSP_TLS
;
4525 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4528 #ifdef TARGET_THREAD_SSP_OFFSET
4529 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4530 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4533 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4536 const char *str
= rs6000_stack_protector_guard_offset_str
;
4539 long offset
= strtol (str
, &endp
, 0);
4540 if (!*str
|| *endp
|| errno
)
4541 error ("%qs is not a valid number in %qs", str
,
4542 "-mstack-protector-guard-offset=");
4544 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4545 || (TARGET_64BIT
&& (offset
& 3)))
4546 error ("%qs is not a valid offset in %qs", str
,
4547 "-mstack-protector-guard-offset=");
4549 rs6000_stack_protector_guard_offset
= offset
;
4552 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4554 const char *str
= rs6000_stack_protector_guard_reg_str
;
4555 int reg
= decode_reg_name (str
);
4557 if (!IN_RANGE (reg
, 1, 31))
4558 error ("%qs is not a valid base register in %qs", str
,
4559 "-mstack-protector-guard-reg=");
4561 rs6000_stack_protector_guard_reg
= reg
;
4564 if (rs6000_stack_protector_guard
== SSP_TLS
4565 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4566 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4570 #ifdef TARGET_REGNAMES
4571 /* If the user desires alternate register names, copy in the
4572 alternate names now. */
4573 if (TARGET_REGNAMES
)
4574 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4577 /* Set aix_struct_return last, after the ABI is determined.
4578 If -maix-struct-return or -msvr4-struct-return was explicitly
4579 used, don't override with the ABI default. */
4580 if (!OPTION_SET_P (aix_struct_return
))
4581 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4584 /* IBM XL compiler defaults to unsigned bitfields. */
4585 if (TARGET_XL_COMPAT
)
4586 flag_signed_bitfields
= 0;
4589 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4590 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4592 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4594 /* We can only guarantee the availability of DI pseudo-ops when
4595 assembling for 64-bit targets. */
4598 targetm
.asm_out
.aligned_op
.di
= NULL
;
4599 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4603 /* Set branch target alignment, if not optimizing for size. */
4606 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4607 aligned 8byte to avoid misprediction by the branch predictor. */
4608 if (rs6000_tune
== PROCESSOR_TITAN
4609 || rs6000_tune
== PROCESSOR_CELL
)
4611 if (flag_align_functions
&& !str_align_functions
)
4612 str_align_functions
= "8";
4613 if (flag_align_jumps
&& !str_align_jumps
)
4614 str_align_jumps
= "8";
4615 if (flag_align_loops
&& !str_align_loops
)
4616 str_align_loops
= "8";
4618 if (rs6000_align_branch_targets
)
4620 if (flag_align_functions
&& !str_align_functions
)
4621 str_align_functions
= "16";
4622 if (flag_align_jumps
&& !str_align_jumps
)
4623 str_align_jumps
= "16";
4624 if (flag_align_loops
&& !str_align_loops
)
4626 can_override_loop_align
= 1;
4627 str_align_loops
= "16";
4632 /* Arrange to save and restore machine status around nested functions. */
4633 init_machine_status
= rs6000_init_machine_status
;
4635 /* We should always be splitting complex arguments, but we can't break
4636 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4637 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4638 targetm
.calls
.split_complex_arg
= NULL
;
4640 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4641 if (DEFAULT_ABI
== ABI_AIX
)
4642 targetm
.calls
.custom_function_descriptors
= 0;
4645 /* Initialize rs6000_cost with the appropriate target costs. */
4647 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4649 switch (rs6000_tune
)
4651 case PROCESSOR_RS64A
:
4652 rs6000_cost
= &rs64a_cost
;
4655 case PROCESSOR_MPCCORE
:
4656 rs6000_cost
= &mpccore_cost
;
4659 case PROCESSOR_PPC403
:
4660 rs6000_cost
= &ppc403_cost
;
4663 case PROCESSOR_PPC405
:
4664 rs6000_cost
= &ppc405_cost
;
4667 case PROCESSOR_PPC440
:
4668 rs6000_cost
= &ppc440_cost
;
4671 case PROCESSOR_PPC476
:
4672 rs6000_cost
= &ppc476_cost
;
4675 case PROCESSOR_PPC601
:
4676 rs6000_cost
= &ppc601_cost
;
4679 case PROCESSOR_PPC603
:
4680 rs6000_cost
= &ppc603_cost
;
4683 case PROCESSOR_PPC604
:
4684 rs6000_cost
= &ppc604_cost
;
4687 case PROCESSOR_PPC604e
:
4688 rs6000_cost
= &ppc604e_cost
;
4691 case PROCESSOR_PPC620
:
4692 rs6000_cost
= &ppc620_cost
;
4695 case PROCESSOR_PPC630
:
4696 rs6000_cost
= &ppc630_cost
;
4699 case PROCESSOR_CELL
:
4700 rs6000_cost
= &ppccell_cost
;
4703 case PROCESSOR_PPC750
:
4704 case PROCESSOR_PPC7400
:
4705 rs6000_cost
= &ppc750_cost
;
4708 case PROCESSOR_PPC7450
:
4709 rs6000_cost
= &ppc7450_cost
;
4712 case PROCESSOR_PPC8540
:
4713 case PROCESSOR_PPC8548
:
4714 rs6000_cost
= &ppc8540_cost
;
4717 case PROCESSOR_PPCE300C2
:
4718 case PROCESSOR_PPCE300C3
:
4719 rs6000_cost
= &ppce300c2c3_cost
;
4722 case PROCESSOR_PPCE500MC
:
4723 rs6000_cost
= &ppce500mc_cost
;
4726 case PROCESSOR_PPCE500MC64
:
4727 rs6000_cost
= &ppce500mc64_cost
;
4730 case PROCESSOR_PPCE5500
:
4731 rs6000_cost
= &ppce5500_cost
;
4734 case PROCESSOR_PPCE6500
:
4735 rs6000_cost
= &ppce6500_cost
;
4738 case PROCESSOR_TITAN
:
4739 rs6000_cost
= &titan_cost
;
4742 case PROCESSOR_POWER4
:
4743 case PROCESSOR_POWER5
:
4744 rs6000_cost
= &power4_cost
;
4747 case PROCESSOR_POWER6
:
4748 rs6000_cost
= &power6_cost
;
4751 case PROCESSOR_POWER7
:
4752 rs6000_cost
= &power7_cost
;
4755 case PROCESSOR_POWER8
:
4756 rs6000_cost
= &power8_cost
;
4759 case PROCESSOR_POWER9
:
4760 rs6000_cost
= &power9_cost
;
4763 case PROCESSOR_POWER10
:
4764 rs6000_cost
= &power10_cost
;
4767 case PROCESSOR_PPCA2
:
4768 rs6000_cost
= &ppca2_cost
;
4777 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4778 param_simultaneous_prefetches
,
4779 rs6000_cost
->simultaneous_prefetches
);
4780 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4781 param_l1_cache_size
,
4782 rs6000_cost
->l1_cache_size
);
4783 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4784 param_l1_cache_line_size
,
4785 rs6000_cost
->cache_line_size
);
4786 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4787 param_l2_cache_size
,
4788 rs6000_cost
->l2_cache_size
);
4790 /* Increase loop peeling limits based on performance analysis. */
4791 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4792 param_max_peeled_insns
, 400);
4793 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4794 param_max_completely_peeled_insns
, 400);
4796 /* The lxvl/stxvl instructions don't perform well before Power10. */
4798 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4799 param_vect_partial_vector_usage
, 1);
4801 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4802 param_vect_partial_vector_usage
, 0);
4804 /* Use the 'model' -fsched-pressure algorithm by default. */
4805 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4806 param_sched_pressure_algorithm
,
4807 SCHED_PRESSURE_MODEL
);
4809 /* If using typedef char *va_list, signal that
4810 __builtin_va_start (&ap, 0) can be optimized to
4811 ap = __builtin_next_arg (0). */
4812 if (DEFAULT_ABI
!= ABI_V4
)
4813 targetm
.expand_builtin_va_start
= NULL
;
4816 rs6000_override_options_after_change ();
4818 /* If not explicitly specified via option, decide whether to generate indexed
4819 load/store instructions. A value of -1 indicates that the
4820 initial value of this variable has not been overwritten. During
4821 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4822 if (TARGET_AVOID_XFORM
== -1)
4823 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4824 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4825 need indexed accesses and the type used is the scalar type of the element
4826 being loaded or stored. */
4827 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4828 && !TARGET_ALTIVEC
);
4830 /* Set the -mrecip options. */
4831 if (rs6000_recip_name
)
4833 char *p
= ASTRDUP (rs6000_recip_name
);
4835 unsigned int mask
, i
;
4838 while ((q
= strtok (p
, ",")) != NULL
)
4849 if (!strcmp (q
, "default"))
4850 mask
= ((TARGET_RECIP_PRECISION
)
4851 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4854 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4855 if (!strcmp (q
, recip_options
[i
].string
))
4857 mask
= recip_options
[i
].mask
;
4861 if (i
== ARRAY_SIZE (recip_options
))
4863 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4871 rs6000_recip_control
&= ~mask
;
4873 rs6000_recip_control
|= mask
;
4877 /* Initialize all of the registers. */
4878 rs6000_init_hard_regno_mode_ok (global_init_p
);
4880 /* Save the initial options in case the user does function specific options */
4882 target_option_default_node
= target_option_current_node
4883 = build_target_option_node (&global_options
, &global_options_set
);
4885 /* If not explicitly specified via option, decide whether to generate the
4886 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4887 if (TARGET_LINK_STACK
== -1)
4888 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4890 /* Deprecate use of -mno-speculate-indirect-jumps. */
4891 if (!rs6000_speculate_indirect_jumps
)
4892 warning (0, "%qs is deprecated and not recommended in any circumstances",
4893 "-mno-speculate-indirect-jumps");
4898 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4899 define the target cpu type. */
4902 rs6000_option_override (void)
4904 (void) rs6000_option_override_internal (true);
4908 /* Implement LOOP_ALIGN. */
4910 rs6000_loop_align (rtx label
)
4915 /* Don't override loop alignment if -falign-loops was specified. */
4916 if (!can_override_loop_align
)
4919 bb
= BLOCK_FOR_INSN (label
);
4920 ninsns
= num_loop_insns(bb
->loop_father
);
4922 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4923 if (ninsns
> 4 && ninsns
<= 8
4924 && (rs6000_tune
== PROCESSOR_POWER4
4925 || rs6000_tune
== PROCESSOR_POWER5
4926 || rs6000_tune
== PROCESSOR_POWER6
4927 || rs6000_tune
== PROCESSOR_POWER7
4928 || rs6000_tune
== PROCESSOR_POWER8
))
4929 return align_flags (5);
4934 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4935 after applying N number of iterations. This routine does not determine
4936 how may iterations are required to reach desired alignment. */
4939 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4946 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4949 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4959 /* Assuming that all other types are naturally aligned. CHECKME! */
4964 /* Return true if the vector misalignment factor is supported by the
4967 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4974 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4977 /* Return if movmisalign pattern is not supported for this mode. */
4978 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4981 if (misalignment
== -1)
4983 /* Misalignment factor is unknown at compile time but we know
4984 it's word aligned. */
4985 if (rs6000_vector_alignment_reachable (type
, is_packed
))
4987 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
4989 if (element_size
== 64 || element_size
== 32)
4996 /* VSX supports word-aligned vector. */
4997 if (misalignment
% 4 == 0)
5003 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5005 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5006 tree vectype
, int misalign
)
5011 switch (type_of_cost
)
5019 case cond_branch_not_taken
:
5023 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5027 /* Power7 has only one permute unit, make it a bit expensive. */
5028 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5033 case vec_promote_demote
:
5034 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5035 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5040 case cond_branch_taken
:
5043 case unaligned_load
:
5044 case vector_gather_load
:
5045 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5046 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5049 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5051 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5052 /* See PR102767, consider V1TI to keep consistency. */
5053 if (elements
== 2 || elements
== 1)
5054 /* Double word aligned. */
5062 /* Double word aligned. */
5066 /* Unknown misalignment. */
5079 /* Misaligned loads are not supported. */
5082 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5085 case unaligned_store
:
5086 case vector_scatter_store
:
5087 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5090 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5092 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5093 /* See PR102767, consider V1TI to keep consistency. */
5094 if (elements
== 2 || elements
== 1)
5095 /* Double word aligned. */
5103 /* Double word aligned. */
5107 /* Unknown misalignment. */
5120 /* Misaligned stores are not supported. */
5126 /* This is a rough approximation assuming non-constant elements
5127 constructed into a vector via element insertion. FIXME:
5128 vec_construct is not granular enough for uniformly good
5129 decisions. If the initialization is a splat, this is
5130 cheaper than we estimate. Improve this someday. */
5131 elem_type
= TREE_TYPE (vectype
);
5132 /* 32-bit vectors loaded into registers are stored as double
5133 precision, so we need 2 permutes, 2 converts, and 1 merge
5134 to construct a vector of short floats from them. */
5135 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5136 && TYPE_PRECISION (elem_type
) == 32)
5138 /* On POWER9, integer vector types are built up in GPRs and then
5139 use a direct move (2 cycles). For POWER8 this is even worse,
5140 as we need two direct moves and a merge, and the direct moves
5142 else if (INTEGRAL_TYPE_P (elem_type
))
5144 if (TARGET_P9_VECTOR
)
5145 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5147 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5150 /* V2DFmode doesn't need a direct move. */
5158 /* Implement targetm.vectorize.preferred_simd_mode. */
5161 rs6000_preferred_simd_mode (scalar_mode mode
)
5163 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5165 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5166 return vmode
.require ();
5171 class rs6000_cost_data
: public vector_costs
5174 using vector_costs::vector_costs
;
5176 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5177 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5179 vect_cost_model_location where
) override
;
5180 void finish_cost (const vector_costs
*) override
;
5183 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5184 vect_cost_model_location
, unsigned int);
5185 void density_test (loop_vec_info
);
5186 void adjust_vect_cost_per_loop (loop_vec_info
);
5187 unsigned int determine_suggested_unroll_factor (loop_vec_info
);
5189 /* Total number of vectorized stmts (loop only). */
5190 unsigned m_nstmts
= 0;
5191 /* Total number of loads (loop only). */
5192 unsigned m_nloads
= 0;
5193 /* Total number of stores (loop only). */
5194 unsigned m_nstores
= 0;
5195 /* Reduction factor for suggesting unroll factor (loop only). */
5196 unsigned m_reduc_factor
= 0;
5197 /* Possible extra penalized cost on vector construction (loop only). */
5198 unsigned m_extra_ctor_cost
= 0;
5199 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5200 instruction is needed by the vectorization. */
5201 bool m_vect_nonmem
= false;
5202 /* If this loop gets vectorized with emulated gather load. */
5203 bool m_gather_load
= false;
5206 /* Test for likely overcommitment of vector hardware resources. If a
5207 loop iteration is relatively large, and too large a percentage of
5208 instructions in the loop are vectorized, the cost model may not
5209 adequately reflect delays from unavailable vector resources.
5210 Penalize the loop body cost for this case. */
5213 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5215 /* This density test only cares about the cost of vector version of the
5216 loop, so immediately return if we are passed costing for the scalar
5217 version (namely computing single scalar iteration cost). */
5218 if (m_costing_for_scalar
)
5221 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5222 basic_block
*bbs
= get_loop_body (loop
);
5223 int nbbs
= loop
->num_nodes
;
5224 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5226 for (int i
= 0; i
< nbbs
; i
++)
5228 basic_block bb
= bbs
[i
];
5229 gimple_stmt_iterator gsi
;
5231 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5233 gimple
*stmt
= gsi_stmt (gsi
);
5234 if (is_gimple_debug (stmt
))
5237 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5239 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5240 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5246 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5248 if (density_pct
> rs6000_density_pct_threshold
5249 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5251 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5252 if (dump_enabled_p ())
5253 dump_printf_loc (MSG_NOTE
, vect_location
,
5254 "density %d%%, cost %d exceeds threshold, penalizing "
5255 "loop body cost by %u%%\n", density_pct
,
5256 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5259 /* Check whether we need to penalize the body cost to account
5260 for excess strided or elementwise loads. */
5261 if (m_extra_ctor_cost
> 0)
5263 gcc_assert (m_nloads
<= m_nstmts
);
5264 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5266 /* It's likely to be bounded by latency and execution resources
5267 from many scalar loads which are strided or elementwise loads
5268 into a vector if both conditions below are found:
5269 1. there are many loads, it's easy to result in a long wait
5271 2. load has a big proportion of all vectorized statements,
5272 it's not easy to schedule other statements to spread among
5274 One typical case is the innermost loop of the hotspot of SPEC2017
5275 503.bwaves_r without loop interchange. */
5276 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5277 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5279 m_costs
[vect_body
] += m_extra_ctor_cost
;
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_NOTE
, vect_location
,
5282 "Found %u loads and "
5283 "load pct. %u%% exceed "
5285 "penalizing loop body "
5286 "cost by extra cost %u "
5294 /* Implement targetm.vectorize.create_costs. */
5296 static vector_costs
*
5297 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5299 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5302 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5303 For some statement, we would like to further fine-grain tweak the cost on
5304 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5305 information on statement operation codes etc. One typical case here is
5306 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5307 for scalar cost, but it should be priced more whatever transformed to either
5308 compare + branch or compare + isel instructions. */
5311 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5312 struct _stmt_vec_info
*stmt_info
)
5314 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5315 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5317 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5318 if (subcode
== COND_EXPR
)
5325 /* Helper function for add_stmt_cost. Check each statement cost
5326 entry, gather information and update the target_cost fields
5329 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5330 stmt_vec_info stmt_info
,
5331 vect_cost_model_location where
,
5332 unsigned int orig_count
)
5335 /* Check whether we're doing something other than just a copy loop.
5336 Not all such loops may be profitably vectorized; see
5337 rs6000_finish_cost. */
5338 if (kind
== vec_to_scalar
5340 || kind
== vec_promote_demote
5341 || kind
== vec_construct
5342 || kind
== scalar_to_vec
5343 || (where
== vect_body
&& kind
== vector_stmt
))
5344 m_vect_nonmem
= true;
5346 /* Gather some information when we are costing the vectorized instruction
5347 for the statements located in a loop body. */
5348 if (!m_costing_for_scalar
5349 && is_a
<loop_vec_info
> (m_vinfo
)
5350 && where
== vect_body
)
5352 m_nstmts
+= orig_count
;
5354 if (kind
== scalar_load
5355 || kind
== vector_load
5356 || kind
== unaligned_load
5357 || kind
== vector_gather_load
)
5359 m_nloads
+= orig_count
;
5360 if (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5361 m_gather_load
= true;
5363 else if (kind
== scalar_store
5364 || kind
== vector_store
5365 || kind
== unaligned_store
5366 || kind
== vector_scatter_store
)
5367 m_nstores
+= orig_count
;
5368 else if ((kind
== scalar_stmt
5369 || kind
== vector_stmt
5370 || kind
== vec_to_scalar
)
5372 && vect_is_reduction (stmt_info
))
5374 /* Loop body contains normal int or fp operations and epilogue
5375 contains vector reduction. For simplicity, we assume int
5376 operation takes one cycle and fp operation takes one more. */
5377 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5378 bool is_float
= FLOAT_TYPE_P (TREE_TYPE (lhs
));
5379 unsigned int basic_cost
= is_float
? 2 : 1;
5380 m_reduc_factor
= MAX (basic_cost
* orig_count
, m_reduc_factor
);
5383 /* Power processors do not currently have instructions for strided
5384 and elementwise loads, and instead we must generate multiple
5385 scalar loads. This leads to undercounting of the cost. We
5386 account for this by scaling the construction cost by the number
5387 of elements involved, and saving this as extra cost that we may
5388 or may not need to apply. When finalizing the cost of the loop,
5389 the extra penalty is applied when the load density heuristics
5391 if (kind
== vec_construct
&& stmt_info
5392 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5393 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5394 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5396 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5397 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5398 /* As PR103702 shows, it's possible that vectorizer wants to do
5399 costings for only one unit here, it's no need to do any
5400 penalization for it, so simply early return here. */
5403 /* i386 port adopts nunits * stmt_cost as the penalized cost
5404 for this kind of penalization, we used to follow it but
5405 found it could result in an unreliable body cost especially
5406 for V16QI/V8HI modes. To make it better, we choose this
5407 new heuristic: for each scalar load, we use 2 as penalized
5408 cost for the case with 2 nunits and use 1 for the other
5409 cases. It's without much supporting theory, mainly
5410 concluded from the broad performance evaluations on Power8,
5411 Power9 and Power10. One possibly related point is that:
5412 vector construction for more units would use more insns,
5413 it has more chances to schedule them better (even run in
5414 parallelly when enough available units at that time), so
5415 it seems reasonable not to penalize that much for them. */
5416 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5417 unsigned int extra_cost
= nunits
* adjusted_cost
;
5418 m_extra_ctor_cost
+= extra_cost
;
5424 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5425 stmt_vec_info stmt_info
, slp_tree
,
5426 tree vectype
, int misalign
,
5427 vect_cost_model_location where
)
5429 unsigned retval
= 0;
5431 if (flag_vect_cost_model
)
5433 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5435 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5436 /* Statements in an inner loop relative to the loop being
5437 vectorized are weighted more heavily. The value here is
5438 arbitrary and could potentially be improved with analysis. */
5439 unsigned int orig_count
= count
;
5440 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5441 m_costs
[where
] += retval
;
5443 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5449 /* For some target specific vectorization cost which can't be handled per stmt,
5450 we check the requisite conditions and adjust the vectorization cost
5451 accordingly if satisfied. One typical example is to model shift cost for
5452 vector with length by counting number of required lengths under condition
5453 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5456 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5458 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5460 rgroup_controls
*rgc
;
5461 unsigned int num_vectors_m1
;
5462 unsigned int shift_cnt
= 0;
5463 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5465 /* Each length needs one shift to fill into bits 0-7. */
5466 shift_cnt
+= num_vectors_m1
+ 1;
5468 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5469 NULL_TREE
, 0, vect_body
);
5473 /* Determine suggested unroll factor by considering some below factors:
5475 - unroll option/pragma which can disable unrolling for this loop;
5476 - simple hardware resource model for non memory vector insns;
5477 - aggressive heuristics when iteration count is unknown:
5478 - reduction case to break cross iteration dependency;
5479 - emulated gather load;
5480 - estimated iteration count when iteration count is unknown;
5485 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo
)
5487 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5489 /* Don't unroll if it's specified explicitly not to be unrolled. */
5490 if (loop
->unroll
== 1
5491 || (OPTION_SET_P (flag_unroll_loops
) && !flag_unroll_loops
)
5492 || (OPTION_SET_P (flag_unroll_all_loops
) && !flag_unroll_all_loops
))
5495 unsigned int nstmts_nonldst
= m_nstmts
- m_nloads
- m_nstores
;
5496 /* Don't unroll if no vector instructions excepting for memory access. */
5497 if (nstmts_nonldst
== 0)
5500 /* Consider breaking cross iteration dependency for reduction. */
5501 unsigned int reduc_factor
= m_reduc_factor
> 1 ? m_reduc_factor
: 1;
5503 /* Use this simple hardware resource model that how many non ld/st
5504 vector instructions can be issued per cycle. */
5505 unsigned int issue_width
= rs6000_vect_unroll_issue
;
5506 unsigned int uf
= CEIL (reduc_factor
* issue_width
, nstmts_nonldst
);
5507 uf
= MIN ((unsigned int) rs6000_vect_unroll_limit
, uf
);
5508 /* Make sure it is power of 2. */
5509 uf
= 1 << ceil_log2 (uf
);
5511 /* If the iteration count is known, the costing would be exact enough,
5512 don't worry it could be worse. */
5513 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5516 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5517 loop if either condition is satisfied:
5518 - reduction factor exceeds the threshold;
5519 - emulated gather load adopted. */
5520 if (reduc_factor
> (unsigned int) rs6000_vect_unroll_reduc_threshold
5524 /* Check if we can conclude it's good to unroll from the estimated
5526 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
5527 unsigned int vf
= vect_vf_for_cost (loop_vinfo
);
5528 unsigned int unrolled_vf
= vf
* uf
;
5529 if (est_niter
== -1 || est_niter
< unrolled_vf
)
5530 /* When the estimated iteration of this loop is unknown, it's possible
5531 that we are able to vectorize this loop with the original VF but fail
5532 to vectorize it with the unrolled VF any more if the actual iteration
5533 count is in between. */
5537 unsigned int epil_niter_unr
= est_niter
% unrolled_vf
;
5538 unsigned int epil_niter
= est_niter
% vf
;
5539 /* Even if we have partial vector support, it can be still inefficent
5540 to calculate the length when the iteration count is unknown, so
5541 only expect it's good to unroll when the epilogue iteration count
5542 is not bigger than VF (only one time length calculation). */
5543 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5544 && epil_niter_unr
<= vf
)
5546 /* Without partial vector support, conservatively unroll this when
5547 the epilogue iteration count is less than the original one
5548 (epilogue execution time wouldn't be longer than before). */
5549 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5550 && epil_niter_unr
<= epil_niter
)
5558 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5560 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5562 adjust_vect_cost_per_loop (loop_vinfo
);
5563 density_test (loop_vinfo
);
5565 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5566 that require versioning for any reason. The vectorization is at
5567 best a wash inside the loop, and the versioning checks make
5568 profitability highly unlikely and potentially quite harmful. */
5570 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5571 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5572 m_costs
[vect_body
] += 10000;
5574 m_suggested_unroll_factor
5575 = determine_suggested_unroll_factor (loop_vinfo
);
5578 vector_costs::finish_cost (scalar_costs
);
5581 /* Implement targetm.loop_unroll_adjust. */
5584 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5586 if (unroll_only_small_loops
)
5588 /* TODO: These are hardcoded values right now. We probably should use
5590 if (loop
->ninsns
<= 6)
5591 return MIN (4, nunroll
);
5592 if (loop
->ninsns
<= 10)
5593 return MIN (2, nunroll
);
5601 /* Returns a function decl for a vectorized version of the builtin function
5602 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5603 if it is not available.
5605 Implement targetm.vectorize.builtin_vectorized_function. */
5608 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5611 machine_mode in_mode
, out_mode
;
5614 if (TARGET_DEBUG_BUILTIN
)
5615 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5616 combined_fn_name (combined_fn (fn
)),
5617 GET_MODE_NAME (TYPE_MODE (type_out
)),
5618 GET_MODE_NAME (TYPE_MODE (type_in
)));
5620 /* TODO: Should this be gcc_assert? */
5621 if (TREE_CODE (type_out
) != VECTOR_TYPE
5622 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5625 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5626 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5627 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5628 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5633 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5634 && out_mode
== DFmode
&& out_n
== 2
5635 && in_mode
== DFmode
&& in_n
== 2)
5636 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5637 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5638 && out_mode
== SFmode
&& out_n
== 4
5639 && in_mode
== SFmode
&& in_n
== 4)
5640 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5641 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5642 && out_mode
== SFmode
&& out_n
== 4
5643 && in_mode
== SFmode
&& in_n
== 4)
5644 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5647 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5648 && out_mode
== DFmode
&& out_n
== 2
5649 && in_mode
== DFmode
&& in_n
== 2)
5650 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5651 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5652 && out_mode
== SFmode
&& out_n
== 4
5653 && in_mode
== SFmode
&& in_n
== 4)
5654 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5655 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5656 && out_mode
== SFmode
&& out_n
== 4
5657 && in_mode
== SFmode
&& in_n
== 4)
5658 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5661 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5662 && out_mode
== DFmode
&& out_n
== 2
5663 && in_mode
== DFmode
&& in_n
== 2)
5664 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5665 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5666 && out_mode
== SFmode
&& out_n
== 4
5667 && in_mode
== SFmode
&& in_n
== 4)
5668 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5669 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5670 && out_mode
== SFmode
&& out_n
== 4
5671 && in_mode
== SFmode
&& in_n
== 4)
5672 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5675 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5676 && out_mode
== DFmode
&& out_n
== 2
5677 && in_mode
== DFmode
&& in_n
== 2)
5678 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5679 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5680 && out_mode
== SFmode
&& out_n
== 4
5681 && in_mode
== SFmode
&& in_n
== 4)
5682 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5683 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5684 && out_mode
== SFmode
&& out_n
== 4
5685 && in_mode
== SFmode
&& in_n
== 4)
5686 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5689 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5690 && out_mode
== DFmode
&& out_n
== 2
5691 && in_mode
== DFmode
&& in_n
== 2)
5692 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5693 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5694 && out_mode
== SFmode
&& out_n
== 4
5695 && in_mode
== SFmode
&& in_n
== 4)
5696 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5697 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5698 && out_mode
== SFmode
&& out_n
== 4
5699 && in_mode
== SFmode
&& in_n
== 4)
5700 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5703 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5704 && flag_unsafe_math_optimizations
5705 && out_mode
== DFmode
&& out_n
== 2
5706 && in_mode
== DFmode
&& in_n
== 2)
5707 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5708 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5709 && flag_unsafe_math_optimizations
5710 && out_mode
== SFmode
&& out_n
== 4
5711 && in_mode
== SFmode
&& in_n
== 4)
5712 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5715 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5716 && !flag_trapping_math
5717 && out_mode
== DFmode
&& out_n
== 2
5718 && in_mode
== DFmode
&& in_n
== 2)
5719 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5720 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5721 && !flag_trapping_math
5722 && out_mode
== SFmode
&& out_n
== 4
5723 && in_mode
== SFmode
&& in_n
== 4)
5724 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5730 /* Generate calls to libmass if appropriate. */
5731 if (rs6000_veclib_handler
)
5732 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5737 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5738 library with vectorized intrinsics. */
5741 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5745 const char *suffix
= NULL
;
5746 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5749 machine_mode el_mode
, in_mode
;
5752 /* Libmass is suitable for unsafe math only as it does not correctly support
5753 parts of IEEE with the required precision such as denormals. Only support
5754 it if we have VSX to use the simd d2 or f4 functions.
5755 XXX: Add variable length support. */
5756 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5759 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5760 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5761 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5762 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5763 if (el_mode
!= in_mode
5799 if (el_mode
== DFmode
&& n
== 2)
5801 bdecl
= mathfn_built_in (double_type_node
, fn
);
5802 suffix
= "d2"; /* pow -> powd2 */
5804 else if (el_mode
== SFmode
&& n
== 4)
5806 bdecl
= mathfn_built_in (float_type_node
, fn
);
5807 suffix
= "4"; /* powf -> powf4 */
5819 gcc_assert (suffix
!= NULL
);
5820 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5824 strcpy (name
, bname
+ strlen ("__builtin_"));
5825 strcat (name
, suffix
);
5828 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5829 else if (n_args
== 2)
5830 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5834 /* Build a function declaration for the vectorized function. */
5835 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5836 FUNCTION_DECL
, get_identifier (name
), fntype
);
5837 TREE_PUBLIC (new_fndecl
) = 1;
5838 DECL_EXTERNAL (new_fndecl
) = 1;
5839 DECL_IS_NOVOPS (new_fndecl
) = 1;
5840 TREE_READONLY (new_fndecl
) = 1;
5846 /* Default CPU string for rs6000*_file_start functions. */
5847 static const char *rs6000_default_cpu
;
5849 #ifdef USING_ELFOS_H
5850 const char *rs6000_machine
;
5853 rs6000_machine_from_flags (void)
5856 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5858 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5860 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5862 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5864 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5866 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5870 if (rs6000_cpu
== PROCESSOR_PPC403
)
5872 if (rs6000_cpu
== PROCESSOR_PPC405
)
5874 if (rs6000_cpu
== PROCESSOR_PPC440
)
5876 if (rs6000_cpu
== PROCESSOR_PPC476
)
5880 if (rs6000_cpu
== PROCESSOR_PPCA2
)
5884 if (rs6000_cpu
== PROCESSOR_CELL
)
5888 if (rs6000_cpu
== PROCESSOR_TITAN
)
5891 /* 500 series and 800 series */
5892 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
5896 /* This (and ppc64 below) are disabled here (for now at least) because
5897 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5898 are #define'd as some of these. Untangling that is a job for later. */
5900 /* 600 series and 700 series, "classic" */
5901 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
5902 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
5903 || rs6000_cpu
== PROCESSOR_PPC750
)
5907 /* Classic with AltiVec, "G4" */
5908 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
5912 /* The older 64-bit CPUs */
5913 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
5914 || rs6000_cpu
== PROCESSOR_RS64A
)
5918 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5920 /* Disable the flags that should never influence the .machine selection. */
5921 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
5923 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5925 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5927 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5929 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5931 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5933 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5935 if ((flags
& ISA_2_1_MASKS
) != 0)
5937 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5943 emit_asm_machine (void)
5945 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5949 /* Do anything needed at the start of the asm file. */
5952 rs6000_file_start (void)
5955 const char *start
= buffer
;
5956 FILE *file
= asm_out_file
;
5958 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5960 default_file_start ();
5962 if (flag_verbose_asm
)
5964 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5966 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5968 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5972 if (OPTION_SET_P (rs6000_cpu_index
))
5974 fprintf (file
, "%s -mcpu=%s", start
,
5975 processor_target_table
[rs6000_cpu_index
].name
);
5979 if (OPTION_SET_P (rs6000_tune_index
))
5981 fprintf (file
, "%s -mtune=%s", start
,
5982 processor_target_table
[rs6000_tune_index
].name
);
5986 if (PPC405_ERRATUM77
)
5988 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5992 #ifdef USING_ELFOS_H
5993 switch (rs6000_sdata
)
5995 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5996 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5997 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5998 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
6001 if (rs6000_sdata
&& g_switch_value
)
6003 fprintf (file
, "%s -G %d", start
,
6013 #ifdef USING_ELFOS_H
6014 rs6000_machine
= rs6000_machine_from_flags ();
6015 emit_asm_machine ();
6018 if (DEFAULT_ABI
== ABI_ELFv2
)
6019 fprintf (file
, "\t.abiversion 2\n");
6023 /* Return nonzero if this function is known to have a null epilogue. */
6026 direct_return (void)
6028 if (reload_completed
)
6030 rs6000_stack_t
*info
= rs6000_stack_info ();
6032 if (info
->first_gp_reg_save
== 32
6033 && info
->first_fp_reg_save
== 64
6034 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6035 && ! info
->lr_save_p
6036 && ! info
->cr_save_p
6037 && info
->vrsave_size
== 0
6045 /* Helper for num_insns_constant. Calculate number of instructions to
6046 load VALUE to a single gpr using combinations of addi, addis, ori,
6047 oris, sldi and rldimi instructions. */
6050 num_insns_constant_gpr (HOST_WIDE_INT value
)
6052 /* signed constant loadable with addi */
6053 if (SIGNED_INTEGER_16BIT_P (value
))
6056 /* constant loadable with addis */
6057 else if ((value
& 0xffff) == 0
6058 && (value
>> 31 == -1 || value
>> 31 == 0))
6061 /* PADDI can support up to 34 bit signed integers. */
6062 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
6065 else if (TARGET_POWERPC64
)
6068 rs6000_emit_set_long_const (nullptr, value
, &num_insns
);
6076 /* Helper for num_insns_constant. Allow constants formed by the
6077 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6078 and handle modes that require multiple gprs. */
6081 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
6083 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6087 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6088 int insns
= num_insns_constant_gpr (low
);
6090 /* We won't get more than 2 from num_insns_constant_gpr
6091 except when TARGET_POWERPC64 and mode is DImode or
6092 wider, so the register mode must be DImode. */
6093 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6096 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6097 it all at once would be UB. */
6098 value
>>= (BITS_PER_WORD
- 1);
6104 /* Return the number of instructions it takes to form a constant in as
6105 many gprs are needed for MODE. */
6108 num_insns_constant (rtx op
, machine_mode mode
)
6112 switch (GET_CODE (op
))
6118 case CONST_WIDE_INT
:
6121 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6122 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6129 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6131 if (mode
== SFmode
|| mode
== SDmode
)
6136 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6138 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6139 /* See the first define_split in rs6000.md handling a
6140 const_double_operand. */
6144 else if (mode
== DFmode
|| mode
== DDmode
)
6149 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6151 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6153 /* See the second (32-bit) and third (64-bit) define_split
6154 in rs6000.md handling a const_double_operand. */
6155 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6156 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6159 else if (mode
== TFmode
|| mode
== TDmode
6160 || mode
== KFmode
|| mode
== IFmode
)
6166 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6168 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6170 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6171 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6172 insns
= num_insns_constant_multi (val
, DImode
);
6173 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6174 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6175 insns
+= num_insns_constant_multi (val
, DImode
);
6187 return num_insns_constant_multi (val
, mode
);
6190 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6191 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6192 corresponding element of the vector, but for V4SFmode, the
6193 corresponding "float" is interpreted as an SImode integer. */
6196 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6200 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6201 gcc_assert (GET_MODE (op
) != V2DImode
6202 && GET_MODE (op
) != V2DFmode
);
6204 tmp
= CONST_VECTOR_ELT (op
, elt
);
6205 if (GET_MODE (op
) == V4SFmode
)
6206 tmp
= gen_lowpart (SImode
, tmp
);
6207 return INTVAL (tmp
);
6210 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6211 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6212 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6213 all items are set to the same value and contain COPIES replicas of the
6214 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6215 operand and the others are set to the value of the operand's msb. */
6218 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6220 machine_mode mode
= GET_MODE (op
);
6221 machine_mode inner
= GET_MODE_INNER (mode
);
6229 HOST_WIDE_INT splat_val
;
6230 HOST_WIDE_INT msb_val
;
6232 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6235 nunits
= GET_MODE_NUNITS (mode
);
6236 bitsize
= GET_MODE_BITSIZE (inner
);
6237 mask
= GET_MODE_MASK (inner
);
6239 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6241 msb_val
= val
>= 0 ? 0 : -1;
6243 if (val
== 0 && step
> 1)
6245 /* Special case for loading most significant bit with step > 1.
6246 In that case, match 0s in all but step-1s elements, where match
6248 for (i
= 1; i
< nunits
; ++i
)
6250 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6251 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6252 if ((i
& (step
- 1)) == step
- 1)
6254 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6264 /* Construct the value to be splatted, if possible. If not, return 0. */
6265 for (i
= 2; i
<= copies
; i
*= 2)
6267 HOST_WIDE_INT small_val
;
6269 small_val
= splat_val
>> bitsize
;
6271 if (splat_val
!= ((HOST_WIDE_INT
)
6272 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6273 | (small_val
& mask
)))
6275 splat_val
= small_val
;
6276 inner
= smallest_int_mode_for_size (bitsize
);
6279 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6280 if (EASY_VECTOR_15 (splat_val
))
6283 /* Also check if we can splat, and then add the result to itself. Do so if
6284 the value is positive, of if the splat instruction is using OP's mode;
6285 for splat_val < 0, the splat and the add should use the same mode. */
6286 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6287 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6290 /* Also check if are loading up the most significant bit which can be done by
6291 loading up -1 and shifting the value left by -1. Only do this for
6292 step 1 here, for larger steps it is done earlier. */
6293 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6299 /* Check if VAL is present in every STEP-th element, and the
6300 other elements are filled with its most significant bit. */
6301 for (i
= 1; i
< nunits
; ++i
)
6303 HOST_WIDE_INT desired_val
;
6304 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6305 if ((i
& (step
- 1)) == 0)
6308 desired_val
= msb_val
;
6310 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6317 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6318 instruction, filling in the bottom elements with 0 or -1.
6320 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6321 for the number of zeroes to shift in, or negative for the number of 0xff
6324 OP is a CONST_VECTOR. */
6327 vspltis_shifted (rtx op
)
6329 machine_mode mode
= GET_MODE (op
);
6330 machine_mode inner
= GET_MODE_INNER (mode
);
6338 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6341 /* We need to create pseudo registers to do the shift, so don't recognize
6342 shift vector constants after reload. Don't match it even before RA
6343 after split1 is done, because there won't be further splitting pass
6344 before RA to do the splitting. */
6345 if (!can_create_pseudo_p ()
6346 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6349 nunits
= GET_MODE_NUNITS (mode
);
6350 mask
= GET_MODE_MASK (inner
);
6352 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6354 /* Check if the value can really be the operand of a vspltis[bhw]. */
6355 if (EASY_VECTOR_15 (val
))
6358 /* Also check if we are loading up the most significant bit which can be done
6359 by loading up -1 and shifting the value left by -1. */
6360 else if (EASY_VECTOR_MSB (val
, inner
))
6366 /* Check if VAL is present in every STEP-th element until we find elements
6367 that are 0 or all 1 bits. */
6368 for (i
= 1; i
< nunits
; ++i
)
6370 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6371 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6373 /* If the value isn't the splat value, check for the remaining elements
6379 for (j
= i
+1; j
< nunits
; ++j
)
6381 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6382 if (const_vector_elt_as_int (op
, elt2
) != 0)
6386 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6389 else if ((elt_val
& mask
) == mask
)
6391 for (j
= i
+1; j
< nunits
; ++j
)
6393 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6394 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6398 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6406 /* If all elements are equal, we don't need to do VSLDOI. */
6411 /* Return non-zero (element mode byte size) if OP is of the given MODE
6412 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6415 easy_altivec_constant (rtx op
, machine_mode mode
)
6417 unsigned step
, copies
;
6419 if (mode
== VOIDmode
)
6420 mode
= GET_MODE (op
);
6421 else if (mode
!= GET_MODE (op
))
6424 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6426 if (mode
== V2DFmode
)
6427 return zero_constant (op
, mode
) ? 8 : 0;
6429 else if (mode
== V2DImode
)
6431 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6432 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6435 if (zero_constant (op
, mode
))
6438 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6439 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6445 /* V1TImode is a special container for TImode. Ignore for now. */
6446 else if (mode
== V1TImode
)
6449 /* Start with a vspltisw. */
6450 step
= GET_MODE_NUNITS (mode
) / 4;
6453 if (vspltis_constant (op
, step
, copies
))
6456 /* Then try with a vspltish. */
6462 if (vspltis_constant (op
, step
, copies
))
6465 /* And finally a vspltisb. */
6471 if (vspltis_constant (op
, step
, copies
))
6474 if (vspltis_shifted (op
) != 0)
6475 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6480 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6481 result is OP. Abort if it is not possible. */
6484 gen_easy_altivec_constant (rtx op
)
6486 machine_mode mode
= GET_MODE (op
);
6487 int nunits
= GET_MODE_NUNITS (mode
);
6488 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6489 unsigned step
= nunits
/ 4;
6490 unsigned copies
= 1;
6492 /* Start with a vspltisw. */
6493 if (vspltis_constant (op
, step
, copies
))
6494 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6496 /* Then try with a vspltish. */
6502 if (vspltis_constant (op
, step
, copies
))
6503 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6505 /* And finally a vspltisb. */
6511 if (vspltis_constant (op
, step
, copies
))
6512 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6517 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6518 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6520 Return the number of instructions needed (1 or 2) into the address pointed
6523 Return the constant that is being split via CONSTANT_PTR. */
6526 xxspltib_constant_p (rtx op
,
6531 size_t nunits
= GET_MODE_NUNITS (mode
);
6533 HOST_WIDE_INT value
;
6536 /* Set the returned values to out of bound values. */
6537 *num_insns_ptr
= -1;
6538 *constant_ptr
= 256;
6540 if (!TARGET_P9_VECTOR
)
6543 if (mode
== VOIDmode
)
6544 mode
= GET_MODE (op
);
6546 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6549 /* Handle (vec_duplicate <constant>). */
6550 if (GET_CODE (op
) == VEC_DUPLICATE
)
6552 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6553 && mode
!= V2DImode
)
6556 element
= XEXP (op
, 0);
6557 if (!CONST_INT_P (element
))
6560 value
= INTVAL (element
);
6561 if (!IN_RANGE (value
, -128, 127))
6565 /* Handle (const_vector [...]). */
6566 else if (GET_CODE (op
) == CONST_VECTOR
)
6568 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6569 && mode
!= V2DImode
)
6572 element
= CONST_VECTOR_ELT (op
, 0);
6573 if (!CONST_INT_P (element
))
6576 value
= INTVAL (element
);
6577 if (!IN_RANGE (value
, -128, 127))
6580 for (i
= 1; i
< nunits
; i
++)
6582 element
= CONST_VECTOR_ELT (op
, i
);
6583 if (!CONST_INT_P (element
))
6586 if (value
!= INTVAL (element
))
6591 /* Handle integer constants being loaded into the upper part of the VSX
6592 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6593 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6594 else if (CONST_INT_P (op
))
6596 if (!SCALAR_INT_MODE_P (mode
))
6599 value
= INTVAL (op
);
6600 if (!IN_RANGE (value
, -128, 127))
6603 if (!IN_RANGE (value
, -1, 0))
6605 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6608 if (EASY_VECTOR_15 (value
))
6616 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6617 sign extend. Special case 0/-1 to allow getting any VSX register instead
6618 of an Altivec register. */
6619 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6620 && EASY_VECTOR_15 (value
))
6623 /* Return # of instructions and the constant byte for XXSPLTIB. */
6624 if (mode
== V16QImode
)
6627 else if (IN_RANGE (value
, -1, 0))
6630 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6631 single XXSPLTIW or XXSPLTIDP instruction. */
6632 else if (vsx_prefixed_constant (op
, mode
))
6635 /* Return XXSPLITB followed by a sign extend operation to convert the
6636 constant to V8HImode or V4SImode. */
6640 *constant_ptr
= (int) value
;
6644 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6645 instructions vupkhsw and vspltisw.
6647 Return the constant that is being split via CONSTANT_PTR. */
6650 vspltisw_vupkhsw_constant_p (rtx op
, machine_mode mode
, int *constant_ptr
)
6652 HOST_WIDE_INT value
;
6655 if (!TARGET_P8_VECTOR
)
6658 if (mode
!= V2DImode
)
6661 if (!const_vec_duplicate_p (op
, &elt
))
6664 value
= INTVAL (elt
);
6665 if (value
== 0 || value
== 1
6666 || !EASY_VECTOR_15 (value
))
6670 *constant_ptr
= (int) value
;
6675 output_vec_const_move (rtx
*operands
)
6683 mode
= GET_MODE (dest
);
6687 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6688 int xxspltib_value
= 256;
6691 if (zero_constant (vec
, mode
))
6693 if (TARGET_P9_VECTOR
)
6694 return "xxspltib %x0,0";
6696 else if (dest_vmx_p
)
6697 return "vspltisw %0,0";
6700 return "xxlxor %x0,%x0,%x0";
6703 if (all_ones_constant (vec
, mode
))
6705 if (TARGET_P9_VECTOR
)
6706 return "xxspltib %x0,255";
6708 else if (dest_vmx_p
)
6709 return "vspltisw %0,-1";
6711 else if (TARGET_P8_VECTOR
)
6712 return "xxlorc %x0,%x0,%x0";
6718 vec_const_128bit_type vsx_const
;
6719 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6721 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6724 operands
[2] = GEN_INT (imm
);
6725 return "lxvkq %x0,%2";
6728 imm
= constant_generates_xxspltiw (&vsx_const
);
6731 operands
[2] = GEN_INT (imm
);
6732 return "xxspltiw %x0,%2";
6735 imm
= constant_generates_xxspltidp (&vsx_const
);
6738 operands
[2] = GEN_INT (imm
);
6739 return "xxspltidp %x0,%2";
6743 if (TARGET_P9_VECTOR
6744 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6748 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6749 return "xxspltib %x0,%2";
6760 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6761 if (zero_constant (vec
, mode
))
6762 return "vspltisw %0,0";
6764 if (all_ones_constant (vec
, mode
))
6765 return "vspltisw %0,-1";
6767 /* Do we need to construct a value using VSLDOI? */
6768 shift
= vspltis_shifted (vec
);
6772 splat_vec
= gen_easy_altivec_constant (vec
);
6773 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6774 operands
[1] = XEXP (splat_vec
, 0);
6775 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6778 switch (GET_MODE (splat_vec
))
6781 return "vspltisw %0,%1";
6784 return "vspltish %0,%1";
6787 return "vspltisb %0,%1";
6797 /* Initialize vector TARGET to VALS. */
6800 rs6000_expand_vector_init (rtx target
, rtx vals
)
6802 machine_mode mode
= GET_MODE (target
);
6803 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6804 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6805 int n_var
= 0, one_var
= -1;
6806 bool all_same
= true, all_const_zero
= true;
6810 for (i
= 0; i
< n_elts
; ++i
)
6812 x
= XVECEXP (vals
, 0, i
);
6813 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6814 ++n_var
, one_var
= i
;
6815 else if (x
!= CONST0_RTX (inner_mode
))
6816 all_const_zero
= false;
6818 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6824 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6825 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6826 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6828 /* Zero register. */
6829 emit_move_insn (target
, CONST0_RTX (mode
));
6832 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6834 /* Splat immediate. */
6835 emit_insn (gen_rtx_SET (target
, const_vec
));
6840 /* Load from constant pool. */
6841 emit_move_insn (target
, const_vec
);
6846 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6847 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6851 size_t num_elements
= all_same
? 1 : 2;
6852 for (i
= 0; i
< num_elements
; i
++)
6854 op
[i
] = XVECEXP (vals
, 0, i
);
6855 /* Just in case there is a SUBREG with a smaller mode, do a
6857 if (GET_MODE (op
[i
]) != inner_mode
)
6859 rtx tmp
= gen_reg_rtx (inner_mode
);
6860 convert_move (tmp
, op
[i
], 0);
6863 /* Allow load with splat double word. */
6864 else if (MEM_P (op
[i
]))
6867 op
[i
] = force_reg (inner_mode
, op
[i
]);
6869 else if (!REG_P (op
[i
]))
6870 op
[i
] = force_reg (inner_mode
, op
[i
]);
6875 if (mode
== V2DFmode
)
6876 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6878 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6882 if (mode
== V2DFmode
)
6883 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6885 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6890 /* Special case initializing vector int if we are on 64-bit systems with
6891 direct move or we have the ISA 3.0 instructions. */
6892 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6893 && TARGET_DIRECT_MOVE_64BIT
)
6897 rtx element0
= XVECEXP (vals
, 0, 0);
6898 if (MEM_P (element0
))
6899 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6901 element0
= force_reg (SImode
, element0
);
6903 if (TARGET_P9_VECTOR
)
6904 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6907 rtx tmp
= gen_reg_rtx (DImode
);
6908 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6909 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6918 for (i
= 0; i
< 4; i
++)
6919 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6921 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6922 elements
[2], elements
[3]));
6927 /* With single precision floating point on VSX, know that internally single
6928 precision is actually represented as a double, and either make 2 V2DF
6929 vectors, and convert these vectors to single precision, or do one
6930 conversion, and splat the result to the other elements. */
6931 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6935 rtx element0
= XVECEXP (vals
, 0, 0);
6937 if (TARGET_P9_VECTOR
)
6939 if (MEM_P (element0
))
6940 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6942 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6947 rtx freg
= gen_reg_rtx (V4SFmode
);
6948 rtx sreg
= force_reg (SFmode
, element0
);
6949 rtx cvt
= (TARGET_XSCVDPSPN
6950 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6951 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6954 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6960 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6966 for (i
= 0; i
< 4; i
++)
6968 tmp_si
[i
] = gen_reg_rtx (SImode
);
6969 tmp_di
[i
] = gen_reg_rtx (DImode
);
6970 mrg_di
[i
] = gen_reg_rtx (DImode
);
6971 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6972 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6973 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6976 if (!BYTES_BIG_ENDIAN
)
6978 std::swap (tmp_di
[0], tmp_di
[1]);
6979 std::swap (tmp_di
[2], tmp_di
[3]);
6982 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6983 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6984 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6985 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6987 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6988 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6989 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6993 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6994 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6995 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6996 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6997 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6998 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6999 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
7000 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
7002 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
7003 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
7004 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
7005 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
7006 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
7012 /* Special case initializing vector short/char that are splats if we are on
7013 64-bit systems with direct move. */
7014 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
7015 && (mode
== V16QImode
|| mode
== V8HImode
))
7017 rtx op0
= XVECEXP (vals
, 0, 0);
7018 rtx di_tmp
= gen_reg_rtx (DImode
);
7021 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
7023 if (mode
== V16QImode
)
7025 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
7026 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
7030 if (mode
== V8HImode
)
7032 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7033 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7038 /* Store value to stack temp. Load vector element. Splat. However, splat
7039 of 64-bit items is not supported on Altivec. */
7040 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7042 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7043 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7044 XVECEXP (vals
, 0, 0));
7045 x
= gen_rtx_UNSPEC (VOIDmode
,
7046 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7047 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7049 gen_rtx_SET (target
, mem
),
7051 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7052 gen_rtx_PARALLEL (VOIDmode
,
7053 gen_rtvec (1, const0_rtx
)));
7054 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7058 /* One field is non-constant. Load constant then overwrite
7062 rtx copy
= copy_rtx (vals
);
7064 /* Load constant part of vector, substitute neighboring value for
7066 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7067 rs6000_expand_vector_init (target
, copy
);
7069 /* Insert variable. */
7070 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
7075 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
7078 /* Force the values into word_mode registers. */
7079 for (i
= 0; i
< n_elts
; i
++)
7081 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
7082 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
7083 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
7086 /* Take unsigned char big endianness on 64bit as example for below
7087 construction, the input values are: A, B, C, D, ..., O, P. */
7089 if (TARGET_DIRECT_MOVE_128
)
7091 /* Move to VSX register with vec_concat, each has 2 values.
7092 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7093 vr1[1] = { xxxxxxxC, xxxxxxxD };
7095 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7097 for (i
= 0; i
< n_elts
/ 2; i
++)
7099 vr1
[i
] = gen_reg_rtx (V2DImode
);
7100 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
7104 /* Pack vectors with 2 values into vectors with 4 values.
7105 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7106 vr2[1] = { xxxExxxF, xxxGxxxH };
7107 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7108 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7110 for (i
= 0; i
< n_elts
/ 4; i
++)
7112 vr2
[i
] = gen_reg_rtx (V4SImode
);
7113 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
7117 /* Pack vectors with 4 values into vectors with 8 values.
7118 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7119 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7121 for (i
= 0; i
< n_elts
/ 8; i
++)
7123 vr3
[i
] = gen_reg_rtx (V8HImode
);
7124 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7128 /* If it's V8HImode, it's done and return it. */
7129 if (mode
== V8HImode
)
7131 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7135 /* Pack vectors with 8 values into 16 values. */
7136 rtx res
= gen_reg_rtx (V16QImode
);
7137 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7138 emit_insn (gen_rtx_SET (target
, res
));
7142 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7143 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7144 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7147 /* Set up some common gen routines and values. */
7148 if (BYTES_BIG_ENDIAN
)
7150 if (mode
== V16QImode
)
7152 merge_v16qi
= gen_altivec_vmrghb
;
7153 merge_v8hi
= gen_altivec_vmrglh
;
7156 merge_v8hi
= gen_altivec_vmrghh
;
7158 merge_v4si
= gen_altivec_vmrglw
;
7159 perm_idx
= GEN_INT (3);
7163 if (mode
== V16QImode
)
7165 merge_v16qi
= gen_altivec_vmrglb
;
7166 merge_v8hi
= gen_altivec_vmrghh
;
7169 merge_v8hi
= gen_altivec_vmrglh
;
7171 merge_v4si
= gen_altivec_vmrghw
;
7172 perm_idx
= GEN_INT (0);
7175 /* Move to VSX register with direct move.
7176 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7177 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7179 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7181 for (i
= 0; i
< n_elts
; i
++)
7183 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7184 if (TARGET_POWERPC64
)
7185 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7187 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7190 /* Merge/move to vector short.
7191 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7192 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7194 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7196 for (i
= 0; i
< 8; i
++)
7199 if (mode
== V16QImode
)
7201 tmp
= gen_reg_rtx (V16QImode
);
7202 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7204 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7205 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7208 /* Merge vector short to vector int.
7209 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7210 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7212 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7214 for (i
= 0; i
< 4; i
++)
7216 rtx tmp
= gen_reg_rtx (V8HImode
);
7217 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7218 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7219 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7222 /* Merge vector int to vector long.
7223 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7224 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7226 for (i
= 0; i
< 2; i
++)
7228 rtx tmp
= gen_reg_rtx (V4SImode
);
7229 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7230 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7231 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7234 rtx res
= gen_reg_rtx (V2DImode
);
7235 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7236 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7242 /* Construct the vector in memory one field at a time
7243 and load the whole vector. */
7244 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7245 for (i
= 0; i
< n_elts
; i
++)
7246 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7247 i
* GET_MODE_SIZE (inner_mode
)),
7248 XVECEXP (vals
, 0, i
));
7249 emit_move_insn (target
, mem
);
7252 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7253 is variable and also counts by vector element size for p9 and above. */
7256 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7258 machine_mode mode
= GET_MODE (target
);
7260 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7262 machine_mode inner_mode
= GET_MODE (val
);
7264 int width
= GET_MODE_SIZE (inner_mode
);
7266 gcc_assert (width
>= 1 && width
<= 8);
7268 int shift
= exact_log2 (width
);
7270 machine_mode idx_mode
= GET_MODE (idx
);
7272 machine_mode shift_mode
;
7273 /* Gen function pointers for shifting left and generation of permutation
7275 rtx (*gen_ashl
) (rtx
, rtx
, rtx
);
7276 rtx (*gen_pcvr1
) (rtx
, rtx
);
7277 rtx (*gen_pcvr2
) (rtx
, rtx
);
7279 if (TARGET_POWERPC64
)
7281 shift_mode
= DImode
;
7282 gen_ashl
= gen_ashldi3
;
7283 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_di
7284 : gen_altivec_lvsr_reg_di
;
7285 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_di
7286 : gen_altivec_lvsl_reg_di
;
7290 shift_mode
= SImode
;
7291 gen_ashl
= gen_ashlsi3
;
7292 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_si
7293 : gen_altivec_lvsr_reg_si
;
7294 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_si
7295 : gen_altivec_lvsl_reg_si
;
7297 /* Generate the IDX for permute shift, width is the vector element size.
7298 idx = idx * width. */
7299 rtx tmp
= gen_reg_rtx (shift_mode
);
7300 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7302 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7304 /* Generate one permutation control vector used for rotating the element
7305 at to-insert position to element zero in target vector. lvsl is
7306 used for big endianness while lvsr is used for little endianness:
7307 lvs[lr] v1,0,idx. */
7308 rtx pcvr1
= gen_reg_rtx (V16QImode
);
7309 emit_insn (gen_pcvr1 (pcvr1
, tmp
));
7311 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7312 rtx perm1
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7316 /* Insert val into element 0 of target vector. */
7317 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7319 /* Rotate back with a reversed permutation control vector generated from:
7320 lvs[rl] v2,0,idx. */
7321 rtx pcvr2
= gen_reg_rtx (V16QImode
);
7322 emit_insn (gen_pcvr2 (pcvr2
, tmp
));
7324 rtx perm2
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7329 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7330 is variable and also counts by vector element size for p7 & p8. */
7333 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7335 machine_mode mode
= GET_MODE (target
);
7337 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7339 machine_mode inner_mode
= GET_MODE (val
);
7340 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7342 int width
= GET_MODE_SIZE (inner_mode
);
7343 gcc_assert (width
>= 1 && width
<= 4);
7345 int shift
= exact_log2 (width
);
7347 machine_mode idx_mode
= GET_MODE (idx
);
7349 machine_mode shift_mode
;
7350 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7351 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7352 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7353 rtx (*gen_lvsl
)(rtx
, rtx
);
7355 if (TARGET_POWERPC64
)
7357 shift_mode
= DImode
;
7358 gen_ashl
= gen_ashldi3
;
7359 gen_add
= gen_adddi3
;
7360 gen_sub
= gen_subdi3
;
7361 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7365 shift_mode
= SImode
;
7366 gen_ashl
= gen_ashlsi3
;
7367 gen_add
= gen_addsi3
;
7368 gen_sub
= gen_subsi3
;
7369 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7372 /* idx = idx * width. */
7373 rtx tmp
= gen_reg_rtx (shift_mode
);
7374 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7376 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7378 /* For LE: idx = idx + 8. */
7379 if (!BYTES_BIG_ENDIAN
)
7380 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7382 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7385 DImode: 0xffffffffffffffff0000000000000000
7386 SImode: 0x00000000ffffffff0000000000000000
7387 HImode: 0x000000000000ffff0000000000000000.
7388 QImode: 0x00000000000000ff0000000000000000. */
7389 rtx mask
= gen_reg_rtx (V16QImode
);
7390 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7391 rtvec v
= rtvec_alloc (2);
7392 if (!BYTES_BIG_ENDIAN
)
7394 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7395 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7399 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7400 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7402 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7403 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7404 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7406 /* mtvsrd[wz] f0,tmp_val. */
7407 rtx tmp_val
= gen_reg_rtx (SImode
);
7408 if (inner_mode
== E_SFmode
)
7409 if (TARGET_DIRECT_MOVE_64BIT
)
7410 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7413 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7414 emit_insn (gen_movsf_hardfloat (stack
, val
));
7415 rtx stack2
= copy_rtx (stack
);
7416 PUT_MODE (stack2
, SImode
);
7417 emit_move_insn (tmp_val
, stack2
);
7420 tmp_val
= force_reg (SImode
, val
);
7422 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7423 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7424 rtvec vec_val
= rtvec_alloc (2);
7425 if (!BYTES_BIG_ENDIAN
)
7427 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7428 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7432 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7433 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7436 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7437 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7438 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7440 /* lvsl 13,0,idx. */
7441 rtx pcv
= gen_reg_rtx (V16QImode
);
7442 emit_insn (gen_lvsl (pcv
, tmp
));
7444 /* vperm 1,1,1,13. */
7445 /* vperm 0,0,0,13. */
7446 rtx val_perm
= gen_reg_rtx (V16QImode
);
7447 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7448 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7449 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7451 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7453 /* xxsel 34,34,32,33. */
7455 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7458 /* Set field ELT_RTX of TARGET to VAL. */
7461 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7463 machine_mode mode
= GET_MODE (target
);
7464 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7465 rtx reg
= gen_reg_rtx (mode
);
7467 int width
= GET_MODE_SIZE (inner_mode
);
7470 val
= force_reg (GET_MODE (val
), val
);
7472 if (VECTOR_MEM_VSX_P (mode
))
7474 if (!CONST_INT_P (elt_rtx
))
7476 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7477 when elt_rtx is variable. */
7478 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7480 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7483 else if (TARGET_VSX
)
7485 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7489 gcc_assert (CONST_INT_P (elt_rtx
));
7492 rtx insn
= NULL_RTX
;
7494 if (mode
== V2DFmode
)
7495 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7497 else if (mode
== V2DImode
)
7498 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7500 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7502 if (mode
== V4SImode
)
7503 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7504 else if (mode
== V8HImode
)
7505 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7506 else if (mode
== V16QImode
)
7507 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7508 else if (mode
== V4SFmode
)
7509 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7519 /* Simplify setting single element vectors like V1TImode. */
7520 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7521 && INTVAL (elt_rtx
) == 0)
7523 emit_move_insn (target
, gen_lowpart (mode
, val
));
7527 /* Load single variable value. */
7528 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7529 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7530 x
= gen_rtx_UNSPEC (VOIDmode
,
7531 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7532 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7534 gen_rtx_SET (reg
, mem
),
7537 /* Linear sequence. */
7538 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7539 for (i
= 0; i
< 16; ++i
)
7540 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7542 /* Set permute mask to insert element into target. */
7543 for (i
= 0; i
< width
; ++i
)
7544 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7545 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7547 if (BYTES_BIG_ENDIAN
)
7548 x
= gen_rtx_UNSPEC (mode
,
7549 gen_rtvec (3, target
, reg
,
7550 force_reg (V16QImode
, x
)),
7554 if (TARGET_P9_VECTOR
)
7555 x
= gen_rtx_UNSPEC (mode
,
7556 gen_rtvec (3, reg
, target
,
7557 force_reg (V16QImode
, x
)),
7561 /* Invert selector. We prefer to generate VNAND on P8 so
7562 that future fusion opportunities can kick in, but must
7563 generate VNOR elsewhere. */
7564 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7565 rtx iorx
= (TARGET_P8_VECTOR
7566 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7567 : gen_rtx_AND (V16QImode
, notx
, notx
));
7568 rtx tmp
= gen_reg_rtx (V16QImode
);
7569 emit_insn (gen_rtx_SET (tmp
, iorx
));
7571 /* Permute with operands reversed and adjusted selector. */
7572 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7577 emit_insn (gen_rtx_SET (target
, x
));
7580 /* Extract field ELT from VEC into TARGET. */
7583 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7585 machine_mode mode
= GET_MODE (vec
);
7586 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7589 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7596 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7599 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7602 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7605 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7608 if (TARGET_DIRECT_MOVE_64BIT
)
7610 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7616 if (TARGET_DIRECT_MOVE_64BIT
)
7618 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7624 if (TARGET_DIRECT_MOVE_64BIT
)
7626 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7632 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7633 && TARGET_DIRECT_MOVE_64BIT
)
7635 if (GET_MODE (elt
) != DImode
)
7637 rtx tmp
= gen_reg_rtx (DImode
);
7638 convert_move (tmp
, elt
, 0);
7641 else if (!REG_P (elt
))
7642 elt
= force_reg (DImode
, elt
);
7647 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7651 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7655 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7659 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7663 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7667 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7671 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7679 /* Allocate mode-sized buffer. */
7680 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7682 emit_move_insn (mem
, vec
);
7683 if (CONST_INT_P (elt
))
7685 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7687 /* Add offset to field within buffer matching vector element. */
7688 mem
= adjust_address_nv (mem
, inner_mode
,
7689 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7690 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7694 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7695 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7697 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7699 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7700 rtx new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7701 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7702 emit_move_insn (target
, new_addr
);
7706 /* Return the offset within a memory object (MEM) of a vector type to a given
7707 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7708 the element is constant, we return a constant integer.
7710 Otherwise, we use a base register temporary to calculate the offset after
7711 masking it to fit within the bounds of the vector and scaling it. The
7712 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7713 built-in function. */
7716 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7718 if (CONST_INT_P (element
))
7719 return GEN_INT (INTVAL (element
) * scalar_size
);
7721 /* All insns should use the 'Q' constraint (address is a single register) if
7722 the element number is not a constant. */
7723 gcc_assert (satisfies_constraint_Q (mem
));
7725 /* Mask the element to make sure the element number is between 0 and the
7726 maximum number of elements - 1 so that we don't generate an address
7727 outside the vector. */
7728 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7729 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7730 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7732 /* Shift the element to get the byte offset from the element number. */
7733 int shift
= exact_log2 (scalar_size
);
7734 gcc_assert (shift
>= 0);
7738 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7739 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7745 /* Helper function update PC-relative addresses when we are adjusting a memory
7746 address (ADDR) to a vector to point to a scalar field within the vector with
7747 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7748 use the base register temporary (BASE_TMP) to form the address. */
7751 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7753 rtx new_addr
= NULL
;
7755 gcc_assert (CONST_INT_P (element_offset
));
7757 if (GET_CODE (addr
) == CONST
)
7758 addr
= XEXP (addr
, 0);
7760 if (GET_CODE (addr
) == PLUS
)
7762 rtx op0
= XEXP (addr
, 0);
7763 rtx op1
= XEXP (addr
, 1);
7765 if (CONST_INT_P (op1
))
7767 HOST_WIDE_INT offset
7768 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7775 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7776 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7782 emit_move_insn (base_tmp
, addr
);
7783 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7787 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7789 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7790 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7799 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7800 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7801 temporary (BASE_TMP) to fixup the address. Return the new memory address
7802 that is valid for reads or writes to a given register (SCALAR_REG).
7804 This function is expected to be called after reload is completed when we are
7805 splitting insns. The temporary BASE_TMP might be set multiple times with
7809 rs6000_adjust_vec_address (rtx scalar_reg
,
7813 machine_mode scalar_mode
)
7815 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7816 rtx addr
= XEXP (mem
, 0);
7819 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7820 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7822 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7823 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7825 /* Calculate what we need to add to the address to get the element
7827 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7829 /* Create the new address pointing to the element within the vector. If we
7830 are adding 0, we don't have to change the address. */
7831 if (element_offset
== const0_rtx
)
7834 /* A simple indirect address can be converted into a reg + offset
7836 else if (REG_P (addr
) || SUBREG_P (addr
))
7837 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7839 /* For references to local static variables, fold a constant offset into the
7841 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7842 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7844 /* Optimize D-FORM addresses with constant offset with a constant element, to
7845 include the element offset in the address directly. */
7846 else if (GET_CODE (addr
) == PLUS
)
7848 rtx op0
= XEXP (addr
, 0);
7849 rtx op1
= XEXP (addr
, 1);
7851 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7852 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7854 /* op0 should never be r0, because r0+offset is not valid. But it
7855 doesn't hurt to make sure it is not r0. */
7856 gcc_assert (reg_or_subregno (op0
) != 0);
7858 /* D-FORM address with constant element number. */
7859 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7860 rtx offset_rtx
= GEN_INT (offset
);
7861 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7865 /* If we don't have a D-FORM address with a constant element number,
7866 add the two elements in the current address. Then add the offset.
7868 Previously, we tried to add the offset to OP1 and change the
7869 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7870 complicated because we had to verify that op1 was not GPR0 and we
7871 had a constant element offset (due to the way ADDI is defined).
7872 By doing the add of OP0 and OP1 first, and then adding in the
7873 offset, it has the benefit that if D-FORM instructions are
7874 allowed, the offset is part of the memory access to the vector
7876 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7877 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7883 emit_move_insn (base_tmp
, addr
);
7884 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7887 /* If the address isn't valid, move the address into the temporary base
7888 register. Some reasons it could not be valid include:
7890 The address offset overflowed the 16 or 34 bit offset size;
7891 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7892 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7893 Only X_FORM loads can be done, and the address is D_FORM. */
7895 enum insn_form iform
7896 = address_to_insn_form (new_addr
, scalar_mode
,
7897 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7899 if (iform
== INSN_FORM_BAD
)
7901 emit_move_insn (base_tmp
, new_addr
);
7902 new_addr
= base_tmp
;
7905 return change_address (mem
, scalar_mode
, new_addr
);
7908 /* Split a variable vec_extract operation into the component instructions. */
7911 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7914 machine_mode mode
= GET_MODE (src
);
7915 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7916 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7917 int byte_shift
= exact_log2 (scalar_size
);
7919 gcc_assert (byte_shift
>= 0);
7921 /* If we are given a memory address, optimize to load just the element. We
7922 don't have to adjust the vector element number on little endian
7926 emit_move_insn (dest
,
7927 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7932 else if (REG_P (src
) || SUBREG_P (src
))
7934 int num_elements
= GET_MODE_NUNITS (mode
);
7935 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7936 int bit_shift
= 7 - exact_log2 (num_elements
);
7938 unsigned int dest_regno
= reg_or_subregno (dest
);
7939 unsigned int src_regno
= reg_or_subregno (src
);
7940 unsigned int element_regno
= reg_or_subregno (element
);
7942 gcc_assert (REG_P (tmp_gpr
));
7944 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7945 a general purpose register. */
7946 if (TARGET_P9_VECTOR
7947 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7948 && INT_REGNO_P (dest_regno
)
7949 && ALTIVEC_REGNO_P (src_regno
)
7950 && INT_REGNO_P (element_regno
))
7952 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7953 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7955 if (mode
== V16QImode
)
7956 emit_insn (BYTES_BIG_ENDIAN
7957 ? gen_vextublx (dest_si
, element_si
, src
)
7958 : gen_vextubrx (dest_si
, element_si
, src
));
7960 else if (mode
== V8HImode
)
7962 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7963 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7964 emit_insn (BYTES_BIG_ENDIAN
7965 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7966 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7972 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7973 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7974 emit_insn (BYTES_BIG_ENDIAN
7975 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7976 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7983 gcc_assert (REG_P (tmp_altivec
));
7985 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7986 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7987 will shift the element into the upper position (adding 3 to convert a
7988 byte shift into a bit shift). */
7989 if (scalar_size
== 8)
7991 if (!BYTES_BIG_ENDIAN
)
7993 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7999 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8001 emit_insn (gen_rtx_SET (tmp_gpr
,
8002 gen_rtx_AND (DImode
,
8003 gen_rtx_ASHIFT (DImode
,
8010 if (!BYTES_BIG_ENDIAN
)
8012 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
8014 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
8015 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
8021 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
8024 /* Get the value into the lower byte of the Altivec register where VSLO
8026 if (TARGET_P9_VECTOR
)
8027 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
8028 else if (can_create_pseudo_p ())
8029 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8032 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8033 emit_move_insn (tmp_di
, tmp_gpr
);
8034 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8037 /* Do the VSLO to get the value into the final location. */
8041 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8045 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8050 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8051 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8052 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8053 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8056 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8064 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8065 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8066 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8067 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8069 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8070 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8071 GEN_INT (64 - bits_in_element
)));
8085 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8086 selects whether the alignment is abi mandated, optional, or
8087 both abi and optional alignment. */
8090 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8092 if (how
!= align_opt
)
8094 if (VECTOR_TYPE_P (type
) && align
< 128)
8098 if (how
!= align_abi
)
8100 if (TREE_CODE (type
) == ARRAY_TYPE
8101 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8103 if (align
< BITS_PER_WORD
)
8104 align
= BITS_PER_WORD
;
8111 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8112 instructions simply ignore the low bits; VSX memory instructions
8113 are aligned to 4 or 8 bytes. */
8116 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
8118 return (STRICT_ALIGNMENT
8119 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8120 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
8121 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
8122 && (int) align
< VECTOR_ALIGN (mode
)))));
8125 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8128 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8130 if (computed
<= 32 || TYPE_PACKED (type
))
8133 /* Strip initial arrays. */
8134 while (TREE_CODE (type
) == ARRAY_TYPE
)
8135 type
= TREE_TYPE (type
);
8137 /* If RECORD or UNION, recursively find the first field. */
8138 while (AGGREGATE_TYPE_P (type
))
8140 tree field
= TYPE_FIELDS (type
);
8142 /* Skip all non field decls */
8143 while (field
!= NULL
8144 && (TREE_CODE (field
) != FIELD_DECL
8145 || DECL_FIELD_ABI_IGNORED (field
)))
8146 field
= DECL_CHAIN (field
);
8151 /* A packed field does not contribute any extra alignment. */
8152 if (DECL_PACKED (field
))
8155 type
= TREE_TYPE (field
);
8158 while (TREE_CODE (type
) == ARRAY_TYPE
)
8159 type
= TREE_TYPE (type
);
8162 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8163 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8164 computed
= MIN (computed
, 32);
8169 /* AIX increases natural record alignment to doubleword if the innermost first
8170 field is an FP double while the FP fields remain word aligned.
8171 Only called if TYPE initially is a RECORD or UNION. */
8174 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8175 unsigned int specified
)
8177 unsigned int align
= MAX (computed
, specified
);
8179 if (TYPE_PACKED (type
) || align
>= 64)
8182 /* If RECORD or UNION, recursively find the first field. */
8185 tree field
= TYPE_FIELDS (type
);
8187 /* Skip all non field decls */
8188 while (field
!= NULL
8189 && (TREE_CODE (field
) != FIELD_DECL
8190 || DECL_FIELD_ABI_IGNORED (field
)))
8191 field
= DECL_CHAIN (field
);
8196 /* A packed field does not contribute any extra alignment. */
8197 if (DECL_PACKED (field
))
8200 type
= TREE_TYPE (field
);
8203 while (TREE_CODE (type
) == ARRAY_TYPE
)
8204 type
= TREE_TYPE (type
);
8205 } while (AGGREGATE_TYPE_P (type
));
8207 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8208 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8209 align
= MAX (align
, 64);
8214 /* Darwin increases record alignment to the natural alignment of
8218 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8219 unsigned int specified
)
8221 unsigned int align
= MAX (computed
, specified
);
8223 if (TYPE_PACKED (type
))
8226 /* Find the first field, looking down into aggregates. */
8228 tree field
= TYPE_FIELDS (type
);
8229 /* Skip all non field decls */
8230 while (field
!= NULL
8231 && (TREE_CODE (field
) != FIELD_DECL
8232 || DECL_FIELD_ABI_IGNORED (field
)))
8233 field
= DECL_CHAIN (field
);
8236 /* A packed field does not contribute any extra alignment. */
8237 if (DECL_PACKED (field
))
8239 type
= TREE_TYPE (field
);
8240 while (TREE_CODE (type
) == ARRAY_TYPE
)
8241 type
= TREE_TYPE (type
);
8242 } while (AGGREGATE_TYPE_P (type
));
8244 if (type
!= error_mark_node
&& ! AGGREGATE_TYPE_P (type
)
8245 && ! TYPE_PACKED (type
) && maximum_field_alignment
== 0)
8246 align
= MAX (align
, TYPE_ALIGN (type
));
8251 /* Return 1 for an operand in small memory on V.4/eabi. */
8254 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8255 machine_mode mode ATTRIBUTE_UNUSED
)
8260 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8263 if (DEFAULT_ABI
!= ABI_V4
)
8266 if (SYMBOL_REF_P (op
))
8269 else if (GET_CODE (op
) != CONST
8270 || GET_CODE (XEXP (op
, 0)) != PLUS
8271 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8272 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8277 rtx sum
= XEXP (op
, 0);
8278 HOST_WIDE_INT summand
;
8280 /* We have to be careful here, because it is the referenced address
8281 that must be 32k from _SDA_BASE_, not just the symbol. */
8282 summand
= INTVAL (XEXP (sum
, 1));
8283 if (summand
< 0 || summand
> g_switch_value
)
8286 sym_ref
= XEXP (sum
, 0);
8289 return SYMBOL_REF_SMALL_P (sym_ref
);
8295 /* Return true if either operand is a general purpose register. */
8298 gpr_or_gpr_p (rtx op0
, rtx op1
)
8300 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8301 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8304 /* Return true if this is a move direct operation between GPR registers and
8305 floating point/VSX registers. */
8308 direct_move_p (rtx op0
, rtx op1
)
8310 if (!REG_P (op0
) || !REG_P (op1
))
8313 if (!TARGET_DIRECT_MOVE
)
8316 int regno0
= REGNO (op0
);
8317 int regno1
= REGNO (op1
);
8318 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8321 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8324 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8330 /* Return true if the ADDR is an acceptable address for a quad memory
8331 operation of mode MODE (either LQ/STQ for general purpose registers, or
8332 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8333 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8334 3.0 LXV/STXV instruction. */
8337 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8341 if (GET_MODE_SIZE (mode
) < 16)
8344 if (legitimate_indirect_address_p (addr
, strict
))
8347 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8350 /* Is this a valid prefixed address? If the bottom four bits of the offset
8351 are non-zero, we could use a prefixed instruction (which does not have the
8352 DQ-form constraint that the traditional instruction had) instead of
8353 forcing the unaligned offset to a GPR. */
8354 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8357 if (GET_CODE (addr
) != PLUS
)
8360 op0
= XEXP (addr
, 0);
8361 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8364 op1
= XEXP (addr
, 1);
8365 if (!CONST_INT_P (op1
))
8368 return quad_address_offset_p (INTVAL (op1
));
8371 /* Return true if this is a load or store quad operation. This function does
8372 not handle the atomic quad memory instructions. */
8375 quad_load_store_p (rtx op0
, rtx op1
)
8379 if (!TARGET_QUAD_MEMORY
)
8382 else if (REG_P (op0
) && MEM_P (op1
))
8383 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8384 && quad_memory_operand (op1
, GET_MODE (op1
))
8385 && !reg_overlap_mentioned_p (op0
, op1
));
8387 else if (MEM_P (op0
) && REG_P (op1
))
8388 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8389 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8394 if (TARGET_DEBUG_ADDR
)
8396 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8397 ret
? "true" : "false");
8398 debug_rtx (gen_rtx_SET (op0
, op1
));
8404 /* Given an address, return a constant offset term if one exists. */
8407 address_offset (rtx op
)
8409 if (GET_CODE (op
) == PRE_INC
8410 || GET_CODE (op
) == PRE_DEC
)
8412 else if (GET_CODE (op
) == PRE_MODIFY
8413 || GET_CODE (op
) == LO_SUM
)
8416 if (GET_CODE (op
) == CONST
)
8419 if (GET_CODE (op
) == PLUS
)
8422 if (CONST_INT_P (op
))
8428 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8429 the mode. If we can't find (or don't know) the alignment of the symbol
8430 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8431 should be pessimistic]. Offsets are validated in the same way as for
8434 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8436 /* We should not get here with this. */
8437 gcc_checking_assert (! mode_supports_dq_form (mode
));
8439 if (GET_CODE (x
) == CONST
)
8442 /* If we are building PIC code, then any symbol must be wrapped in an
8443 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8444 bool machopic_offs_p
= false;
8445 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8447 x
= XVECEXP (x
, 0, 0);
8448 machopic_offs_p
= true;
8452 unsigned HOST_WIDE_INT offset
= 0;
8454 if (GET_CODE (x
) == PLUS
)
8457 if (! SYMBOL_REF_P (sym
))
8459 if (!CONST_INT_P (XEXP (x
, 1)))
8461 offset
= INTVAL (XEXP (x
, 1));
8463 else if (SYMBOL_REF_P (x
))
8465 else if (CONST_INT_P (x
))
8466 offset
= INTVAL (x
);
8467 else if (GET_CODE (x
) == LABEL_REF
)
8468 offset
= 0; // We assume code labels are Pmode aligned
8470 return false; // not sure what we have here.
8472 /* If we don't know the alignment of the thing to which the symbol refers,
8473 we assume optimistically it is "enough".
8474 ??? maybe we should be pessimistic instead. */
8479 tree decl
= SYMBOL_REF_DECL (sym
);
8480 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8481 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8484 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8485 /* The decl in an indirection symbol is the original one, which might
8486 be less aligned than the indirection. Our indirections are always
8491 if (decl
&& DECL_ALIGN (decl
))
8492 align
= DECL_ALIGN_UNIT (decl
);
8495 unsigned int extra
= 0;
8501 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8503 if (VECTOR_MEM_VSX_P (mode
))
8506 if (!TARGET_POWERPC64
)
8508 else if ((offset
& 3) || (align
& 3))
8519 if (!TARGET_POWERPC64
)
8521 else if ((offset
& 3) || (align
& 3))
8529 /* We only care if the access(es) would cause a change to the high part. */
8530 offset
= sext_hwi (offset
, 16);
8531 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8534 /* Return true if the MEM operand is a memory operand suitable for use
8535 with a (full width, possibly multiple) gpr load/store. On
8536 powerpc64 this means the offset must be divisible by 4.
8537 Implements 'Y' constraint.
8539 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8540 a constraint function we know the operand has satisfied a suitable
8543 Offsetting a lo_sum should not be allowed, except where we know by
8544 alignment that a 32k boundary is not crossed. Note that by
8545 "offsetting" here we mean a further offset to access parts of the
8546 MEM. It's fine to have a lo_sum where the inner address is offset
8547 from a sym, since the same sym+offset will appear in the high part
8548 of the address calculation. */
8551 mem_operand_gpr (rtx op
, machine_mode mode
)
8553 unsigned HOST_WIDE_INT offset
;
8555 rtx addr
= XEXP (op
, 0);
8557 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8559 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8560 && mode_supports_pre_incdec_p (mode
)
8561 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8564 /* Allow prefixed instructions if supported. If the bottom two bits of the
8565 offset are non-zero, we could use a prefixed instruction (which does not
8566 have the DS-form constraint that the traditional instruction had) instead
8567 of forcing the unaligned offset to a GPR. */
8568 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8571 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8572 really OK. Doing this early avoids teaching all the other machinery
8574 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8575 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8577 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8578 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8581 op
= address_offset (addr
);
8585 offset
= INTVAL (op
);
8586 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8589 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8593 if (GET_CODE (addr
) == LO_SUM
)
8594 /* For lo_sum addresses, we must allow any offset except one that
8595 causes a wrap, so test only the low 16 bits. */
8596 offset
= sext_hwi (offset
, 16);
8598 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8601 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8602 enforce an offset divisible by 4 even for 32-bit. */
8605 mem_operand_ds_form (rtx op
, machine_mode mode
)
8607 unsigned HOST_WIDE_INT offset
;
8609 rtx addr
= XEXP (op
, 0);
8611 /* Allow prefixed instructions if supported. If the bottom two bits of the
8612 offset are non-zero, we could use a prefixed instruction (which does not
8613 have the DS-form constraint that the traditional instruction had) instead
8614 of forcing the unaligned offset to a GPR. */
8615 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8618 if (!offsettable_address_p (false, mode
, addr
))
8621 op
= address_offset (addr
);
8625 offset
= INTVAL (op
);
8626 if ((offset
& 3) != 0)
8629 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8633 if (GET_CODE (addr
) == LO_SUM
)
8634 /* For lo_sum addresses, we must allow any offset except one that
8635 causes a wrap, so test only the low 16 bits. */
8636 offset
= sext_hwi (offset
, 16);
8638 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8641 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8644 reg_offset_addressing_ok_p (machine_mode mode
)
8658 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8659 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8660 a vector mode, if we want to use the VSX registers to move it around,
8661 we need to restrict ourselves to reg+reg addressing. Similarly for
8662 IEEE 128-bit floating point that is passed in a single vector
8664 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8665 return mode_supports_dq_form (mode
);
8668 /* The vector pair/quad types support offset addressing if the
8669 underlying vectors support offset addressing. */
8675 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8676 addressing for the LFIWZX and STFIWX instructions. */
8677 if (TARGET_NO_SDMODE_STACK
)
8689 virtual_stack_registers_memory_p (rtx op
)
8694 regnum
= REGNO (op
);
8696 else if (GET_CODE (op
) == PLUS
8697 && REG_P (XEXP (op
, 0))
8698 && CONST_INT_P (XEXP (op
, 1)))
8699 regnum
= REGNO (XEXP (op
, 0));
8704 return (regnum
>= FIRST_VIRTUAL_REGISTER
8705 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8708 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8709 is known to not straddle a 32k boundary. This function is used
8710 to determine whether -mcmodel=medium code can use TOC pointer
8711 relative addressing for OP. This means the alignment of the TOC
8712 pointer must also be taken into account, and unfortunately that is
8715 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8716 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8720 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8724 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8726 if (!SYMBOL_REF_P (op
))
8729 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8731 if (mode_supports_dq_form (mode
))
8734 dsize
= GET_MODE_SIZE (mode
);
8735 decl
= SYMBOL_REF_DECL (op
);
8741 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8742 replacing memory addresses with an anchor plus offset. We
8743 could find the decl by rummaging around in the block->objects
8744 VEC for the given offset but that seems like too much work. */
8745 dalign
= BITS_PER_UNIT
;
8746 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8747 && SYMBOL_REF_ANCHOR_P (op
)
8748 && SYMBOL_REF_BLOCK (op
) != NULL
)
8750 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8752 dalign
= block
->alignment
;
8753 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8755 else if (CONSTANT_POOL_ADDRESS_P (op
))
8757 /* It would be nice to have get_pool_align().. */
8758 machine_mode cmode
= get_pool_mode (op
);
8760 dalign
= GET_MODE_ALIGNMENT (cmode
);
8763 else if (DECL_P (decl
))
8765 dalign
= DECL_ALIGN (decl
);
8769 /* Allow BLKmode when the entire object is known to not
8770 cross a 32k boundary. */
8771 if (!DECL_SIZE_UNIT (decl
))
8774 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8777 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8781 dalign
/= BITS_PER_UNIT
;
8782 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8783 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8784 return dalign
>= dsize
;
8790 /* Find how many bits of the alignment we know for this access. */
8791 dalign
/= BITS_PER_UNIT
;
8792 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8793 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8795 lsb
= offset
& -offset
;
8799 return dalign
>= dsize
;
8803 constant_pool_expr_p (rtx op
)
8807 split_const (op
, &base
, &offset
);
8808 return (SYMBOL_REF_P (base
)
8809 && CONSTANT_POOL_ADDRESS_P (base
)
8810 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8813 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8814 use that as the register to put the HIGH value into if register allocation
8818 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8820 rtx tocrel
, tocreg
, hi
;
8822 gcc_assert (TARGET_TOC
);
8824 if (TARGET_DEBUG_ADDR
)
8826 if (SYMBOL_REF_P (symbol
))
8827 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8831 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8832 GET_RTX_NAME (GET_CODE (symbol
)));
8837 if (!can_create_pseudo_p ())
8838 df_set_regs_ever_live (TOC_REGISTER
, true);
8840 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8841 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8842 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8845 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8846 if (largetoc_reg
!= NULL
)
8848 emit_move_insn (largetoc_reg
, hi
);
8851 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8854 /* These are only used to pass through from print_operand/print_operand_address
8855 to rs6000_output_addr_const_extra over the intervening function
8856 output_addr_const which is not target code. */
8857 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8859 /* Return true if OP is a toc pointer relative address (the output
8860 of create_TOC_reference). If STRICT, do not match non-split
8861 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8862 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8863 TOCREL_OFFSET_RET respectively. */
8866 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8867 const_rtx
*tocrel_offset_ret
)
8872 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8874 /* When strict ensure we have everything tidy. */
8876 && !(GET_CODE (op
) == LO_SUM
8877 && REG_P (XEXP (op
, 0))
8878 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8881 /* When not strict, allow non-split TOC addresses and also allow
8882 (lo_sum (high ..)) TOC addresses created during reload. */
8883 if (GET_CODE (op
) == LO_SUM
)
8887 const_rtx tocrel_base
= op
;
8888 const_rtx tocrel_offset
= const0_rtx
;
8890 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8892 tocrel_base
= XEXP (op
, 0);
8893 tocrel_offset
= XEXP (op
, 1);
8896 if (tocrel_base_ret
)
8897 *tocrel_base_ret
= tocrel_base
;
8898 if (tocrel_offset_ret
)
8899 *tocrel_offset_ret
= tocrel_offset
;
8901 return (GET_CODE (tocrel_base
) == UNSPEC
8902 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8903 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8904 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8907 /* Return true if X is a constant pool address, and also for cmodel=medium
8908 if X is a toc-relative address known to be offsettable within MODE. */
8911 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8914 const_rtx tocrel_base
, tocrel_offset
;
8915 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8916 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8917 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8919 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8920 INTVAL (tocrel_offset
), mode
)));
8924 legitimate_small_data_p (machine_mode mode
, rtx x
)
8926 return (DEFAULT_ABI
== ABI_V4
8927 && !flag_pic
&& !TARGET_TOC
8928 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8929 && small_data_operand (x
, mode
));
8933 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8934 bool strict
, bool worst_case
)
8936 unsigned HOST_WIDE_INT offset
;
8939 if (GET_CODE (x
) != PLUS
)
8941 if (!REG_P (XEXP (x
, 0)))
8943 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8945 if (mode_supports_dq_form (mode
))
8946 return quad_address_p (x
, mode
, strict
);
8947 if (!reg_offset_addressing_ok_p (mode
))
8948 return virtual_stack_registers_memory_p (x
);
8949 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8951 if (!CONST_INT_P (XEXP (x
, 1)))
8954 offset
= INTVAL (XEXP (x
, 1));
8961 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8963 if (VECTOR_MEM_VSX_P (mode
))
8968 if (!TARGET_POWERPC64
)
8970 else if (offset
& 3)
8983 if (!TARGET_POWERPC64
)
8985 else if (offset
& 3)
8993 if (TARGET_PREFIXED
)
8994 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8996 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
9000 legitimate_indexed_address_p (rtx x
, int strict
)
9004 if (GET_CODE (x
) != PLUS
)
9010 return (REG_P (op0
) && REG_P (op1
)
9011 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
9012 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
9013 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
9014 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
9018 avoiding_indexed_address_p (machine_mode mode
)
9020 unsigned int msize
= GET_MODE_SIZE (mode
);
9022 /* Avoid indexed addressing for modes that have non-indexed load/store
9023 instruction forms. On power10, vector pairs have an indexed
9024 form, but vector quads don't. */
9028 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
9032 legitimate_indirect_address_p (rtx x
, int strict
)
9034 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
9038 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9040 if (!TARGET_MACHO
|| !flag_pic
9041 || mode
!= SImode
|| !MEM_P (x
))
9045 if (GET_CODE (x
) != LO_SUM
)
9047 if (!REG_P (XEXP (x
, 0)))
9049 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9053 return CONSTANT_P (x
);
9057 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9059 if (GET_CODE (x
) != LO_SUM
)
9061 if (!REG_P (XEXP (x
, 0)))
9063 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9065 /* quad word addresses are restricted, and we can't use LO_SUM. */
9066 if (mode_supports_dq_form (mode
))
9074 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9076 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9077 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9078 recognizes some LO_SUM addresses as valid although this
9079 function says opposite. In most cases, LRA through different
9080 transformations can generate correct code for address reloads.
9081 It cannot manage only some LO_SUM cases. So we need to add
9082 code here saying that some addresses are still valid. */
9083 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9084 && small_toc_ref (x
, VOIDmode
));
9085 if (TARGET_TOC
&& ! large_toc_ok
)
9087 if (GET_MODE_NUNITS (mode
) != 1)
9089 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9090 && !(/* ??? Assume floating point reg based on mode? */
9091 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9094 return CONSTANT_P (x
) || large_toc_ok
;
9096 else if (TARGET_MACHO
)
9098 if (GET_MODE_NUNITS (mode
) != 1)
9100 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9101 && !(/* see above */
9102 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9105 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
9106 return CONSTANT_P (x
);
9108 /* Macho-O PIC code from here. */
9109 if (GET_CODE (x
) == CONST
)
9112 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9113 if (SYMBOL_REF_P (x
))
9116 /* So this is OK if the wrapped object is const. */
9117 if (GET_CODE (x
) == UNSPEC
9118 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9119 return CONSTANT_P (XVECEXP (x
, 0, 0));
9120 return CONSTANT_P (x
);
9126 /* Try machine-dependent ways of modifying an illegitimate address
9127 to be legitimate. If we find one, return the new, valid address.
9128 This is used from only one place: `memory_address' in explow.cc.
9130 OLDX is the address as it was before break_out_memory_refs was
9131 called. In some cases it is useful to look at this to decide what
9134 It is always safe for this function to do nothing. It exists to
9135 recognize opportunities to optimize the output.
9137 On RS/6000, first check for the sum of a register with a constant
9138 integer that is out of range. If so, generate code to add the
9139 constant with the low-order 16 bits masked to the register and force
9140 this result into another register (this can be done with `cau').
9141 Then generate an address of REG+(CONST&0xffff), allowing for the
9142 possibility of bit 16 being a one.
9144 Then check for the sum of a register and something not constant, try to
9145 load the other things into a register and return the sum. */
9148 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9153 if (!reg_offset_addressing_ok_p (mode
)
9154 || mode_supports_dq_form (mode
))
9156 if (virtual_stack_registers_memory_p (x
))
9159 /* In theory we should not be seeing addresses of the form reg+0,
9160 but just in case it is generated, optimize it away. */
9161 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9162 return force_reg (Pmode
, XEXP (x
, 0));
9164 /* For TImode with load/store quad, restrict addresses to just a single
9165 pointer, so it works with both GPRs and VSX registers. */
9166 /* Make sure both operands are registers. */
9167 else if (GET_CODE (x
) == PLUS
9168 && (mode
!= TImode
|| !TARGET_VSX
))
9169 return gen_rtx_PLUS (Pmode
,
9170 force_reg (Pmode
, XEXP (x
, 0)),
9171 force_reg (Pmode
, XEXP (x
, 1)));
9173 return force_reg (Pmode
, x
);
9175 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9177 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9179 return rs6000_legitimize_tls_address (x
, model
);
9191 /* As in legitimate_offset_address_p we do not assume
9192 worst-case. The mode here is just a hint as to the registers
9193 used. A TImode is usually in gprs, but may actually be in
9194 fprs. Leave worst-case scenario for reload to handle via
9195 insn constraints. PTImode is only GPRs. */
9202 if (GET_CODE (x
) == PLUS
9203 && REG_P (XEXP (x
, 0))
9204 && CONST_INT_P (XEXP (x
, 1))
9205 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9206 >= 0x10000 - extra
))
9208 HOST_WIDE_INT high_int
, low_int
;
9210 low_int
= sext_hwi (INTVAL (XEXP (x
, 1)), 16);
9211 if (low_int
>= 0x8000 - extra
)
9213 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9214 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9215 gen_int_mode (high_int
, Pmode
)), 0);
9216 return plus_constant (Pmode
, sum
, low_int
);
9218 else if (GET_CODE (x
) == PLUS
9219 && REG_P (XEXP (x
, 0))
9220 && !CONST_INT_P (XEXP (x
, 1))
9221 && GET_MODE_NUNITS (mode
) == 1
9222 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9223 || (/* ??? Assume floating point reg based on mode? */
9224 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9225 && !avoiding_indexed_address_p (mode
))
9227 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9228 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9230 else if ((TARGET_ELF
9232 || !MACHO_DYNAMIC_NO_PIC_P
9236 && TARGET_NO_TOC_OR_PCREL
9239 && !CONST_WIDE_INT_P (x
)
9240 && !CONST_DOUBLE_P (x
)
9242 && GET_MODE_NUNITS (mode
) == 1
9243 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9244 || (/* ??? Assume floating point reg based on mode? */
9245 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9247 rtx reg
= gen_reg_rtx (Pmode
);
9249 emit_insn (gen_elf_high (reg
, x
));
9251 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9252 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9256 && constant_pool_expr_p (x
)
9257 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9258 return create_TOC_reference (x
, NULL_RTX
);
9263 /* Debug version of rs6000_legitimize_address. */
9265 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9271 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9272 insns
= get_insns ();
9278 "\nrs6000_legitimize_address: mode %s, old code %s, "
9279 "new code %s, modified\n",
9280 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9281 GET_RTX_NAME (GET_CODE (ret
)));
9283 fprintf (stderr
, "Original address:\n");
9286 fprintf (stderr
, "oldx:\n");
9289 fprintf (stderr
, "New address:\n");
9294 fprintf (stderr
, "Insns added:\n");
9295 debug_rtx_list (insns
, 20);
9301 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9302 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9313 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9314 We need to emit DTP-relative relocations. */
9316 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9318 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9323 fputs ("\t.long\t", file
);
9326 fputs (DOUBLE_INT_ASM_OP
, file
);
9331 output_addr_const (file
, x
);
9333 fputs ("@dtprel+0x8000", file
);
9336 /* Return true if X is a symbol that refers to real (rather than emulated)
9340 rs6000_real_tls_symbol_ref_p (rtx x
)
9342 return (SYMBOL_REF_P (x
)
9343 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9346 /* In the name of slightly smaller debug output, and to cater to
9347 general assembler lossage, recognize various UNSPEC sequences
9348 and turn them back into a direct symbol reference. */
9351 rs6000_delegitimize_address (rtx orig_x
)
9355 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9356 encodes loading up the high part of the address of a TOC reference along
9357 with a load of a GPR using the same base register used for the load. We
9358 return the original SYMBOL_REF.
9360 (set (reg:INT1 <reg>
9361 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9363 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9364 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9365 We return the original SYMBOL_REF.
9367 (parallel [(set (reg:DI <base-reg>)
9368 (unspec:DI [(symbol_ref <symbol>)
9369 (const_int <marker>)]
9370 UNSPEC_PCREL_OPT_LD_ADDR))
9371 (set (reg:DI <load-reg>)
9372 (unspec:DI [(const_int 0)]
9373 UNSPEC_PCREL_OPT_LD_DATA))])
9375 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9376 GPR being loaded is the same as the GPR used to hold the external address.
9378 (set (reg:DI <base-reg>)
9379 (unspec:DI [(symbol_ref <symbol>)
9380 (const_int <marker>)]
9381 UNSPEC_PCREL_OPT_LD_SAME_REG))
9383 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9384 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9385 We return the original SYMBOL_REF.
9387 (parallel [(set (reg:DI <base-reg>)
9388 (unspec:DI [(symbol_ref <symbol>)
9389 (const_int <marker>)]
9390 UNSPEC_PCREL_OPT_ST_ADDR))
9391 (use (reg <store-reg>))]) */
9393 if (GET_CODE (orig_x
) == UNSPEC
)
9394 switch (XINT (orig_x
, 1))
9396 case UNSPEC_FUSION_GPR
:
9397 case UNSPEC_PCREL_OPT_LD_ADDR
:
9398 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9399 case UNSPEC_PCREL_OPT_ST_ADDR
:
9400 orig_x
= XVECEXP (orig_x
, 0, 0);
9407 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9414 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9418 if (GET_CODE (y
) == PLUS
9419 && GET_MODE (y
) == Pmode
9420 && CONST_INT_P (XEXP (y
, 1)))
9422 offset
= XEXP (y
, 1);
9426 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9428 y
= XVECEXP (y
, 0, 0);
9431 /* Do not associate thread-local symbols with the original
9432 constant pool symbol. */
9435 && CONSTANT_POOL_ADDRESS_P (y
)
9436 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9440 if (offset
!= NULL_RTX
)
9441 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9442 if (!MEM_P (orig_x
))
9445 return replace_equiv_address_nv (orig_x
, y
);
9449 && GET_CODE (orig_x
) == LO_SUM
9450 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9452 y
= XEXP (XEXP (orig_x
, 1), 0);
9453 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9454 return XVECEXP (y
, 0, 0);
9460 /* Return true if X shouldn't be emitted into the debug info.
9461 The linker doesn't like .toc section references from
9462 .debug_* sections, so reject .toc section symbols. */
9465 rs6000_const_not_ok_for_debug_p (rtx x
)
9467 if (GET_CODE (x
) == UNSPEC
)
9469 if (SYMBOL_REF_P (x
)
9470 && CONSTANT_POOL_ADDRESS_P (x
))
9472 rtx c
= get_pool_constant (x
);
9473 machine_mode cmode
= get_pool_mode (x
);
9474 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9481 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9484 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9486 int icode
= INSN_CODE (insn
);
9488 /* Reject creating doloop insns. Combine should not be allowed
9489 to create these for a number of reasons:
9490 1) In a nested loop, if combine creates one of these in an
9491 outer loop and the register allocator happens to allocate ctr
9492 to the outer loop insn, then the inner loop can't use ctr.
9493 Inner loops ought to be more highly optimized.
9494 2) Combine often wants to create one of these from what was
9495 originally a three insn sequence, first combining the three
9496 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9497 allocated ctr, the splitter takes use back to the three insn
9498 sequence. It's better to stop combine at the two insn
9500 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9501 insns, the register allocator sometimes uses floating point
9502 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9503 jump insn and output reloads are not implemented for jumps,
9504 the ctrsi/ctrdi splitters need to handle all possible cases.
9505 That's a pain, and it gets to be seriously difficult when a
9506 splitter that runs after reload needs memory to transfer from
9507 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9508 for the difficult case. It's better to not create problems
9509 in the first place. */
9510 if (icode
!= CODE_FOR_nothing
9511 && (icode
== CODE_FOR_bdz_si
9512 || icode
== CODE_FOR_bdz_di
9513 || icode
== CODE_FOR_bdnz_si
9514 || icode
== CODE_FOR_bdnz_di
9515 || icode
== CODE_FOR_bdztf_si
9516 || icode
== CODE_FOR_bdztf_di
9517 || icode
== CODE_FOR_bdnztf_si
9518 || icode
== CODE_FOR_bdnztf_di
))
9524 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9526 static GTY(()) rtx rs6000_tls_symbol
;
9528 rs6000_tls_get_addr (void)
9530 if (!rs6000_tls_symbol
)
9531 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9533 return rs6000_tls_symbol
;
9536 /* Construct the SYMBOL_REF for TLS GOT references. */
9538 static GTY(()) rtx rs6000_got_symbol
;
9540 rs6000_got_sym (void)
9542 if (!rs6000_got_symbol
)
9544 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9545 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9546 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9549 return rs6000_got_symbol
;
9552 /* AIX Thread-Local Address support. */
9555 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9557 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9561 /* Place addr into TOC constant pool. */
9562 sym
= force_const_mem (GET_MODE (addr
), addr
);
9564 /* Output the TOC entry and create the MEM referencing the value. */
9565 if (constant_pool_expr_p (XEXP (sym
, 0))
9566 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9568 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9569 mem
= gen_const_mem (Pmode
, tocref
);
9570 set_mem_alias_set (mem
, get_TOC_alias_set ());
9575 /* Use global-dynamic for local-dynamic. */
9576 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9577 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9579 /* Create new TOC reference for @m symbol. */
9580 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9581 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9582 strcpy (tlsname
, "*LCM");
9583 strcat (tlsname
, name
+ 3);
9584 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9585 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9586 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9587 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9588 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9590 rtx modreg
= gen_reg_rtx (Pmode
);
9591 emit_insn (gen_rtx_SET (modreg
, modmem
));
9593 tmpreg
= gen_reg_rtx (Pmode
);
9594 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9596 dest
= gen_reg_rtx (Pmode
);
9598 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9600 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9603 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9604 else if (TARGET_32BIT
)
9606 tlsreg
= gen_reg_rtx (SImode
);
9607 emit_insn (gen_tls_get_tpointer (tlsreg
));
9611 tlsreg
= gen_rtx_REG (DImode
, 13);
9612 xcoff_tls_exec_model_detected
= true;
9615 /* Load the TOC value into temporary register. */
9616 tmpreg
= gen_reg_rtx (Pmode
);
9617 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9618 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9619 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9621 /* Add TOC symbol value to TLS pointer. */
9622 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9627 /* Passes the tls arg value for global dynamic and local dynamic
9628 emit_library_call_value in rs6000_legitimize_tls_address to
9629 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9630 marker relocs put on __tls_get_addr calls. */
9631 static rtx global_tlsarg
;
9633 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9634 this (thread-local) address. */
9637 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9642 return rs6000_legitimize_tls_address_aix (addr
, model
);
9644 dest
= gen_reg_rtx (Pmode
);
9645 if (model
== TLS_MODEL_LOCAL_EXEC
9646 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9652 tlsreg
= gen_rtx_REG (Pmode
, 13);
9653 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9657 tlsreg
= gen_rtx_REG (Pmode
, 2);
9658 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9662 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9666 tmp
= gen_reg_rtx (Pmode
);
9669 tlsreg
= gen_rtx_REG (Pmode
, 13);
9670 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9674 tlsreg
= gen_rtx_REG (Pmode
, 2);
9675 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9679 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9681 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9686 rtx got
, tga
, tmp1
, tmp2
;
9688 /* We currently use relocations like @got@tlsgd for tls, which
9689 means the linker will handle allocation of tls entries, placing
9690 them in the .got section. So use a pointer to the .got section,
9691 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9692 or to secondary GOT sections used by 32-bit -fPIC. */
9693 if (rs6000_pcrel_p ())
9695 else if (TARGET_64BIT
)
9696 got
= gen_rtx_REG (Pmode
, 2);
9700 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9703 rtx gsym
= rs6000_got_sym ();
9704 got
= gen_reg_rtx (Pmode
);
9706 rs6000_emit_move (got
, gsym
, Pmode
);
9711 tmp1
= gen_reg_rtx (Pmode
);
9712 tmp2
= gen_reg_rtx (Pmode
);
9713 mem
= gen_const_mem (Pmode
, tmp1
);
9714 lab
= gen_label_rtx ();
9715 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9716 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9717 if (TARGET_LINK_STACK
)
9718 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9719 emit_move_insn (tmp2
, mem
);
9720 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9721 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9726 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9728 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9730 tga
= rs6000_tls_get_addr ();
9731 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9732 emit_insn (gen_rtx_SET (argreg
, arg
));
9733 global_tlsarg
= arg
;
9734 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9735 global_tlsarg
= NULL_RTX
;
9737 /* Make a note so that the result of this call can be CSEd. */
9738 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9739 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9740 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9742 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9744 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9745 tga
= rs6000_tls_get_addr ();
9746 tmp1
= gen_reg_rtx (Pmode
);
9747 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9748 emit_insn (gen_rtx_SET (argreg
, arg
));
9749 global_tlsarg
= arg
;
9750 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9751 global_tlsarg
= NULL_RTX
;
9753 /* Make a note so that the result of this call can be CSEd. */
9754 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9755 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9756 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9758 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9761 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9763 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9765 else if (rs6000_tls_size
== 32)
9767 tmp2
= gen_reg_rtx (Pmode
);
9769 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9771 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9774 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9776 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9780 tmp2
= gen_reg_rtx (Pmode
);
9782 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9784 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9786 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9792 /* IE, or 64-bit offset LE. */
9793 tmp2
= gen_reg_rtx (Pmode
);
9795 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9797 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9799 if (rs6000_pcrel_p ())
9802 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9804 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9806 else if (TARGET_64BIT
)
9807 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9809 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9817 /* Only create the global variable for the stack protect guard if we are using
9818 the global flavor of that guard. */
9820 rs6000_init_stack_protect_guard (void)
9822 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9823 return default_stack_protect_guard ();
9828 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9831 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9833 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9834 It can not be put into a constant pool. e.g.
9835 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9836 (high:DI (symbol_ref:DI ("var")..)). */
9837 if (GET_CODE (x
) == HIGH
)
9840 /* A TLS symbol in the TOC cannot contain a sum. */
9841 if (GET_CODE (x
) == CONST
9842 && GET_CODE (XEXP (x
, 0)) == PLUS
9843 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9844 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9847 /* Allow AIX TOC TLS symbols in the constant pool,
9848 but not ELF TLS symbols. */
9849 return TARGET_ELF
&& tls_referenced_p (x
);
9852 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9853 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9854 can be addressed relative to the toc pointer. */
9857 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9859 return ((constant_pool_expr_p (sym
)
9860 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9861 get_pool_mode (sym
)))
9862 || (TARGET_CMODEL
== CMODEL_MEDIUM
9863 && SYMBOL_REF_LOCAL_P (sym
)
9864 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9867 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9868 that is a valid memory address for an instruction.
9869 The MODE argument is the machine mode for the MEM expression
9870 that wants to use this address.
9872 On the RS/6000, there are four valid address: a SYMBOL_REF that
9873 refers to a constant pool entry of an address (or the sum of it
9874 plus a constant), a short (16-bit signed) constant plus a register,
9875 the sum of two registers, or a register indirect, possibly with an
9876 auto-increment. For DFmode, DDmode and DImode with a constant plus
9877 register, we must ensure that both words are addressable or PowerPC64
9878 with offset word aligned.
9880 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9881 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9882 because adjacent memory cells are accessed by adding word-sized offsets
9883 during assembly output. */
9885 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9886 code_helper ch
= ERROR_MARK
)
9888 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9889 bool quad_offset_p
= mode_supports_dq_form (mode
);
9891 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9894 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9895 if (ch
.is_internal_fn ()
9896 && (ch
== IFN_LEN_LOAD
|| ch
== IFN_LEN_STORE
)
9897 && GET_CODE (x
) == PLUS
)
9900 /* Handle unaligned altivec lvx/stvx type addresses. */
9901 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9902 && GET_CODE (x
) == AND
9903 && CONST_INT_P (XEXP (x
, 1))
9904 && INTVAL (XEXP (x
, 1)) == -16)
9907 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9908 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9909 || virtual_stack_registers_memory_p (x
));
9912 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9915 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9916 && mode_supports_pre_incdec_p (mode
)
9917 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9920 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9921 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9924 /* Handle restricted vector d-form offsets in ISA 3.0. */
9927 if (quad_address_p (x
, mode
, reg_ok_strict
))
9930 else if (virtual_stack_registers_memory_p (x
))
9933 else if (reg_offset_p
)
9935 if (legitimate_small_data_p (mode
, x
))
9937 if (legitimate_constant_pool_address_p (x
, mode
,
9938 reg_ok_strict
|| lra_in_progress
))
9942 /* For TImode, if we have TImode in VSX registers, only allow register
9943 indirect addresses. This will allow the values to go in either GPRs
9944 or VSX registers without reloading. The vector types would tend to
9945 go into VSX registers, so we allow REG+REG, while TImode seems
9946 somewhat split, in that some uses are GPR based, and some VSX based. */
9947 /* FIXME: We could loosen this by changing the following to
9948 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9949 but currently we cannot allow REG+REG addressing for TImode. See
9950 PR72827 for complete details on how this ends up hoodwinking DSE. */
9951 if (mode
== TImode
&& TARGET_VSX
)
9953 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9956 && GET_CODE (x
) == PLUS
9957 && REG_P (XEXP (x
, 0))
9958 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9959 || XEXP (x
, 0) == arg_pointer_rtx
)
9960 && CONST_INT_P (XEXP (x
, 1)))
9962 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9964 if (!FLOAT128_2REG_P (mode
)
9965 && (TARGET_HARD_FLOAT
9967 || (mode
!= DFmode
&& mode
!= DDmode
))
9968 && (TARGET_POWERPC64
|| mode
!= DImode
)
9969 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9971 && !avoiding_indexed_address_p (mode
)
9972 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9974 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9975 && mode_supports_pre_modify_p (mode
)
9976 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9977 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9978 reg_ok_strict
, false)
9979 || (!avoiding_indexed_address_p (mode
)
9980 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9981 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9983 /* There is no prefixed version of the load/store with update. */
9984 rtx addr
= XEXP (x
, 1);
9985 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9987 if (reg_offset_p
&& !quad_offset_p
9988 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9993 /* Debug version of rs6000_legitimate_address_p. */
9995 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
,
9998 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
, ch
);
10000 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10001 "strict = %d, reload = %s, code = %s\n",
10002 ret
? "true" : "false",
10003 GET_MODE_NAME (mode
),
10005 (reload_completed
? "after" : "before"),
10006 GET_RTX_NAME (GET_CODE (x
)));
10012 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10015 rs6000_mode_dependent_address_p (const_rtx addr
,
10016 addr_space_t as ATTRIBUTE_UNUSED
)
10018 return rs6000_mode_dependent_address_ptr (addr
);
10021 /* Go to LABEL if ADDR (a legitimate address expression)
10022 has an effect that depends on the machine mode it is used for.
10024 On the RS/6000 this is true of all integral offsets (since AltiVec
10025 and VSX modes don't allow them) or is a pre-increment or decrement.
10027 ??? Except that due to conceptual problems in offsettable_address_p
10028 we can't really report the problems of integral offsets. So leave
10029 this assuming that the adjustable offset must be valid for the
10030 sub-words of a TFmode operand, which is what we had before. */
10033 rs6000_mode_dependent_address (const_rtx addr
)
10035 switch (GET_CODE (addr
))
10038 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10039 is considered a legitimate address before reload, so there
10040 are no offset restrictions in that case. Note that this
10041 condition is safe in strict mode because any address involving
10042 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10043 been rejected as illegitimate. */
10044 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10045 && XEXP (addr
, 0) != arg_pointer_rtx
10046 && CONST_INT_P (XEXP (addr
, 1)))
10048 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10049 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
10050 if (TARGET_PREFIXED
)
10051 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
10053 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
10058 /* Anything in the constant pool is sufficiently aligned that
10059 all bytes have the same high part address. */
10060 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10062 /* Auto-increment cases are now treated generically in recog.cc. */
10064 return TARGET_UPDATE
;
10066 /* AND is only allowed in Altivec loads. */
10077 /* Debug version of rs6000_mode_dependent_address. */
10079 rs6000_debug_mode_dependent_address (const_rtx addr
)
10081 bool ret
= rs6000_mode_dependent_address (addr
);
10083 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10084 ret
? "true" : "false");
10090 /* Implement FIND_BASE_TERM. */
10093 rs6000_find_base_term (rtx op
)
10098 if (GET_CODE (base
) == CONST
)
10099 base
= XEXP (base
, 0);
10100 if (GET_CODE (base
) == PLUS
)
10101 base
= XEXP (base
, 0);
10102 if (GET_CODE (base
) == UNSPEC
)
10103 switch (XINT (base
, 1))
10105 case UNSPEC_TOCREL
:
10106 case UNSPEC_MACHOPIC_OFFSET
:
10107 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10108 for aliasing purposes. */
10109 return XVECEXP (base
, 0, 0);
10115 /* More elaborate version of recog's offsettable_memref_p predicate
10116 that works around the ??? note of rs6000_mode_dependent_address.
10117 In particular it accepts
10119 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10121 in 32-bit mode, that the recog predicate rejects. */
10124 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
10131 /* First mimic offsettable_memref_p. */
10132 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10135 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10136 the latter predicate knows nothing about the mode of the memory
10137 reference and, therefore, assumes that it is the largest supported
10138 mode (TFmode). As a consequence, legitimate offsettable memory
10139 references are rejected. rs6000_legitimate_offset_address_p contains
10140 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10141 at least with a little bit of help here given that we know the
10142 actual registers used. */
10143 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10144 || GET_MODE_SIZE (reg_mode
) == 4);
10145 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10146 strict
, worst_case
);
10149 /* Determine the reassociation width to be used in reassociate_bb.
10150 This takes into account how many parallel operations we
10151 can actually do of a given type, and also the latency.
10153 int add/sub 6/cycle
10155 vect add/sub/mul 2/cycle
10156 fp add/sub/mul 2/cycle
10161 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10164 switch (rs6000_tune
)
10166 case PROCESSOR_POWER8
:
10167 case PROCESSOR_POWER9
:
10168 case PROCESSOR_POWER10
:
10169 if (DECIMAL_FLOAT_MODE_P (mode
))
10171 if (VECTOR_MODE_P (mode
))
10173 if (INTEGRAL_MODE_P (mode
))
10175 if (FLOAT_MODE_P (mode
))
10184 /* Change register usage conditional on target flags. */
10186 rs6000_conditional_register_usage (void)
10190 if (TARGET_DEBUG_TARGET
)
10191 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10193 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10195 fixed_regs
[13] = call_used_regs
[13] = 1;
10197 /* Conditionally disable FPRs. */
10198 if (TARGET_SOFT_FLOAT
)
10199 for (i
= 32; i
< 64; i
++)
10200 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10202 /* The TOC register is not killed across calls in a way that is
10203 visible to the compiler. */
10204 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10205 call_used_regs
[2] = 0;
10207 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10208 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10210 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10211 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10212 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10214 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10215 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10216 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10218 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10219 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10221 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10223 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10224 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10225 call_used_regs
[VRSAVE_REGNO
] = 1;
10228 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10229 global_regs
[VSCR_REGNO
] = 1;
10231 if (TARGET_ALTIVEC_ABI
)
10233 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10234 call_used_regs
[i
] = 1;
10236 /* AIX reserves VR20:31 in non-extended ABI mode. */
10237 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10238 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10239 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10244 /* Output insns to set DEST equal to the constant SOURCE as a series of
10245 lis, ori and shl instructions and return TRUE. */
10248 rs6000_emit_set_const (rtx dest
, rtx source
)
10250 machine_mode mode
= GET_MODE (dest
);
10255 gcc_checking_assert (CONST_INT_P (source
));
10256 c
= INTVAL (source
);
10261 emit_insn (gen_rtx_SET (dest
, source
));
10265 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10267 emit_insn (gen_rtx_SET (temp
, GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10268 emit_insn (gen_rtx_SET (dest
,
10269 gen_rtx_IOR (SImode
, temp
,
10270 GEN_INT (c
& 0xffff))));
10274 if (!TARGET_POWERPC64
)
10278 hi
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
== 0, DImode
);
10279 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0, DImode
);
10280 emit_move_insn (hi
, GEN_INT (c
>> 32));
10281 c
= sext_hwi (c
, 32);
10282 emit_move_insn (lo
, GEN_INT (c
));
10285 rs6000_emit_set_long_const (dest
, c
);
10289 gcc_unreachable ();
10292 insn
= get_last_insn ();
10293 set
= single_set (insn
);
10294 if (! CONSTANT_P (SET_SRC (set
)))
10295 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10300 /* Check if C can be rotated to a negative value which 'lis' instruction is
10301 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10302 rotated, and return true. Return false otherwise. */
10305 can_be_rotated_to_negative_lis (HOST_WIDE_INT c
, int *rot
)
10307 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10308 int leading_ones
= clz_hwi (~c
);
10309 int tailing_ones
= ctz_hwi (~c
);
10310 int middle_zeros
= ctz_hwi (c
>> tailing_ones
);
10311 if (middle_zeros
>= 16 && leading_ones
+ tailing_ones
>= 33)
10313 *rot
= HOST_BITS_PER_WIDE_INT
- tailing_ones
;
10317 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10318 rotated over the highest bit. */
10319 int pos_one
= clz_hwi ((c
<< 16) >> 16);
10320 middle_zeros
= ctz_hwi (c
>> (HOST_BITS_PER_WIDE_INT
- pos_one
));
10321 int middle_ones
= clz_hwi (~(c
<< pos_one
));
10322 if (middle_zeros
>= 16 && middle_ones
>= 33)
10331 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10334 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10335 is set to the mask operand of rotldi(rldicl), and return true.
10336 Return false otherwise. */
10339 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c
, int *shift
,
10340 HOST_WIDE_INT
*mask
)
10342 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10343 to/from a positive or negative value that 'li' is able to load. */
10345 if (can_be_rotated_to_lowbits (c
, 15, &n
)
10346 || can_be_rotated_to_lowbits (~c
, 15, &n
)
10347 || can_be_rotated_to_negative_lis (c
, &n
))
10349 *mask
= HOST_WIDE_INT_M1
;
10350 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10357 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10360 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10361 the mask operand of rldicl, and return true.
10362 Return false otherwise. */
10365 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c
, int *shift
,
10366 HOST_WIDE_INT
*mask
)
10368 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10369 to ones and then recheck it. */
10370 int lz
= clz_hwi (c
);
10372 /* If lz == 0, the left shift is undefined. */
10376 HOST_WIDE_INT unmask_c
10377 = c
| (HOST_WIDE_INT_M1U
<< (HOST_BITS_PER_WIDE_INT
- lz
));
10379 if (can_be_rotated_to_lowbits (~unmask_c
, 15, &n
)
10380 || can_be_rotated_to_negative_lis (unmask_c
, &n
))
10382 *mask
= HOST_WIDE_INT_M1U
>> lz
;
10383 *shift
= n
== 0 ? 0 : HOST_BITS_PER_WIDE_INT
- n
;
10390 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10393 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10394 the mask operand of rldicr, and return true.
10395 Return false otherwise. */
10398 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c
, int *shift
,
10399 HOST_WIDE_INT
*mask
)
10401 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10402 to ones and then recheck it. */
10403 int tz
= ctz_hwi (c
);
10405 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10406 if (tz
>= HOST_BITS_PER_WIDE_INT
)
10409 HOST_WIDE_INT unmask_c
= c
| ((HOST_WIDE_INT_1U
<< tz
) - 1);
10411 if (can_be_rotated_to_lowbits (~unmask_c
, 15, &n
)
10412 || can_be_rotated_to_negative_lis (unmask_c
, &n
))
10414 *mask
= HOST_WIDE_INT_M1U
<< tz
;
10415 *shift
= HOST_BITS_PER_WIDE_INT
- n
;
10422 /* Check if value C can be built by 2 instructions: one is 'li', another is
10425 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10426 to the mask value about the 'mb' operand of rldic; and return true.
10427 Return false otherwise. */
10430 can_be_built_by_li_and_rldic (HOST_WIDE_INT c
, int *shift
, HOST_WIDE_INT
*mask
)
10432 /* There are 49 successive ones in the negative value of 'li'. */
10435 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10436 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10437 int tz
= ctz_hwi (c
);
10438 int lz
= clz_hwi (c
);
10440 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10441 if (lz
>= HOST_BITS_PER_WIDE_INT
)
10444 int middle_ones
= clz_hwi (~(c
<< lz
));
10445 if (tz
+ lz
+ middle_ones
>= ones
10446 && (tz
- lz
) < HOST_BITS_PER_WIDE_INT
10447 && tz
< HOST_BITS_PER_WIDE_INT
)
10449 *mask
= ((1LL << (HOST_BITS_PER_WIDE_INT
- tz
- lz
)) - 1LL) << tz
;
10454 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10455 int leading_ones
= clz_hwi (~c
);
10456 int tailing_ones
= ctz_hwi (~c
);
10457 int middle_zeros
= ctz_hwi (c
>> tailing_ones
);
10458 if (leading_ones
+ tailing_ones
+ middle_zeros
>= ones
10459 && middle_zeros
< HOST_BITS_PER_WIDE_INT
)
10461 *mask
= ~(((1ULL << middle_zeros
) - 1ULL) << tailing_ones
);
10462 *shift
= tailing_ones
+ middle_zeros
;
10466 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10467 /* Get the position for the first bit of successive 1.
10468 The 24th bit would be in successive 0 or 1. */
10469 HOST_WIDE_INT low_mask
= (HOST_WIDE_INT_1U
<< 24) - HOST_WIDE_INT_1U
;
10470 int pos_first_1
= ((c
& (low_mask
+ 1)) == 0)
10471 ? clz_hwi (c
& low_mask
)
10472 : HOST_BITS_PER_WIDE_INT
- ctz_hwi (~(c
| low_mask
));
10474 /* Make sure the left and right shifts are defined. */
10475 if (!IN_RANGE (pos_first_1
, 1, HOST_BITS_PER_WIDE_INT
-1))
10478 middle_ones
= clz_hwi (~c
<< pos_first_1
);
10479 middle_zeros
= ctz_hwi (c
>> (HOST_BITS_PER_WIDE_INT
- pos_first_1
));
10480 if (pos_first_1
< HOST_BITS_PER_WIDE_INT
10481 && middle_ones
+ middle_zeros
< HOST_BITS_PER_WIDE_INT
10482 && middle_ones
+ middle_zeros
>= ones
)
10484 *mask
= ~(((1ULL << middle_zeros
) - 1LL)
10485 << (HOST_BITS_PER_WIDE_INT
- pos_first_1
));
10486 *shift
= HOST_BITS_PER_WIDE_INT
- pos_first_1
+ middle_zeros
;
10493 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10494 Output insns to set DEST equal to the constant C as a series of
10495 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10496 only increase *NUM_INSNS as the number of insns, and do not emit
10500 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
, int *num_insns
)
10502 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10505 ud2
= (c
>> 16) & 0xffff;
10506 ud3
= (c
>> 32) & 0xffff;
10507 ud4
= (c
>> 48) & 0xffff;
10509 /* This lambda is used to emit one insn or just increase the insn count.
10510 When counting the insn number, no need to emit the insn. */
10511 auto count_or_emit_insn
= [&num_insns
] (rtx dest_or_insn
, rtx src
= nullptr) {
10519 emit_move_insn (dest_or_insn
, src
);
10521 emit_insn (dest_or_insn
);
10524 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10525 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && !(ud1
& 0x8000)))
10528 count_or_emit_insn (dest
, GEN_INT (sext_hwi (ud1
, 16)));
10533 = (num_insns
|| !can_create_pseudo_p ()) ? dest
: gen_reg_rtx (DImode
);
10535 if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10536 || (ud4
== 0 && ud3
== 0 && !(ud2
& 0x8000)))
10539 count_or_emit_insn (ud1
!= 0 ? temp
: dest
,
10540 GEN_INT (sext_hwi (ud2
<< 16, 32)));
10542 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10546 if (ud4
== 0xffff && ud3
== 0xffff && !(ud2
& 0x8000) && ud1
== 0)
10549 count_or_emit_insn (temp
, GEN_INT (sext_hwi ((ud2
| 0x8000) << 16, 32)));
10550 count_or_emit_insn (dest
,
10551 gen_rtx_XOR (DImode
, temp
, GEN_INT (0x80000000)));
10555 if (ud4
== 0xffff && ud3
== 0xffff && (ud1
& 0x8000))
10558 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud1
, 16)));
10559 count_or_emit_insn (dest
, gen_rtx_XOR (DImode
, temp
,
10560 GEN_INT ((ud2
^ 0xffff) << 16)));
10565 HOST_WIDE_INT mask
;
10566 if (can_be_built_by_li_lis_and_rotldi (c
, &shift
, &mask
)
10567 || can_be_built_by_li_lis_and_rldicl (c
, &shift
, &mask
)
10568 || can_be_built_by_li_lis_and_rldicr (c
, &shift
, &mask
)
10569 || can_be_built_by_li_and_rldic (c
, &shift
, &mask
))
10571 /* li/lis; rldicX */
10572 unsigned HOST_WIDE_INT imm
= (c
| ~mask
);
10573 imm
= (imm
>> shift
) | (imm
<< (HOST_BITS_PER_WIDE_INT
- shift
));
10575 count_or_emit_insn (temp
, GEN_INT (imm
));
10577 temp
= gen_rtx_ROTATE (DImode
, temp
, GEN_INT (shift
));
10578 if (mask
!= HOST_WIDE_INT_M1
)
10579 temp
= gen_rtx_AND (DImode
, temp
, GEN_INT (mask
));
10580 count_or_emit_insn (dest
, temp
);
10585 if (ud3
== 0 && ud4
== 0)
10587 gcc_assert ((ud2
& 0x8000) && ud1
!= 0);
10588 if (!(ud1
& 0x8000))
10591 count_or_emit_insn (temp
, GEN_INT (ud1
));
10592 count_or_emit_insn (dest
,
10593 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10597 /* lis; ori; rldicl */
10598 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10599 count_or_emit_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10600 count_or_emit_insn (dest
,
10601 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10605 if (ud1
== ud3
&& ud2
== ud4
)
10607 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10608 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10609 rs6000_emit_set_long_const (temp
, sext_hwi (num
, 32), num_insns
);
10611 rtx rldimi
= gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp
,
10612 GEN_INT (0xffffffff));
10613 count_or_emit_insn (rldimi
);
10617 if ((ud4
== 0xffff && (ud3
& 0x8000)) || (ud4
== 0 && !(ud3
& 0x8000)))
10619 /* li; [ori;] rldicl [;oir]. */
10620 count_or_emit_insn (temp
, GEN_INT (sext_hwi (ud3
<< 16, 32)));
10622 count_or_emit_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
)));
10623 count_or_emit_insn (ud1
!= 0 ? temp
: dest
,
10624 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (16)));
10626 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10630 if (TARGET_PREFIXED
)
10632 if (can_create_pseudo_p ())
10634 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10635 rtx temp1
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10636 count_or_emit_insn (temp
, GEN_INT ((ud4
<< 16) | ud3
));
10637 count_or_emit_insn (temp1
, GEN_INT ((ud2
<< 16) | ud1
));
10638 rtx rldimi
= gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp1
,
10639 GEN_INT (0xffffffff));
10640 count_or_emit_insn (rldimi
);
10644 /* pli A,H; sldi A,32; paddi A,A,L. */
10645 count_or_emit_insn (dest
, GEN_INT ((ud4
<< 16) | ud3
));
10646 count_or_emit_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10648 bool can_use_paddi
= dest
? REGNO (dest
) != FIRST_GPR_REGNO
: false;
10649 /* Use paddi for the low 32 bits. */
10650 if (ud2
!= 0 && ud1
!= 0 && can_use_paddi
)
10651 count_or_emit_insn (dest
, gen_rtx_PLUS (DImode
, dest
,
10652 GEN_INT ((ud2
<< 16) | ud1
)));
10653 /* Use oris, ori for low 32 bits. */
10654 if (ud2
!= 0 && (ud1
== 0 || !can_use_paddi
))
10655 count_or_emit_insn (dest
,
10656 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10657 if (ud1
!= 0 && (ud2
== 0 || !can_use_paddi
))
10658 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10662 if (can_create_pseudo_p ())
10664 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10665 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10666 rtx high
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10667 rtx low
= num_insns
? nullptr : gen_reg_rtx (DImode
);
10668 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10669 rs6000_emit_set_long_const (low
, sext_hwi (num
, 32), num_insns
);
10670 num
= (ud4
<< 16) | ud3
;
10671 rs6000_emit_set_long_const (high
, sext_hwi (num
, 32), num_insns
);
10673 rtx rldimi
= gen_rotldi3_insert_3 (dest
, high
, GEN_INT (32), low
,
10674 GEN_INT (0xffffffff));
10675 count_or_emit_insn (rldimi
);
10679 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10680 oris DEST,UD2 ; ori DEST,UD1. */
10681 count_or_emit_insn (dest
, GEN_INT (sext_hwi (ud4
<< 16, 32)));
10683 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud3
)));
10685 count_or_emit_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10687 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10689 count_or_emit_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10694 /* Helper for the following. Get rid of [r+r] memory refs
10695 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10698 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10700 if (MEM_P (operands
[0])
10701 && !REG_P (XEXP (operands
[0], 0))
10702 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10703 GET_MODE (operands
[0]), false))
10705 = replace_equiv_address (operands
[0],
10706 copy_addr_to_reg (XEXP (operands
[0], 0)));
10708 if (MEM_P (operands
[1])
10709 && !REG_P (XEXP (operands
[1], 0))
10710 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10711 GET_MODE (operands
[1]), false))
10713 = replace_equiv_address (operands
[1],
10714 copy_addr_to_reg (XEXP (operands
[1], 0)));
10717 /* Generate a vector of constants to permute MODE for a little-endian
10718 storage operation by swapping the two halves of a vector. */
10720 rs6000_const_vec (machine_mode mode
)
10748 v
= rtvec_alloc (subparts
);
10750 for (i
= 0; i
< subparts
/ 2; ++i
)
10751 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10752 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10753 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10758 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10759 store operation. */
10761 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10763 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10764 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10766 /* Scalar permutations are easier to express in integer modes rather than
10767 floating-point modes, so cast them here. We use V1TImode instead
10768 of TImode to ensure that the values don't go through GPRs. */
10769 if (FLOAT128_VECTOR_P (mode
))
10771 dest
= gen_lowpart (V1TImode
, dest
);
10772 source
= gen_lowpart (V1TImode
, source
);
10776 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10778 if (mode
== TImode
|| mode
== V1TImode
)
10779 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10783 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10784 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10788 /* Emit a little-endian load from vector memory location SOURCE to VSX
10789 register DEST in mode MODE. The load is done with two permuting
10790 insn's that represent an lxvd2x and xxpermdi. */
10792 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10794 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10796 if (mode
== TImode
|| mode
== V1TImode
)
10799 dest
= gen_lowpart (V2DImode
, dest
);
10800 source
= adjust_address (source
, V2DImode
, 0);
10803 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10804 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10805 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10808 /* Emit a little-endian store to vector memory location DEST from VSX
10809 register SOURCE in mode MODE. The store is done with two permuting
10810 insn's that represent an xxpermdi and an stxvd2x. */
10812 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10814 /* This should never be called after LRA. */
10815 gcc_assert (can_create_pseudo_p ());
10817 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10819 if (mode
== TImode
|| mode
== V1TImode
)
10822 dest
= adjust_address (dest
, V2DImode
, 0);
10823 source
= gen_lowpart (V2DImode
, source
);
10826 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10827 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10828 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10831 /* Emit a sequence representing a little-endian VSX load or store,
10832 moving data from SOURCE to DEST in mode MODE. This is done
10833 separately from rs6000_emit_move to ensure it is called only
10834 during expand. LE VSX loads and stores introduced later are
10835 handled with a split. The expand-time RTL generation allows
10836 us to optimize away redundant pairs of register-permutes. */
10838 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10840 gcc_assert (!BYTES_BIG_ENDIAN
10841 && VECTOR_MEM_VSX_P (mode
)
10842 && !TARGET_P9_VECTOR
10843 && !gpr_or_gpr_p (dest
, source
)
10844 && (MEM_P (source
) ^ MEM_P (dest
)));
10846 if (MEM_P (source
))
10848 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10849 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10853 if (!REG_P (source
))
10854 source
= force_reg (mode
, source
);
10855 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10859 /* Return whether a SFmode or SImode move can be done without converting one
10860 mode to another. This arrises when we have:
10862 (SUBREG:SF (REG:SI ...))
10863 (SUBREG:SI (REG:SF ...))
10865 and one of the values is in a floating point/vector register, where SFmode
10866 scalars are stored in DFmode format. */
10869 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10871 if (TARGET_ALLOW_SF_SUBREG
)
10874 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10877 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10880 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10881 if (SUBREG_P (dest
))
10883 rtx dest_subreg
= SUBREG_REG (dest
);
10884 rtx src_subreg
= SUBREG_REG (src
);
10885 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10892 /* Helper function to change moves with:
10894 (SUBREG:SF (REG:SI)) and
10895 (SUBREG:SI (REG:SF))
10897 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10898 values are stored as DFmode values in the VSX registers. We need to convert
10899 the bits before we can use a direct move or operate on the bits in the
10900 vector register as an integer type.
10902 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10905 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10907 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10908 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10909 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10911 rtx inner_source
= SUBREG_REG (source
);
10912 machine_mode inner_mode
= GET_MODE (inner_source
);
10914 if (mode
== SImode
&& inner_mode
== SFmode
)
10916 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10920 if (mode
== SFmode
&& inner_mode
== SImode
)
10922 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10930 /* Emit a move from SOURCE to DEST in mode MODE. */
10932 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10935 operands
[0] = dest
;
10936 operands
[1] = source
;
10938 if (TARGET_DEBUG_ADDR
)
10941 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10942 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10943 GET_MODE_NAME (mode
),
10946 can_create_pseudo_p ());
10948 fprintf (stderr
, "source:\n");
10949 debug_rtx (source
);
10952 /* Check that we get CONST_WIDE_INT only when we should. */
10953 if (CONST_WIDE_INT_P (operands
[1])
10954 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10955 gcc_unreachable ();
10957 #ifdef HAVE_AS_GNU_ATTRIBUTE
10958 /* If we use a long double type, set the flags in .gnu_attribute that say
10959 what the long double type is. This is to allow the linker's warning
10960 message for the wrong long double to be useful, even if the function does
10961 not do a call (for example, doing a 128-bit add on power9 if the long
10962 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10963 used if they aren't the default long dobule type. */
10964 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10966 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10967 rs6000_passes_float
= rs6000_passes_long_double
= true;
10969 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10970 rs6000_passes_float
= rs6000_passes_long_double
= true;
10974 /* See if we need to special case SImode/SFmode SUBREG moves. */
10975 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10976 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10979 /* Check if GCC is setting up a block move that will end up using FP
10980 registers as temporaries. We must make sure this is acceptable. */
10981 if (MEM_P (operands
[0])
10982 && MEM_P (operands
[1])
10984 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10985 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10986 && ! (rs6000_slow_unaligned_access (SImode
,
10987 (MEM_ALIGN (operands
[0]) > 32
10988 ? 32 : MEM_ALIGN (operands
[0])))
10989 || rs6000_slow_unaligned_access (SImode
,
10990 (MEM_ALIGN (operands
[1]) > 32
10991 ? 32 : MEM_ALIGN (operands
[1]))))
10992 && ! MEM_VOLATILE_P (operands
[0])
10993 && ! MEM_VOLATILE_P (operands
[1]))
10995 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10996 adjust_address (operands
[1], SImode
, 0));
10997 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10998 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
11002 if (can_create_pseudo_p () && MEM_P (operands
[0])
11003 && !gpc_reg_operand (operands
[1], mode
))
11004 operands
[1] = force_reg (mode
, operands
[1]);
11006 /* Recognize the case where operand[1] is a reference to thread-local
11007 data and load its address to a register. */
11008 if (tls_referenced_p (operands
[1]))
11010 enum tls_model model
;
11011 rtx tmp
= operands
[1];
11014 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
11016 addend
= XEXP (XEXP (tmp
, 0), 1);
11017 tmp
= XEXP (XEXP (tmp
, 0), 0);
11020 gcc_assert (SYMBOL_REF_P (tmp
));
11021 model
= SYMBOL_REF_TLS_MODEL (tmp
);
11022 gcc_assert (model
!= 0);
11024 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
11027 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
11028 tmp
= force_operand (tmp
, operands
[0]);
11033 /* 128-bit constant floating-point values on Darwin should really be loaded
11034 as two parts. However, this premature splitting is a problem when DFmode
11035 values can go into Altivec registers. */
11036 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
11037 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
11039 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
11040 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
11042 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
11043 GET_MODE_SIZE (DFmode
)),
11044 simplify_gen_subreg (DFmode
, operands
[1], mode
,
11045 GET_MODE_SIZE (DFmode
)),
11050 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11051 p1:SD) if p1 is not of floating point class and p0 is spilled as
11052 we can have no analogous movsd_store for this. */
11053 if (lra_in_progress
&& mode
== DDmode
11054 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
11055 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11056 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
11057 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
11060 int regno
= REGNO (SUBREG_REG (operands
[1]));
11062 if (!HARD_REGISTER_NUM_P (regno
))
11064 cl
= reg_preferred_class (regno
);
11065 regno
= reg_renumber
[regno
];
11067 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
11069 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11072 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
11073 operands
[1] = SUBREG_REG (operands
[1]);
11076 if (lra_in_progress
11078 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
11079 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
11080 && (REG_P (operands
[1])
11081 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
11083 int regno
= reg_or_subregno (operands
[1]);
11086 if (!HARD_REGISTER_NUM_P (regno
))
11088 cl
= reg_preferred_class (regno
);
11089 gcc_assert (cl
!= NO_REGS
);
11090 regno
= reg_renumber
[regno
];
11092 regno
= ira_class_hard_regs
[cl
][0];
11094 if (FP_REGNO_P (regno
))
11096 if (GET_MODE (operands
[0]) != DDmode
)
11097 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
11098 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
11100 else if (INT_REGNO_P (regno
))
11101 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11106 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11107 p:DD)) if p0 is not of floating point class and p1 is spilled as
11108 we can have no analogous movsd_load for this. */
11109 if (lra_in_progress
&& mode
== DDmode
11110 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
11111 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
11112 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
11113 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11116 int regno
= REGNO (SUBREG_REG (operands
[0]));
11118 if (!HARD_REGISTER_NUM_P (regno
))
11120 cl
= reg_preferred_class (regno
);
11121 regno
= reg_renumber
[regno
];
11123 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
11125 if (regno
>= 0 && ! FP_REGNO_P (regno
))
11128 operands
[0] = SUBREG_REG (operands
[0]);
11129 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
11132 if (lra_in_progress
11134 && (REG_P (operands
[0])
11135 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
11136 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
11137 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
11139 int regno
= reg_or_subregno (operands
[0]);
11142 if (!HARD_REGISTER_NUM_P (regno
))
11144 cl
= reg_preferred_class (regno
);
11145 gcc_assert (cl
!= NO_REGS
);
11146 regno
= reg_renumber
[regno
];
11148 regno
= ira_class_hard_regs
[cl
][0];
11150 if (FP_REGNO_P (regno
))
11152 if (GET_MODE (operands
[1]) != DDmode
)
11153 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
11154 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
11156 else if (INT_REGNO_P (regno
))
11157 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
11163 /* FIXME: In the long term, this switch statement should go away
11164 and be replaced by a sequence of tests based on things like
11170 if (CONSTANT_P (operands
[1])
11171 && !CONST_INT_P (operands
[1]))
11172 operands
[1] = force_const_mem (mode
, operands
[1]);
11179 if (FLOAT128_2REG_P (mode
))
11180 rs6000_eliminate_indexed_memrefs (operands
);
11187 if (CONSTANT_P (operands
[1])
11188 && ! easy_fp_constant (operands
[1], mode
))
11189 operands
[1] = force_const_mem (mode
, operands
[1]);
11199 if (CONSTANT_P (operands
[1])
11200 && !easy_vector_constant (operands
[1], mode
))
11201 operands
[1] = force_const_mem (mode
, operands
[1]);
11206 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
11207 error ("%qs is an opaque type, and you cannot set it to other values",
11208 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
11213 /* Use default pattern for address of ELF small data */
11216 && DEFAULT_ABI
== ABI_V4
11217 && (SYMBOL_REF_P (operands
[1])
11218 || GET_CODE (operands
[1]) == CONST
)
11219 && small_data_operand (operands
[1], mode
))
11221 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11225 /* Use the default pattern for loading up PC-relative addresses. */
11226 if (TARGET_PCREL
&& mode
== Pmode
11227 && pcrel_local_or_external_address (operands
[1], Pmode
))
11229 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11233 if (DEFAULT_ABI
== ABI_V4
11234 && mode
== Pmode
&& mode
== SImode
11235 && flag_pic
== 1 && got_operand (operands
[1], mode
))
11237 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
11241 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
11242 && TARGET_NO_TOC_OR_PCREL
11245 && CONSTANT_P (operands
[1])
11246 && GET_CODE (operands
[1]) != HIGH
11247 && !CONST_INT_P (operands
[1]))
11249 rtx target
= (!can_create_pseudo_p ()
11251 : gen_reg_rtx (mode
));
11253 /* If this is a function address on -mcall-aixdesc,
11254 convert it to the address of the descriptor. */
11255 if (DEFAULT_ABI
== ABI_AIX
11256 && SYMBOL_REF_P (operands
[1])
11257 && XSTR (operands
[1], 0)[0] == '.')
11259 const char *name
= XSTR (operands
[1], 0);
11261 while (*name
== '.')
11263 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11264 CONSTANT_POOL_ADDRESS_P (new_ref
)
11265 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
11266 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
11267 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
11268 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
11269 operands
[1] = new_ref
;
11272 if (DEFAULT_ABI
== ABI_DARWIN
)
11275 /* This is not PIC code, but could require the subset of
11276 indirections used by mdynamic-no-pic. */
11277 if (MACHO_DYNAMIC_NO_PIC_P
)
11279 /* Take care of any required data indirection. */
11280 operands
[1] = rs6000_machopic_legitimize_pic_address (
11281 operands
[1], mode
, operands
[0]);
11282 if (operands
[0] != operands
[1])
11283 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11287 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
11288 emit_insn (gen_macho_low (Pmode
, operands
[0],
11289 target
, operands
[1]));
11293 emit_insn (gen_elf_high (target
, operands
[1]));
11294 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11298 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11299 and we have put it in the TOC, we just need to make a TOC-relative
11300 reference to it. */
11302 && SYMBOL_REF_P (operands
[1])
11303 && use_toc_relative_ref (operands
[1], mode
))
11304 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11305 else if (mode
== Pmode
11306 && CONSTANT_P (operands
[1])
11307 && GET_CODE (operands
[1]) != HIGH
11308 && ((REG_P (operands
[0])
11309 && FP_REGNO_P (REGNO (operands
[0])))
11310 || !CONST_INT_P (operands
[1])
11311 || (num_insns_constant (operands
[1], mode
)
11312 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11313 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11314 && (TARGET_CMODEL
== CMODEL_SMALL
11315 || can_create_pseudo_p ()
11316 || (REG_P (operands
[0])
11317 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11321 /* Darwin uses a special PIC legitimizer. */
11322 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11325 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11327 if (operands
[0] != operands
[1])
11328 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11333 /* If we are to limit the number of things we put in the TOC and
11334 this is a symbol plus a constant we can add in one insn,
11335 just put the symbol in the TOC and add the constant. */
11336 if (GET_CODE (operands
[1]) == CONST
11337 && TARGET_NO_SUM_IN_TOC
11338 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11339 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11340 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11341 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
11342 && ! side_effects_p (operands
[0]))
11345 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11346 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11348 sym
= force_reg (mode
, sym
);
11349 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11353 operands
[1] = force_const_mem (mode
, operands
[1]);
11356 && SYMBOL_REF_P (XEXP (operands
[1], 0))
11357 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11359 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11361 operands
[1] = gen_const_mem (mode
, tocref
);
11362 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11368 if (!VECTOR_MEM_VSX_P (TImode
))
11369 rs6000_eliminate_indexed_memrefs (operands
);
11373 rs6000_eliminate_indexed_memrefs (operands
);
11377 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11380 /* Above, we may have called force_const_mem which may have returned
11381 an invalid address. If we can, fix this up; otherwise, reload will
11382 have to deal with it. */
11383 if (MEM_P (operands
[1]))
11384 operands
[1] = validize_mem (operands
[1]);
11386 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11390 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11392 init_float128_ibm (machine_mode mode
)
11394 if (!TARGET_XL_COMPAT
)
11396 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
11397 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
11398 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
11399 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
11401 if (!TARGET_HARD_FLOAT
)
11403 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
11404 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
11405 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
11406 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
11407 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
11408 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
11409 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
11410 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
11412 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
11413 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
11414 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
11415 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
11416 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
11417 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
11418 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
11419 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11424 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11425 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11426 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11427 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11430 /* Add various conversions for IFmode to use the traditional TFmode
11432 if (mode
== IFmode
)
11434 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11435 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11436 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11437 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11438 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11439 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11441 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11442 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11444 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11445 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11447 if (TARGET_POWERPC64
)
11449 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11450 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11451 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11452 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11457 /* Set up IEEE 128-bit floating point routines. Use different names if the
11458 arguments can be passed in a vector register. The historical PowerPC
11459 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11460 continue to use that if we aren't using vector registers to pass IEEE
11461 128-bit floating point. */
11464 init_float128_ieee (machine_mode mode
)
11466 if (FLOAT128_VECTOR_P (mode
))
11468 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11469 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11470 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11471 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11472 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11473 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11474 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11475 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11477 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11478 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11479 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11480 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11481 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11482 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11483 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11485 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11486 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11487 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11488 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11490 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11491 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11492 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11494 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11495 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11496 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11498 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11499 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11500 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11501 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11502 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11503 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11505 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11506 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11507 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11508 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11510 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11511 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11512 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11513 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11515 if (TARGET_POWERPC64
)
11517 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11518 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11519 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11520 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11526 set_optab_libfunc (add_optab
, mode
, "_q_add");
11527 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11528 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11529 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11530 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11531 if (TARGET_PPC_GPOPT
)
11532 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11534 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11535 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11536 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11537 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11538 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11539 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11541 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11542 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11543 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11544 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11545 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11546 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11547 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11548 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11553 rs6000_init_libfuncs (void)
11555 /* __float128 support. */
11556 if (TARGET_FLOAT128_TYPE
)
11558 init_float128_ibm (IFmode
);
11559 init_float128_ieee (KFmode
);
11562 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11563 if (TARGET_LONG_DOUBLE_128
)
11565 if (!TARGET_IEEEQUAD
)
11566 init_float128_ibm (TFmode
);
11568 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11570 init_float128_ieee (TFmode
);
11574 /* Emit a potentially record-form instruction, setting DST from SRC.
11575 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11576 signed comparison of DST with zero. If DOT is 1, the generated RTL
11577 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11578 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11579 a separate COMPARE. */
11582 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11586 emit_move_insn (dst
, src
);
11590 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11592 emit_move_insn (dst
, src
);
11593 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11597 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11600 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11601 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11605 rtx set
= gen_rtx_SET (dst
, src
);
11606 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11611 /* A validation routine: say whether CODE, a condition code, and MODE
11612 match. The other alternatives either don't make sense or should
11613 never be generated. */
11616 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11618 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11619 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11620 && GET_MODE_CLASS (mode
) == MODE_CC
);
11622 /* These don't make sense. */
11623 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11624 || mode
!= CCUNSmode
);
11626 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11627 || mode
== CCUNSmode
);
11629 gcc_assert (mode
== CCFPmode
11630 || (code
!= ORDERED
&& code
!= UNORDERED
11631 && code
!= UNEQ
&& code
!= LTGT
11632 && code
!= UNGT
&& code
!= UNLT
11633 && code
!= UNGE
&& code
!= UNLE
));
11635 /* These are invalid; the information is not there. */
11636 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11640 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11641 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11642 not zero, store there the bit offset (counted from the right) where
11643 the single stretch of 1 bits begins; and similarly for B, the bit
11644 offset where it ends. */
11647 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11649 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11650 unsigned HOST_WIDE_INT bit
;
11652 int n
= GET_MODE_PRECISION (mode
);
11654 if (mode
!= DImode
&& mode
!= SImode
)
11657 if (INTVAL (mask
) >= 0)
11660 ne
= exact_log2 (bit
);
11661 nb
= exact_log2 (val
+ bit
);
11663 else if (val
+ 1 == 0)
11672 nb
= exact_log2 (bit
);
11673 ne
= exact_log2 (val
+ bit
);
11678 ne
= exact_log2 (bit
);
11679 if (val
+ bit
== 0)
11687 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11699 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11702 if (rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0)
11706 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11708 return (UINTVAL (mask
) << (63 - nb
)) <= 0x7fffffff;
11714 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11715 or rldicr instruction, to implement an AND with it in mode MODE. */
11718 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11722 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11725 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11727 if (mode
== DImode
)
11728 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11730 /* For SImode, rlwinm can do everything. */
11731 if (mode
== SImode
)
11732 return (nb
< 32 && ne
< 32);
11737 /* Return the instruction template for an AND with mask in mode MODE, with
11738 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11741 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11745 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11746 gcc_unreachable ();
11748 if (mode
== DImode
&& ne
== 0)
11750 operands
[3] = GEN_INT (63 - nb
);
11752 return "rldicl. %0,%1,0,%3";
11753 return "rldicl %0,%1,0,%3";
11756 if (mode
== DImode
&& nb
== 63)
11758 operands
[3] = GEN_INT (63 - ne
);
11760 return "rldicr. %0,%1,0,%3";
11761 return "rldicr %0,%1,0,%3";
11764 if (nb
< 32 && ne
< 32)
11766 operands
[3] = GEN_INT (31 - nb
);
11767 operands
[4] = GEN_INT (31 - ne
);
11769 return "rlwinm. %0,%1,0,%3,%4";
11770 return "rlwinm %0,%1,0,%3,%4";
11773 gcc_unreachable ();
11776 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11777 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11778 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11781 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11785 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11788 int n
= GET_MODE_PRECISION (mode
);
11791 if (CONST_INT_P (XEXP (shift
, 1)))
11793 sh
= INTVAL (XEXP (shift
, 1));
11794 if (sh
< 0 || sh
>= n
)
11798 rtx_code code
= GET_CODE (shift
);
11800 /* Convert any shift by 0 to a rotate, to simplify below code. */
11804 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11805 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11807 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11813 /* DImode rotates need rld*. */
11814 if (mode
== DImode
&& code
== ROTATE
)
11815 return (nb
== 63 || ne
== 0 || ne
== sh
);
11817 /* SImode rotates need rlw*. */
11818 if (mode
== SImode
&& code
== ROTATE
)
11819 return (nb
< 32 && ne
< 32 && sh
< 32);
11821 /* Wrap-around masks are only okay for rotates. */
11825 /* Variable shifts are only okay for rotates. */
11829 /* Don't allow ASHIFT if the mask is wrong for that. */
11830 if (code
== ASHIFT
&& ne
< sh
)
11833 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11834 if the mask is wrong for that. */
11835 if (nb
< 32 && ne
< 32 && sh
< 32
11836 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11839 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11840 if the mask is wrong for that. */
11841 if (code
== LSHIFTRT
)
11843 if (nb
== 63 || ne
== 0 || ne
== sh
)
11844 return !(code
== LSHIFTRT
&& nb
>= sh
);
11849 /* Return the instruction template for a shift with mask in mode MODE, with
11850 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11853 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11857 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11858 gcc_unreachable ();
11860 if (mode
== DImode
&& ne
== 0)
11862 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11863 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11864 operands
[3] = GEN_INT (63 - nb
);
11866 return "rld%I2cl. %0,%1,%2,%3";
11867 return "rld%I2cl %0,%1,%2,%3";
11870 if (mode
== DImode
&& nb
== 63)
11872 operands
[3] = GEN_INT (63 - ne
);
11874 return "rld%I2cr. %0,%1,%2,%3";
11875 return "rld%I2cr %0,%1,%2,%3";
11879 && GET_CODE (operands
[4]) != LSHIFTRT
11880 && CONST_INT_P (operands
[2])
11881 && ne
== INTVAL (operands
[2]))
11883 operands
[3] = GEN_INT (63 - nb
);
11885 return "rld%I2c. %0,%1,%2,%3";
11886 return "rld%I2c %0,%1,%2,%3";
11889 if (nb
< 32 && ne
< 32)
11891 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11892 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11893 operands
[3] = GEN_INT (31 - nb
);
11894 operands
[4] = GEN_INT (31 - ne
);
11895 /* This insn can also be a 64-bit rotate with mask that really makes
11896 it just a shift right (with mask); the %h below are to adjust for
11897 that situation (shift count is >= 32 in that case). */
11899 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11900 return "rlw%I2nm %0,%1,%h2,%3,%4";
11903 gcc_unreachable ();
11906 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11907 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11908 ASHIFT, or LSHIFTRT) in mode MODE. */
11911 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11915 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11918 int n
= GET_MODE_PRECISION (mode
);
11920 int sh
= INTVAL (XEXP (shift
, 1));
11921 if (sh
< 0 || sh
>= n
)
11924 rtx_code code
= GET_CODE (shift
);
11926 /* Convert any shift by 0 to a rotate, to simplify below code. */
11930 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11931 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11933 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11939 /* DImode rotates need rldimi. */
11940 if (mode
== DImode
&& code
== ROTATE
)
11943 /* SImode rotates need rlwimi. */
11944 if (mode
== SImode
&& code
== ROTATE
)
11945 return (nb
< 32 && ne
< 32 && sh
< 32);
11947 /* Wrap-around masks are only okay for rotates. */
11951 /* Don't allow ASHIFT if the mask is wrong for that. */
11952 if (code
== ASHIFT
&& ne
< sh
)
11955 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11956 if the mask is wrong for that. */
11957 if (nb
< 32 && ne
< 32 && sh
< 32
11958 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11961 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11962 if the mask is wrong for that. */
11963 if (code
== LSHIFTRT
)
11966 return !(code
== LSHIFTRT
&& nb
>= sh
);
11971 /* Return the instruction template for an insert with mask in mode MODE, with
11972 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11975 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11979 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11980 gcc_unreachable ();
11982 /* Prefer rldimi because rlwimi is cracked. */
11983 if (TARGET_POWERPC64
11984 && (!dot
|| mode
== DImode
)
11985 && GET_CODE (operands
[4]) != LSHIFTRT
11986 && ne
== INTVAL (operands
[2]))
11988 operands
[3] = GEN_INT (63 - nb
);
11990 return "rldimi. %0,%1,%2,%3";
11991 return "rldimi %0,%1,%2,%3";
11994 if (nb
< 32 && ne
< 32)
11996 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11997 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11998 operands
[3] = GEN_INT (31 - nb
);
11999 operands
[4] = GEN_INT (31 - ne
);
12001 return "rlwimi. %0,%1,%2,%3,%4";
12002 return "rlwimi %0,%1,%2,%3,%4";
12005 gcc_unreachable ();
12008 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
12009 using two machine instructions. */
12012 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
12014 /* There are two kinds of AND we can handle with two insns:
12015 1) those we can do with two rl* insn;
12018 We do not handle that last case yet. */
12020 /* If there is just one stretch of ones, we can do it. */
12021 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
12024 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12025 one insn, we can do the whole thing with two. */
12026 unsigned HOST_WIDE_INT val
= INTVAL (c
);
12027 unsigned HOST_WIDE_INT bit1
= val
& -val
;
12028 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
12029 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
12030 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
12031 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
12034 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12035 If EXPAND is true, split rotate-and-mask instructions we generate to
12036 their constituent parts as well (this is used during expand); if DOT
12037 is 1, make the last insn a record-form instruction clobbering the
12038 destination GPR and setting the CC reg (from operands[3]); if 2, set
12039 that GPR as well as the CC reg. */
12042 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
12044 gcc_assert (!(expand
&& dot
));
12046 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
12048 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12049 shift right. This generates better code than doing the masks without
12050 shifts, or shifting first right and then left. */
12052 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
12054 gcc_assert (mode
== DImode
);
12056 int shift
= 63 - nb
;
12059 rtx tmp1
= gen_reg_rtx (DImode
);
12060 rtx tmp2
= gen_reg_rtx (DImode
);
12061 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
12062 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
12063 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
12067 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
12068 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
12069 emit_move_insn (operands
[0], tmp
);
12070 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
12071 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12076 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12077 that does the rest. */
12078 unsigned HOST_WIDE_INT bit1
= val
& -val
;
12079 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
12080 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
12081 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
12083 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
12084 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
12086 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
12088 /* Two "no-rotate"-and-mask instructions, for SImode. */
12089 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
12091 gcc_assert (mode
== SImode
);
12093 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
12094 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
12095 emit_move_insn (reg
, tmp
);
12096 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
12097 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12101 gcc_assert (mode
== DImode
);
12103 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12104 insns; we have to do the first in SImode, because it wraps. */
12105 if (mask2
<= 0xffffffff
12106 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
12108 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
12109 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
12111 rtx reg_low
= gen_lowpart (SImode
, reg
);
12112 emit_move_insn (reg_low
, tmp
);
12113 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
12114 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12118 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12119 at the top end), rotate back and clear the other hole. */
12120 int right
= exact_log2 (bit3
);
12121 int left
= 64 - right
;
12123 /* Rotate the mask too. */
12124 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
12128 rtx tmp1
= gen_reg_rtx (DImode
);
12129 rtx tmp2
= gen_reg_rtx (DImode
);
12130 rtx tmp3
= gen_reg_rtx (DImode
);
12131 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
12132 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
12133 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
12134 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
12138 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
12139 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
12140 emit_move_insn (operands
[0], tmp
);
12141 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
12142 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
12143 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
12147 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12148 for lfq and stfq insns iff the registers are hard registers. */
12151 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
12153 /* We might have been passed a SUBREG. */
12154 if (!REG_P (reg1
) || !REG_P (reg2
))
12157 /* We might have been passed non floating point registers. */
12158 if (!FP_REGNO_P (REGNO (reg1
))
12159 || !FP_REGNO_P (REGNO (reg2
)))
12162 return (REGNO (reg1
) == REGNO (reg2
) - 1);
12165 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12166 addr1 and addr2 must be in consecutive memory locations
12167 (addr2 == addr1 + 8). */
12170 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
12173 unsigned int reg1
, reg2
;
12174 int offset1
, offset2
;
12176 /* The mems cannot be volatile. */
12177 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
12180 addr1
= XEXP (mem1
, 0);
12181 addr2
= XEXP (mem2
, 0);
12183 /* Extract an offset (if used) from the first addr. */
12184 if (GET_CODE (addr1
) == PLUS
)
12186 /* If not a REG, return zero. */
12187 if (!REG_P (XEXP (addr1
, 0)))
12191 reg1
= REGNO (XEXP (addr1
, 0));
12192 /* The offset must be constant! */
12193 if (!CONST_INT_P (XEXP (addr1
, 1)))
12195 offset1
= INTVAL (XEXP (addr1
, 1));
12198 else if (!REG_P (addr1
))
12202 reg1
= REGNO (addr1
);
12203 /* This was a simple (mem (reg)) expression. Offset is 0. */
12207 /* And now for the second addr. */
12208 if (GET_CODE (addr2
) == PLUS
)
12210 /* If not a REG, return zero. */
12211 if (!REG_P (XEXP (addr2
, 0)))
12215 reg2
= REGNO (XEXP (addr2
, 0));
12216 /* The offset must be constant. */
12217 if (!CONST_INT_P (XEXP (addr2
, 1)))
12219 offset2
= INTVAL (XEXP (addr2
, 1));
12222 else if (!REG_P (addr2
))
12226 reg2
= REGNO (addr2
);
12227 /* This was a simple (mem (reg)) expression. Offset is 0. */
12231 /* Both of these must have the same base register. */
12235 /* The offset for the second addr must be 8 more than the first addr. */
12236 if (offset2
!= offset1
+ 8)
12239 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12244 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12245 need to use DDmode, in all other cases we can use the same mode. */
12246 static machine_mode
12247 rs6000_secondary_memory_needed_mode (machine_mode mode
)
12249 if (lra_in_progress
&& mode
== SDmode
)
12254 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12255 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12256 only work on the traditional altivec registers, note if an altivec register
12259 static enum rs6000_reg_type
12260 register_to_reg_type (rtx reg
, bool *is_altivec
)
12262 HOST_WIDE_INT regno
;
12263 enum reg_class rclass
;
12265 if (SUBREG_P (reg
))
12266 reg
= SUBREG_REG (reg
);
12269 return NO_REG_TYPE
;
12271 regno
= REGNO (reg
);
12272 if (!HARD_REGISTER_NUM_P (regno
))
12274 if (!lra_in_progress
&& !reload_completed
)
12275 return PSEUDO_REG_TYPE
;
12277 regno
= true_regnum (reg
);
12278 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
12279 return PSEUDO_REG_TYPE
;
12282 gcc_assert (regno
>= 0);
12284 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
12285 *is_altivec
= true;
12287 rclass
= rs6000_regno_regclass
[regno
];
12288 return reg_class_to_reg_type
[(int)rclass
];
12291 /* Helper function to return the cost of adding a TOC entry address. */
12294 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
12298 if (TARGET_CMODEL
!= CMODEL_SMALL
)
12299 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
12302 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
12307 /* Helper function for rs6000_secondary_reload to determine whether the memory
12308 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12309 needs reloading. Return negative if the memory is not handled by the memory
12310 helper functions and to try a different reload method, 0 if no additional
12311 instructions are need, and positive to give the extra cost for the
12315 rs6000_secondary_reload_memory (rtx addr
,
12316 enum reg_class rclass
,
12319 int extra_cost
= 0;
12320 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
12321 addr_mask_type addr_mask
;
12322 const char *type
= NULL
;
12323 const char *fail_msg
= NULL
;
12325 if (GPR_REG_CLASS_P (rclass
))
12326 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12328 else if (rclass
== FLOAT_REGS
)
12329 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12331 else if (rclass
== ALTIVEC_REGS
)
12332 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12334 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12335 else if (rclass
== VSX_REGS
)
12336 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
12337 & ~RELOAD_REG_AND_M16
);
12339 /* If the register allocator hasn't made up its mind yet on the register
12340 class to use, settle on defaults to use. */
12341 else if (rclass
== NO_REGS
)
12343 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
12344 & ~RELOAD_REG_AND_M16
);
12346 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
12347 addr_mask
&= ~(RELOAD_REG_INDEXED
12348 | RELOAD_REG_PRE_INCDEC
12349 | RELOAD_REG_PRE_MODIFY
);
12355 /* If the register isn't valid in this register class, just return now. */
12356 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12358 if (TARGET_DEBUG_ADDR
)
12361 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12362 "not valid in class\n",
12363 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
12370 switch (GET_CODE (addr
))
12372 /* Does the register class supports auto update forms for this mode? We
12373 don't need a scratch register, since the powerpc only supports
12374 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12377 reg
= XEXP (addr
, 0);
12378 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12380 fail_msg
= "no base register #1";
12384 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12392 reg
= XEXP (addr
, 0);
12393 plus_arg1
= XEXP (addr
, 1);
12394 if (!base_reg_operand (reg
, GET_MODE (reg
))
12395 || GET_CODE (plus_arg1
) != PLUS
12396 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12398 fail_msg
= "bad PRE_MODIFY";
12402 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12409 /* Do we need to simulate AND -16 to clear the bottom address bits used
12410 in VMX load/stores? Only allow the AND for vector sizes. */
12412 and_arg
= XEXP (addr
, 0);
12413 if (GET_MODE_SIZE (mode
) != 16
12414 || !CONST_INT_P (XEXP (addr
, 1))
12415 || INTVAL (XEXP (addr
, 1)) != -16)
12417 fail_msg
= "bad Altivec AND #1";
12421 if (rclass
!= ALTIVEC_REGS
)
12423 if (legitimate_indirect_address_p (and_arg
, false))
12426 else if (legitimate_indexed_address_p (and_arg
, false))
12431 fail_msg
= "bad Altivec AND #2";
12439 /* If this is an indirect address, make sure it is a base register. */
12442 if (!legitimate_indirect_address_p (addr
, false))
12449 /* If this is an indexed address, make sure the register class can handle
12450 indexed addresses for this mode. */
12452 plus_arg0
= XEXP (addr
, 0);
12453 plus_arg1
= XEXP (addr
, 1);
12455 /* (plus (plus (reg) (constant)) (constant)) is generated during
12456 push_reload processing, so handle it now. */
12457 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12459 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12466 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12467 push_reload processing, so handle it now. */
12468 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12470 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12473 type
= "indexed #2";
12477 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12479 fail_msg
= "no base register #2";
12483 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12485 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12486 || !legitimate_indexed_address_p (addr
, false))
12493 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12494 && CONST_INT_P (plus_arg1
))
12496 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12499 type
= "vector d-form offset";
12503 /* Make sure the register class can handle offset addresses. */
12504 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12506 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12509 type
= "offset #2";
12515 fail_msg
= "bad PLUS";
12522 /* Quad offsets are restricted and can't handle normal addresses. */
12523 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12526 type
= "vector d-form lo_sum";
12529 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12531 fail_msg
= "bad LO_SUM";
12535 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12542 /* Static addresses need to create a TOC entry. */
12546 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12549 type
= "vector d-form lo_sum #2";
12555 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12559 /* TOC references look like offsetable memory. */
12561 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12563 fail_msg
= "bad UNSPEC";
12567 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12570 type
= "vector d-form lo_sum #3";
12573 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12576 type
= "toc reference";
12582 fail_msg
= "bad address";
12587 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12589 if (extra_cost
< 0)
12591 "rs6000_secondary_reload_memory error: mode = %s, "
12592 "class = %s, addr_mask = '%s', %s\n",
12593 GET_MODE_NAME (mode
),
12594 reg_class_names
[rclass
],
12595 rs6000_debug_addr_mask (addr_mask
, false),
12596 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12600 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12601 "addr_mask = '%s', extra cost = %d, %s\n",
12602 GET_MODE_NAME (mode
),
12603 reg_class_names
[rclass
],
12604 rs6000_debug_addr_mask (addr_mask
, false),
12606 (type
) ? type
: "<none>");
12614 /* Helper function for rs6000_secondary_reload to return true if a move to a
12615 different register classe is really a simple move. */
12618 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12619 enum rs6000_reg_type from_type
,
12622 int size
= GET_MODE_SIZE (mode
);
12624 /* Add support for various direct moves available. In this function, we only
12625 look at cases where we don't need any extra registers, and one or more
12626 simple move insns are issued. Originally small integers are not allowed
12627 in FPR/VSX registers. Single precision binary floating is not a simple
12628 move because we need to convert to the single precision memory layout.
12629 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12630 need special direct move handling, which we do not support yet. */
12631 if (TARGET_DIRECT_MOVE
12632 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12633 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12635 if (TARGET_POWERPC64
)
12637 /* ISA 2.07: MTVSRD or MVFVSRD. */
12641 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12642 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12646 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12647 if (TARGET_P8_VECTOR
)
12649 if (mode
== SImode
)
12652 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12656 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12657 if (mode
== SDmode
)
12661 /* Move to/from SPR. */
12662 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12663 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12664 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12670 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12671 special direct moves that involve allocating an extra register, return the
12672 insn code of the helper function if there is such a function or
12673 CODE_FOR_nothing if not. */
12676 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12677 enum rs6000_reg_type from_type
,
12679 secondary_reload_info
*sri
,
12683 enum insn_code icode
= CODE_FOR_nothing
;
12685 int size
= GET_MODE_SIZE (mode
);
12687 if (TARGET_POWERPC64
&& size
== 16)
12689 /* Handle moving 128-bit values from GPRs to VSX point registers on
12690 ISA 2.07 (power8, power9) when running in 64-bit mode using
12691 XXPERMDI to glue the two 64-bit values back together. */
12692 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12694 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12695 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12698 /* Handle moving 128-bit values from VSX point registers to GPRs on
12699 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12700 bottom 64-bit value. */
12701 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12703 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12704 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12708 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12710 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12712 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12713 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12716 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12718 cost
= 2; /* mtvsrz, xscvspdpn. */
12719 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12723 else if (!TARGET_POWERPC64
&& size
== 8)
12725 /* Handle moving 64-bit values from GPRs to floating point registers on
12726 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12727 32-bit values back together. Altivec register classes must be handled
12728 specially since a different instruction is used, and the secondary
12729 reload support requires a single instruction class in the scratch
12730 register constraint. However, right now TFmode is not allowed in
12731 Altivec registers, so the pattern will never match. */
12732 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12734 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12735 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12739 if (icode
!= CODE_FOR_nothing
)
12744 sri
->icode
= icode
;
12745 sri
->extra_cost
= cost
;
12752 /* Return whether a move between two register classes can be done either
12753 directly (simple move) or via a pattern that uses a single extra temporary
12754 (using ISA 2.07's direct move in this case. */
12757 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12758 enum rs6000_reg_type from_type
,
12760 secondary_reload_info
*sri
,
12763 /* Fall back to load/store reloads if either type is not a register. */
12764 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12767 /* If we haven't allocated registers yet, assume the move can be done for the
12768 standard register types. */
12769 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12770 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12771 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12774 /* Moves to the same set of registers is a simple move for non-specialized
12776 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12779 /* Check whether a simple move can be done directly. */
12780 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12784 sri
->icode
= CODE_FOR_nothing
;
12785 sri
->extra_cost
= 0;
12790 /* Now check if we can do it in a few steps. */
12791 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12795 /* Inform reload about cases where moving X with a mode MODE to a register in
12796 RCLASS requires an extra scratch or immediate register. Return the class
12797 needed for the immediate register.
12799 For VSX and Altivec, we may need a register to convert sp+offset into
12802 For misaligned 64-bit gpr loads and stores we need a register to
12803 convert an offset address to indirect. */
12806 rs6000_secondary_reload (bool in_p
,
12808 reg_class_t rclass_i
,
12810 secondary_reload_info
*sri
)
12812 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12813 reg_class_t ret
= ALL_REGS
;
12814 enum insn_code icode
;
12815 bool default_p
= false;
12816 bool done_p
= false;
12818 /* Allow subreg of memory before/during reload. */
12819 bool memory_p
= (MEM_P (x
)
12820 || (!reload_completed
&& SUBREG_P (x
)
12821 && MEM_P (SUBREG_REG (x
))));
12823 sri
->icode
= CODE_FOR_nothing
;
12824 sri
->t_icode
= CODE_FOR_nothing
;
12825 sri
->extra_cost
= 0;
12827 ? reg_addr
[mode
].reload_load
12828 : reg_addr
[mode
].reload_store
);
12830 if (REG_P (x
) || register_operand (x
, mode
))
12832 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12833 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12834 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12837 std::swap (to_type
, from_type
);
12839 /* Can we do a direct move of some sort? */
12840 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12843 icode
= (enum insn_code
)sri
->icode
;
12850 /* Make sure 0.0 is not reloaded or forced into memory. */
12851 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12858 /* If this is a scalar floating point value and we want to load it into the
12859 traditional Altivec registers, do it via a move via a traditional floating
12860 point register, unless we have D-form addressing. Also make sure that
12861 non-zero constants use a FPR. */
12862 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12863 && !mode_supports_vmx_dform (mode
)
12864 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12865 && (memory_p
|| CONST_DOUBLE_P (x
)))
12872 /* Handle reload of load/stores if we have reload helper functions. */
12873 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12875 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12878 if (extra_cost
>= 0)
12882 if (extra_cost
> 0)
12884 sri
->extra_cost
= extra_cost
;
12885 sri
->icode
= icode
;
12890 /* Handle unaligned loads and stores of integer registers. */
12891 if (!done_p
&& TARGET_POWERPC64
12892 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12894 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12896 rtx addr
= XEXP (x
, 0);
12897 rtx off
= address_offset (addr
);
12899 if (off
!= NULL_RTX
)
12901 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12902 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12904 /* We need a secondary reload when our legitimate_address_p
12905 says the address is good (as otherwise the entire address
12906 will be reloaded), and the offset is not a multiple of
12907 four or we have an address wrap. Address wrap will only
12908 occur for LO_SUMs since legitimate_offset_address_p
12909 rejects addresses for 16-byte mems that will wrap. */
12910 if (GET_CODE (addr
) == LO_SUM
12911 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12912 && ((offset
& 3) != 0
12913 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12914 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12915 && (offset
& 3) != 0))
12917 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12919 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12920 : CODE_FOR_reload_di_load
);
12922 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12923 : CODE_FOR_reload_di_store
);
12924 sri
->extra_cost
= 2;
12935 if (!done_p
&& !TARGET_POWERPC64
12936 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12938 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12940 rtx addr
= XEXP (x
, 0);
12941 rtx off
= address_offset (addr
);
12943 if (off
!= NULL_RTX
)
12945 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12946 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12948 /* We need a secondary reload when our legitimate_address_p
12949 says the address is good (as otherwise the entire address
12950 will be reloaded), and we have a wrap.
12952 legitimate_lo_sum_address_p allows LO_SUM addresses to
12953 have any offset so test for wrap in the low 16 bits.
12955 legitimate_offset_address_p checks for the range
12956 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12957 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12958 [0x7ff4,0x7fff] respectively, so test for the
12959 intersection of these ranges, [0x7ffc,0x7fff] and
12960 [0x7ff4,0x7ff7] respectively.
12962 Note that the address we see here may have been
12963 manipulated by legitimize_reload_address. */
12964 if (GET_CODE (addr
) == LO_SUM
12965 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12966 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12969 sri
->icode
= CODE_FOR_reload_si_load
;
12971 sri
->icode
= CODE_FOR_reload_si_store
;
12972 sri
->extra_cost
= 2;
12987 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12989 gcc_assert (ret
!= ALL_REGS
);
12991 if (TARGET_DEBUG_ADDR
)
12994 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12996 reg_class_names
[ret
],
12997 in_p
? "true" : "false",
12998 reg_class_names
[rclass
],
12999 GET_MODE_NAME (mode
));
13001 if (reload_completed
)
13002 fputs (", after reload", stderr
);
13005 fputs (", done_p not set", stderr
);
13008 fputs (", default secondary reload", stderr
);
13010 if (sri
->icode
!= CODE_FOR_nothing
)
13011 fprintf (stderr
, ", reload func = %s, extra cost = %d",
13012 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
13014 else if (sri
->extra_cost
> 0)
13015 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
13017 fputs ("\n", stderr
);
13024 /* Better tracing for rs6000_secondary_reload_inner. */
13027 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
13032 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
13034 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
13035 store_p
? "store" : "load");
13038 set
= gen_rtx_SET (mem
, reg
);
13040 set
= gen_rtx_SET (reg
, mem
);
13042 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
13043 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
13046 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
13047 ATTRIBUTE_NORETURN
;
13050 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
13053 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
13054 gcc_unreachable ();
13057 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13058 reload helper functions. These were identified in
13059 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13060 reload, it calls the insns:
13061 reload_<RELOAD:mode>_<P:mptrsize>_store
13062 reload_<RELOAD:mode>_<P:mptrsize>_load
13064 which in turn calls this function, to do whatever is necessary to create
13065 valid addresses. */
13068 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13070 int regno
= true_regnum (reg
);
13071 machine_mode mode
= GET_MODE (reg
);
13072 addr_mask_type addr_mask
;
13075 rtx op_reg
, op0
, op1
;
13080 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
13081 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
13082 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13084 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
13085 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
13087 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
13088 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
13090 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
13091 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
13094 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13096 /* Make sure the mode is valid in this register class. */
13097 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
13098 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13100 if (TARGET_DEBUG_ADDR
)
13101 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
13103 new_addr
= addr
= XEXP (mem
, 0);
13104 switch (GET_CODE (addr
))
13106 /* Does the register class support auto update forms for this mode? If
13107 not, do the update now. We don't need a scratch register, since the
13108 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13111 op_reg
= XEXP (addr
, 0);
13112 if (!base_reg_operand (op_reg
, Pmode
))
13113 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13115 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
13117 int delta
= GET_MODE_SIZE (mode
);
13118 if (GET_CODE (addr
) == PRE_DEC
)
13120 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
13126 op0
= XEXP (addr
, 0);
13127 op1
= XEXP (addr
, 1);
13128 if (!base_reg_operand (op0
, Pmode
)
13129 || GET_CODE (op1
) != PLUS
13130 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
13131 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13133 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
13135 emit_insn (gen_rtx_SET (op0
, op1
));
13140 /* Do we need to simulate AND -16 to clear the bottom address bits used
13141 in VMX load/stores? */
13143 op0
= XEXP (addr
, 0);
13144 op1
= XEXP (addr
, 1);
13145 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
13147 if (REG_P (op0
) || SUBREG_P (op0
))
13150 else if (GET_CODE (op1
) == PLUS
)
13152 emit_insn (gen_rtx_SET (scratch
, op1
));
13157 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13159 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
13160 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
13161 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
13162 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
13163 new_addr
= scratch
;
13167 /* If this is an indirect address, make sure it is a base register. */
13170 if (!base_reg_operand (addr
, GET_MODE (addr
)))
13172 emit_insn (gen_rtx_SET (scratch
, addr
));
13173 new_addr
= scratch
;
13177 /* If this is an indexed address, make sure the register class can handle
13178 indexed addresses for this mode. */
13180 op0
= XEXP (addr
, 0);
13181 op1
= XEXP (addr
, 1);
13182 if (!base_reg_operand (op0
, Pmode
))
13183 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13185 else if (int_reg_operand (op1
, Pmode
))
13187 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13189 emit_insn (gen_rtx_SET (scratch
, addr
));
13190 new_addr
= scratch
;
13194 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
13196 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
13197 || !quad_address_p (addr
, mode
, false))
13199 emit_insn (gen_rtx_SET (scratch
, addr
));
13200 new_addr
= scratch
;
13204 /* Make sure the register class can handle offset addresses. */
13205 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
13207 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13209 emit_insn (gen_rtx_SET (scratch
, addr
));
13210 new_addr
= scratch
;
13215 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13220 op0
= XEXP (addr
, 0);
13221 op1
= XEXP (addr
, 1);
13222 if (!base_reg_operand (op0
, Pmode
))
13223 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13225 else if (int_reg_operand (op1
, Pmode
))
13227 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
13229 emit_insn (gen_rtx_SET (scratch
, addr
));
13230 new_addr
= scratch
;
13234 /* Quad offsets are restricted and can't handle normal addresses. */
13235 else if (mode_supports_dq_form (mode
))
13237 emit_insn (gen_rtx_SET (scratch
, addr
));
13238 new_addr
= scratch
;
13241 /* Make sure the register class can handle offset addresses. */
13242 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
13244 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
13246 emit_insn (gen_rtx_SET (scratch
, addr
));
13247 new_addr
= scratch
;
13252 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13259 rs6000_emit_move (scratch
, addr
, Pmode
);
13260 new_addr
= scratch
;
13264 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
13267 /* Adjust the address if it changed. */
13268 if (addr
!= new_addr
)
13270 mem
= replace_equiv_address_nv (mem
, new_addr
);
13271 if (TARGET_DEBUG_ADDR
)
13272 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13275 /* Now create the move. */
13277 emit_insn (gen_rtx_SET (mem
, reg
));
13279 emit_insn (gen_rtx_SET (reg
, mem
));
13284 /* Convert reloads involving 64-bit gprs and misaligned offset
13285 addressing, or multiple 32-bit gprs and offsets that are too large,
13286 to use indirect addressing. */
13289 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13291 int regno
= true_regnum (reg
);
13292 enum reg_class rclass
;
13294 rtx scratch_or_premodify
= scratch
;
13296 if (TARGET_DEBUG_ADDR
)
13298 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
13299 store_p
? "store" : "load");
13300 fprintf (stderr
, "reg:\n");
13302 fprintf (stderr
, "mem:\n");
13304 fprintf (stderr
, "scratch:\n");
13305 debug_rtx (scratch
);
13308 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
13309 gcc_assert (MEM_P (mem
));
13310 rclass
= REGNO_REG_CLASS (regno
);
13311 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
13312 addr
= XEXP (mem
, 0);
13314 if (GET_CODE (addr
) == PRE_MODIFY
)
13316 gcc_assert (REG_P (XEXP (addr
, 0))
13317 && GET_CODE (XEXP (addr
, 1)) == PLUS
13318 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
13319 scratch_or_premodify
= XEXP (addr
, 0);
13320 addr
= XEXP (addr
, 1);
13322 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
13324 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
13326 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
13328 /* Now create the move. */
13330 emit_insn (gen_rtx_SET (mem
, reg
));
13332 emit_insn (gen_rtx_SET (reg
, mem
));
13337 /* Given an rtx X being reloaded into a reg required to be
13338 in class CLASS, return the class of reg to actually use.
13339 In general this is just CLASS; but on some machines
13340 in some cases it is preferable to use a more restrictive class.
13342 On the RS/6000, we have to return NO_REGS when we want to reload a
13343 floating-point CONST_DOUBLE to force it to be copied to memory.
13345 We also don't want to reload integer values into floating-point
13346 registers if we can at all help it. In fact, this can
13347 cause reload to die, if it tries to generate a reload of CTR
13348 into a FP register and discovers it doesn't have the memory location
13351 ??? Would it be a good idea to have reload do the converse, that is
13352 try to reload floating modes into FP registers if possible?
13355 static enum reg_class
13356 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
13358 machine_mode mode
= GET_MODE (x
);
13359 bool is_constant
= CONSTANT_P (x
);
13361 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13362 reload class for it. */
13363 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13364 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
13367 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
13368 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
13371 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13372 the reloading of address expressions using PLUS into floating point
13374 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13378 /* Zero is always allowed in all VSX registers. */
13379 if (x
== CONST0_RTX (mode
))
13382 /* If this is a vector constant that can be formed with a few Altivec
13383 instructions, we want altivec registers. */
13384 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13385 return ALTIVEC_REGS
;
13387 /* If this is an integer constant that can easily be loaded into
13388 vector registers, allow it. */
13389 if (CONST_INT_P (x
))
13391 HOST_WIDE_INT value
= INTVAL (x
);
13393 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13394 2.06 can generate it in the Altivec registers with
13398 if (TARGET_P8_VECTOR
)
13400 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13401 return ALTIVEC_REGS
;
13406 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13407 a sign extend in the Altivec registers. */
13408 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13409 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13410 return ALTIVEC_REGS
;
13413 /* Force constant to memory. */
13417 /* D-form addressing can easily reload the value. */
13418 if (mode_supports_vmx_dform (mode
)
13419 || mode_supports_dq_form (mode
))
13422 /* If this is a scalar floating point value and we don't have D-form
13423 addressing, prefer the traditional floating point registers so that we
13424 can use D-form (register+offset) addressing. */
13425 if (rclass
== VSX_REGS
13426 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13429 /* Prefer the Altivec registers if Altivec is handling the vector
13430 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13432 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13433 || mode
== V1TImode
)
13434 return ALTIVEC_REGS
;
13439 if (is_constant
|| GET_CODE (x
) == PLUS
)
13441 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13442 return GENERAL_REGS
;
13443 if (reg_class_subset_p (BASE_REGS
, rclass
))
13448 /* For the vector pair and vector quad modes, prefer their natural register
13449 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13450 the GPR registers. */
13451 if (rclass
== GEN_OR_FLOAT_REGS
)
13453 if (mode
== OOmode
)
13456 if (mode
== XOmode
)
13459 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13460 return GENERAL_REGS
;
13466 /* Debug version of rs6000_preferred_reload_class. */
13467 static enum reg_class
13468 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13470 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13473 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13475 reg_class_names
[ret
], reg_class_names
[rclass
],
13476 GET_MODE_NAME (GET_MODE (x
)));
13482 /* If we are copying between FP or AltiVec registers and anything else, we need
13483 a memory location. The exception is when we are targeting ppc64 and the
13484 move to/from fpr to gpr instructions are available. Also, under VSX, you
13485 can copy vector registers from the FP register set to the Altivec register
13486 set and vice versa. */
13489 rs6000_secondary_memory_needed (machine_mode mode
,
13490 reg_class_t from_class
,
13491 reg_class_t to_class
)
13493 enum rs6000_reg_type from_type
, to_type
;
13494 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13495 || (to_class
== ALTIVEC_REGS
));
13497 /* If a simple/direct move is available, we don't need secondary memory */
13498 from_type
= reg_class_to_reg_type
[(int)from_class
];
13499 to_type
= reg_class_to_reg_type
[(int)to_class
];
13501 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13502 (secondary_reload_info
*)0, altivec_p
))
13505 /* If we have a floating point or vector register class, we need to use
13506 memory to transfer the data. */
13507 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13513 /* Debug version of rs6000_secondary_memory_needed. */
13515 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13516 reg_class_t from_class
,
13517 reg_class_t to_class
)
13519 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13522 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13523 "to_class = %s, mode = %s\n",
13524 ret
? "true" : "false",
13525 reg_class_names
[from_class
],
13526 reg_class_names
[to_class
],
13527 GET_MODE_NAME (mode
));
13532 /* Return the register class of a scratch register needed to copy IN into
13533 or out of a register in RCLASS in MODE. If it can be done directly,
13534 NO_REGS is returned. */
13536 static enum reg_class
13537 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13542 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13544 && MACHOPIC_INDIRECT
13548 /* We cannot copy a symbolic operand directly into anything
13549 other than BASE_REGS for TARGET_ELF. So indicate that a
13550 register from BASE_REGS is needed as an intermediate
13553 On Darwin, pic addresses require a load from memory, which
13554 needs a base register. */
13555 if (rclass
!= BASE_REGS
13556 && (SYMBOL_REF_P (in
)
13557 || GET_CODE (in
) == HIGH
13558 || GET_CODE (in
) == LABEL_REF
13559 || GET_CODE (in
) == CONST
))
13565 regno
= REGNO (in
);
13566 if (!HARD_REGISTER_NUM_P (regno
))
13568 regno
= true_regnum (in
);
13569 if (!HARD_REGISTER_NUM_P (regno
))
13573 else if (SUBREG_P (in
))
13575 regno
= true_regnum (in
);
13576 if (!HARD_REGISTER_NUM_P (regno
))
13582 /* If we have VSX register moves, prefer moving scalar values between
13583 Altivec registers and GPR by going via an FPR (and then via memory)
13584 instead of reloading the secondary memory address for Altivec moves. */
13586 && GET_MODE_SIZE (mode
) < 16
13587 && !mode_supports_vmx_dform (mode
)
13588 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13589 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13590 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13591 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13594 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13596 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13597 || (regno
>= 0 && INT_REGNO_P (regno
)))
13600 /* Constants, memory, and VSX registers can go into VSX registers (both the
13601 traditional floating point and the altivec registers). */
13602 if (rclass
== VSX_REGS
13603 && (regno
== -1 || VSX_REGNO_P (regno
)))
13606 /* Constants, memory, and FP registers can go into FP registers. */
13607 if ((regno
== -1 || FP_REGNO_P (regno
))
13608 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13609 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13611 /* Memory, and AltiVec registers can go into AltiVec registers. */
13612 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13613 && rclass
== ALTIVEC_REGS
)
13616 /* We can copy among the CR registers. */
13617 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13618 && regno
>= 0 && CR_REGNO_P (regno
))
13621 /* Otherwise, we need GENERAL_REGS. */
13622 return GENERAL_REGS
;
13625 /* Debug version of rs6000_secondary_reload_class. */
13626 static enum reg_class
13627 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13628 machine_mode mode
, rtx in
)
13630 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13632 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13633 "mode = %s, input rtx:\n",
13634 reg_class_names
[ret
], reg_class_names
[rclass
],
13635 GET_MODE_NAME (mode
));
13641 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13644 rs6000_can_change_mode_class (machine_mode from
,
13646 reg_class_t rclass
)
13648 unsigned from_size
= GET_MODE_SIZE (from
);
13649 unsigned to_size
= GET_MODE_SIZE (to
);
13651 if (from_size
!= to_size
)
13653 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13655 if (reg_classes_intersect_p (xclass
, rclass
))
13657 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13658 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13659 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13660 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13662 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13663 single register under VSX because the scalar part of the register
13664 is in the upper 64-bits, and not the lower 64-bits. Types like
13665 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13666 IEEE floating point can't overlap, and neither can small
13669 if (to_float128_vector_p
&& from_float128_vector_p
)
13672 else if (to_float128_vector_p
|| from_float128_vector_p
)
13675 /* TDmode in floating-mode registers must always go into a register
13676 pair with the most significant word in the even-numbered register
13677 to match ISA requirements. In little-endian mode, this does not
13678 match subreg numbering, so we cannot allow subregs. */
13679 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13682 /* Allow SD<->DD changes, since SDmode values are stored in
13683 the low half of the DDmode, just like target-independent
13684 code expects. We need to allow at least SD->DD since
13685 rs6000_secondary_memory_needed_mode asks for that change
13686 to be made for SD reloads. */
13687 if ((to
== DDmode
&& from
== SDmode
)
13688 || (to
== SDmode
&& from
== DDmode
))
13691 if (from_size
< 8 || to_size
< 8)
13694 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13697 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13706 /* Since the VSX register set includes traditional floating point registers
13707 and altivec registers, just check for the size being different instead of
13708 trying to check whether the modes are vector modes. Otherwise it won't
13709 allow say DF and DI to change classes. For types like TFmode and TDmode
13710 that take 2 64-bit registers, rather than a single 128-bit register, don't
13711 allow subregs of those types to other 128 bit types. */
13712 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13714 unsigned num_regs
= (from_size
+ 15) / 16;
13715 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13716 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13719 return (from_size
== 8 || from_size
== 16);
13722 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13723 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13729 /* Debug version of rs6000_can_change_mode_class. */
13731 rs6000_debug_can_change_mode_class (machine_mode from
,
13733 reg_class_t rclass
)
13735 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13738 "rs6000_can_change_mode_class, return %s, from = %s, "
13739 "to = %s, rclass = %s\n",
13740 ret
? "true" : "false",
13741 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13742 reg_class_names
[rclass
]);
13747 /* Return a string to do a move operation of 128 bits of data. */
13750 rs6000_output_move_128bit (rtx operands
[])
13752 rtx dest
= operands
[0];
13753 rtx src
= operands
[1];
13754 machine_mode mode
= GET_MODE (dest
);
13757 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13758 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13762 dest_regno
= REGNO (dest
);
13763 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13764 dest_fp_p
= FP_REGNO_P (dest_regno
);
13765 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13766 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13771 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13776 src_regno
= REGNO (src
);
13777 src_gpr_p
= INT_REGNO_P (src_regno
);
13778 src_fp_p
= FP_REGNO_P (src_regno
);
13779 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13780 src_vsx_p
= src_fp_p
| src_vmx_p
;
13785 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13788 /* Register moves. */
13789 if (dest_regno
>= 0 && src_regno
>= 0)
13796 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13797 return (WORDS_BIG_ENDIAN
13798 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13799 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13801 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13805 else if (TARGET_VSX
&& dest_vsx_p
)
13808 return "xxlor %x0,%x1,%x1";
13810 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13811 return (WORDS_BIG_ENDIAN
13812 ? "mtvsrdd %x0,%1,%L1"
13813 : "mtvsrdd %x0,%L1,%1");
13815 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13819 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13820 return "vor %0,%1,%1";
13822 else if (dest_fp_p
&& src_fp_p
)
13827 else if (dest_regno
>= 0 && MEM_P (src
))
13831 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13837 else if (TARGET_ALTIVEC
&& dest_vmx_p
13838 && altivec_indexed_or_indirect_operand (src
, mode
))
13839 return "lvx %0,%y1";
13841 else if (TARGET_VSX
&& dest_vsx_p
)
13843 if (mode_supports_dq_form (mode
)
13844 && quad_address_p (XEXP (src
, 0), mode
, true))
13845 return "lxv %x0,%1";
13847 else if (TARGET_P9_VECTOR
)
13848 return "lxvx %x0,%y1";
13850 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13851 return "lxvw4x %x0,%y1";
13854 return "lxvd2x %x0,%y1";
13857 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13858 return "lvx %0,%y1";
13860 else if (dest_fp_p
)
13865 else if (src_regno
>= 0 && MEM_P (dest
))
13869 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13870 return "stq %1,%0";
13875 else if (TARGET_ALTIVEC
&& src_vmx_p
13876 && altivec_indexed_or_indirect_operand (dest
, mode
))
13877 return "stvx %1,%y0";
13879 else if (TARGET_VSX
&& src_vsx_p
)
13881 if (mode_supports_dq_form (mode
)
13882 && quad_address_p (XEXP (dest
, 0), mode
, true))
13883 return "stxv %x1,%0";
13885 else if (TARGET_P9_VECTOR
)
13886 return "stxvx %x1,%y0";
13888 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13889 return "stxvw4x %x1,%y0";
13892 return "stxvd2x %x1,%y0";
13895 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13896 return "stvx %1,%y0";
13903 else if (dest_regno
>= 0
13904 && (CONST_INT_P (src
)
13905 || CONST_WIDE_INT_P (src
)
13906 || CONST_DOUBLE_P (src
)
13907 || GET_CODE (src
) == CONST_VECTOR
))
13912 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13913 || (dest_vsx_p
&& TARGET_VSX
))
13914 return output_vec_const_move (operands
);
13917 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13920 /* Validate a 128-bit move. */
13922 rs6000_move_128bit_ok_p (rtx operands
[])
13924 machine_mode mode
= GET_MODE (operands
[0]);
13925 return (gpc_reg_operand (operands
[0], mode
)
13926 || gpc_reg_operand (operands
[1], mode
));
13929 /* Return true if a 128-bit move needs to be split. */
13931 rs6000_split_128bit_ok_p (rtx operands
[])
13933 if (!reload_completed
)
13936 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13939 if (quad_load_store_p (operands
[0], operands
[1]))
13946 /* Given a comparison operation, return the bit number in CCR to test. We
13947 know this is a valid comparison.
13949 SCC_P is 1 if this is for an scc. That means that %D will have been
13950 used instead of %C, so the bits will be in different places.
13952 Return -1 if OP isn't a valid comparison for some reason. */
13955 ccr_bit (rtx op
, int scc_p
)
13957 enum rtx_code code
= GET_CODE (op
);
13958 machine_mode cc_mode
;
13963 if (!COMPARISON_P (op
))
13966 reg
= XEXP (op
, 0);
13968 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13971 cc_mode
= GET_MODE (reg
);
13972 cc_regnum
= REGNO (reg
);
13973 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13975 validate_condition_mode (code
, cc_mode
);
13977 /* When generating a sCOND operation, only positive conditions are
13996 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13998 return base_bit
+ 2;
13999 case GT
: case GTU
: case UNLE
:
14000 return base_bit
+ 1;
14001 case LT
: case LTU
: case UNGE
:
14003 case ORDERED
: case UNORDERED
:
14004 return base_bit
+ 3;
14007 /* If scc, we will have done a cror to put the bit in the
14008 unordered position. So test that bit. For integer, this is ! LT
14009 unless this is an scc insn. */
14010 return scc_p
? base_bit
+ 3 : base_bit
;
14013 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
14020 /* Return the GOT register. */
14023 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
14025 /* The second flow pass currently (June 1999) can't update
14026 regs_ever_live without disturbing other parts of the compiler, so
14027 update it here to make the prolog/epilogue code happy. */
14028 if (!can_create_pseudo_p ()
14029 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
14030 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
14032 crtl
->uses_pic_offset_table
= 1;
14034 return pic_offset_table_rtx
;
14037 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14039 /* Write out a function code label. */
14042 rs6000_output_function_entry (FILE *file
, const char *fname
)
14044 if (fname
[0] != '.')
14046 switch (DEFAULT_ABI
)
14049 gcc_unreachable ();
14055 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
14065 RS6000_OUTPUT_BASENAME (file
, fname
);
14068 /* Print an operand. Recognize special options, documented below. */
14071 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14072 only introduced by the linker, when applying the sda21
14074 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14075 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14077 #define SMALL_DATA_RELOC "sda21"
14078 #define SMALL_DATA_REG 0
14082 print_operand (FILE *file
, rtx x
, int code
)
14085 unsigned HOST_WIDE_INT uval
;
14089 /* %a is output_address. */
14091 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14095 /* Write the MMA accumulator number associated with VSX register X. */
14096 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
14097 output_operand_lossage ("invalid %%A value");
14099 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
14103 /* Like 'J' but get to the GT bit only. */
14104 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14106 output_operand_lossage ("invalid %%D value");
14110 /* Bit 1 is GT bit. */
14111 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
14113 /* Add one for shift count in rlinm for scc. */
14114 fprintf (file
, "%d", i
+ 1);
14118 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14121 output_operand_lossage ("invalid %%e value");
14126 if ((uval
& 0xffff) == 0 && uval
!= 0)
14131 /* X is a CR register. Print the number of the EQ bit of the CR */
14132 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14133 output_operand_lossage ("invalid %%E value");
14135 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
14139 /* X is a CR register. Print the shift count needed to move it
14140 to the high-order four bits. */
14141 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14142 output_operand_lossage ("invalid %%f value");
14144 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
14148 /* Similar, but print the count for the rotate in the opposite
14150 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14151 output_operand_lossage ("invalid %%F value");
14153 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
14157 /* X is a constant integer. If it is negative, print "m",
14158 otherwise print "z". This is to make an aze or ame insn. */
14159 if (!CONST_INT_P (x
))
14160 output_operand_lossage ("invalid %%G value");
14161 else if (INTVAL (x
) >= 0)
14168 /* If constant, output low-order five bits. Otherwise, write
14171 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
14173 print_operand (file
, x
, 0);
14177 /* If constant, output low-order six bits. Otherwise, write
14180 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
14182 print_operand (file
, x
, 0);
14186 /* Print `i' if this is a constant, else nothing. */
14192 /* Write the bit number in CCR for jump. */
14193 i
= ccr_bit (x
, 0);
14195 output_operand_lossage ("invalid %%j code");
14197 fprintf (file
, "%d", i
);
14201 /* Similar, but add one for shift count in rlinm for scc and pass
14202 scc flag to `ccr_bit'. */
14203 i
= ccr_bit (x
, 1);
14205 output_operand_lossage ("invalid %%J code");
14207 /* If we want bit 31, write a shift count of zero, not 32. */
14208 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14212 /* X must be a constant. Write the 1's complement of the
14215 output_operand_lossage ("invalid %%k value");
14217 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
14221 /* X must be a symbolic constant on ELF. Write an
14222 expression suitable for an 'addi' that adds in the low 16
14223 bits of the MEM. */
14224 if (GET_CODE (x
) == CONST
)
14226 if (GET_CODE (XEXP (x
, 0)) != PLUS
14227 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
14228 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
14229 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
14230 output_operand_lossage ("invalid %%K value");
14232 print_operand_address (file
, x
);
14233 fputs ("@l", file
);
14236 /* %l is output_asm_label. */
14239 /* Write second word of DImode or DFmode reference. Works on register
14240 or non-indexed memory only. */
14242 fputs (reg_names
[REGNO (x
) + 1], file
);
14243 else if (MEM_P (x
))
14245 machine_mode mode
= GET_MODE (x
);
14246 /* Handle possible auto-increment. Since it is pre-increment and
14247 we have already done it, we can just use an offset of word. */
14248 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14249 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14250 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14252 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14253 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
14256 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
14260 if (small_data_operand (x
, GET_MODE (x
)))
14261 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14262 reg_names
[SMALL_DATA_REG
]);
14266 case 'N': /* Unused */
14267 /* Write the number of elements in the vector times 4. */
14268 if (GET_CODE (x
) != PARALLEL
)
14269 output_operand_lossage ("invalid %%N value");
14271 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
14274 case 'O': /* Unused */
14275 /* Similar, but subtract 1 first. */
14276 if (GET_CODE (x
) != PARALLEL
)
14277 output_operand_lossage ("invalid %%O value");
14279 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
14283 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14286 || (i
= exact_log2 (INTVAL (x
))) < 0)
14287 output_operand_lossage ("invalid %%p value");
14289 fprintf (file
, "%d", i
);
14293 /* The operand must be an indirect memory reference. The result
14294 is the register name. */
14295 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
14296 || REGNO (XEXP (x
, 0)) >= 32)
14297 output_operand_lossage ("invalid %%P value");
14299 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
14303 /* This outputs the logical code corresponding to a boolean
14304 expression. The expression may have one or both operands
14305 negated (if one, only the first one). For condition register
14306 logical operations, it will also treat the negated
14307 CR codes as NOTs, but not handle NOTs of them. */
14309 const char *const *t
= 0;
14311 enum rtx_code code
= GET_CODE (x
);
14312 static const char * const tbl
[3][3] = {
14313 { "and", "andc", "nor" },
14314 { "or", "orc", "nand" },
14315 { "xor", "eqv", "xor" } };
14319 else if (code
== IOR
)
14321 else if (code
== XOR
)
14324 output_operand_lossage ("invalid %%q value");
14326 if (GET_CODE (XEXP (x
, 0)) != NOT
)
14330 if (GET_CODE (XEXP (x
, 1)) == NOT
)
14341 if (! TARGET_MFCRF
)
14347 /* X is a CR register. Print the mask for `mtcrf'. */
14348 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14349 output_operand_lossage ("invalid %%R value");
14351 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
14355 /* Low 5 bits of 32 - value */
14357 output_operand_lossage ("invalid %%s value");
14359 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
14363 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14364 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14366 output_operand_lossage ("invalid %%t value");
14370 /* Bit 3 is OV bit. */
14371 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14373 /* If we want bit 31, write a shift count of zero, not 32. */
14374 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14378 /* Print the symbolic name of a branch target register. */
14379 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14380 x
= XVECEXP (x
, 0, 0);
14381 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14382 && REGNO (x
) != CTR_REGNO
))
14383 output_operand_lossage ("invalid %%T value");
14384 else if (REGNO (x
) == LR_REGNO
)
14385 fputs ("lr", file
);
14387 fputs ("ctr", file
);
14391 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14392 for use in unsigned operand. */
14395 output_operand_lossage ("invalid %%u value");
14400 if ((uval
& 0xffff) == 0)
14403 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14407 /* High-order 16 bits of constant for use in signed operand. */
14409 output_operand_lossage ("invalid %%v value");
14411 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14412 (INTVAL (x
) >> 16) & 0xffff);
14416 /* Print `u' if this has an auto-increment or auto-decrement. */
14418 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14419 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14420 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14425 /* Print the trap code for this operand. */
14426 switch (GET_CODE (x
))
14429 fputs ("eq", file
); /* 4 */
14432 fputs ("ne", file
); /* 24 */
14435 fputs ("lt", file
); /* 16 */
14438 fputs ("le", file
); /* 20 */
14441 fputs ("gt", file
); /* 8 */
14444 fputs ("ge", file
); /* 12 */
14447 fputs ("llt", file
); /* 2 */
14450 fputs ("lle", file
); /* 6 */
14453 fputs ("lgt", file
); /* 1 */
14456 fputs ("lge", file
); /* 5 */
14459 output_operand_lossage ("invalid %%V value");
14464 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14467 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, sext_hwi (INTVAL (x
), 16));
14469 print_operand (file
, x
, 0);
14473 /* X is a FPR or Altivec register used in a VSX context. */
14474 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14475 output_operand_lossage ("invalid %%x value");
14478 int reg
= REGNO (x
);
14479 int vsx_reg
= (FP_REGNO_P (reg
)
14481 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14483 #ifdef TARGET_REGNAMES
14484 if (TARGET_REGNAMES
)
14485 fprintf (file
, "%%vs%d", vsx_reg
);
14488 fprintf (file
, "%d", vsx_reg
);
14494 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14495 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14496 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14501 /* Like 'L', for third word of TImode/PTImode */
14503 fputs (reg_names
[REGNO (x
) + 2], file
);
14504 else if (MEM_P (x
))
14506 machine_mode mode
= GET_MODE (x
);
14507 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14508 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14509 output_address (mode
, plus_constant (Pmode
,
14510 XEXP (XEXP (x
, 0), 0), 8));
14511 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14512 output_address (mode
, plus_constant (Pmode
,
14513 XEXP (XEXP (x
, 0), 0), 8));
14515 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14516 if (small_data_operand (x
, GET_MODE (x
)))
14517 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14518 reg_names
[SMALL_DATA_REG
]);
14523 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14524 x
= XVECEXP (x
, 0, 1);
14525 /* X is a SYMBOL_REF. Write out the name preceded by a
14526 period and without any trailing data in brackets. Used for function
14527 names. If we are configured for System V (or the embedded ABI) on
14528 the PowerPC, do not emit the period, since those systems do not use
14529 TOCs and the like. */
14530 if (!SYMBOL_REF_P (x
))
14532 output_operand_lossage ("invalid %%z value");
14536 /* For macho, check to see if we need a stub. */
14539 const char *name
= XSTR (x
, 0);
14541 if (darwin_symbol_stubs
14542 && MACHOPIC_INDIRECT
14543 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14544 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14546 assemble_name (file
, name
);
14548 else if (!DOT_SYMBOLS
)
14549 assemble_name (file
, XSTR (x
, 0));
14551 rs6000_output_function_entry (file
, XSTR (x
, 0));
14555 /* Like 'L', for last word of TImode/PTImode. */
14557 fputs (reg_names
[REGNO (x
) + 3], file
);
14558 else if (MEM_P (x
))
14560 machine_mode mode
= GET_MODE (x
);
14561 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14562 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14563 output_address (mode
, plus_constant (Pmode
,
14564 XEXP (XEXP (x
, 0), 0), 12));
14565 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14566 output_address (mode
, plus_constant (Pmode
,
14567 XEXP (XEXP (x
, 0), 0), 12));
14569 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14570 if (small_data_operand (x
, GET_MODE (x
)))
14571 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14572 reg_names
[SMALL_DATA_REG
]);
14576 /* Print AltiVec memory operand. */
14581 gcc_assert (MEM_P (x
));
14585 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14586 && GET_CODE (tmp
) == AND
14587 && CONST_INT_P (XEXP (tmp
, 1))
14588 && INTVAL (XEXP (tmp
, 1)) == -16)
14589 tmp
= XEXP (tmp
, 0);
14590 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14591 && GET_CODE (tmp
) == PRE_MODIFY
)
14592 tmp
= XEXP (tmp
, 1);
14594 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14597 if (GET_CODE (tmp
) != PLUS
14598 || !REG_P (XEXP (tmp
, 0))
14599 || !REG_P (XEXP (tmp
, 1)))
14601 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14605 if (REGNO (XEXP (tmp
, 0)) == 0)
14606 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14607 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14609 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14610 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14617 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14618 else if (MEM_P (x
))
14620 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14621 know the width from the mode. */
14622 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14623 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14624 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14625 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14626 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14627 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14628 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14629 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14631 output_address (GET_MODE (x
), XEXP (x
, 0));
14633 else if (toc_relative_expr_p (x
, false,
14634 &tocrel_base_oac
, &tocrel_offset_oac
))
14635 /* This hack along with a corresponding hack in
14636 rs6000_output_addr_const_extra arranges to output addends
14637 where the assembler expects to find them. eg.
14638 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14639 without this hack would be output as "x@toc+4". We
14641 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14642 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14643 output_addr_const (file
, XVECEXP (x
, 0, 0));
14644 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14645 output_addr_const (file
, XVECEXP (x
, 0, 1));
14647 output_addr_const (file
, x
);
14651 if (const char *name
= get_some_local_dynamic_name ())
14652 assemble_name (file
, name
);
14654 output_operand_lossage ("'%%&' used without any "
14655 "local dynamic TLS references");
14659 output_operand_lossage ("invalid %%xn code");
14663 /* Print the address of an operand. */
14666 print_operand_address (FILE *file
, rtx x
)
14669 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14671 /* Is it a PC-relative address? */
14672 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14674 HOST_WIDE_INT offset
;
14676 if (GET_CODE (x
) == CONST
)
14679 if (GET_CODE (x
) == PLUS
)
14681 offset
= INTVAL (XEXP (x
, 1));
14687 output_addr_const (file
, x
);
14690 fprintf (file
, "%+" PRId64
, offset
);
14692 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14693 fprintf (file
, "@got");
14695 fprintf (file
, "@pcrel");
14697 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14698 || GET_CODE (x
) == LABEL_REF
)
14700 output_addr_const (file
, x
);
14701 if (small_data_operand (x
, GET_MODE (x
)))
14702 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14703 reg_names
[SMALL_DATA_REG
]);
14705 gcc_assert (!TARGET_TOC
);
14707 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14708 && REG_P (XEXP (x
, 1)))
14710 if (REGNO (XEXP (x
, 0)) == 0)
14711 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14712 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14714 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14715 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14717 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14718 && CONST_INT_P (XEXP (x
, 1)))
14719 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14720 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14722 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14723 && CONSTANT_P (XEXP (x
, 1)))
14725 fprintf (file
, "lo16(");
14726 output_addr_const (file
, XEXP (x
, 1));
14727 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14731 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14732 && CONSTANT_P (XEXP (x
, 1)))
14734 output_addr_const (file
, XEXP (x
, 1));
14735 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14738 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14740 /* This hack along with a corresponding hack in
14741 rs6000_output_addr_const_extra arranges to output addends
14742 where the assembler expects to find them. eg.
14744 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14745 without this hack would be output as "x@toc+8@l(9)". We
14746 want "x+8@toc@l(9)". */
14747 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14748 if (GET_CODE (x
) == LO_SUM
)
14749 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14751 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14754 output_addr_const (file
, x
);
14757 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14760 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14762 if (GET_CODE (x
) == UNSPEC
)
14763 switch (XINT (x
, 1))
14765 case UNSPEC_TOCREL
:
14766 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14767 && REG_P (XVECEXP (x
, 0, 1))
14768 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14769 output_addr_const (file
, XVECEXP (x
, 0, 0));
14770 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14772 if (INTVAL (tocrel_offset_oac
) >= 0)
14773 fprintf (file
, "+");
14774 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14776 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14779 assemble_name (file
, toc_label_name
);
14782 else if (TARGET_ELF
)
14783 fputs ("@toc", file
);
14787 case UNSPEC_MACHOPIC_OFFSET
:
14788 output_addr_const (file
, XVECEXP (x
, 0, 0));
14790 machopic_output_function_base_name (file
);
14797 /* Target hook for assembling integer objects. The PowerPC version has
14798 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14799 is defined. It also needs to handle DI-mode objects on 64-bit
14803 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14805 #ifdef RELOCATABLE_NEEDS_FIXUP
14806 /* Special handling for SI values. */
14807 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14809 static int recurse
= 0;
14811 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14812 the .fixup section. Since the TOC section is already relocated, we
14813 don't need to mark it here. We used to skip the text section, but it
14814 should never be valid for relocated addresses to be placed in the text
14816 if (DEFAULT_ABI
== ABI_V4
14817 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14818 && in_section
!= toc_section
14820 && !CONST_SCALAR_INT_P (x
)
14826 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14828 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14829 fprintf (asm_out_file
, "\t.long\t(");
14830 output_addr_const (asm_out_file
, x
);
14831 fprintf (asm_out_file
, ")@fixup\n");
14832 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14833 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14834 fprintf (asm_out_file
, "\t.long\t");
14835 assemble_name (asm_out_file
, buf
);
14836 fprintf (asm_out_file
, "\n\t.previous\n");
14840 /* Remove initial .'s to turn a -mcall-aixdesc function
14841 address into the address of the descriptor, not the function
14843 else if (SYMBOL_REF_P (x
)
14844 && XSTR (x
, 0)[0] == '.'
14845 && DEFAULT_ABI
== ABI_AIX
)
14847 const char *name
= XSTR (x
, 0);
14848 while (*name
== '.')
14851 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14855 #endif /* RELOCATABLE_NEEDS_FIXUP */
14856 return default_assemble_integer (x
, size
, aligned_p
);
14859 /* Return a template string for assembly to emit when making an
14860 external call. FUNOP is the call mem argument operand number. */
14862 static const char *
14863 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14865 /* -Wformat-overflow workaround, without which gcc thinks that %u
14866 might produce 10 digits. */
14867 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14871 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14873 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14874 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14875 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14876 sprintf (arg
, "(%%&@tlsld)");
14879 /* The magic 32768 offset here corresponds to the offset of
14880 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14882 sprintf (z
, "%%z%u%s", funop
,
14883 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14886 static char str
[32]; /* 1 spare */
14887 if (rs6000_pcrel_p ())
14888 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14889 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14890 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14891 sibcall
? "" : "\n\tnop");
14892 else if (DEFAULT_ABI
== ABI_V4
)
14893 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14894 flag_pic
? "@plt" : "");
14896 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14897 else if (DEFAULT_ABI
== ABI_DARWIN
)
14899 /* The cookie is in operand func+2. */
14900 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14901 int cookie
= INTVAL (operands
[funop
+ 2]);
14902 if (cookie
& CALL_LONG
)
14904 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14905 tree labelname
= get_prev_label (funname
);
14906 gcc_checking_assert (labelname
&& !sibcall
);
14908 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14909 instruction will reach 'foo', otherwise link as 'bl L42'".
14910 "L42" should be a 'branch island', that will do a far jump to
14911 'foo'. Branch islands are generated in
14912 macho_branch_islands(). */
14913 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14914 IDENTIFIER_POINTER (labelname
));
14917 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14919 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14923 gcc_unreachable ();
14928 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14930 return rs6000_call_template_1 (operands
, funop
, false);
14934 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14936 return rs6000_call_template_1 (operands
, funop
, true);
14939 /* As above, for indirect calls. */
14941 static const char *
14942 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14945 /* -Wformat-overflow workaround, without which gcc thinks that %u
14946 might produce 10 digits. Note that -Wformat-overflow will not
14947 currently warn here for str[], so do not rely on a warning to
14948 ensure str[] is correctly sized. */
14949 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14951 /* Currently, funop is either 0 or 1. The maximum string is always
14952 a !speculate 64-bit __tls_get_addr call.
14955 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14956 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14958 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14959 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14966 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14967 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14969 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14970 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14977 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14978 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14980 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14981 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14988 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14989 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14991 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14992 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14996 static char str
[160]; /* 8 spare */
14998 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
15000 if (DEFAULT_ABI
== ABI_AIX
)
15003 ptrload
, funop
+ 3);
15005 /* We don't need the extra code to stop indirect call speculation if
15007 bool speculate
= (TARGET_MACHO
15008 || rs6000_speculate_indirect_jumps
15009 || (REG_P (operands
[funop
])
15010 && REGNO (operands
[funop
]) == LR_REGNO
));
15012 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
15014 const char *rel64
= TARGET_64BIT
? "64" : "";
15017 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
15019 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
15020 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
15022 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
15023 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15027 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
15028 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15029 && flag_pic
== 2 ? "+32768" : "");
15033 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15034 tls
, rel64
, notoc
, funop
, addend
);
15035 s
+= sprintf (s
, "crset 2\n\t");
15038 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15039 tls
, rel64
, notoc
, funop
, addend
);
15041 else if (!speculate
)
15042 s
+= sprintf (s
, "crset 2\n\t");
15044 if (rs6000_pcrel_p ())
15047 sprintf (s
, "b%%T%ul", funop
);
15049 sprintf (s
, "beq%%T%ul-", funop
);
15051 else if (DEFAULT_ABI
== ABI_AIX
)
15057 funop
, ptrload
, funop
+ 4);
15062 funop
, ptrload
, funop
+ 4);
15064 else if (DEFAULT_ABI
== ABI_ELFv2
)
15070 funop
, ptrload
, funop
+ 3);
15075 funop
, ptrload
, funop
+ 3);
15082 funop
, sibcall
? "" : "l");
15086 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
15092 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
15094 return rs6000_indirect_call_template_1 (operands
, funop
, false);
15098 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
15100 return rs6000_indirect_call_template_1 (operands
, funop
, true);
15104 /* Output indirect call insns. WHICH identifies the type of sequence. */
15106 rs6000_pltseq_template (rtx
*operands
, int which
)
15108 const char *rel64
= TARGET_64BIT
? "64" : "";
15111 if (GET_CODE (operands
[3]) == UNSPEC
)
15113 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
15114 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
15115 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15117 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
15118 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15122 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
15123 static char str
[96]; /* 10 spare */
15124 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
15125 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
15126 && flag_pic
== 2 ? "+32768" : "");
15129 case RS6000_PLTSEQ_TOCSAVE
:
15132 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15133 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
15136 case RS6000_PLTSEQ_PLT16_HA
:
15137 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
15140 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15144 "addis %%0,%%1,0\n\t"
15145 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15146 tls
, off
, rel64
, addend
);
15148 case RS6000_PLTSEQ_PLT16_LO
:
15150 "l%s %%0,0(%%1)\n\t"
15151 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15152 TARGET_64BIT
? "d" : "wz",
15153 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
15155 case RS6000_PLTSEQ_MTCTR
:
15158 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15159 tls
, rel64
, addend
);
15161 case RS6000_PLTSEQ_PLT_PCREL34
:
15163 "pl%s %%0,0(0),1\n\t"
15164 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15165 TARGET_64BIT
? "d" : "wz",
15169 gcc_unreachable ();
15175 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15176 /* Emit an assembler directive to set symbol visibility for DECL to
15177 VISIBILITY_TYPE. */
15180 rs6000_assemble_visibility (tree decl
, int vis
)
15185 /* Functions need to have their entry point symbol visibility set as
15186 well as their descriptor symbol visibility. */
15187 if (DEFAULT_ABI
== ABI_AIX
15189 && TREE_CODE (decl
) == FUNCTION_DECL
)
15191 static const char * const visibility_types
[] = {
15192 NULL
, "protected", "hidden", "internal"
15195 const char *name
, *type
;
15197 name
= ((* targetm
.strip_name_encoding
)
15198 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
15199 type
= visibility_types
[vis
];
15201 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
15202 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
15205 default_assemble_visibility (decl
, vis
);
15209 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15210 entry. If RECORD_P is true and the target supports named sections,
15211 the location of the NOPs will be recorded in a special object section
15212 called "__patchable_function_entries". This routine may be called
15213 twice per function to put NOPs before and after the function
15217 rs6000_print_patchable_function_entry (FILE *file
,
15218 unsigned HOST_WIDE_INT patch_area_size
,
15221 bool global_entry_needed_p
= rs6000_global_entry_point_prologue_needed_p ();
15222 /* For a function which needs global entry point, we will emit the
15223 patchable area before and after local entry point under the control of
15224 cfun->machine->global_entry_emitted, see the handling in function
15225 rs6000_output_function_prologue. */
15226 if (!global_entry_needed_p
|| cfun
->machine
->global_entry_emitted
)
15227 default_print_patchable_function_entry (file
, patch_area_size
, record_p
);
15231 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
15233 /* Reversal of FP compares takes care -- an ordered compare
15234 becomes an unordered compare and vice versa. */
15235 if (mode
== CCFPmode
15236 && (!flag_finite_math_only
15237 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
15238 || code
== UNEQ
|| code
== LTGT
))
15239 return reverse_condition_maybe_unordered (code
);
15241 return reverse_condition (code
);
15244 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15245 nonzero bits at the LOWBITS low bits only.
15247 Return true if C can be rotated to such constant. If so, *ROT is written
15248 to the number by which C is rotated.
15249 Return false otherwise. */
15252 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c
, int lowbits
, int *rot
)
15254 int clz
= HOST_BITS_PER_WIDE_INT
- lowbits
;
15256 /* case a. 0..0xxx: already at least clz zeros. */
15257 int lz
= clz_hwi (c
);
15264 /* case b. 0..0xxx0..0: at least clz zeros. */
15265 int tz
= ctz_hwi (c
);
15266 if (lz
+ tz
>= clz
)
15268 *rot
= HOST_BITS_PER_WIDE_INT
- tz
;
15272 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15273 ^bit -> Vbit, , then zeros are at head or tail.
15274 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15275 const int rot_bits
= lowbits
+ 1;
15276 unsigned HOST_WIDE_INT rc
= (c
>> rot_bits
) | (c
<< (clz
- 1));
15278 if (clz_hwi (rc
) + tz
>= clz
)
15280 *rot
= HOST_BITS_PER_WIDE_INT
- (tz
+ rot_bits
);
15287 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15288 which contains 48bits leading zeros and 16bits of any value. */
15291 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c
)
15294 bool res
= can_be_rotated_to_lowbits (c
, 16, &rot
);
15295 return res
&& rot
> 0;
15298 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15299 which contains 49bits leading ones and 15bits of any value. */
15302 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c
)
15305 bool res
= can_be_rotated_to_lowbits (~c
, 15, &rot
);
15306 return res
&& rot
> 0;
15309 /* Generate a compare for CODE. Return a brand-new rtx that
15310 represents the result of the compare. */
15313 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
15315 machine_mode comp_mode
;
15316 rtx compare_result
;
15317 enum rtx_code code
= GET_CODE (cmp
);
15318 rtx op0
= XEXP (cmp
, 0);
15319 rtx op1
= XEXP (cmp
, 1);
15321 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15322 comp_mode
= CCmode
;
15323 else if (FLOAT_MODE_P (mode
))
15324 comp_mode
= CCFPmode
;
15325 else if (code
== GTU
|| code
== LTU
15326 || code
== GEU
|| code
== LEU
)
15327 comp_mode
= CCUNSmode
;
15328 else if ((code
== EQ
|| code
== NE
)
15329 && unsigned_reg_p (op0
)
15330 && (unsigned_reg_p (op1
)
15331 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
15332 /* These are unsigned values, perhaps there will be a later
15333 ordering compare that can be shared with this one. */
15334 comp_mode
= CCUNSmode
;
15336 comp_mode
= CCmode
;
15338 /* If we have an unsigned compare, make sure we don't have a signed value as
15340 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
15341 && INTVAL (op1
) < 0)
15343 op0
= copy_rtx_if_shared (op0
);
15344 op1
= force_reg (GET_MODE (op0
), op1
);
15345 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
15348 /* First, the compare. */
15349 compare_result
= gen_reg_rtx (comp_mode
);
15351 /* IEEE 128-bit support in VSX registers when we do not have hardware
15353 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15355 rtx libfunc
= NULL_RTX
;
15356 bool check_nan
= false;
15363 libfunc
= optab_libfunc (eq_optab
, mode
);
15368 libfunc
= optab_libfunc (ge_optab
, mode
);
15373 libfunc
= optab_libfunc (le_optab
, mode
);
15378 libfunc
= optab_libfunc (unord_optab
, mode
);
15379 code
= (code
== UNORDERED
) ? NE
: EQ
;
15385 libfunc
= optab_libfunc (ge_optab
, mode
);
15386 code
= (code
== UNGE
) ? GE
: GT
;
15392 libfunc
= optab_libfunc (le_optab
, mode
);
15393 code
= (code
== UNLE
) ? LE
: LT
;
15399 libfunc
= optab_libfunc (eq_optab
, mode
);
15400 code
= (code
= UNEQ
) ? EQ
: NE
;
15404 gcc_unreachable ();
15407 gcc_assert (libfunc
);
15410 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15411 SImode
, op0
, mode
, op1
, mode
);
15413 /* The library signals an exception for signalling NaNs, so we need to
15414 handle isgreater, etc. by first checking isordered. */
15417 rtx ne_rtx
, normal_dest
, unord_dest
;
15418 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
15419 rtx join_label
= gen_label_rtx ();
15420 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
15421 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
15424 /* Test for either value being a NaN. */
15425 gcc_assert (unord_func
);
15426 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
15427 SImode
, op0
, mode
, op1
, mode
);
15429 /* Set value (0) if either value is a NaN, and jump to the join
15431 dest
= gen_reg_rtx (SImode
);
15432 emit_move_insn (dest
, const1_rtx
);
15433 emit_insn (gen_rtx_SET (unord_cmp
,
15434 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15437 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15438 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15439 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15443 /* Do the normal comparison, knowing that the values are not
15445 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15446 SImode
, op0
, mode
, op1
, mode
);
15448 emit_insn (gen_cstoresi4 (dest
,
15449 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15451 normal_dest
, const0_rtx
));
15453 /* Join NaN and non-Nan paths. Compare dest against 0. */
15454 emit_label (join_label
);
15458 emit_insn (gen_rtx_SET (compare_result
,
15459 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15464 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15465 CLOBBERs to match cmptf_internal2 pattern. */
15466 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15467 && FLOAT128_IBM_P (GET_MODE (op0
))
15468 && TARGET_HARD_FLOAT
)
15469 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15471 gen_rtx_SET (compare_result
,
15472 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15473 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15474 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15475 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15476 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15477 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15478 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15479 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15480 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15481 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15482 else if (GET_CODE (op1
) == UNSPEC
15483 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15485 rtx op1b
= XVECEXP (op1
, 0, 0);
15486 comp_mode
= CCEQmode
;
15487 compare_result
= gen_reg_rtx (CCEQmode
);
15489 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15491 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15493 else if (mode
== V16QImode
)
15495 gcc_assert (code
== EQ
|| code
== NE
);
15497 rtx result_vector
= gen_reg_rtx (V16QImode
);
15498 rtx cc_bit
= gen_reg_rtx (SImode
);
15499 emit_insn (gen_altivec_vcmpequb_p (result_vector
, op0
, op1
));
15500 emit_insn (gen_cr6_test_for_lt (cc_bit
));
15501 emit_insn (gen_rtx_SET (compare_result
,
15502 gen_rtx_COMPARE (comp_mode
, cc_bit
,
15506 emit_insn (gen_rtx_SET (compare_result
,
15507 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15510 validate_condition_mode (code
, GET_MODE (compare_result
));
15512 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15516 /* Return the diagnostic message string if the binary operation OP is
15517 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15520 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15524 machine_mode mode1
= TYPE_MODE (type1
);
15525 machine_mode mode2
= TYPE_MODE (type2
);
15527 /* For complex modes, use the inner type. */
15528 if (COMPLEX_MODE_P (mode1
))
15529 mode1
= GET_MODE_INNER (mode1
);
15531 if (COMPLEX_MODE_P (mode2
))
15532 mode2
= GET_MODE_INNER (mode2
);
15534 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15535 double to intermix unless -mfloat128-convert. */
15536 if (mode1
== mode2
)
15539 if (!TARGET_FLOAT128_CVT
)
15541 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15542 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15543 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15551 /* Expand floating point conversion to/from __float128 and __ibm128. */
15554 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15556 machine_mode dest_mode
= GET_MODE (dest
);
15557 machine_mode src_mode
= GET_MODE (src
);
15558 convert_optab cvt
= unknown_optab
;
15559 bool do_move
= false;
15560 rtx libfunc
= NULL_RTX
;
15562 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15563 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15567 rtx_2func_t from_df
;
15568 rtx_2func_t from_sf
;
15569 rtx_2func_t from_si_sign
;
15570 rtx_2func_t from_si_uns
;
15571 rtx_2func_t from_di_sign
;
15572 rtx_2func_t from_di_uns
;
15575 rtx_2func_t to_si_sign
;
15576 rtx_2func_t to_si_uns
;
15577 rtx_2func_t to_di_sign
;
15578 rtx_2func_t to_di_uns
;
15579 } hw_conversions
[2] = {
15580 /* convertions to/from KFmode */
15582 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15583 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15584 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15585 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15586 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15587 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15588 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15589 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15590 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15591 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15592 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15593 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15596 /* convertions to/from TFmode */
15598 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15599 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15600 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15601 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15602 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15603 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15604 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15605 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15606 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15607 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15608 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15609 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15613 if (dest_mode
== src_mode
)
15614 gcc_unreachable ();
15616 /* Eliminate memory operations. */
15618 src
= force_reg (src_mode
, src
);
15622 rtx tmp
= gen_reg_rtx (dest_mode
);
15623 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15624 rs6000_emit_move (dest
, tmp
, dest_mode
);
15628 /* Convert to IEEE 128-bit floating point. */
15629 if (FLOAT128_IEEE_P (dest_mode
))
15631 if (dest_mode
== KFmode
)
15633 else if (dest_mode
== TFmode
)
15636 gcc_unreachable ();
15642 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15647 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15653 if (FLOAT128_IBM_P (src_mode
))
15662 cvt
= ufloat_optab
;
15663 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15667 cvt
= sfloat_optab
;
15668 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15675 cvt
= ufloat_optab
;
15676 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15680 cvt
= sfloat_optab
;
15681 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15686 gcc_unreachable ();
15690 /* Convert from IEEE 128-bit floating point. */
15691 else if (FLOAT128_IEEE_P (src_mode
))
15693 if (src_mode
== KFmode
)
15695 else if (src_mode
== TFmode
)
15698 gcc_unreachable ();
15704 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15709 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15715 if (FLOAT128_IBM_P (dest_mode
))
15725 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15730 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15738 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15743 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15748 gcc_unreachable ();
15752 /* Both IBM format. */
15753 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15757 gcc_unreachable ();
15759 /* Handle conversion between TFmode/KFmode/IFmode. */
15761 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15763 /* Handle conversion if we have hardware support. */
15764 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15765 emit_insn ((hw_convert
) (dest
, src
));
15767 /* Call an external function to do the conversion. */
15768 else if (cvt
!= unknown_optab
)
15770 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15771 gcc_assert (libfunc
!= NULL_RTX
);
15773 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15776 gcc_assert (dest2
!= NULL_RTX
);
15777 if (!rtx_equal_p (dest
, dest2
))
15778 emit_move_insn (dest
, dest2
);
15782 gcc_unreachable ();
15788 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15789 can be used as that dest register. Return the dest register. */
15792 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15794 if (op2
== const0_rtx
)
15797 if (GET_CODE (scratch
) == SCRATCH
)
15798 scratch
= gen_reg_rtx (mode
);
15800 if (logical_operand (op2
, mode
))
15801 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15803 emit_insn (gen_rtx_SET (scratch
,
15804 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15809 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15810 requires this. The result is mode MODE. */
15812 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15816 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15817 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15818 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15819 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15820 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15821 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15822 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15823 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15825 gcc_assert (n
== 2);
15827 rtx cc
= gen_reg_rtx (CCEQmode
);
15828 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15829 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15835 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15837 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15838 rtx_code cond_code
= GET_CODE (condition_rtx
);
15840 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15841 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15843 else if (cond_code
== NE
15844 || cond_code
== GE
|| cond_code
== LE
15845 || cond_code
== GEU
|| cond_code
== LEU
15846 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15848 rtx not_result
= gen_reg_rtx (CCEQmode
);
15849 rtx not_op
, rev_cond_rtx
;
15850 machine_mode cc_mode
;
15852 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15854 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15855 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15856 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15857 emit_insn (gen_rtx_SET (not_result
, not_op
));
15858 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15861 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15862 if (op_mode
== VOIDmode
)
15863 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15865 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15867 PUT_MODE (condition_rtx
, DImode
);
15868 convert_move (operands
[0], condition_rtx
, 0);
15872 PUT_MODE (condition_rtx
, SImode
);
15873 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15877 /* Emit a branch of kind CODE to location LOC. */
15880 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15882 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15883 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15884 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15885 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15888 /* Return the string to output a conditional branch to LABEL, which is
15889 the operand template of the label, or NULL if the branch is really a
15890 conditional return.
15892 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15893 condition code register and its mode specifies what kind of
15894 comparison we made.
15896 REVERSED is nonzero if we should reverse the sense of the comparison.
15898 INSN is the insn. */
15901 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15903 static char string
[64];
15904 enum rtx_code code
= GET_CODE (op
);
15905 rtx cc_reg
= XEXP (op
, 0);
15906 machine_mode mode
= GET_MODE (cc_reg
);
15907 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15908 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15909 int really_reversed
= reversed
^ need_longbranch
;
15915 validate_condition_mode (code
, mode
);
15917 /* Work out which way this really branches. We could use
15918 reverse_condition_maybe_unordered here always but this
15919 makes the resulting assembler clearer. */
15920 if (really_reversed
)
15922 /* Reversal of FP compares takes care -- an ordered compare
15923 becomes an unordered compare and vice versa. */
15924 if (mode
== CCFPmode
)
15925 code
= reverse_condition_maybe_unordered (code
);
15927 code
= reverse_condition (code
);
15932 /* Not all of these are actually distinct opcodes, but
15933 we distinguish them for clarity of the resulting assembler. */
15934 case NE
: case LTGT
:
15935 ccode
= "ne"; break;
15936 case EQ
: case UNEQ
:
15937 ccode
= "eq"; break;
15939 ccode
= "ge"; break;
15940 case GT
: case GTU
: case UNGT
:
15941 ccode
= "gt"; break;
15943 ccode
= "le"; break;
15944 case LT
: case LTU
: case UNLT
:
15945 ccode
= "lt"; break;
15946 case UNORDERED
: ccode
= "un"; break;
15947 case ORDERED
: ccode
= "nu"; break;
15948 case UNGE
: ccode
= "nl"; break;
15949 case UNLE
: ccode
= "ng"; break;
15951 gcc_unreachable ();
15954 /* Maybe we have a guess as to how likely the branch is. */
15956 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15957 if (note
!= NULL_RTX
)
15959 /* PROB is the difference from 50%. */
15960 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15961 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15963 /* Only hint for highly probable/improbable branches on newer cpus when
15964 we have real profile data, as static prediction overrides processor
15965 dynamic prediction. For older cpus we may as well always hint, but
15966 assume not taken for branches that are very close to 50% as a
15967 mispredicted taken branch is more expensive than a
15968 mispredicted not-taken branch. */
15969 if (rs6000_always_hint
15970 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15971 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15972 && br_prob_note_reliable_p (note
)))
15974 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15975 && ((prob
> 0) ^ need_longbranch
))
15983 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15985 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15987 /* We need to escape any '%' characters in the reg_names string.
15988 Assume they'd only be the first character.... */
15989 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15991 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15995 /* If the branch distance was too far, we may have to use an
15996 unconditional branch to go the distance. */
15997 if (need_longbranch
)
15998 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
16000 s
+= sprintf (s
, ",%s", label
);
16006 /* Return insn for VSX or Altivec comparisons. */
16009 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
16012 machine_mode mode
= GET_MODE (op0
);
16020 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16031 mask
= gen_reg_rtx (mode
);
16032 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16039 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16040 DMODE is expected destination mode. This is a recursive function. */
16043 rs6000_emit_vector_compare (enum rtx_code rcode
,
16045 machine_mode dmode
)
16048 bool swap_operands
= false;
16049 bool try_again
= false;
16051 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
16052 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
16054 /* See if the comparison works as is. */
16055 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16063 swap_operands
= true;
16068 swap_operands
= true;
16076 /* Invert condition and try again.
16077 e.g., A != B becomes ~(A==B). */
16079 enum rtx_code rev_code
;
16080 enum insn_code nor_code
;
16083 rev_code
= reverse_condition_maybe_unordered (rcode
);
16084 if (rev_code
== UNKNOWN
)
16087 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
16088 if (nor_code
== CODE_FOR_nothing
)
16091 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
16095 mask
= gen_reg_rtx (dmode
);
16096 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
16104 /* Try GT/GTU/LT/LTU OR EQ */
16107 enum insn_code ior_code
;
16108 enum rtx_code new_code
;
16129 gcc_unreachable ();
16132 ior_code
= optab_handler (ior_optab
, dmode
);
16133 if (ior_code
== CODE_FOR_nothing
)
16136 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
16140 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
16144 mask
= gen_reg_rtx (dmode
);
16145 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
16156 std::swap (op0
, op1
);
16158 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
16163 /* You only get two chances. */
16167 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16168 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16169 operands for the relation operation COND. */
16172 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
16173 rtx cond
, rtx cc_op0
, rtx cc_op1
)
16175 machine_mode dest_mode
= GET_MODE (dest
);
16176 machine_mode mask_mode
= GET_MODE (cc_op0
);
16177 enum rtx_code rcode
= GET_CODE (cond
);
16179 bool invert_move
= false;
16181 if (VECTOR_UNIT_NONE_P (dest_mode
))
16184 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
16185 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
16189 /* Swap operands if we can, and fall back to doing the operation as
16190 specified, and doing a NOR to invert the test. */
16196 /* Invert condition and try again.
16197 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16198 invert_move
= true;
16199 rcode
= reverse_condition_maybe_unordered (rcode
);
16200 if (rcode
== UNKNOWN
)
16206 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
16208 /* Invert condition to avoid compound test. */
16209 invert_move
= true;
16210 rcode
= reverse_condition (rcode
);
16219 /* Invert condition to avoid compound test if necessary. */
16220 if (rcode
== GEU
|| rcode
== LEU
)
16222 invert_move
= true;
16223 rcode
= reverse_condition (rcode
);
16231 /* Get the vector mask for the given relational operations. */
16232 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
16237 if (mask_mode
!= dest_mode
)
16238 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
16241 std::swap (op_true
, op_false
);
16243 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16244 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
16245 && (GET_CODE (op_true
) == CONST_VECTOR
16246 || GET_CODE (op_false
) == CONST_VECTOR
))
16248 rtx constant_0
= CONST0_RTX (dest_mode
);
16249 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
16251 if (op_true
== constant_m1
&& op_false
== constant_0
)
16253 emit_move_insn (dest
, mask
);
16257 else if (op_true
== constant_0
&& op_false
== constant_m1
)
16259 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
16263 /* If we can't use the vector comparison directly, perhaps we can use
16264 the mask for the true or false fields, instead of loading up a
16266 if (op_true
== constant_m1
)
16269 if (op_false
== constant_0
)
16273 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
16274 op_true
= force_reg (dest_mode
, op_true
);
16276 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
16277 op_false
= force_reg (dest_mode
, op_false
);
16279 rtx tmp
= gen_rtx_IOR (dest_mode
,
16280 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
16282 gen_rtx_AND (dest_mode
, mask
, op_true
));
16283 emit_insn (gen_rtx_SET (dest
, tmp
));
16287 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16288 maximum or minimum with "C" semantics.
16290 Unless you use -ffast-math, you can't use these instructions to replace
16291 conditions that implicitly reverse the condition because the comparison
16292 might generate a NaN or signed zer0.
16294 I.e. the following can be replaced all of the time
16295 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16296 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16297 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16298 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16300 The following can be replaced only if -ffast-math is used:
16301 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16302 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16303 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16304 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16306 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16307 nonzero/true, FALSE_COND if it is zero/false.
16309 Return false if we can't generate the appropriate minimum or maximum, and
16310 true if we can did the minimum or maximum. */
16313 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16315 enum rtx_code code
= GET_CODE (op
);
16316 rtx op0
= XEXP (op
, 0);
16317 rtx op1
= XEXP (op
, 1);
16318 machine_mode compare_mode
= GET_MODE (op0
);
16319 machine_mode result_mode
= GET_MODE (dest
);
16321 if (result_mode
!= compare_mode
)
16324 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16325 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16326 we need to do the reversions first to make the following checks
16327 support fewer cases, like:
16329 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16330 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16331 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16332 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16334 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16335 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16336 have to check for fast-math or the like. */
16337 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
16339 code
= reverse_condition_maybe_unordered (code
);
16340 std::swap (true_cond
, false_cond
);
16344 if (code
== GE
|| code
== GT
)
16346 else if (code
== LE
|| code
== LT
)
16351 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
16354 /* Only when NaNs and signed-zeros are not in effect, smax could be
16355 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16356 `op0 > op1 ? op1 : op0`. */
16357 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
16358 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
16364 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
16368 /* Possibly emit a floating point conditional move by generating a compare that
16369 sets a mask instruction and a XXSEL select instruction.
16371 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16372 nonzero/true, FALSE_COND if it is zero/false.
16374 Return false if the operation cannot be generated, and true if we could
16375 generate the instruction. */
16378 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16380 enum rtx_code code
= GET_CODE (op
);
16381 rtx op0
= XEXP (op
, 0);
16382 rtx op1
= XEXP (op
, 1);
16383 machine_mode compare_mode
= GET_MODE (op0
);
16384 machine_mode result_mode
= GET_MODE (dest
);
16389 if (!can_create_pseudo_p ())
16392 /* We allow the comparison to be either SFmode/DFmode and the true/false
16393 condition to be either SFmode/DFmode. I.e. we allow:
16398 r = (a == b) ? c : d;
16405 r = (a == b) ? c : d;
16407 but we don't allow intermixing the IEEE 128-bit floating point types with
16408 the 32/64-bit scalar types. */
16410 if (!(compare_mode
== result_mode
16411 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
16412 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
16425 code
= swap_condition (code
);
16426 std::swap (op0
, op1
);
16433 /* Generate: [(parallel [(set (dest)
16434 (if_then_else (op (cmp1) (cmp2))
16437 (clobber (scratch))])]. */
16439 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
16440 cmove_rtx
= gen_rtx_SET (dest
,
16441 gen_rtx_IF_THEN_ELSE (result_mode
,
16446 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16447 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16448 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16453 /* Helper function to return true if the target has instructions to do a
16454 compare and set mask instruction that can be used with XXSEL to implement a
16455 conditional move. It is also assumed that such a target also supports the
16456 "C" minimum and maximum instructions. */
16459 have_compare_and_set_mask (machine_mode mode
)
16465 return TARGET_P9_MINMAX
;
16469 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16478 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16479 operands of the last comparison is nonzero/true, FALSE_COND if it
16480 is zero/false. Return 0 if the hardware has no such operation. */
16483 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16485 enum rtx_code code
= GET_CODE (op
);
16486 rtx op0
= XEXP (op
, 0);
16487 rtx op1
= XEXP (op
, 1);
16488 machine_mode compare_mode
= GET_MODE (op0
);
16489 machine_mode result_mode
= GET_MODE (dest
);
16491 bool is_against_zero
;
16493 /* These modes should always match. */
16494 if (GET_MODE (op1
) != compare_mode
16495 /* In the isel case however, we can use a compare immediate, so
16496 op1 may be a small constant. */
16497 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16499 if (GET_MODE (true_cond
) != result_mode
)
16501 if (GET_MODE (false_cond
) != result_mode
)
16504 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16506 if (have_compare_and_set_mask (compare_mode
)
16507 && have_compare_and_set_mask (result_mode
))
16509 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16512 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16516 /* Don't allow using floating point comparisons for integer results for
16518 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16521 /* First, work out if the hardware can do this at all, or
16522 if it's too slow.... */
16523 if (!FLOAT_MODE_P (compare_mode
))
16526 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16530 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16532 /* A floating-point subtract might overflow, underflow, or produce
16533 an inexact result, thus changing the floating-point flags, so it
16534 can't be generated if we care about that. It's safe if one side
16535 of the construct is zero, since then no subtract will be
16537 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16538 && flag_trapping_math
&& ! is_against_zero
)
16541 /* Eliminate half of the comparisons by switching operands, this
16542 makes the remaining code simpler. */
16543 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16544 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16546 code
= reverse_condition_maybe_unordered (code
);
16548 true_cond
= false_cond
;
16552 /* UNEQ and LTGT take four instructions for a comparison with zero,
16553 it'll probably be faster to use a branch here too. */
16554 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16557 /* We're going to try to implement comparisons by performing
16558 a subtract, then comparing against zero. Unfortunately,
16559 Inf - Inf is NaN which is not zero, and so if we don't
16560 know that the operand is finite and the comparison
16561 would treat EQ different to UNORDERED, we can't do it. */
16562 if (HONOR_INFINITIES (compare_mode
)
16563 && code
!= GT
&& code
!= UNGE
16564 && (!CONST_DOUBLE_P (op1
)
16565 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16566 /* Constructs of the form (a OP b ? a : b) are safe. */
16567 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16568 || (! rtx_equal_p (op0
, true_cond
)
16569 && ! rtx_equal_p (op1
, true_cond
))))
16572 /* At this point we know we can use fsel. */
16574 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16575 is no fsel instruction. */
16576 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16579 /* Reduce the comparison to a comparison against zero. */
16580 if (! is_against_zero
)
16582 temp
= gen_reg_rtx (compare_mode
);
16583 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16585 op1
= CONST0_RTX (compare_mode
);
16588 /* If we don't care about NaNs we can reduce some of the comparisons
16589 down to faster ones. */
16590 if (! HONOR_NANS (compare_mode
))
16596 true_cond
= false_cond
;
16609 /* Now, reduce everything down to a GE. */
16616 temp
= gen_reg_rtx (compare_mode
);
16617 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16622 temp
= gen_reg_rtx (compare_mode
);
16623 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16628 temp
= gen_reg_rtx (compare_mode
);
16629 emit_insn (gen_rtx_SET (temp
,
16630 gen_rtx_NEG (compare_mode
,
16631 gen_rtx_ABS (compare_mode
, op0
))));
16636 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16637 temp
= gen_reg_rtx (result_mode
);
16638 emit_insn (gen_rtx_SET (temp
,
16639 gen_rtx_IF_THEN_ELSE (result_mode
,
16640 gen_rtx_GE (VOIDmode
,
16642 true_cond
, false_cond
)));
16643 false_cond
= true_cond
;
16646 temp
= gen_reg_rtx (compare_mode
);
16647 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16652 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16653 temp
= gen_reg_rtx (result_mode
);
16654 emit_insn (gen_rtx_SET (temp
,
16655 gen_rtx_IF_THEN_ELSE (result_mode
,
16656 gen_rtx_GE (VOIDmode
,
16658 true_cond
, false_cond
)));
16659 true_cond
= false_cond
;
16662 temp
= gen_reg_rtx (compare_mode
);
16663 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16668 gcc_unreachable ();
16671 emit_insn (gen_rtx_SET (dest
,
16672 gen_rtx_IF_THEN_ELSE (result_mode
,
16673 gen_rtx_GE (VOIDmode
,
16675 true_cond
, false_cond
)));
16679 /* Same as above, but for ints (isel). */
16682 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16684 rtx condition_rtx
, cr
;
16685 machine_mode mode
= GET_MODE (dest
);
16686 enum rtx_code cond_code
;
16687 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16690 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16693 /* PR104335: We now need to expect CC-mode "comparisons"
16694 coming from ifcvt. The following code expects proper
16695 comparisons so better abort here. */
16696 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16699 /* We still have to do the compare, because isel doesn't do a
16700 compare, it just looks at the CRx bits set by a previous compare
16702 condition_rtx
= rs6000_generate_compare (op
, mode
);
16703 cond_code
= GET_CODE (condition_rtx
);
16704 cr
= XEXP (condition_rtx
, 0);
16705 signedp
= GET_MODE (cr
) == CCmode
;
16707 isel_func
= (mode
== SImode
16708 ? (signedp
? gen_isel_cc_si
: gen_isel_ccuns_si
)
16709 : (signedp
? gen_isel_cc_di
: gen_isel_ccuns_di
));
16713 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16714 /* isel handles these directly. */
16718 /* We need to swap the sense of the comparison. */
16720 std::swap (false_cond
, true_cond
);
16721 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16726 false_cond
= force_reg (mode
, false_cond
);
16727 if (true_cond
!= const0_rtx
)
16728 true_cond
= force_reg (mode
, true_cond
);
16730 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16736 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16738 machine_mode mode
= GET_MODE (op0
);
16742 /* VSX/altivec have direct min/max insns. */
16743 if ((code
== SMAX
|| code
== SMIN
)
16744 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16745 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16746 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16748 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16752 if (code
== SMAX
|| code
== SMIN
)
16757 if (code
== SMAX
|| code
== UMAX
)
16758 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16759 op0
, op1
, mode
, 0);
16761 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16762 op1
, op0
, mode
, 0);
16763 gcc_assert (target
);
16764 if (target
!= dest
)
16765 emit_move_insn (dest
, target
);
16768 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16769 COND is true. Mark the jump as unlikely to be taken. */
16772 emit_unlikely_jump (rtx cond
, rtx label
)
16774 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16775 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16776 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16779 /* A subroutine of the atomic operation splitters. Emit a load-locked
16780 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16781 the zero_extend operation. */
16784 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16786 rtx (*fn
) (rtx
, rtx
) = NULL
;
16791 fn
= gen_load_lockedqi
;
16794 fn
= gen_load_lockedhi
;
16797 if (GET_MODE (mem
) == QImode
)
16798 fn
= gen_load_lockedqi_si
;
16799 else if (GET_MODE (mem
) == HImode
)
16800 fn
= gen_load_lockedhi_si
;
16802 fn
= gen_load_lockedsi
;
16805 fn
= gen_load_lockeddi
;
16808 fn
= gen_load_lockedti
;
16811 gcc_unreachable ();
16813 emit_insn (fn (reg
, mem
));
16816 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16817 instruction in MODE. */
16820 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16822 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16827 fn
= gen_store_conditionalqi
;
16830 fn
= gen_store_conditionalhi
;
16833 fn
= gen_store_conditionalsi
;
16836 fn
= gen_store_conditionaldi
;
16839 fn
= gen_store_conditionalti
;
16842 gcc_unreachable ();
16845 /* Emit sync before stwcx. to address PPC405 Erratum. */
16846 if (PPC405_ERRATUM77
)
16847 emit_insn (gen_hwsync ());
16849 emit_insn (fn (res
, mem
, val
));
16852 /* Expand barriers before and after a load_locked/store_cond sequence. */
16855 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16857 rtx addr
= XEXP (mem
, 0);
16859 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16860 && !legitimate_indexed_address_p (addr
, reload_completed
))
16862 addr
= force_reg (Pmode
, addr
);
16863 mem
= replace_equiv_address_nv (mem
, addr
);
16868 case MEMMODEL_RELAXED
:
16869 case MEMMODEL_CONSUME
:
16870 case MEMMODEL_ACQUIRE
:
16872 case MEMMODEL_RELEASE
:
16873 case MEMMODEL_ACQ_REL
:
16874 emit_insn (gen_lwsync ());
16876 case MEMMODEL_SEQ_CST
:
16877 emit_insn (gen_hwsync ());
16880 gcc_unreachable ();
16886 rs6000_post_atomic_barrier (enum memmodel model
)
16890 case MEMMODEL_RELAXED
:
16891 case MEMMODEL_CONSUME
:
16892 case MEMMODEL_RELEASE
:
16894 case MEMMODEL_ACQUIRE
:
16895 case MEMMODEL_ACQ_REL
:
16896 case MEMMODEL_SEQ_CST
:
16897 emit_insn (gen_isync ());
16900 gcc_unreachable ();
16904 /* A subroutine of the various atomic expanders. For sub-word operations,
16905 we must adjust things to operate on SImode. Given the original MEM,
16906 return a new aligned memory. Also build and return the quantities by
16907 which to shift and mask. */
16910 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16912 rtx addr
, align
, shift
, mask
, mem
;
16913 HOST_WIDE_INT shift_mask
;
16914 machine_mode mode
= GET_MODE (orig_mem
);
16916 /* For smaller modes, we have to implement this via SImode. */
16917 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16919 addr
= XEXP (orig_mem
, 0);
16920 addr
= force_reg (GET_MODE (addr
), addr
);
16922 /* Aligned memory containing subword. Generate a new memory. We
16923 do not want any of the existing MEM_ATTR data, as we're now
16924 accessing memory outside the original object. */
16925 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16926 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16927 mem
= gen_rtx_MEM (SImode
, align
);
16928 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16929 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16930 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16932 /* Shift amount for subword relative to aligned word. */
16933 shift
= gen_reg_rtx (SImode
);
16934 addr
= gen_lowpart (SImode
, addr
);
16935 rtx tmp
= gen_reg_rtx (SImode
);
16936 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16937 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16938 if (BYTES_BIG_ENDIAN
)
16939 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16940 shift
, 1, OPTAB_LIB_WIDEN
);
16943 /* Mask for insertion. */
16944 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16945 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16951 /* A subroutine of the various atomic expanders. For sub-word operands,
16952 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16955 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16959 x
= gen_reg_rtx (SImode
);
16960 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16961 gen_rtx_NOT (SImode
, mask
),
16964 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16969 /* A subroutine of the various atomic expanders. For sub-word operands,
16970 extract WIDE to NARROW via SHIFT. */
16973 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16975 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16976 wide
, 1, OPTAB_LIB_WIDEN
);
16977 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16980 /* Expand an atomic compare and swap operation. */
16983 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16985 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16986 rtx label1
, label2
, x
, mask
, shift
;
16987 machine_mode mode
, orig_mode
;
16988 enum memmodel mod_s
, mod_f
;
16991 boolval
= operands
[0];
16992 retval
= operands
[1];
16994 oldval
= operands
[3];
16995 newval
= operands
[4];
16996 is_weak
= (INTVAL (operands
[5]) != 0);
16997 mod_s
= memmodel_base (INTVAL (operands
[6]));
16998 mod_f
= memmodel_base (INTVAL (operands
[7]));
16999 orig_mode
= mode
= GET_MODE (mem
);
17001 mask
= shift
= NULL_RTX
;
17002 if (mode
== QImode
|| mode
== HImode
)
17004 /* Before power8, we didn't have access to lbarx/lharx, so generate a
17005 lwarx and shift/mask operations. With power8, we need to do the
17006 comparison in SImode, but the store is still done in QI/HImode. */
17007 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
17009 if (!TARGET_SYNC_HI_QI
)
17011 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17013 /* Shift and mask OLDVAL into position with the word. */
17014 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
17015 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17017 /* Shift and mask NEWVAL into position within the word. */
17018 newval
= convert_modes (SImode
, mode
, newval
, 1);
17019 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
17020 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17023 /* Prepare to adjust the return value. */
17024 retval
= gen_reg_rtx (SImode
);
17027 else if (reg_overlap_mentioned_p (retval
, oldval
))
17028 oldval
= copy_to_reg (oldval
);
17030 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
17031 oldval
= copy_to_mode_reg (mode
, oldval
);
17033 if (reg_overlap_mentioned_p (retval
, newval
))
17034 newval
= copy_to_reg (newval
);
17036 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
17041 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17042 emit_label (XEXP (label1
, 0));
17044 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17046 emit_load_locked (mode
, retval
, mem
);
17050 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
17051 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17053 cond
= gen_reg_rtx (CCmode
);
17054 /* If we have TImode, synthesize a comparison. */
17055 if (mode
!= TImode
)
17056 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
17059 rtx xor1_result
= gen_reg_rtx (DImode
);
17060 rtx xor2_result
= gen_reg_rtx (DImode
);
17061 rtx or_result
= gen_reg_rtx (DImode
);
17062 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
17063 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
17064 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
17065 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
17067 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
17068 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
17069 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
17070 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
17073 emit_insn (gen_rtx_SET (cond
, x
));
17075 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17076 emit_unlikely_jump (x
, label2
);
17080 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
17082 emit_store_conditional (orig_mode
, cond
, mem
, x
);
17086 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17087 emit_unlikely_jump (x
, label1
);
17090 if (!is_mm_relaxed (mod_f
))
17091 emit_label (XEXP (label2
, 0));
17093 rs6000_post_atomic_barrier (mod_s
);
17095 if (is_mm_relaxed (mod_f
))
17096 emit_label (XEXP (label2
, 0));
17099 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
17100 else if (mode
!= GET_MODE (operands
[1]))
17101 convert_move (operands
[1], retval
, 1);
17103 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17104 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
17105 emit_insn (gen_rtx_SET (boolval
, x
));
17108 /* Expand an atomic exchange operation. */
17111 rs6000_expand_atomic_exchange (rtx operands
[])
17113 rtx retval
, mem
, val
, cond
;
17115 enum memmodel model
;
17116 rtx label
, x
, mask
, shift
;
17118 retval
= operands
[0];
17121 model
= memmodel_base (INTVAL (operands
[3]));
17122 mode
= GET_MODE (mem
);
17124 mask
= shift
= NULL_RTX
;
17125 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
17127 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17129 /* Shift and mask VAL into position with the word. */
17130 val
= convert_modes (SImode
, mode
, val
, 1);
17131 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17132 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17134 /* Prepare to adjust the return value. */
17135 retval
= gen_reg_rtx (SImode
);
17139 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17141 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
17142 emit_label (XEXP (label
, 0));
17144 emit_load_locked (mode
, retval
, mem
);
17148 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
17150 cond
= gen_reg_rtx (CCmode
);
17151 emit_store_conditional (mode
, cond
, mem
, x
);
17153 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17154 emit_unlikely_jump (x
, label
);
17156 rs6000_post_atomic_barrier (model
);
17159 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
17162 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17163 to perform. MEM is the memory on which to operate. VAL is the second
17164 operand of the binary operator. BEFORE and AFTER are optional locations to
17165 return the value of MEM either before of after the operation. MODEL_RTX
17166 is a CONST_INT containing the memory model to use. */
17169 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
17170 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
17172 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
17173 machine_mode mode
= GET_MODE (mem
);
17174 machine_mode store_mode
= mode
;
17175 rtx label
, x
, cond
, mask
, shift
;
17176 rtx before
= orig_before
, after
= orig_after
;
17178 mask
= shift
= NULL_RTX
;
17179 /* On power8, we want to use SImode for the operation. On previous systems,
17180 use the operation in a subword and shift/mask to get the proper byte or
17182 if (mode
== QImode
|| mode
== HImode
)
17184 if (TARGET_SYNC_HI_QI
)
17186 val
= convert_modes (SImode
, mode
, val
, 1);
17188 /* Prepare to adjust the return value. */
17189 before
= gen_reg_rtx (SImode
);
17191 after
= gen_reg_rtx (SImode
);
17196 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
17198 /* Shift and mask VAL into position with the word. */
17199 val
= convert_modes (SImode
, mode
, val
, 1);
17200 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
17201 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17207 /* We've already zero-extended VAL. That is sufficient to
17208 make certain that it does not affect other bits. */
17213 /* If we make certain that all of the other bits in VAL are
17214 set, that will be sufficient to not affect other bits. */
17215 x
= gen_rtx_NOT (SImode
, mask
);
17216 x
= gen_rtx_IOR (SImode
, x
, val
);
17217 emit_insn (gen_rtx_SET (val
, x
));
17224 /* These will all affect bits outside the field and need
17225 adjustment via MASK within the loop. */
17229 gcc_unreachable ();
17232 /* Prepare to adjust the return value. */
17233 before
= gen_reg_rtx (SImode
);
17235 after
= gen_reg_rtx (SImode
);
17236 store_mode
= mode
= SImode
;
17240 mem
= rs6000_pre_atomic_barrier (mem
, model
);
17242 label
= gen_label_rtx ();
17243 emit_label (label
);
17244 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
17246 if (before
== NULL_RTX
)
17247 before
= gen_reg_rtx (mode
);
17249 emit_load_locked (mode
, before
, mem
);
17253 x
= expand_simple_binop (mode
, AND
, before
, val
,
17254 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17255 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
17259 after
= expand_simple_binop (mode
, code
, before
, val
,
17260 after
, 1, OPTAB_LIB_WIDEN
);
17266 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
17267 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
17268 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
17270 else if (store_mode
!= mode
)
17271 x
= convert_modes (store_mode
, mode
, x
, 1);
17273 cond
= gen_reg_rtx (CCmode
);
17274 emit_store_conditional (store_mode
, cond
, mem
, x
);
17276 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
17277 emit_unlikely_jump (x
, label
);
17279 rs6000_post_atomic_barrier (model
);
17283 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17284 then do the calcuations in a SImode register. */
17286 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
17288 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
17290 else if (store_mode
!= mode
)
17292 /* QImode/HImode on machines with lbarx/lharx where we do the native
17293 operation and then do the calcuations in a SImode register. */
17295 convert_move (orig_before
, before
, 1);
17297 convert_move (orig_after
, after
, 1);
17299 else if (orig_after
&& after
!= orig_after
)
17300 emit_move_insn (orig_after
, after
);
17303 static GTY(()) alias_set_type TOC_alias_set
= -1;
17306 get_TOC_alias_set (void)
17308 if (TOC_alias_set
== -1)
17309 TOC_alias_set
= new_alias_set ();
17310 return TOC_alias_set
;
17313 /* The mode the ABI uses for a word. This is not the same as word_mode
17314 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17316 static scalar_int_mode
17317 rs6000_abi_word_mode (void)
17319 return TARGET_32BIT
? SImode
: DImode
;
17322 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17324 rs6000_offload_options (void)
17327 return xstrdup ("-foffload-abi=lp64");
17329 return xstrdup ("-foffload-abi=ilp32");
17333 /* A quick summary of the various types of 'constant-pool tables'
17336 Target Flags Name One table per
17337 AIX (none) AIX TOC object file
17338 AIX -mfull-toc AIX TOC object file
17339 AIX -mminimal-toc AIX minimal TOC translation unit
17340 SVR4/EABI (none) SVR4 SDATA object file
17341 SVR4/EABI -fpic SVR4 pic object file
17342 SVR4/EABI -fPIC SVR4 PIC translation unit
17343 SVR4/EABI -mrelocatable EABI TOC function
17344 SVR4/EABI -maix AIX TOC object file
17345 SVR4/EABI -maix -mminimal-toc
17346 AIX minimal TOC translation unit
17348 Name Reg. Set by entries contains:
17349 made by addrs? fp? sum?
17351 AIX TOC 2 crt0 as Y option option
17352 AIX minimal TOC 30 prolog gcc Y Y option
17353 SVR4 SDATA 13 crt0 gcc N Y N
17354 SVR4 pic 30 prolog ld Y not yet N
17355 SVR4 PIC 30 prolog gcc Y option option
17356 EABI TOC 30 prolog gcc Y option option
17360 /* Hash functions for the hash table. */
17363 rs6000_hash_constant (rtx k
)
17365 enum rtx_code code
= GET_CODE (k
);
17366 machine_mode mode
= GET_MODE (k
);
17367 unsigned result
= (code
<< 3) ^ mode
;
17368 const char *format
;
17371 format
= GET_RTX_FORMAT (code
);
17372 flen
= strlen (format
);
17378 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
17380 case CONST_WIDE_INT
:
17383 flen
= CONST_WIDE_INT_NUNITS (k
);
17384 for (i
= 0; i
< flen
; i
++)
17385 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
17390 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
17400 for (; fidx
< flen
; fidx
++)
17401 switch (format
[fidx
])
17406 const char *str
= XSTR (k
, fidx
);
17407 len
= strlen (str
);
17408 result
= result
* 613 + len
;
17409 for (i
= 0; i
< len
; i
++)
17410 result
= result
* 613 + (unsigned) str
[i
];
17415 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
17419 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
17422 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
17423 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
17427 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
17428 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
17435 gcc_unreachable ();
17442 toc_hasher::hash (toc_hash_struct
*thc
)
17444 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17447 /* Compare H1 and H2 for equivalence. */
17450 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17455 if (h1
->key_mode
!= h2
->key_mode
)
17458 return rtx_equal_p (r1
, r2
);
17461 /* These are the names given by the C++ front-end to vtables, and
17462 vtable-like objects. Ideally, this logic should not be here;
17463 instead, there should be some programmatic way of inquiring as
17464 to whether or not an object is a vtable. */
17466 #define VTABLE_NAME_P(NAME) \
17467 (startswith (name, "_vt.") \
17468 || startswith (name, "_ZTV") \
17469 || startswith (name, "_ZTT") \
17470 || startswith (name, "_ZTI") \
17471 || startswith (name, "_ZTC"))
17473 #ifdef NO_DOLLAR_IN_LABEL
17474 /* Return a GGC-allocated character string translating dollar signs in
17475 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17478 rs6000_xcoff_strip_dollar (const char *name
)
17484 q
= (const char *) strchr (name
, '$');
17486 if (q
== 0 || q
== name
)
17489 len
= strlen (name
);
17490 strip
= XALLOCAVEC (char, len
+ 1);
17491 strcpy (strip
, name
);
17492 p
= strip
+ (q
- name
);
17496 p
= strchr (p
+ 1, '$');
17499 return ggc_alloc_string (strip
, len
);
17504 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17506 const char *name
= XSTR (x
, 0);
17508 /* Currently C++ toc references to vtables can be emitted before it
17509 is decided whether the vtable is public or private. If this is
17510 the case, then the linker will eventually complain that there is
17511 a reference to an unknown section. Thus, for vtables only,
17512 we emit the TOC reference to reference the identifier and not the
17514 if (VTABLE_NAME_P (name
))
17516 RS6000_OUTPUT_BASENAME (file
, name
);
17519 assemble_name (file
, name
);
17522 /* Output a TOC entry. We derive the entry name from what is being
17526 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17529 const char *name
= buf
;
17531 HOST_WIDE_INT offset
= 0;
17533 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17535 /* When the linker won't eliminate them, don't output duplicate
17536 TOC entries (this happens on AIX if there is any kind of TOC,
17537 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17539 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17541 struct toc_hash_struct
*h
;
17543 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17544 time because GGC is not initialized at that point. */
17545 if (toc_hash_table
== NULL
)
17546 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17548 h
= ggc_alloc
<toc_hash_struct
> ();
17550 h
->key_mode
= mode
;
17551 h
->labelno
= labelno
;
17553 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17554 if (*found
== NULL
)
17556 else /* This is indeed a duplicate.
17557 Set this label equal to that label. */
17559 fputs ("\t.set ", file
);
17560 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17561 fprintf (file
, "%d,", labelno
);
17562 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17563 fprintf (file
, "%d\n", ((*found
)->labelno
));
17566 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17567 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17568 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17570 fputs ("\t.set ", file
);
17571 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17572 fprintf (file
, "%d,", labelno
);
17573 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17574 fprintf (file
, "%d\n", ((*found
)->labelno
));
17581 /* If we're going to put a double constant in the TOC, make sure it's
17582 aligned properly when strict alignment is on. */
17583 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17584 && STRICT_ALIGNMENT
17585 && GET_MODE_BITSIZE (mode
) >= 64
17586 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17587 ASM_OUTPUT_ALIGN (file
, 3);
17590 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17592 /* Handle FP constants specially. Note that if we have a minimal
17593 TOC, things we put here aren't actually in the TOC, so we can allow
17595 if (CONST_DOUBLE_P (x
)
17596 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17597 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17601 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17602 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17604 real_to_target (k
, CONST_DOUBLE_REAL_VALUE (x
), GET_MODE (x
));
17608 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17609 fputs (DOUBLE_INT_ASM_OP
, file
);
17611 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17612 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17613 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17614 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17615 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17616 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17617 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17618 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17623 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17624 fputs ("\t.long ", file
);
17626 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17627 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17628 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17629 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17630 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17631 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17635 else if (CONST_DOUBLE_P (x
)
17636 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17640 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17641 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17643 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17647 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17648 fputs (DOUBLE_INT_ASM_OP
, file
);
17650 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17651 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17652 fprintf (file
, "0x%lx%08lx\n",
17653 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17654 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17659 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17660 fputs ("\t.long ", file
);
17662 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17663 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17664 fprintf (file
, "0x%lx,0x%lx\n",
17665 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17669 else if (CONST_DOUBLE_P (x
)
17670 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17674 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17675 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17677 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17681 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17682 fputs (DOUBLE_INT_ASM_OP
, file
);
17684 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17685 if (WORDS_BIG_ENDIAN
)
17686 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17688 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17693 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17694 fputs ("\t.long ", file
);
17696 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17697 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17701 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17703 unsigned HOST_WIDE_INT low
;
17704 HOST_WIDE_INT high
;
17706 low
= INTVAL (x
) & 0xffffffff;
17707 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17709 /* TOC entries are always Pmode-sized, so when big-endian
17710 smaller integer constants in the TOC need to be padded.
17711 (This is still a win over putting the constants in
17712 a separate constant pool, because then we'd have
17713 to have both a TOC entry _and_ the actual constant.)
17715 For a 32-bit target, CONST_INT values are loaded and shifted
17716 entirely within `low' and can be stored in one TOC entry. */
17718 /* It would be easy to make this work, but it doesn't now. */
17719 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17721 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17724 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17725 high
= (HOST_WIDE_INT
) low
>> 32;
17731 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17732 fputs (DOUBLE_INT_ASM_OP
, file
);
17734 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17735 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17736 fprintf (file
, "0x%lx%08lx\n",
17737 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17742 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17744 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17745 fputs ("\t.long ", file
);
17747 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17748 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17749 fprintf (file
, "0x%lx,0x%lx\n",
17750 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17754 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17755 fputs ("\t.long ", file
);
17757 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17758 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17764 if (GET_CODE (x
) == CONST
)
17766 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17767 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17769 base
= XEXP (XEXP (x
, 0), 0);
17770 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17773 switch (GET_CODE (base
))
17776 name
= XSTR (base
, 0);
17780 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17781 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17785 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17789 gcc_unreachable ();
17792 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17793 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17796 fputs ("\t.tc ", file
);
17797 RS6000_OUTPUT_BASENAME (file
, name
);
17800 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17802 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17804 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17805 after other TOC symbols, reducing overflow of small TOC access
17806 to [TC] symbols. */
17807 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17808 ? "[TE]," : "[TC],", file
);
17811 /* Currently C++ toc references to vtables can be emitted before it
17812 is decided whether the vtable is public or private. If this is
17813 the case, then the linker will eventually complain that there is
17814 a TOC reference to an unknown section. Thus, for vtables only,
17815 we emit the TOC reference to reference the symbol and not the
17817 if (VTABLE_NAME_P (name
))
17819 RS6000_OUTPUT_BASENAME (file
, name
);
17821 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17822 else if (offset
> 0)
17823 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17826 output_addr_const (file
, x
);
17829 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17831 switch (SYMBOL_REF_TLS_MODEL (base
))
17835 case TLS_MODEL_LOCAL_EXEC
:
17836 fputs ("@le", file
);
17838 case TLS_MODEL_INITIAL_EXEC
:
17839 fputs ("@ie", file
);
17841 /* Use global-dynamic for local-dynamic. */
17842 case TLS_MODEL_GLOBAL_DYNAMIC
:
17843 case TLS_MODEL_LOCAL_DYNAMIC
:
17845 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17846 fputs ("\t.tc .", file
);
17847 RS6000_OUTPUT_BASENAME (file
, name
);
17848 fputs ("[TC],", file
);
17849 output_addr_const (file
, x
);
17850 fputs ("@m", file
);
17853 gcc_unreachable ();
17861 /* Output an assembler pseudo-op to write an ASCII string of N characters
17862 starting at P to FILE.
17864 On the RS/6000, we have to do this using the .byte operation and
17865 write out special characters outside the quoted string.
17866 Also, the assembler is broken; very long strings are truncated,
17867 so we must artificially break them up early. */
17870 output_ascii (FILE *file
, const char *p
, int n
)
17873 int i
, count_string
;
17874 const char *for_string
= "\t.byte \"";
17875 const char *for_decimal
= "\t.byte ";
17876 const char *to_close
= NULL
;
17879 for (i
= 0; i
< n
; i
++)
17882 if (c
>= ' ' && c
< 0177)
17885 fputs (for_string
, file
);
17888 /* Write two quotes to get one. */
17896 for_decimal
= "\"\n\t.byte ";
17900 if (count_string
>= 512)
17902 fputs (to_close
, file
);
17904 for_string
= "\t.byte \"";
17905 for_decimal
= "\t.byte ";
17913 fputs (for_decimal
, file
);
17914 fprintf (file
, "%d", c
);
17916 for_string
= "\n\t.byte \"";
17917 for_decimal
= ", ";
17923 /* Now close the string if we have written one. Then end the line. */
17925 fputs (to_close
, file
);
17928 /* Generate a unique section name for FILENAME for a section type
17929 represented by SECTION_DESC. Output goes into BUF.
17931 SECTION_DESC can be any string, as long as it is different for each
17932 possible section type.
17934 We name the section in the same manner as xlc. The name begins with an
17935 underscore followed by the filename (after stripping any leading directory
17936 names) with the last period replaced by the string SECTION_DESC. If
17937 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17941 rs6000_gen_section_name (char **buf
, const char *filename
,
17942 const char *section_desc
)
17944 const char *q
, *after_last_slash
, *last_period
= 0;
17948 after_last_slash
= filename
;
17949 for (q
= filename
; *q
; q
++)
17952 after_last_slash
= q
+ 1;
17953 else if (*q
== '.')
17957 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17958 *buf
= (char *) xmalloc (len
);
17963 for (q
= after_last_slash
; *q
; q
++)
17965 if (q
== last_period
)
17967 strcpy (p
, section_desc
);
17968 p
+= strlen (section_desc
);
17972 else if (ISALNUM (*q
))
17976 if (last_period
== 0)
17977 strcpy (p
, section_desc
);
17982 /* Emit profile function. */
17985 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17987 /* Non-standard profiling for kernels, which just saves LR then calls
17988 _mcount without worrying about arg saves. The idea is to change
17989 the function prologue as little as possible as it isn't easy to
17990 account for arg save/restore code added just for _mcount. */
17991 if (TARGET_PROFILE_KERNEL
)
17994 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17996 #ifndef NO_PROFILE_COUNTERS
17997 # define NO_PROFILE_COUNTERS 0
17999 if (NO_PROFILE_COUNTERS
)
18000 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
18001 LCT_NORMAL
, VOIDmode
);
18005 const char *label_name
;
18008 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
18009 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
18010 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
18012 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
18013 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
18016 else if (DEFAULT_ABI
== ABI_DARWIN
)
18018 const char *mcount_name
= RS6000_MCOUNT
;
18019 int caller_addr_regno
= LR_REGNO
;
18021 /* Be conservative and always set this, at least for now. */
18022 crtl
->uses_pic_offset_table
= 1;
18025 /* For PIC code, set up a stub and collect the caller's address
18026 from r0, which is where the prologue puts it. */
18027 if (MACHOPIC_INDIRECT
18028 && crtl
->uses_pic_offset_table
)
18029 caller_addr_regno
= 0;
18031 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
18032 LCT_NORMAL
, VOIDmode
,
18033 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
18037 /* Write function profiler code. */
18040 output_function_profiler (FILE *file
, int labelno
)
18044 switch (DEFAULT_ABI
)
18047 gcc_unreachable ();
18052 warning (0, "no profiling of 64-bit code for this ABI");
18055 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
18056 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
18057 if (NO_PROFILE_COUNTERS
)
18059 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18060 reg_names
[0], reg_names
[1]);
18062 else if (TARGET_SECURE_PLT
&& flag_pic
)
18064 if (TARGET_LINK_STACK
)
18067 get_ppc476_thunk_name (name
);
18068 asm_fprintf (file
, "\tbl %s\n", name
);
18071 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
18072 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18073 reg_names
[0], reg_names
[1]);
18074 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
18075 asm_fprintf (file
, "\taddis %s,%s,",
18076 reg_names
[12], reg_names
[12]);
18077 assemble_name (file
, buf
);
18078 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
18079 assemble_name (file
, buf
);
18080 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
18082 else if (flag_pic
== 1)
18084 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
18085 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18086 reg_names
[0], reg_names
[1]);
18087 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
18088 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
18089 assemble_name (file
, buf
);
18090 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
18092 else if (flag_pic
> 1)
18094 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18095 reg_names
[0], reg_names
[1]);
18096 /* Now, we need to get the address of the label. */
18097 if (TARGET_LINK_STACK
)
18100 get_ppc476_thunk_name (name
);
18101 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
18102 assemble_name (file
, buf
);
18103 fputs ("-.\n1:", file
);
18104 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
18105 asm_fprintf (file
, "\taddi %s,%s,4\n",
18106 reg_names
[11], reg_names
[11]);
18110 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
18111 assemble_name (file
, buf
);
18112 fputs ("-.\n1:", file
);
18113 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
18115 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
18116 reg_names
[0], reg_names
[11]);
18117 asm_fprintf (file
, "\tadd %s,%s,%s\n",
18118 reg_names
[0], reg_names
[0], reg_names
[11]);
18122 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
18123 assemble_name (file
, buf
);
18124 fputs ("@ha\n", file
);
18125 asm_fprintf (file
, "\tstw %s,4(%s)\n",
18126 reg_names
[0], reg_names
[1]);
18127 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
18128 assemble_name (file
, buf
);
18129 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
18132 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18133 fprintf (file
, "\tbl %s%s\n",
18134 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
18140 /* Don't do anything, done in output_profile_hook (). */
18147 /* The following variable value is the last issued insn. */
18149 static rtx_insn
*last_scheduled_insn
;
18151 /* The following variable helps to balance issuing of load and
18152 store instructions */
18154 static int load_store_pendulum
;
18156 /* The following variable helps pair divide insns during scheduling. */
18157 static int divide_cnt
;
18158 /* The following variable helps pair and alternate vector and vector load
18159 insns during scheduling. */
18160 static int vec_pairing
;
18163 /* Power4 load update and store update instructions are cracked into a
18164 load or store and an integer insn which are executed in the same cycle.
18165 Branches have their own dispatch slot which does not count against the
18166 GCC issue rate, but it changes the program flow so there are no other
18167 instructions to issue in this cycle. */
18170 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
18172 last_scheduled_insn
= insn
;
18173 if (GET_CODE (PATTERN (insn
)) == USE
18174 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18176 cached_can_issue_more
= more
;
18177 return cached_can_issue_more
;
18180 if (insn_terminates_group_p (insn
, current_group
))
18182 cached_can_issue_more
= 0;
18183 return cached_can_issue_more
;
18186 /* If no reservation, but reach here */
18187 if (recog_memoized (insn
) < 0)
18190 if (rs6000_sched_groups
)
18192 if (is_microcoded_insn (insn
))
18193 cached_can_issue_more
= 0;
18194 else if (is_cracked_insn (insn
))
18195 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
18197 cached_can_issue_more
= more
- 1;
18199 return cached_can_issue_more
;
18202 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
18205 cached_can_issue_more
= more
- 1;
18206 return cached_can_issue_more
;
18210 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
18212 int r
= rs6000_variable_issue_1 (insn
, more
);
18214 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
18218 /* Adjust the cost of a scheduling dependency. Return the new cost of
18219 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18222 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
18225 enum attr_type attr_type
;
18227 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
18234 /* Data dependency; DEP_INSN writes a register that INSN reads
18235 some cycles later. */
18237 /* Separate a load from a narrower, dependent store. */
18238 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
18239 || rs6000_tune
== PROCESSOR_POWER10
)
18240 && GET_CODE (PATTERN (insn
)) == SET
18241 && GET_CODE (PATTERN (dep_insn
)) == SET
18242 && MEM_P (XEXP (PATTERN (insn
), 1))
18243 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
18244 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
18245 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
18248 attr_type
= get_attr_type (insn
);
18253 /* Tell the first scheduling pass about the latency between
18254 a mtctr and bctr (and mtlr and br/blr). The first
18255 scheduling pass will not know about this latency since
18256 the mtctr instruction, which has the latency associated
18257 to it, will be generated by reload. */
18260 /* Leave some extra cycles between a compare and its
18261 dependent branch, to inhibit expensive mispredicts. */
18262 if ((rs6000_tune
== PROCESSOR_PPC603
18263 || rs6000_tune
== PROCESSOR_PPC604
18264 || rs6000_tune
== PROCESSOR_PPC604e
18265 || rs6000_tune
== PROCESSOR_PPC620
18266 || rs6000_tune
== PROCESSOR_PPC630
18267 || rs6000_tune
== PROCESSOR_PPC750
18268 || rs6000_tune
== PROCESSOR_PPC7400
18269 || rs6000_tune
== PROCESSOR_PPC7450
18270 || rs6000_tune
== PROCESSOR_PPCE5500
18271 || rs6000_tune
== PROCESSOR_PPCE6500
18272 || rs6000_tune
== PROCESSOR_POWER4
18273 || rs6000_tune
== PROCESSOR_POWER5
18274 || rs6000_tune
== PROCESSOR_POWER7
18275 || rs6000_tune
== PROCESSOR_POWER8
18276 || rs6000_tune
== PROCESSOR_POWER9
18277 || rs6000_tune
== PROCESSOR_POWER10
18278 || rs6000_tune
== PROCESSOR_CELL
)
18279 && recog_memoized (dep_insn
)
18280 && (INSN_CODE (dep_insn
) >= 0))
18282 switch (get_attr_type (dep_insn
))
18285 case TYPE_FPCOMPARE
:
18286 case TYPE_CR_LOGICAL
:
18290 if (get_attr_dot (dep_insn
) == DOT_YES
)
18295 if (get_attr_dot (dep_insn
) == DOT_YES
18296 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
18307 if ((rs6000_tune
== PROCESSOR_POWER6
)
18308 && recog_memoized (dep_insn
)
18309 && (INSN_CODE (dep_insn
) >= 0))
18312 if (GET_CODE (PATTERN (insn
)) != SET
)
18313 /* If this happens, we have to extend this to schedule
18314 optimally. Return default for now. */
18317 /* Adjust the cost for the case where the value written
18318 by a fixed point operation is used as the address
18319 gen value on a store. */
18320 switch (get_attr_type (dep_insn
))
18325 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18326 return get_attr_sign_extend (dep_insn
)
18327 == SIGN_EXTEND_YES
? 6 : 4;
18332 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18333 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18343 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18351 if (get_attr_update (dep_insn
) == UPDATE_YES
18352 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
18358 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18364 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18365 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18375 if ((rs6000_tune
== PROCESSOR_POWER6
)
18376 && recog_memoized (dep_insn
)
18377 && (INSN_CODE (dep_insn
) >= 0))
18380 /* Adjust the cost for the case where the value written
18381 by a fixed point instruction is used within the address
18382 gen portion of a subsequent load(u)(x) */
18383 switch (get_attr_type (dep_insn
))
18388 if (set_to_load_agen (dep_insn
, insn
))
18389 return get_attr_sign_extend (dep_insn
)
18390 == SIGN_EXTEND_YES
? 6 : 4;
18395 if (set_to_load_agen (dep_insn
, insn
))
18396 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18406 if (set_to_load_agen (dep_insn
, insn
))
18414 if (get_attr_update (dep_insn
) == UPDATE_YES
18415 && set_to_load_agen (dep_insn
, insn
))
18421 if (set_to_load_agen (dep_insn
, insn
))
18427 if (set_to_load_agen (dep_insn
, insn
))
18428 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18441 /* Fall out to return default cost. */
18445 case REG_DEP_OUTPUT
:
18446 /* Output dependency; DEP_INSN writes a register that INSN writes some
18448 if ((rs6000_tune
== PROCESSOR_POWER6
)
18449 && recog_memoized (dep_insn
)
18450 && (INSN_CODE (dep_insn
) >= 0))
18452 attr_type
= get_attr_type (insn
);
18457 case TYPE_FPSIMPLE
:
18458 if (get_attr_type (dep_insn
) == TYPE_FP
18459 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18466 /* Fall through, no cost for output dependency. */
18470 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18475 gcc_unreachable ();
18481 /* Debug version of rs6000_adjust_cost. */
18484 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18485 int cost
, unsigned int dw
)
18487 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18495 default: dep
= "unknown depencency"; break;
18496 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18497 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18498 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18502 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18503 "%s, insn:\n", ret
, cost
, dep
);
18511 /* The function returns a true if INSN is microcoded.
18512 Return false otherwise. */
18515 is_microcoded_insn (rtx_insn
*insn
)
18517 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18518 || GET_CODE (PATTERN (insn
)) == USE
18519 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18522 if (rs6000_tune
== PROCESSOR_CELL
)
18523 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18525 if (rs6000_sched_groups
18526 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18528 enum attr_type type
= get_attr_type (insn
);
18529 if ((type
== TYPE_LOAD
18530 && get_attr_update (insn
) == UPDATE_YES
18531 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18532 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18533 && get_attr_update (insn
) == UPDATE_YES
18534 && get_attr_indexed (insn
) == INDEXED_YES
)
18535 || type
== TYPE_MFCR
)
18542 /* The function returns true if INSN is cracked into 2 instructions
18543 by the processor (and therefore occupies 2 issue slots). */
18546 is_cracked_insn (rtx_insn
*insn
)
18548 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18549 || GET_CODE (PATTERN (insn
)) == USE
18550 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18553 if (rs6000_sched_groups
18554 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18556 enum attr_type type
= get_attr_type (insn
);
18557 if ((type
== TYPE_LOAD
18558 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18559 && get_attr_update (insn
) == UPDATE_NO
)
18560 || (type
== TYPE_LOAD
18561 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18562 && get_attr_update (insn
) == UPDATE_YES
18563 && get_attr_indexed (insn
) == INDEXED_NO
)
18564 || (type
== TYPE_STORE
18565 && get_attr_update (insn
) == UPDATE_YES
18566 && get_attr_indexed (insn
) == INDEXED_NO
)
18567 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18568 && get_attr_update (insn
) == UPDATE_YES
)
18569 || (type
== TYPE_CR_LOGICAL
18570 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18571 || (type
== TYPE_EXTS
18572 && get_attr_dot (insn
) == DOT_YES
)
18573 || (type
== TYPE_SHIFT
18574 && get_attr_dot (insn
) == DOT_YES
18575 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18576 || (type
== TYPE_MUL
18577 && get_attr_dot (insn
) == DOT_YES
)
18578 || type
== TYPE_DIV
18579 || (type
== TYPE_INSERT
18580 && get_attr_size (insn
) == SIZE_32
))
18587 /* The function returns true if INSN can be issued only from
18588 the branch slot. */
18591 is_branch_slot_insn (rtx_insn
*insn
)
18593 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18594 || GET_CODE (PATTERN (insn
)) == USE
18595 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18598 if (rs6000_sched_groups
)
18600 enum attr_type type
= get_attr_type (insn
);
18601 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18609 /* The function returns true if out_inst sets a value that is
18610 used in the address generation computation of in_insn */
18612 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18614 rtx out_set
, in_set
;
18616 /* For performance reasons, only handle the simple case where
18617 both loads are a single_set. */
18618 out_set
= single_set (out_insn
);
18621 in_set
= single_set (in_insn
);
18623 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18629 /* Try to determine base/offset/size parts of the given MEM.
18630 Return true if successful, false if all the values couldn't
18633 This function only looks for REG or REG+CONST address forms.
18634 REG+REG address form will return false. */
18637 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18638 HOST_WIDE_INT
*size
)
18641 if (MEM_SIZE_KNOWN_P (mem
))
18642 *size
= MEM_SIZE (mem
);
18646 addr_rtx
= (XEXP (mem
, 0));
18647 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18648 addr_rtx
= XEXP (addr_rtx
, 1);
18651 while (GET_CODE (addr_rtx
) == PLUS
18652 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18654 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18655 addr_rtx
= XEXP (addr_rtx
, 0);
18657 if (!REG_P (addr_rtx
))
18664 /* If the target storage locations of arguments MEM1 and MEM2 are
18665 adjacent, then return the argument that has the lower address.
18666 Otherwise, return NULL_RTX. */
18669 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18672 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18676 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18677 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18678 && REGNO (reg1
) == REGNO (reg2
))
18680 if (off1
+ size1
== off2
)
18682 else if (off2
+ size2
== off1
)
18689 /* This function returns true if it can be determined that the two MEM
18690 locations overlap by at least 1 byte based on base reg/offset/size. */
18693 mem_locations_overlap (rtx mem1
, rtx mem2
)
18696 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18698 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18699 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18700 return ((REGNO (reg1
) == REGNO (reg2
))
18701 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18702 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18707 /* A C statement (sans semicolon) to update the integer scheduling
18708 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18709 INSN earlier, reduce the priority to execute INSN later. Do not
18710 define this macro if you do not need to adjust the scheduling
18711 priorities of insns. */
18714 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18716 rtx load_mem
, str_mem
;
18717 /* On machines (like the 750) which have asymmetric integer units,
18718 where one integer unit can do multiply and divides and the other
18719 can't, reduce the priority of multiply/divide so it is scheduled
18720 before other integer operations. */
18723 if (! INSN_P (insn
))
18726 if (GET_CODE (PATTERN (insn
)) == USE
)
18729 switch (rs6000_tune
) {
18730 case PROCESSOR_PPC750
:
18731 switch (get_attr_type (insn
))
18738 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18739 priority
, priority
);
18740 if (priority
>= 0 && priority
< 0x01000000)
18747 if (insn_must_be_first_in_group (insn
)
18748 && reload_completed
18749 && current_sched_info
->sched_max_insns_priority
18750 && rs6000_sched_restricted_insns_priority
)
18753 /* Prioritize insns that can be dispatched only in the first
18755 if (rs6000_sched_restricted_insns_priority
== 1)
18756 /* Attach highest priority to insn. This means that in
18757 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18758 precede 'priority' (critical path) considerations. */
18759 return current_sched_info
->sched_max_insns_priority
;
18760 else if (rs6000_sched_restricted_insns_priority
== 2)
18761 /* Increase priority of insn by a minimal amount. This means that in
18762 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18763 considerations precede dispatch-slot restriction considerations. */
18764 return (priority
+ 1);
18767 if (rs6000_tune
== PROCESSOR_POWER6
18768 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18769 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18770 /* Attach highest priority to insn if the scheduler has just issued two
18771 stores and this instruction is a load, or two loads and this instruction
18772 is a store. Power6 wants loads and stores scheduled alternately
18774 return current_sched_info
->sched_max_insns_priority
;
18779 /* Return true if the instruction is nonpipelined on the Cell. */
18781 is_nonpipeline_insn (rtx_insn
*insn
)
18783 enum attr_type type
;
18784 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18785 || GET_CODE (PATTERN (insn
)) == USE
18786 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18789 type
= get_attr_type (insn
);
18790 if (type
== TYPE_MUL
18791 || type
== TYPE_DIV
18792 || type
== TYPE_SDIV
18793 || type
== TYPE_DDIV
18794 || type
== TYPE_SSQRT
18795 || type
== TYPE_DSQRT
18796 || type
== TYPE_MFCR
18797 || type
== TYPE_MFCRF
18798 || type
== TYPE_MFJMPR
)
18806 /* Return how many instructions the machine can issue per cycle. */
18809 rs6000_issue_rate (void)
18811 /* Unless scheduling for register pressure, use issue rate of 1 for
18812 first scheduling pass to decrease degradation. */
18813 if (!reload_completed
&& !flag_sched_pressure
)
18816 switch (rs6000_tune
) {
18817 case PROCESSOR_RS64A
:
18818 case PROCESSOR_PPC601
: /* ? */
18819 case PROCESSOR_PPC7450
:
18821 case PROCESSOR_PPC440
:
18822 case PROCESSOR_PPC603
:
18823 case PROCESSOR_PPC750
:
18824 case PROCESSOR_PPC7400
:
18825 case PROCESSOR_PPC8540
:
18826 case PROCESSOR_PPC8548
:
18827 case PROCESSOR_CELL
:
18828 case PROCESSOR_PPCE300C2
:
18829 case PROCESSOR_PPCE300C3
:
18830 case PROCESSOR_PPCE500MC
:
18831 case PROCESSOR_PPCE500MC64
:
18832 case PROCESSOR_PPCE5500
:
18833 case PROCESSOR_PPCE6500
:
18834 case PROCESSOR_TITAN
:
18836 case PROCESSOR_PPC476
:
18837 case PROCESSOR_PPC604
:
18838 case PROCESSOR_PPC604e
:
18839 case PROCESSOR_PPC620
:
18840 case PROCESSOR_PPC630
:
18842 case PROCESSOR_POWER4
:
18843 case PROCESSOR_POWER5
:
18844 case PROCESSOR_POWER6
:
18845 case PROCESSOR_POWER7
:
18847 case PROCESSOR_POWER8
:
18849 case PROCESSOR_POWER9
:
18851 case PROCESSOR_POWER10
:
18858 /* Return how many instructions to look ahead for better insn
18862 rs6000_use_sched_lookahead (void)
18864 switch (rs6000_tune
)
18866 case PROCESSOR_PPC8540
:
18867 case PROCESSOR_PPC8548
:
18870 case PROCESSOR_CELL
:
18871 return (reload_completed
? 8 : 0);
18878 /* We are choosing insn from the ready queue. Return zero if INSN can be
18881 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18883 if (ready_index
== 0)
18886 if (rs6000_tune
!= PROCESSOR_CELL
)
18889 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18891 if (!reload_completed
18892 || is_nonpipeline_insn (insn
)
18893 || is_microcoded_insn (insn
))
18899 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18900 and return true. */
18903 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18908 /* stack_tie does not produce any real memory traffic. */
18909 if (tie_operand (pat
, VOIDmode
))
18918 /* Recursively process the pattern. */
18919 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18921 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18925 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18928 else if (fmt
[i
] == 'E')
18929 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18931 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18939 /* Determine if PAT is a PATTERN of a load insn. */
18942 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18944 if (!pat
|| pat
== NULL_RTX
)
18947 if (GET_CODE (pat
) == SET
)
18949 if (REG_P (SET_DEST (pat
)))
18950 return find_mem_ref (SET_SRC (pat
), load_mem
);
18955 if (GET_CODE (pat
) == PARALLEL
)
18959 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18960 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18967 /* Determine if INSN loads from memory. */
18970 is_load_insn (rtx insn
, rtx
*load_mem
)
18972 if (!insn
|| !INSN_P (insn
))
18978 return is_load_insn1 (PATTERN (insn
), load_mem
);
18981 /* Determine if PAT is a PATTERN of a store insn. */
18984 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18986 if (!pat
|| pat
== NULL_RTX
)
18989 if (GET_CODE (pat
) == SET
)
18991 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18992 return find_mem_ref (SET_DEST (pat
), str_mem
);
18997 if (GET_CODE (pat
) == PARALLEL
)
19001 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
19002 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
19009 /* Determine if INSN stores to memory. */
19012 is_store_insn (rtx insn
, rtx
*str_mem
)
19014 if (!insn
|| !INSN_P (insn
))
19017 return is_store_insn1 (PATTERN (insn
), str_mem
);
19020 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19023 is_power9_pairable_vec_type (enum attr_type type
)
19027 case TYPE_VECSIMPLE
:
19028 case TYPE_VECCOMPLEX
:
19032 case TYPE_VECFLOAT
:
19034 case TYPE_VECDOUBLE
:
19042 /* Returns whether the dependence between INSN and NEXT is considered
19043 costly by the given target. */
19046 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
19050 rtx load_mem
, str_mem
;
19052 /* If the flag is not enabled - no dependence is considered costly;
19053 allow all dependent insns in the same group.
19054 This is the most aggressive option. */
19055 if (rs6000_sched_costly_dep
== no_dep_costly
)
19058 /* If the flag is set to 1 - a dependence is always considered costly;
19059 do not allow dependent instructions in the same group.
19060 This is the most conservative option. */
19061 if (rs6000_sched_costly_dep
== all_deps_costly
)
19064 insn
= DEP_PRO (dep
);
19065 next
= DEP_CON (dep
);
19067 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
19068 && is_load_insn (next
, &load_mem
)
19069 && is_store_insn (insn
, &str_mem
))
19070 /* Prevent load after store in the same group. */
19073 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
19074 && is_load_insn (next
, &load_mem
)
19075 && is_store_insn (insn
, &str_mem
)
19076 && DEP_TYPE (dep
) == REG_DEP_TRUE
19077 && mem_locations_overlap(str_mem
, load_mem
))
19078 /* Prevent load after store in the same group if it is a true
19082 /* The flag is set to X; dependences with latency >= X are considered costly,
19083 and will not be scheduled in the same group. */
19084 if (rs6000_sched_costly_dep
<= max_dep_latency
19085 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
19091 /* Return the next insn after INSN that is found before TAIL is reached,
19092 skipping any "non-active" insns - insns that will not actually occupy
19093 an issue slot. Return NULL_RTX if such an insn is not found. */
19096 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
19098 if (insn
== NULL_RTX
|| insn
== tail
)
19103 insn
= NEXT_INSN (insn
);
19104 if (insn
== NULL_RTX
|| insn
== tail
)
19108 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
19109 || (NONJUMP_INSN_P (insn
)
19110 && GET_CODE (PATTERN (insn
)) != USE
19111 && GET_CODE (PATTERN (insn
)) != CLOBBER
19112 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
19118 /* Move instruction at POS to the end of the READY list. */
19121 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
19127 for (i
= pos
; i
< lastpos
; i
++)
19128 ready
[i
] = ready
[i
+ 1];
19129 ready
[lastpos
] = tmp
;
19132 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19135 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19137 /* For Power6, we need to handle some special cases to try and keep the
19138 store queue from overflowing and triggering expensive flushes.
19140 This code monitors how load and store instructions are being issued
19141 and skews the ready list one way or the other to increase the likelihood
19142 that a desired instruction is issued at the proper time.
19144 A couple of things are done. First, we maintain a "load_store_pendulum"
19145 to track the current state of load/store issue.
19147 - If the pendulum is at zero, then no loads or stores have been
19148 issued in the current cycle so we do nothing.
19150 - If the pendulum is 1, then a single load has been issued in this
19151 cycle and we attempt to locate another load in the ready list to
19154 - If the pendulum is -2, then two stores have already been
19155 issued in this cycle, so we increase the priority of the first load
19156 in the ready list to increase it's likelihood of being chosen first
19159 - If the pendulum is -1, then a single store has been issued in this
19160 cycle and we attempt to locate another store in the ready list to
19161 issue with it, preferring a store to an adjacent memory location to
19162 facilitate store pairing in the store queue.
19164 - If the pendulum is 2, then two loads have already been
19165 issued in this cycle, so we increase the priority of the first store
19166 in the ready list to increase it's likelihood of being chosen first
19169 - If the pendulum < -2 or > 2, then do nothing.
19171 Note: This code covers the most common scenarios. There exist non
19172 load/store instructions which make use of the LSU and which
19173 would need to be accounted for to strictly model the behavior
19174 of the machine. Those instructions are currently unaccounted
19175 for to help minimize compile time overhead of this code.
19178 rtx load_mem
, str_mem
;
19180 if (is_store_insn (last_scheduled_insn
, &str_mem
))
19181 /* Issuing a store, swing the load_store_pendulum to the left */
19182 load_store_pendulum
--;
19183 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
19184 /* Issuing a load, swing the load_store_pendulum to the right */
19185 load_store_pendulum
++;
19187 return cached_can_issue_more
;
19189 /* If the pendulum is balanced, or there is only one instruction on
19190 the ready list, then all is well, so return. */
19191 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
19192 return cached_can_issue_more
;
19194 if (load_store_pendulum
== 1)
19196 /* A load has been issued in this cycle. Scan the ready list
19197 for another load to issue with it */
19202 if (is_load_insn (ready
[pos
], &load_mem
))
19204 /* Found a load. Move it to the head of the ready list,
19205 and adjust it's priority so that it is more likely to
19207 move_to_end_of_ready (ready
, pos
, lastpos
);
19209 if (!sel_sched_p ()
19210 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19211 INSN_PRIORITY (ready
[lastpos
])++;
19217 else if (load_store_pendulum
== -2)
19219 /* Two stores have been issued in this cycle. Increase the
19220 priority of the first load in the ready list to favor it for
19221 issuing in the next cycle. */
19226 if (is_load_insn (ready
[pos
], &load_mem
)
19228 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19230 INSN_PRIORITY (ready
[pos
])++;
19232 /* Adjust the pendulum to account for the fact that a load
19233 was found and increased in priority. This is to prevent
19234 increasing the priority of multiple loads */
19235 load_store_pendulum
--;
19242 else if (load_store_pendulum
== -1)
19244 /* A store has been issued in this cycle. Scan the ready list for
19245 another store to issue with it, preferring a store to an adjacent
19247 int first_store_pos
= -1;
19253 if (is_store_insn (ready
[pos
], &str_mem
))
19256 /* Maintain the index of the first store found on the
19258 if (first_store_pos
== -1)
19259 first_store_pos
= pos
;
19261 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
19262 && adjacent_mem_locations (str_mem
, str_mem2
))
19264 /* Found an adjacent store. Move it to the head of the
19265 ready list, and adjust it's priority so that it is
19266 more likely to stay there */
19267 move_to_end_of_ready (ready
, pos
, lastpos
);
19269 if (!sel_sched_p ()
19270 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19271 INSN_PRIORITY (ready
[lastpos
])++;
19273 first_store_pos
= -1;
19281 if (first_store_pos
>= 0)
19283 /* An adjacent store wasn't found, but a non-adjacent store was,
19284 so move the non-adjacent store to the front of the ready
19285 list, and adjust its priority so that it is more likely to
19287 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
19288 if (!sel_sched_p ()
19289 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19290 INSN_PRIORITY (ready
[lastpos
])++;
19293 else if (load_store_pendulum
== 2)
19295 /* Two loads have been issued in this cycle. Increase the priority
19296 of the first store in the ready list to favor it for issuing in
19302 if (is_store_insn (ready
[pos
], &str_mem
)
19304 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19306 INSN_PRIORITY (ready
[pos
])++;
19308 /* Adjust the pendulum to account for the fact that a store
19309 was found and increased in priority. This is to prevent
19310 increasing the priority of multiple stores */
19311 load_store_pendulum
++;
19319 return cached_can_issue_more
;
19322 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19325 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19328 enum attr_type type
, type2
;
19330 type
= get_attr_type (last_scheduled_insn
);
19332 /* Try to issue fixed point divides back-to-back in pairs so they will be
19333 routed to separate execution units and execute in parallel. */
19334 if (type
== TYPE_DIV
&& divide_cnt
== 0)
19336 /* First divide has been scheduled. */
19339 /* Scan the ready list looking for another divide, if found move it
19340 to the end of the list so it is chosen next. */
19344 if (recog_memoized (ready
[pos
]) >= 0
19345 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
19347 move_to_end_of_ready (ready
, pos
, lastpos
);
19355 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19358 /* The best dispatch throughput for vector and vector load insns can be
19359 achieved by interleaving a vector and vector load such that they'll
19360 dispatch to the same superslice. If this pairing cannot be achieved
19361 then it is best to pair vector insns together and vector load insns
19364 To aid in this pairing, vec_pairing maintains the current state with
19365 the following values:
19367 0 : Initial state, no vecload/vector pairing has been started.
19369 1 : A vecload or vector insn has been issued and a candidate for
19370 pairing has been found and moved to the end of the ready
19372 if (type
== TYPE_VECLOAD
)
19374 /* Issued a vecload. */
19375 if (vec_pairing
== 0)
19377 int vecload_pos
= -1;
19378 /* We issued a single vecload, look for a vector insn to pair it
19379 with. If one isn't found, try to pair another vecload. */
19383 if (recog_memoized (ready
[pos
]) >= 0)
19385 type2
= get_attr_type (ready
[pos
]);
19386 if (is_power9_pairable_vec_type (type2
))
19388 /* Found a vector insn to pair with, move it to the
19389 end of the ready list so it is scheduled next. */
19390 move_to_end_of_ready (ready
, pos
, lastpos
);
19392 return cached_can_issue_more
;
19394 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
19395 /* Remember position of first vecload seen. */
19400 if (vecload_pos
>= 0)
19402 /* Didn't find a vector to pair with but did find a vecload,
19403 move it to the end of the ready list. */
19404 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
19406 return cached_can_issue_more
;
19410 else if (is_power9_pairable_vec_type (type
))
19412 /* Issued a vector operation. */
19413 if (vec_pairing
== 0)
19416 /* We issued a single vector insn, look for a vecload to pair it
19417 with. If one isn't found, try to pair another vector. */
19421 if (recog_memoized (ready
[pos
]) >= 0)
19423 type2
= get_attr_type (ready
[pos
]);
19424 if (type2
== TYPE_VECLOAD
)
19426 /* Found a vecload insn to pair with, move it to the
19427 end of the ready list so it is scheduled next. */
19428 move_to_end_of_ready (ready
, pos
, lastpos
);
19430 return cached_can_issue_more
;
19432 else if (is_power9_pairable_vec_type (type2
)
19434 /* Remember position of first vector insn seen. */
19441 /* Didn't find a vecload to pair with but did find a vector
19442 insn, move it to the end of the ready list. */
19443 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19445 return cached_can_issue_more
;
19450 /* We've either finished a vec/vecload pair, couldn't find an insn to
19451 continue the current pair, or the last insn had nothing to do with
19452 with pairing. In any case, reset the state. */
19456 return cached_can_issue_more
;
19459 /* Determine if INSN is a store to memory that can be fused with a similar
19463 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19465 /* Insn must be a non-prefixed base+disp form store. */
19466 if (is_store_insn (insn
, str_mem
)
19467 && get_attr_prefixed (insn
) == PREFIXED_NO
19468 && get_attr_update (insn
) == UPDATE_NO
19469 && get_attr_indexed (insn
) == INDEXED_NO
)
19471 /* Further restrictions by mode and size. */
19472 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19475 machine_mode mode
= GET_MODE (*str_mem
);
19476 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19478 if (INTEGRAL_MODE_P (mode
))
19479 /* Must be word or dword size. */
19480 return (size
== 4 || size
== 8);
19481 else if (FLOAT_MODE_P (mode
))
19482 /* Must be dword size. */
19483 return (size
== 8);
19489 /* Do Power10 specific reordering of the ready list. */
19492 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19496 /* Do store fusion during sched2 only. */
19497 if (!reload_completed
)
19498 return cached_can_issue_more
;
19500 /* If the prior insn finished off a store fusion pair then simply
19501 reset the counter and return, nothing more to do. */
19502 if (load_store_pendulum
!= 0)
19504 load_store_pendulum
= 0;
19505 return cached_can_issue_more
;
19508 /* Try to pair certain store insns to adjacent memory locations
19509 so that the hardware will fuse them to a single operation. */
19510 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19513 /* A fusable store was just scheduled. Scan the ready list for another
19514 store that it can fuse with. */
19519 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19520 must be ascending only. */
19521 if (is_fusable_store (ready
[pos
], &mem2
)
19522 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19523 && adjacent_mem_locations (mem1
, mem2
))
19524 || (FLOAT_MODE_P (GET_MODE (mem1
))
19525 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19527 /* Found a fusable store. Move it to the end of the ready list
19528 so it is scheduled next. */
19529 move_to_end_of_ready (ready
, pos
, lastpos
);
19531 load_store_pendulum
= -1;
19538 return cached_can_issue_more
;
19541 /* We are about to begin issuing insns for this clock cycle. */
19544 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19545 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19546 int *pn_ready ATTRIBUTE_UNUSED
,
19547 int clock_var ATTRIBUTE_UNUSED
)
19549 int n_ready
= *pn_ready
;
19552 fprintf (dump
, "// rs6000_sched_reorder :\n");
19554 /* Reorder the ready list, if the second to last ready insn
19555 is a nonepipeline insn. */
19556 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19558 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19559 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19560 /* Simply swap first two insns. */
19561 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19564 if (rs6000_tune
== PROCESSOR_POWER6
)
19565 load_store_pendulum
= 0;
19567 /* Do Power10 dependent reordering. */
19568 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19569 power10_sched_reorder (ready
, n_ready
- 1);
19571 return rs6000_issue_rate ();
19574 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19577 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19578 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19581 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19583 /* Do Power6 dependent reordering if necessary. */
19584 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19585 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19587 /* Do Power9 dependent reordering if necessary. */
19588 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19589 && recog_memoized (last_scheduled_insn
) >= 0)
19590 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19592 /* Do Power10 dependent reordering. */
19593 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19594 return power10_sched_reorder (ready
, *pn_ready
- 1);
19596 return cached_can_issue_more
;
19599 /* Return whether the presence of INSN causes a dispatch group termination
19600 of group WHICH_GROUP.
19602 If WHICH_GROUP == current_group, this function will return true if INSN
19603 causes the termination of the current group (i.e, the dispatch group to
19604 which INSN belongs). This means that INSN will be the last insn in the
19605 group it belongs to.
19607 If WHICH_GROUP == previous_group, this function will return true if INSN
19608 causes the termination of the previous group (i.e, the dispatch group that
19609 precedes the group to which INSN belongs). This means that INSN will be
19610 the first insn in the group it belongs to). */
19613 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19620 first
= insn_must_be_first_in_group (insn
);
19621 last
= insn_must_be_last_in_group (insn
);
19626 if (which_group
== current_group
)
19628 else if (which_group
== previous_group
)
19636 insn_must_be_first_in_group (rtx_insn
*insn
)
19638 enum attr_type type
;
19642 || DEBUG_INSN_P (insn
)
19643 || GET_CODE (PATTERN (insn
)) == USE
19644 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19647 switch (rs6000_tune
)
19649 case PROCESSOR_POWER5
:
19650 if (is_cracked_insn (insn
))
19653 case PROCESSOR_POWER4
:
19654 if (is_microcoded_insn (insn
))
19657 if (!rs6000_sched_groups
)
19660 type
= get_attr_type (insn
);
19667 case TYPE_CR_LOGICAL
:
19680 case PROCESSOR_POWER6
:
19681 type
= get_attr_type (insn
);
19690 case TYPE_FPCOMPARE
:
19701 if (get_attr_dot (insn
) == DOT_NO
19702 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19707 if (get_attr_size (insn
) == SIZE_32
)
19715 if (get_attr_update (insn
) == UPDATE_YES
)
19723 case PROCESSOR_POWER7
:
19724 type
= get_attr_type (insn
);
19728 case TYPE_CR_LOGICAL
:
19742 if (get_attr_dot (insn
) == DOT_YES
)
19747 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19748 || get_attr_update (insn
) == UPDATE_YES
)
19755 if (get_attr_update (insn
) == UPDATE_YES
)
19763 case PROCESSOR_POWER8
:
19764 type
= get_attr_type (insn
);
19768 case TYPE_CR_LOGICAL
:
19776 case TYPE_VECSTORE
:
19783 if (get_attr_dot (insn
) == DOT_YES
)
19788 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19789 || get_attr_update (insn
) == UPDATE_YES
)
19794 if (get_attr_update (insn
) == UPDATE_YES
19795 && get_attr_indexed (insn
) == INDEXED_YES
)
19811 insn_must_be_last_in_group (rtx_insn
*insn
)
19813 enum attr_type type
;
19817 || DEBUG_INSN_P (insn
)
19818 || GET_CODE (PATTERN (insn
)) == USE
19819 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19822 switch (rs6000_tune
) {
19823 case PROCESSOR_POWER4
:
19824 case PROCESSOR_POWER5
:
19825 if (is_microcoded_insn (insn
))
19828 if (is_branch_slot_insn (insn
))
19832 case PROCESSOR_POWER6
:
19833 type
= get_attr_type (insn
);
19841 case TYPE_FPCOMPARE
:
19852 if (get_attr_dot (insn
) == DOT_NO
19853 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19858 if (get_attr_size (insn
) == SIZE_32
)
19866 case PROCESSOR_POWER7
:
19867 type
= get_attr_type (insn
);
19877 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19878 && get_attr_update (insn
) == UPDATE_YES
)
19883 if (get_attr_update (insn
) == UPDATE_YES
19884 && get_attr_indexed (insn
) == INDEXED_YES
)
19892 case PROCESSOR_POWER8
:
19893 type
= get_attr_type (insn
);
19905 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19906 && get_attr_update (insn
) == UPDATE_YES
)
19911 if (get_attr_update (insn
) == UPDATE_YES
19912 && get_attr_indexed (insn
) == INDEXED_YES
)
19927 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19928 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19931 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19934 int issue_rate
= rs6000_issue_rate ();
19936 for (i
= 0; i
< issue_rate
; i
++)
19938 sd_iterator_def sd_it
;
19940 rtx insn
= group_insns
[i
];
19945 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19947 rtx next
= DEP_CON (dep
);
19949 if (next
== next_insn
19950 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19958 /* Utility of the function redefine_groups.
19959 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19960 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19961 to keep it "far" (in a separate group) from GROUP_INSNS, following
19962 one of the following schemes, depending on the value of the flag
19963 -minsert_sched_nops = X:
19964 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19965 in order to force NEXT_INSN into a separate group.
19966 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19967 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19968 insertion (has a group just ended, how many vacant issue slots remain in the
19969 last group, and how many dispatch groups were encountered so far). */
19972 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19973 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19978 int issue_rate
= rs6000_issue_rate ();
19979 bool end
= *group_end
;
19982 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19983 return can_issue_more
;
19985 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19986 return can_issue_more
;
19988 force
= is_costly_group (group_insns
, next_insn
);
19990 return can_issue_more
;
19992 if (sched_verbose
> 6)
19993 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19994 *group_count
,can_issue_more
);
19996 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19999 can_issue_more
= 0;
20001 /* Since only a branch can be issued in the last issue_slot, it is
20002 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
20003 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
20004 in this case the last nop will start a new group and the branch
20005 will be forced to the new group. */
20006 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
20009 /* Do we have a special group ending nop? */
20010 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
20011 || rs6000_tune
== PROCESSOR_POWER8
)
20013 nop
= gen_group_ending_nop ();
20014 emit_insn_before (nop
, next_insn
);
20015 can_issue_more
= 0;
20018 while (can_issue_more
> 0)
20021 emit_insn_before (nop
, next_insn
);
20029 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
20031 int n_nops
= rs6000_sched_insert_nops
;
20033 /* Nops can't be issued from the branch slot, so the effective
20034 issue_rate for nops is 'issue_rate - 1'. */
20035 if (can_issue_more
== 0)
20036 can_issue_more
= issue_rate
;
20038 if (can_issue_more
== 0)
20040 can_issue_more
= issue_rate
- 1;
20043 for (i
= 0; i
< issue_rate
; i
++)
20045 group_insns
[i
] = 0;
20052 emit_insn_before (nop
, next_insn
);
20053 if (can_issue_more
== issue_rate
- 1) /* new group begins */
20056 if (can_issue_more
== 0)
20058 can_issue_more
= issue_rate
- 1;
20061 for (i
= 0; i
< issue_rate
; i
++)
20063 group_insns
[i
] = 0;
20069 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20072 /* Is next_insn going to start a new group? */
20075 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20076 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20077 || (can_issue_more
< issue_rate
&&
20078 insn_terminates_group_p (next_insn
, previous_group
)));
20079 if (*group_end
&& end
)
20082 if (sched_verbose
> 6)
20083 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
20084 *group_count
, can_issue_more
);
20085 return can_issue_more
;
20088 return can_issue_more
;
20091 /* This function tries to synch the dispatch groups that the compiler "sees"
20092 with the dispatch groups that the processor dispatcher is expected to
20093 form in practice. It tries to achieve this synchronization by forcing the
20094 estimated processor grouping on the compiler (as opposed to the function
20095 'pad_goups' which tries to force the scheduler's grouping on the processor).
20097 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20098 examines the (estimated) dispatch groups that will be formed by the processor
20099 dispatcher. It marks these group boundaries to reflect the estimated
20100 processor grouping, overriding the grouping that the scheduler had marked.
20101 Depending on the value of the flag '-minsert-sched-nops' this function can
20102 force certain insns into separate groups or force a certain distance between
20103 them by inserting nops, for example, if there exists a "costly dependence"
20106 The function estimates the group boundaries that the processor will form as
20107 follows: It keeps track of how many vacant issue slots are available after
20108 each insn. A subsequent insn will start a new group if one of the following
20110 - no more vacant issue slots remain in the current dispatch group.
20111 - only the last issue slot, which is the branch slot, is vacant, but the next
20112 insn is not a branch.
20113 - only the last 2 or less issue slots, including the branch slot, are vacant,
20114 which means that a cracked insn (which occupies two issue slots) can't be
20115 issued in this group.
20116 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20117 start a new group. */
20120 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20123 rtx_insn
*insn
, *next_insn
;
20125 int can_issue_more
;
20128 int group_count
= 0;
20132 issue_rate
= rs6000_issue_rate ();
20133 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
20134 for (i
= 0; i
< issue_rate
; i
++)
20136 group_insns
[i
] = 0;
20138 can_issue_more
= issue_rate
;
20140 insn
= get_next_active_insn (prev_head_insn
, tail
);
20143 while (insn
!= NULL_RTX
)
20145 slot
= (issue_rate
- can_issue_more
);
20146 group_insns
[slot
] = insn
;
20148 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20149 if (insn_terminates_group_p (insn
, current_group
))
20150 can_issue_more
= 0;
20152 next_insn
= get_next_active_insn (insn
, tail
);
20153 if (next_insn
== NULL_RTX
)
20154 return group_count
+ 1;
20156 /* Is next_insn going to start a new group? */
20158 = (can_issue_more
== 0
20159 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
20160 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
20161 || (can_issue_more
< issue_rate
&&
20162 insn_terminates_group_p (next_insn
, previous_group
)));
20164 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
20165 next_insn
, &group_end
, can_issue_more
,
20171 can_issue_more
= 0;
20172 for (i
= 0; i
< issue_rate
; i
++)
20174 group_insns
[i
] = 0;
20178 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
20179 PUT_MODE (next_insn
, VOIDmode
);
20180 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
20181 PUT_MODE (next_insn
, TImode
);
20184 if (can_issue_more
== 0)
20185 can_issue_more
= issue_rate
;
20188 return group_count
;
20191 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20192 dispatch group boundaries that the scheduler had marked. Pad with nops
20193 any dispatch groups which have vacant issue slots, in order to force the
20194 scheduler's grouping on the processor dispatcher. The function
20195 returns the number of dispatch groups found. */
20198 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
20201 rtx_insn
*insn
, *next_insn
;
20204 int can_issue_more
;
20206 int group_count
= 0;
20208 /* Initialize issue_rate. */
20209 issue_rate
= rs6000_issue_rate ();
20210 can_issue_more
= issue_rate
;
20212 insn
= get_next_active_insn (prev_head_insn
, tail
);
20213 next_insn
= get_next_active_insn (insn
, tail
);
20215 while (insn
!= NULL_RTX
)
20218 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
20220 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
20222 if (next_insn
== NULL_RTX
)
20227 /* If the scheduler had marked group termination at this location
20228 (between insn and next_insn), and neither insn nor next_insn will
20229 force group termination, pad the group with nops to force group
20232 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20233 && !insn_terminates_group_p (insn
, current_group
)
20234 && !insn_terminates_group_p (next_insn
, previous_group
))
20236 if (!is_branch_slot_insn (next_insn
))
20239 while (can_issue_more
)
20242 emit_insn_before (nop
, next_insn
);
20247 can_issue_more
= issue_rate
;
20252 next_insn
= get_next_active_insn (insn
, tail
);
20255 return group_count
;
20258 /* We're beginning a new block. Initialize data structures as necessary. */
20261 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
20262 int sched_verbose ATTRIBUTE_UNUSED
,
20263 int max_ready ATTRIBUTE_UNUSED
)
20265 last_scheduled_insn
= NULL
;
20266 load_store_pendulum
= 0;
20271 /* The following function is called at the end of scheduling BB.
20272 After reload, it inserts nops at insn group bundling. */
20275 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
20280 fprintf (dump
, "=== Finishing schedule.\n");
20282 if (reload_completed
&& rs6000_sched_groups
)
20284 /* Do not run sched_finish hook when selective scheduling enabled. */
20285 if (sel_sched_p ())
20288 if (rs6000_sched_insert_nops
== sched_finish_none
)
20291 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20292 n_groups
= pad_groups (dump
, sched_verbose
,
20293 current_sched_info
->prev_head
,
20294 current_sched_info
->next_tail
);
20296 n_groups
= redefine_groups (dump
, sched_verbose
,
20297 current_sched_info
->prev_head
,
20298 current_sched_info
->next_tail
);
20300 if (sched_verbose
>= 6)
20302 fprintf (dump
, "ngroups = %d\n", n_groups
);
20303 print_rtl (dump
, current_sched_info
->prev_head
);
20304 fprintf (dump
, "Done finish_sched\n");
20309 struct rs6000_sched_context
20311 short cached_can_issue_more
;
20312 rtx_insn
*last_scheduled_insn
;
20313 int load_store_pendulum
;
20318 typedef struct rs6000_sched_context rs6000_sched_context_def
;
20319 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
20321 /* Allocate store for new scheduling context. */
20323 rs6000_alloc_sched_context (void)
20325 return xmalloc (sizeof (rs6000_sched_context_def
));
20328 /* If CLEAN_P is true then initializes _SC with clean data,
20329 and from the global context otherwise. */
20331 rs6000_init_sched_context (void *_sc
, bool clean_p
)
20333 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20337 sc
->cached_can_issue_more
= 0;
20338 sc
->last_scheduled_insn
= NULL
;
20339 sc
->load_store_pendulum
= 0;
20340 sc
->divide_cnt
= 0;
20341 sc
->vec_pairing
= 0;
20345 sc
->cached_can_issue_more
= cached_can_issue_more
;
20346 sc
->last_scheduled_insn
= last_scheduled_insn
;
20347 sc
->load_store_pendulum
= load_store_pendulum
;
20348 sc
->divide_cnt
= divide_cnt
;
20349 sc
->vec_pairing
= vec_pairing
;
20353 /* Sets the global scheduling context to the one pointed to by _SC. */
20355 rs6000_set_sched_context (void *_sc
)
20357 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20359 gcc_assert (sc
!= NULL
);
20361 cached_can_issue_more
= sc
->cached_can_issue_more
;
20362 last_scheduled_insn
= sc
->last_scheduled_insn
;
20363 load_store_pendulum
= sc
->load_store_pendulum
;
20364 divide_cnt
= sc
->divide_cnt
;
20365 vec_pairing
= sc
->vec_pairing
;
20370 rs6000_free_sched_context (void *_sc
)
20372 gcc_assert (_sc
!= NULL
);
20378 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
20380 switch (get_attr_type (insn
))
20395 /* Length in units of the trampoline for entering a nested function. */
20398 rs6000_trampoline_size (void)
20402 switch (DEFAULT_ABI
)
20405 gcc_unreachable ();
20408 ret
= (TARGET_32BIT
) ? 12 : 24;
20412 gcc_assert (!TARGET_32BIT
);
20418 ret
= (TARGET_32BIT
) ? 40 : 48;
20425 /* Emit RTL insns to initialize the variable parts of a trampoline.
20426 FNADDR is an RTX for the address of the function's pure code.
20427 CXT is an RTX for the static chain value for the function. */
20430 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
20432 int regsize
= (TARGET_32BIT
) ? 4 : 8;
20433 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
20434 rtx ctx_reg
= force_reg (Pmode
, cxt
);
20435 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
20437 switch (DEFAULT_ABI
)
20440 gcc_unreachable ();
20442 /* Under AIX, just build the 3 word function descriptor */
20445 rtx fnmem
, fn_reg
, toc_reg
;
20447 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20448 error ("you cannot take the address of a nested function if you use "
20449 "the %qs option", "-mno-pointers-to-nested-functions");
20451 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20452 fn_reg
= gen_reg_rtx (Pmode
);
20453 toc_reg
= gen_reg_rtx (Pmode
);
20455 /* Macro to shorten the code expansions below. */
20456 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20458 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20460 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20461 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20462 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20463 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20464 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20470 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20474 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20475 LCT_NORMAL
, VOIDmode
,
20477 GEN_INT (rs6000_trampoline_size ()), SImode
,
20485 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20486 identifier as an argument, so the front end shouldn't look it up. */
20489 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20491 return is_attribute_p ("altivec", attr_id
);
20494 /* Handle the "altivec" attribute. The attribute may have
20495 arguments as follows:
20497 __attribute__((altivec(vector__)))
20498 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20499 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20501 and may appear more than once (e.g., 'vector bool char') in a
20502 given declaration. */
20505 rs6000_handle_altivec_attribute (tree
*node
,
20506 tree name ATTRIBUTE_UNUSED
,
20508 int flags ATTRIBUTE_UNUSED
,
20509 bool *no_add_attrs
)
20511 tree type
= *node
, result
= NULL_TREE
;
20515 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20516 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20517 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20520 while (POINTER_TYPE_P (type
)
20521 || TREE_CODE (type
) == FUNCTION_TYPE
20522 || TREE_CODE (type
) == METHOD_TYPE
20523 || TREE_CODE (type
) == ARRAY_TYPE
)
20524 type
= TREE_TYPE (type
);
20526 mode
= TYPE_MODE (type
);
20528 /* Check for invalid AltiVec type qualifiers. */
20529 if (type
== long_double_type_node
)
20530 error ("use of %<long double%> in AltiVec types is invalid");
20531 else if (type
== boolean_type_node
)
20532 error ("use of boolean types in AltiVec types is invalid");
20533 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20534 error ("use of %<complex%> in AltiVec types is invalid");
20535 else if (DECIMAL_FLOAT_MODE_P (mode
))
20536 error ("use of decimal floating-point types in AltiVec types is invalid");
20537 else if (!TARGET_VSX
)
20539 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20542 error ("use of %<long%> in AltiVec types is invalid for "
20543 "64-bit code without %qs", "-mvsx");
20544 else if (rs6000_warn_altivec_long
)
20545 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20548 else if (type
== long_long_unsigned_type_node
20549 || type
== long_long_integer_type_node
)
20550 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20552 else if (type
== double_type_node
)
20553 error ("use of %<double%> in AltiVec types is invalid without %qs",
20557 switch (altivec_type
)
20560 unsigned_p
= TYPE_UNSIGNED (type
);
20564 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20567 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20570 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20573 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20576 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20578 case E_SFmode
: result
= V4SF_type_node
; break;
20579 case E_DFmode
: result
= V2DF_type_node
; break;
20580 /* If the user says 'vector int bool', we may be handed the 'bool'
20581 attribute _before_ the 'vector' attribute, and so select the
20582 proper type in the 'b' case below. */
20583 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20584 case E_V2DImode
: case E_V2DFmode
:
20592 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20593 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20594 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20595 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20596 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20603 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20609 /* Propagate qualifiers attached to the element type
20610 onto the vector type. */
20611 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20612 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20614 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20617 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20622 /* AltiVec defines five built-in scalar types that serve as vector
20623 elements; we must teach the compiler how to mangle them. The 128-bit
20624 floating point mangling is target-specific as well. MMA defines
20625 two built-in types to be used as opaque vector types. */
20627 static const char *
20628 rs6000_mangle_type (const_tree type
)
20630 type
= TYPE_MAIN_VARIANT (type
);
20632 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20633 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20634 && TREE_CODE (type
) != OPAQUE_TYPE
)
20637 if (type
== bool_char_type_node
) return "U6__boolc";
20638 if (type
== bool_short_type_node
) return "U6__bools";
20639 if (type
== pixel_type_node
) return "u7__pixel";
20640 if (type
== bool_int_type_node
) return "U6__booli";
20641 if (type
== bool_long_long_type_node
) return "U6__boolx";
20643 if (type
== float128_type_node
|| type
== float64x_type_node
)
20646 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20648 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20649 return "u9__ieee128";
20651 if (type
== vector_pair_type_node
)
20652 return "u13__vector_pair";
20653 if (type
== vector_quad_type_node
)
20654 return "u13__vector_quad";
20656 /* For all other types, use the default mangling. */
20660 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20661 struct attribute_spec.handler. */
20664 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20665 tree args ATTRIBUTE_UNUSED
,
20666 int flags ATTRIBUTE_UNUSED
,
20667 bool *no_add_attrs
)
20669 if (TREE_CODE (*node
) != FUNCTION_TYPE
20670 && TREE_CODE (*node
) != FIELD_DECL
20671 && TREE_CODE (*node
) != TYPE_DECL
)
20673 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20675 *no_add_attrs
= true;
20681 /* Set longcall attributes on all functions declared when
20682 rs6000_default_long_calls is true. */
20684 rs6000_set_default_type_attributes (tree type
)
20686 if (rs6000_default_long_calls
20687 && FUNC_OR_METHOD_TYPE_P (type
))
20688 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20690 TYPE_ATTRIBUTES (type
));
20693 darwin_set_default_type_attributes (type
);
20697 /* Return a reference suitable for calling a function with the
20698 longcall attribute. */
20701 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20703 /* System V adds '.' to the internal name, so skip them. */
20704 const char *call_name
= XSTR (call_ref
, 0);
20705 if (*call_name
== '.')
20707 while (*call_name
== '.')
20710 tree node
= get_identifier (call_name
);
20711 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20716 rtx base
= const0_rtx
;
20718 if (rs6000_pcrel_p ())
20720 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20721 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20722 gen_rtvec (3, base
, call_ref
, arg
),
20723 UNSPECV_PLT_PCREL
);
20724 emit_insn (gen_rtx_SET (reg
, u
));
20728 if (DEFAULT_ABI
== ABI_ELFv2
)
20729 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20733 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20736 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20737 may be used by a function global entry point. For SysV4, r11
20738 is used by __glink_PLTresolve lazy resolver entry. */
20739 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20740 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20742 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20743 gen_rtvec (3, reg
, call_ref
, arg
),
20745 emit_insn (gen_rtx_SET (reg
, hi
));
20746 emit_insn (gen_rtx_SET (reg
, lo
));
20750 return force_reg (Pmode
, call_ref
);
20753 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20754 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20757 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20758 struct attribute_spec.handler. */
20760 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20761 tree args ATTRIBUTE_UNUSED
,
20762 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20765 if (DECL_P (*node
))
20767 if (TREE_CODE (*node
) == TYPE_DECL
)
20768 type
= &TREE_TYPE (*node
);
20773 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20774 || TREE_CODE (*type
) == UNION_TYPE
)))
20776 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20777 *no_add_attrs
= true;
20780 else if ((is_attribute_p ("ms_struct", name
)
20781 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20782 || ((is_attribute_p ("gcc_struct", name
)
20783 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20785 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20787 *no_add_attrs
= true;
20794 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20796 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20797 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20798 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20801 #ifdef USING_ELFOS_H
20803 /* A get_unnamed_section callback, used for switching to toc_section. */
20806 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20808 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20809 && TARGET_MINIMAL_TOC
)
20811 if (!toc_initialized
)
20813 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20814 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20815 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20816 fprintf (asm_out_file
, "\t.tc ");
20817 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20818 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20819 fprintf (asm_out_file
, "\n");
20821 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20822 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20823 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20824 fprintf (asm_out_file
, " = .+32768\n");
20825 toc_initialized
= 1;
20828 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20830 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20832 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20833 if (!toc_initialized
)
20835 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20836 toc_initialized
= 1;
20841 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20842 if (!toc_initialized
)
20844 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20845 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20846 fprintf (asm_out_file
, " = .+32768\n");
20847 toc_initialized
= 1;
20852 /* Implement TARGET_ASM_INIT_SECTIONS. */
20855 rs6000_elf_asm_init_sections (void)
20858 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20861 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20862 SDATA2_SECTION_ASM_OP
);
20865 /* Implement TARGET_SELECT_RTX_SECTION. */
20868 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20869 unsigned HOST_WIDE_INT align
)
20871 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20872 return toc_section
;
20874 return default_elf_select_rtx_section (mode
, x
, align
);
20877 /* For a SYMBOL_REF, set generic flags and then perform some
20878 target-specific processing.
20880 When the AIX ABI is requested on a non-AIX system, replace the
20881 function name with the real name (with a leading .) rather than the
20882 function descriptor name. This saves a lot of overriding code to
20883 read the prefixes. */
20885 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20887 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20889 default_encode_section_info (decl
, rtl
, first
);
20892 && TREE_CODE (decl
) == FUNCTION_DECL
20894 && DEFAULT_ABI
== ABI_AIX
)
20896 rtx sym_ref
= XEXP (rtl
, 0);
20897 size_t len
= strlen (XSTR (sym_ref
, 0));
20898 char *str
= XALLOCAVEC (char, len
+ 2);
20900 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20901 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20906 compare_section_name (const char *section
, const char *templ
)
20910 len
= strlen (templ
);
20911 return (strncmp (section
, templ
, len
) == 0
20912 && (section
[len
] == 0 || section
[len
] == '.'));
20916 rs6000_elf_in_small_data_p (const_tree decl
)
20918 if (rs6000_sdata
== SDATA_NONE
)
20921 /* We want to merge strings, so we never consider them small data. */
20922 if (TREE_CODE (decl
) == STRING_CST
)
20925 /* Functions are never in the small data area. */
20926 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20929 if (VAR_P (decl
) && DECL_SECTION_NAME (decl
))
20931 const char *section
= DECL_SECTION_NAME (decl
);
20932 if (compare_section_name (section
, ".sdata")
20933 || compare_section_name (section
, ".sdata2")
20934 || compare_section_name (section
, ".gnu.linkonce.s")
20935 || compare_section_name (section
, ".sbss")
20936 || compare_section_name (section
, ".sbss2")
20937 || compare_section_name (section
, ".gnu.linkonce.sb")
20938 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20939 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20944 /* If we are told not to put readonly data in sdata, then don't. */
20945 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20946 && !rs6000_readonly_in_sdata
)
20949 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20952 && size
<= g_switch_value
20953 /* If it's not public, and we're not going to reference it there,
20954 there's no need to put it in the small data section. */
20955 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20962 #endif /* USING_ELFOS_H */
20964 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20967 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20969 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20972 /* Do not place thread-local symbols refs in the object blocks. */
20975 rs6000_use_blocks_for_decl_p (const_tree decl
)
20977 return !DECL_THREAD_LOCAL_P (decl
);
20980 /* Return a REG that occurs in ADDR with coefficient 1.
20981 ADDR can be effectively incremented by incrementing REG.
20983 r0 is special and we must not select it as an address
20984 register by this routine since our caller will try to
20985 increment the returned register via an "la" instruction. */
20988 find_addr_reg (rtx addr
)
20990 while (GET_CODE (addr
) == PLUS
)
20992 if (REG_P (XEXP (addr
, 0))
20993 && REGNO (XEXP (addr
, 0)) != 0)
20994 addr
= XEXP (addr
, 0);
20995 else if (REG_P (XEXP (addr
, 1))
20996 && REGNO (XEXP (addr
, 1)) != 0)
20997 addr
= XEXP (addr
, 1);
20998 else if (CONSTANT_P (XEXP (addr
, 0)))
20999 addr
= XEXP (addr
, 1);
21000 else if (CONSTANT_P (XEXP (addr
, 1)))
21001 addr
= XEXP (addr
, 0);
21003 gcc_unreachable ();
21005 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
21010 rs6000_fatal_bad_address (rtx op
)
21012 fatal_insn ("bad address", op
);
21017 vec
<branch_island
, va_gc
> *branch_islands
;
21019 /* Remember to generate a branch island for far calls to the given
21023 add_compiler_branch_island (tree label_name
, tree function_name
,
21026 branch_island bi
= {function_name
, label_name
, line_number
};
21027 vec_safe_push (branch_islands
, bi
);
21030 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21031 already there or not. */
21034 no_previous_def (tree function_name
)
21039 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
21040 if (function_name
== bi
->function_name
)
21045 /* GET_PREV_LABEL gets the label name from the previous definition of
21049 get_prev_label (tree function_name
)
21054 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
21055 if (function_name
== bi
->function_name
)
21056 return bi
->label_name
;
21060 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21063 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
21065 unsigned int length
;
21066 char *symbol_name
, *lazy_ptr_name
;
21067 char *local_label_0
;
21068 static unsigned label
= 0;
21070 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21071 symb
= (*targetm
.strip_name_encoding
) (symb
);
21073 length
= strlen (symb
);
21074 symbol_name
= XALLOCAVEC (char, length
+ 32);
21075 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
21077 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
21078 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
21082 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
21083 fprintf (file
, "\t.align 5\n");
21085 fprintf (file
, "%s:\n", stub
);
21086 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21089 local_label_0
= XALLOCAVEC (char, 16);
21090 sprintf (local_label_0
, "L%u$spb", label
);
21092 fprintf (file
, "\tmflr r0\n");
21093 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
21094 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
21095 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
21096 lazy_ptr_name
, local_label_0
);
21097 fprintf (file
, "\tmtlr r0\n");
21098 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
21099 (TARGET_64BIT
? "ldu" : "lwzu"),
21100 lazy_ptr_name
, local_label_0
);
21101 fprintf (file
, "\tmtctr r12\n");
21102 fprintf (file
, "\tbctr\n");
21104 else /* mdynamic-no-pic or mkernel. */
21106 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
21107 fprintf (file
, "\t.align 4\n");
21109 fprintf (file
, "%s:\n", stub
);
21110 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21112 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
21113 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
21114 (TARGET_64BIT
? "ldu" : "lwzu"),
21116 fprintf (file
, "\tmtctr r12\n");
21117 fprintf (file
, "\tbctr\n");
21120 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
21121 fprintf (file
, "%s:\n", lazy_ptr_name
);
21122 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
21123 fprintf (file
, "%sdyld_stub_binding_helper\n",
21124 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
21127 /* Legitimize PIC addresses. If the address is already
21128 position-independent, we return ORIG. Newly generated
21129 position-independent addresses go into a reg. This is REG if non
21130 zero, otherwise we allocate register(s) as necessary. */
21132 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21135 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
21140 if (reg
== NULL
&& !reload_completed
)
21141 reg
= gen_reg_rtx (Pmode
);
21143 if (GET_CODE (orig
) == CONST
)
21147 if (GET_CODE (XEXP (orig
, 0)) == PLUS
21148 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
21151 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
21153 /* Use a different reg for the intermediate value, as
21154 it will be marked UNCHANGING. */
21155 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
21156 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
21159 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
21162 if (CONST_INT_P (offset
))
21164 if (SMALL_INT (offset
))
21165 return plus_constant (Pmode
, base
, INTVAL (offset
));
21166 else if (!reload_completed
)
21167 offset
= force_reg (Pmode
, offset
);
21170 rtx mem
= force_const_mem (Pmode
, orig
);
21171 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
21174 return gen_rtx_PLUS (Pmode
, base
, offset
);
21177 /* Fall back on generic machopic code. */
21178 return machopic_legitimize_pic_address (orig
, mode
, reg
);
21181 /* Output a .machine directive for the Darwin assembler, and call
21182 the generic start_file routine. */
21185 rs6000_darwin_file_start (void)
21187 static const struct
21191 HOST_WIDE_INT if_set
;
21193 { "ppc64", "ppc64", MASK_64BIT
},
21194 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
21195 | MASK_POWERPC64
},
21196 { "power4", "ppc970", 0 },
21197 { "G5", "ppc970", 0 },
21198 { "7450", "ppc7450", 0 },
21199 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
21200 { "G4", "ppc7400", 0 },
21201 { "750", "ppc750", 0 },
21202 { "740", "ppc750", 0 },
21203 { "G3", "ppc750", 0 },
21204 { "604e", "ppc604e", 0 },
21205 { "604", "ppc604", 0 },
21206 { "603e", "ppc603", 0 },
21207 { "603", "ppc603", 0 },
21208 { "601", "ppc601", 0 },
21209 { NULL
, "ppc", 0 } };
21210 const char *cpu_id
= "";
21213 rs6000_file_start ();
21214 darwin_file_start ();
21216 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21218 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
21219 cpu_id
= rs6000_default_cpu
;
21221 if (OPTION_SET_P (rs6000_cpu_index
))
21222 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
21224 /* Look through the mapping array. Pick the first name that either
21225 matches the argument, has a bit set in IF_SET that is also set
21226 in the target flags, or has a NULL name. */
21229 while (mapping
[i
].arg
!= NULL
21230 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
21231 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
21234 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
21237 #endif /* TARGET_MACHO */
21241 rs6000_elf_reloc_rw_mask (void)
21245 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
21251 /* Record an element in the table of global constructors. SYMBOL is
21252 a SYMBOL_REF of the function to be called; PRIORITY is a number
21253 between 0 and MAX_INIT_PRIORITY.
21255 This differs from default_named_section_asm_out_constructor in
21256 that we have special handling for -mrelocatable. */
21258 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
21260 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
21262 const char *section
= ".ctors";
21265 if (priority
!= DEFAULT_INIT_PRIORITY
)
21267 sprintf (buf
, ".ctors.%.5u",
21268 /* Invert the numbering so the linker puts us in the proper
21269 order; constructors are run from right to left, and the
21270 linker sorts in increasing order. */
21271 MAX_INIT_PRIORITY
- priority
);
21275 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21276 assemble_align (POINTER_SIZE
);
21278 if (DEFAULT_ABI
== ABI_V4
21279 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21281 fputs ("\t.long (", asm_out_file
);
21282 output_addr_const (asm_out_file
, symbol
);
21283 fputs (")@fixup\n", asm_out_file
);
21286 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21289 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
21291 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
21293 const char *section
= ".dtors";
21296 if (priority
!= DEFAULT_INIT_PRIORITY
)
21298 sprintf (buf
, ".dtors.%.5u",
21299 /* Invert the numbering so the linker puts us in the proper
21300 order; constructors are run from right to left, and the
21301 linker sorts in increasing order. */
21302 MAX_INIT_PRIORITY
- priority
);
21306 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21307 assemble_align (POINTER_SIZE
);
21309 if (DEFAULT_ABI
== ABI_V4
21310 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21312 fputs ("\t.long (", asm_out_file
);
21313 output_addr_const (asm_out_file
, symbol
);
21314 fputs (")@fixup\n", asm_out_file
);
21317 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21321 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
21323 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
21325 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
21326 ASM_OUTPUT_LABEL (file
, name
);
21327 fputs (DOUBLE_INT_ASM_OP
, file
);
21328 rs6000_output_function_entry (file
, name
);
21329 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
21332 fputs ("\t.size\t", file
);
21333 assemble_name (file
, name
);
21334 fputs (",24\n\t.type\t.", file
);
21335 assemble_name (file
, name
);
21336 fputs (",@function\n", file
);
21337 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
21339 fputs ("\t.globl\t.", file
);
21340 assemble_name (file
, name
);
21345 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21346 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21347 rs6000_output_function_entry (file
, name
);
21348 fputs (":\n", file
);
21353 if (DEFAULT_ABI
== ABI_V4
21354 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21355 && !TARGET_SECURE_PLT
21356 && (!constant_pool_empty_p () || crtl
->profile
)
21357 && (uses_toc
= uses_TOC ()))
21362 switch_to_other_text_partition ();
21363 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21365 fprintf (file
, "\t.long ");
21366 assemble_name (file
, toc_label_name
);
21369 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21370 assemble_name (file
, buf
);
21373 switch_to_other_text_partition ();
21376 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21377 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21379 if (TARGET_CMODEL
== CMODEL_LARGE
21380 && rs6000_global_entry_point_prologue_needed_p ())
21384 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21386 fprintf (file
, "\t.quad .TOC.-");
21387 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21388 assemble_name (file
, buf
);
21392 if (DEFAULT_ABI
== ABI_AIX
)
21394 const char *desc_name
, *orig_name
;
21396 orig_name
= (*targetm
.strip_name_encoding
) (name
);
21397 desc_name
= orig_name
;
21398 while (*desc_name
== '.')
21401 if (TREE_PUBLIC (decl
))
21402 fprintf (file
, "\t.globl %s\n", desc_name
);
21404 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
21405 fprintf (file
, "%s:\n", desc_name
);
21406 fprintf (file
, "\t.long %s\n", orig_name
);
21407 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
21408 fputs ("\t.long 0\n", file
);
21409 fprintf (file
, "\t.previous\n");
21411 ASM_OUTPUT_LABEL (file
, name
);
21414 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
21416 rs6000_elf_file_end (void)
21418 #ifdef HAVE_AS_GNU_ATTRIBUTE
21419 /* ??? The value emitted depends on options active at file end.
21420 Assume anyone using #pragma or attributes that might change
21421 options knows what they are doing. */
21422 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
21423 && rs6000_passes_float
)
21427 if (TARGET_HARD_FLOAT
)
21431 if (rs6000_passes_long_double
)
21433 if (!TARGET_LONG_DOUBLE_128
)
21435 else if (TARGET_IEEEQUAD
)
21440 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21442 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21444 if (rs6000_passes_vector
)
21445 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21446 (TARGET_ALTIVEC_ABI
? 2 : 1));
21447 if (rs6000_returns_struct
)
21448 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21449 aix_struct_return
? 2 : 1);
21452 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21453 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21454 file_end_indicate_exec_stack ();
21457 if (flag_split_stack
)
21458 file_end_indicate_split_stack ();
21462 /* We have expanded a CPU builtin, so we need to emit a reference to
21463 the special symbol that LIBC uses to declare it supports the
21464 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21465 switch_to_section (data_section
);
21466 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21467 fprintf (asm_out_file
, "\t%s %s\n",
21468 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21475 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21476 #define HAVE_XCOFF_DWARF_EXTRAS 0
21480 /* Names of bss and data sections. These should be unique names for each
21481 compilation unit. */
21483 char *xcoff_bss_section_name
;
21484 char *xcoff_private_data_section_name
;
21485 char *xcoff_private_rodata_section_name
;
21486 char *xcoff_tls_data_section_name
;
21487 char *xcoff_read_only_section_name
;
21489 static enum unwind_info_type
21490 rs6000_xcoff_debug_unwind_info (void)
21496 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21500 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21501 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21502 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21503 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21504 fprintf (asm_out_file
, ",");
21505 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21506 fprintf (asm_out_file
, "\n");
21510 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21512 fputs (GLOBAL_ASM_OP
, stream
);
21513 RS6000_OUTPUT_BASENAME (stream
, name
);
21514 putc ('\n', stream
);
21517 /* A get_unnamed_decl callback, used for read-only sections. PTR
21518 points to the section string variable. */
21521 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21523 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21525 ? xcoff_private_rodata_section_name
21526 : xcoff_read_only_section_name
,
21527 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21530 /* Likewise for read-write sections. */
21533 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21535 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21536 xcoff_private_data_section_name
,
21537 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21541 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21543 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21545 ? xcoff_private_data_section_name
21546 : xcoff_tls_data_section_name
,
21547 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21550 /* A get_unnamed_section callback, used for switching to toc_section. */
21553 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21555 if (TARGET_MINIMAL_TOC
)
21557 /* toc_section is always selected at least once from
21558 rs6000_xcoff_file_start, so this is guaranteed to
21559 always be defined once and only once in each file. */
21560 if (!toc_initialized
)
21562 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21563 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21564 toc_initialized
= 1;
21566 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21567 (TARGET_32BIT
? "" : ",3"));
21570 fputs ("\t.toc\n", asm_out_file
);
21573 /* Implement TARGET_ASM_INIT_SECTIONS. */
21576 rs6000_xcoff_asm_init_sections (void)
21578 read_only_data_section
21579 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21582 private_data_section
21583 = get_unnamed_section (SECTION_WRITE
,
21584 rs6000_xcoff_output_readwrite_section_asm_op
,
21587 read_only_private_data_section
21588 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21592 = get_unnamed_section (SECTION_TLS
,
21593 rs6000_xcoff_output_tls_section_asm_op
,
21596 tls_private_data_section
21597 = get_unnamed_section (SECTION_TLS
,
21598 rs6000_xcoff_output_tls_section_asm_op
,
21602 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21604 readonly_data_section
= read_only_data_section
;
21608 rs6000_xcoff_reloc_rw_mask (void)
21614 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21615 tree decl ATTRIBUTE_UNUSED
)
21618 static const char * const suffix
[7]
21619 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21621 if (flags
& SECTION_EXCLUDE
)
21623 else if (flags
& SECTION_DEBUG
)
21625 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21628 else if (flags
& SECTION_CODE
)
21630 else if (flags
& SECTION_TLS
)
21632 if (flags
& SECTION_BSS
)
21637 else if (flags
& SECTION_WRITE
)
21639 if (flags
& SECTION_BSS
)
21647 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21648 (flags
& SECTION_CODE
) ? "." : "",
21649 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21652 #define IN_NAMED_SECTION(DECL) \
21653 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21654 && DECL_SECTION_NAME (DECL) != NULL)
21657 rs6000_xcoff_select_section (tree decl
, int reloc
,
21658 unsigned HOST_WIDE_INT align
)
21660 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21662 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21664 resolve_unique_section (decl
, reloc
, true);
21665 if (IN_NAMED_SECTION (decl
))
21666 return get_named_section (decl
, NULL
, reloc
);
21669 if (decl_readonly_section (decl
, reloc
))
21671 if (TREE_PUBLIC (decl
))
21672 return read_only_data_section
;
21674 return read_only_private_data_section
;
21679 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21681 if (bss_initializer_p (decl
))
21682 return tls_comm_section
;
21683 else if (TREE_PUBLIC (decl
))
21684 return tls_data_section
;
21686 return tls_private_data_section
;
21690 if (TREE_PUBLIC (decl
))
21691 return data_section
;
21693 return private_data_section
;
21698 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21702 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21703 name
= (*targetm
.strip_name_encoding
) (name
);
21704 set_decl_section_name (decl
, name
);
21707 /* Select section for constant in constant pool.
21709 On RS/6000, all constants are in the private read-only data area.
21710 However, if this is being placed in the TOC it must be output as a
21714 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21715 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21717 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21718 return toc_section
;
21720 return read_only_private_data_section
;
21723 /* Remove any trailing [DS] or the like from the symbol name. */
21725 static const char *
21726 rs6000_xcoff_strip_name_encoding (const char *name
)
21731 len
= strlen (name
);
21732 if (name
[len
- 1] == ']')
21733 return ggc_alloc_string (name
, len
- 4);
21738 /* Section attributes. AIX is always PIC. */
21740 static unsigned int
21741 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21743 unsigned int align
;
21744 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21746 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21747 flags
|= SECTION_BSS
;
21749 /* Align to at least UNIT size. */
21750 if (!decl
|| !DECL_P (decl
))
21751 align
= MIN_UNITS_PER_WORD
;
21752 /* Align code CSECT to at least 32 bytes. */
21753 else if ((flags
& SECTION_CODE
) != 0)
21754 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21756 /* Increase alignment of large objects if not already stricter. */
21757 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21758 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21759 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21761 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21764 /* Output at beginning of assembler file.
21766 Initialize the section names for the RS/6000 at this point.
21768 Specify filename, including full path, to assembler.
21770 We want to go into the TOC section so at least one .toc will be emitted.
21771 Also, in order to output proper .bs/.es pairs, we need at least one static
21772 [RW] section emitted.
21774 Finally, declare mcount when profiling to make the assembler happy. */
21777 rs6000_xcoff_file_start (void)
21779 rs6000_gen_section_name (&xcoff_bss_section_name
,
21780 main_input_filename
, ".bss_");
21781 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21782 main_input_filename
, ".rw_");
21783 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21784 main_input_filename
, ".rop_");
21785 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21786 main_input_filename
, ".ro_");
21787 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21788 main_input_filename
, ".tls_");
21790 fputs ("\t.file\t", asm_out_file
);
21791 output_quoted_string (asm_out_file
, main_input_filename
);
21792 fputc ('\n', asm_out_file
);
21793 if (write_symbols
!= NO_DEBUG
)
21794 switch_to_section (private_data_section
);
21795 switch_to_section (toc_section
);
21796 switch_to_section (text_section
);
21798 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21799 rs6000_file_start ();
21802 /* Output at end of assembler file.
21803 On the RS/6000, referencing data should automatically pull in text. */
21806 rs6000_xcoff_file_end (void)
21808 switch_to_section (text_section
);
21809 if (xcoff_tls_exec_model_detected
)
21811 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21812 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21814 fputs ("_section_.text:\n", asm_out_file
);
21815 switch_to_section (data_section
);
21816 fputs (TARGET_32BIT
21817 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21822 struct declare_alias_data
21825 bool function_descriptor
;
21828 /* Declare alias N. A helper function for for_node_and_aliases. */
21831 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21833 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21834 /* Main symbol is output specially, because varasm machinery does part of
21835 the job for us - we do not need to declare .globl/lglobs and such. */
21836 if (!n
->alias
|| n
->weakref
)
21839 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21842 /* Prevent assemble_alias from trying to use .set pseudo operation
21843 that does not behave as expected by the middle-end. */
21844 TREE_ASM_WRITTEN (n
->decl
) = true;
21846 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21847 char *buffer
= (char *) alloca (strlen (name
) + 2);
21849 int dollar_inside
= 0;
21851 strcpy (buffer
, name
);
21852 p
= strchr (buffer
, '$');
21856 p
= strchr (p
+ 1, '$');
21858 if (TREE_PUBLIC (n
->decl
))
21860 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21862 if (dollar_inside
) {
21863 if (data
->function_descriptor
)
21864 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21865 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21867 if (data
->function_descriptor
)
21869 fputs ("\t.globl .", data
->file
);
21870 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21871 putc ('\n', data
->file
);
21873 fputs ("\t.globl ", data
->file
);
21874 assemble_name (data
->file
, buffer
);
21875 putc ('\n', data
->file
);
21877 #ifdef ASM_WEAKEN_DECL
21878 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21879 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21886 if (data
->function_descriptor
)
21887 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21888 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21890 if (data
->function_descriptor
)
21892 fputs ("\t.lglobl .", data
->file
);
21893 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21894 putc ('\n', data
->file
);
21896 fputs ("\t.lglobl ", data
->file
);
21897 assemble_name (data
->file
, buffer
);
21898 putc ('\n', data
->file
);
21900 if (data
->function_descriptor
)
21901 putc ('.', data
->file
);
21902 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21907 #ifdef HAVE_GAS_HIDDEN
21908 /* Helper function to calculate visibility of a DECL
21909 and return the value as a const string. */
21911 static const char *
21912 rs6000_xcoff_visibility (tree decl
)
21914 static const char * const visibility_types
[] = {
21915 "", ",protected", ",hidden", ",internal"
21918 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21919 return visibility_types
[vis
];
21924 /* This macro produces the initial definition of a function name.
21925 On the RS/6000, we need to place an extra '.' in the function name and
21926 output the function descriptor.
21927 Dollar signs are converted to underscores.
21929 The csect for the function will have already been created when
21930 text_section was selected. We do have to go back to that csect, however.
21932 The third and fourth parameters to the .function pseudo-op (16 and 044)
21933 are placeholders which no longer have any use.
21935 Because AIX assembler's .set command has unexpected semantics, we output
21936 all aliases as alternative labels in front of the definition. */
21939 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21941 char *buffer
= (char *) alloca (strlen (name
) + 1);
21943 int dollar_inside
= 0;
21944 struct declare_alias_data data
= {file
, false};
21946 strcpy (buffer
, name
);
21947 p
= strchr (buffer
, '$');
21951 p
= strchr (p
+ 1, '$');
21953 if (TREE_PUBLIC (decl
))
21955 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21957 if (dollar_inside
) {
21958 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21959 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21961 fputs ("\t.globl .", file
);
21962 RS6000_OUTPUT_BASENAME (file
, buffer
);
21963 #ifdef HAVE_GAS_HIDDEN
21964 fputs (rs6000_xcoff_visibility (decl
), file
);
21971 if (dollar_inside
) {
21972 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21973 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21975 fputs ("\t.lglobl .", file
);
21976 RS6000_OUTPUT_BASENAME (file
, buffer
);
21980 fputs ("\t.csect ", file
);
21981 assemble_name (file
, buffer
);
21982 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21984 ASM_OUTPUT_LABEL (file
, buffer
);
21986 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21988 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21989 RS6000_OUTPUT_BASENAME (file
, buffer
);
21990 fputs (", TOC[tc0], 0\n", file
);
21993 switch_to_section (function_section (decl
));
21995 ASM_OUTPUT_LABEL (file
, buffer
);
21997 data
.function_descriptor
= true;
21998 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
22000 if (!DECL_IGNORED_P (decl
))
22002 if (dwarf_debuginfo_p ())
22004 name
= (*targetm
.strip_name_encoding
) (name
);
22005 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
22012 /* Output assembly language to globalize a symbol from a DECL,
22013 possibly with visibility. */
22016 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
22018 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
22019 fputs (GLOBAL_ASM_OP
, stream
);
22020 assemble_name (stream
, name
);
22021 #ifdef HAVE_GAS_HIDDEN
22022 fputs (rs6000_xcoff_visibility (decl
), stream
);
22024 putc ('\n', stream
);
22027 /* Output assembly language to define a symbol as COMMON from a DECL,
22028 possibly with visibility. */
22031 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
22032 tree decl ATTRIBUTE_UNUSED
,
22034 unsigned HOST_WIDE_INT size
,
22035 unsigned int align
)
22037 unsigned int align2
= 2;
22040 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
22043 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
22047 if (! DECL_COMMON (decl
))
22049 /* Forget section. */
22052 /* Globalize TLS BSS. */
22053 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
22055 fputs (GLOBAL_ASM_OP
, stream
);
22056 assemble_name (stream
, name
);
22057 fputc ('\n', stream
);
22060 /* Switch to section and skip space. */
22061 fputs ("\t.csect ", stream
);
22062 assemble_name (stream
, name
);
22063 fprintf (stream
, ",%u\n", align2
);
22064 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
22065 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
22069 if (TREE_PUBLIC (decl
))
22072 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
22073 name
, size
, align2
);
22075 #ifdef HAVE_GAS_HIDDEN
22077 fputs (rs6000_xcoff_visibility (decl
), stream
);
22079 putc ('\n', stream
);
22083 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
22084 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
22087 /* This macro produces the initial definition of a object (variable) name.
22088 Because AIX assembler's .set command has unexpected semantics, we output
22089 all aliases as alternative labels in front of the definition. */
22092 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
22094 struct declare_alias_data data
= {file
, false};
22095 ASM_OUTPUT_LABEL (file
, name
);
22096 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
22100 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22103 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
22105 fputs (integer_asm_op (size
, FALSE
), file
);
22106 assemble_name (file
, label
);
22107 fputs ("-$", file
);
22110 /* Output a symbol offset relative to the dbase for the current object.
22111 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22114 __gcc_unwind_dbase is embedded in all executables/libraries through
22115 libgcc/config/rs6000/crtdbase.S. */
22118 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
22120 fputs (integer_asm_op (size
, FALSE
), file
);
22121 assemble_name (file
, label
);
22122 fputs("-__gcc_unwind_dbase", file
);
22127 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
22131 const char *symname
;
22133 default_encode_section_info (decl
, rtl
, first
);
22135 /* Careful not to prod global register variables. */
22138 symbol
= XEXP (rtl
, 0);
22139 if (!SYMBOL_REF_P (symbol
))
22142 flags
= SYMBOL_REF_FLAGS (symbol
);
22144 if (VAR_P (decl
) && DECL_THREAD_LOCAL_P (decl
))
22145 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
22147 SYMBOL_REF_FLAGS (symbol
) = flags
;
22149 symname
= XSTR (symbol
, 0);
22151 /* Append CSECT mapping class, unless the symbol already is qualified.
22152 Aliases are implemented as labels, so the symbol name should not add
22153 a mapping class. */
22156 && VAR_OR_FUNCTION_DECL_P (decl
)
22157 && (symtab_node::get (decl
) == NULL
22158 || symtab_node::get (decl
)->alias
== 0)
22159 && symname
[strlen (symname
) - 1] != ']')
22161 const char *smclass
= NULL
;
22163 if (TREE_CODE (decl
) == FUNCTION_DECL
)
22165 else if (DECL_THREAD_LOCAL_P (decl
))
22167 if (bss_initializer_p (decl
))
22169 else if (flag_data_sections
)
22172 else if (DECL_EXTERNAL (decl
))
22174 else if (bss_initializer_p (decl
))
22176 else if (flag_data_sections
)
22178 /* This must exactly match the logic of select section. */
22179 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
22185 if (smclass
!= NULL
)
22187 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
22189 strcpy (newname
, symname
);
22190 strcat (newname
, smclass
);
22191 XSTR (symbol
, 0) = ggc_strdup (newname
);
22195 #endif /* HAVE_AS_TLS */
22196 #endif /* TARGET_XCOFF */
22199 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
22200 const char *name
, const char *val
)
22202 fputs ("\t.weak\t", stream
);
22203 assemble_name (stream
, name
);
22204 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22205 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22207 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22209 fputs (rs6000_xcoff_visibility (decl
), stream
);
22211 fputs ("\n\t.weak\t.", stream
);
22212 RS6000_OUTPUT_BASENAME (stream
, name
);
22214 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22216 fputs (rs6000_xcoff_visibility (decl
), stream
);
22218 fputc ('\n', stream
);
22222 #ifdef ASM_OUTPUT_DEF
22223 ASM_OUTPUT_DEF (stream
, name
, val
);
22225 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
22226 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
22228 fputs ("\t.set\t.", stream
);
22229 RS6000_OUTPUT_BASENAME (stream
, name
);
22230 fputs (",.", stream
);
22231 RS6000_OUTPUT_BASENAME (stream
, val
);
22232 fputc ('\n', stream
);
22238 /* Return true if INSN should not be copied. */
22241 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
22243 return recog_memoized (insn
) >= 0
22244 && get_attr_cannot_copy (insn
);
22247 /* Compute a (partial) cost for rtx X. Return true if the complete
22248 cost has been computed, and false if subexpressions should be
22249 scanned. In either case, *TOTAL contains the cost result. */
22252 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22253 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
22255 int code
= GET_CODE (x
);
22259 /* On the RS/6000, if it is valid in the insn, it is free. */
22261 if (((outer_code
== SET
22262 || outer_code
== PLUS
22263 || outer_code
== MINUS
)
22264 && (satisfies_constraint_I (x
)
22265 || satisfies_constraint_L (x
)))
22266 || (outer_code
== AND
22267 && (satisfies_constraint_K (x
)
22269 ? satisfies_constraint_L (x
)
22270 : satisfies_constraint_J (x
))))
22271 || ((outer_code
== IOR
|| outer_code
== XOR
)
22272 && (satisfies_constraint_K (x
)
22274 ? satisfies_constraint_L (x
)
22275 : satisfies_constraint_J (x
))))
22276 || outer_code
== ASHIFT
22277 || outer_code
== ASHIFTRT
22278 || outer_code
== LSHIFTRT
22279 || outer_code
== ROTATE
22280 || outer_code
== ROTATERT
22281 || outer_code
== ZERO_EXTRACT
22282 || (outer_code
== MULT
22283 && satisfies_constraint_I (x
))
22284 || ((outer_code
== DIV
|| outer_code
== UDIV
22285 || outer_code
== MOD
|| outer_code
== UMOD
)
22286 && exact_log2 (INTVAL (x
)) >= 0)
22287 || (outer_code
== COMPARE
22288 && (satisfies_constraint_I (x
)
22289 || satisfies_constraint_K (x
)))
22290 || ((outer_code
== EQ
|| outer_code
== NE
)
22291 && (satisfies_constraint_I (x
)
22292 || satisfies_constraint_K (x
)
22294 ? satisfies_constraint_L (x
)
22295 : satisfies_constraint_J (x
))))
22296 || (outer_code
== GTU
22297 && satisfies_constraint_I (x
))
22298 || (outer_code
== LTU
22299 && satisfies_constraint_P (x
)))
22304 else if ((outer_code
== PLUS
22305 && reg_or_add_cint_operand (x
, mode
))
22306 || (outer_code
== MINUS
22307 && reg_or_sub_cint_operand (x
, mode
))
22308 || ((outer_code
== SET
22309 || outer_code
== IOR
22310 || outer_code
== XOR
)
22312 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
22314 *total
= COSTS_N_INSNS (1);
22320 case CONST_WIDE_INT
:
22324 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22328 /* When optimizing for size, MEM should be slightly more expensive
22329 than generating address, e.g., (plus (reg) (const)).
22330 L1 cache latency is about two instructions. */
22331 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22332 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
22333 *total
+= COSTS_N_INSNS (100);
22342 if (FLOAT_MODE_P (mode
))
22343 *total
= rs6000_cost
->fp
;
22345 *total
= COSTS_N_INSNS (1);
22349 if (CONST_INT_P (XEXP (x
, 1))
22350 && satisfies_constraint_I (XEXP (x
, 1)))
22352 if (INTVAL (XEXP (x
, 1)) >= -256
22353 && INTVAL (XEXP (x
, 1)) <= 255)
22354 *total
= rs6000_cost
->mulsi_const9
;
22356 *total
= rs6000_cost
->mulsi_const
;
22358 else if (mode
== SFmode
)
22359 *total
= rs6000_cost
->fp
;
22360 else if (FLOAT_MODE_P (mode
))
22361 *total
= rs6000_cost
->dmul
;
22362 else if (mode
== DImode
)
22363 *total
= rs6000_cost
->muldi
;
22365 *total
= rs6000_cost
->mulsi
;
22369 if (mode
== SFmode
)
22370 *total
= rs6000_cost
->fp
;
22372 *total
= rs6000_cost
->dmul
;
22377 if (FLOAT_MODE_P (mode
))
22379 *total
= mode
== DFmode
? rs6000_cost
->ddiv
22380 : rs6000_cost
->sdiv
;
22387 if (CONST_INT_P (XEXP (x
, 1))
22388 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
22390 if (code
== DIV
|| code
== MOD
)
22392 *total
= COSTS_N_INSNS (2);
22395 *total
= COSTS_N_INSNS (1);
22399 if (GET_MODE (XEXP (x
, 1)) == DImode
)
22400 *total
= rs6000_cost
->divdi
;
22402 *total
= rs6000_cost
->divsi
;
22404 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22405 if ((!TARGET_MODULO
22406 || (RS6000_DISABLE_SCALAR_MODULO
&& SCALAR_INT_MODE_P (mode
)))
22407 && (code
== MOD
|| code
== UMOD
))
22408 *total
+= COSTS_N_INSNS (2);
22412 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
22416 *total
= COSTS_N_INSNS (4);
22420 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
22424 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
22428 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
22431 *total
= COSTS_N_INSNS (1);
22435 if (CONST_INT_P (XEXP (x
, 1)))
22437 rtx left
= XEXP (x
, 0);
22438 rtx_code left_code
= GET_CODE (left
);
22440 /* rotate-and-mask: 1 insn. */
22441 if ((left_code
== ROTATE
22442 || left_code
== ASHIFT
22443 || left_code
== LSHIFTRT
)
22444 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
22446 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
22447 if (!CONST_INT_P (XEXP (left
, 1)))
22448 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
22449 *total
+= COSTS_N_INSNS (1);
22453 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22454 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22455 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22456 || (val
& 0xffff) == val
22457 || (val
& 0xffff0000) == val
22458 || ((val
& 0xffff) == 0 && mode
== SImode
))
22460 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22461 *total
+= COSTS_N_INSNS (1);
22466 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22468 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22469 *total
+= COSTS_N_INSNS (2);
22474 *total
= COSTS_N_INSNS (1);
22479 *total
= COSTS_N_INSNS (1);
22485 *total
= COSTS_N_INSNS (1);
22489 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22490 the sign extend and shift separately within the insn. */
22491 if (TARGET_EXTSWSLI
&& mode
== DImode
22492 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22493 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22504 /* Handle mul_highpart. */
22505 if (outer_code
== TRUNCATE
22506 && GET_CODE (XEXP (x
, 0)) == MULT
)
22508 if (mode
== DImode
)
22509 *total
= rs6000_cost
->muldi
;
22511 *total
= rs6000_cost
->mulsi
;
22514 else if (outer_code
== AND
)
22517 *total
= COSTS_N_INSNS (1);
22522 if (MEM_P (XEXP (x
, 0)))
22525 *total
= COSTS_N_INSNS (1);
22531 if (!FLOAT_MODE_P (mode
))
22533 *total
= COSTS_N_INSNS (1);
22539 case UNSIGNED_FLOAT
:
22542 case FLOAT_TRUNCATE
:
22543 *total
= rs6000_cost
->fp
;
22547 if (mode
== DFmode
)
22548 *total
= rs6000_cost
->sfdf_convert
;
22550 *total
= rs6000_cost
->fp
;
22557 *total
= COSTS_N_INSNS (1);
22560 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22562 *total
= rs6000_cost
->fp
;
22571 /* Carry bit requires mode == Pmode.
22572 NEG or PLUS already counted so only add one. */
22574 && (outer_code
== NEG
|| outer_code
== PLUS
))
22576 *total
= COSTS_N_INSNS (1);
22584 if (outer_code
== SET
)
22586 if (XEXP (x
, 1) == const0_rtx
)
22588 *total
= COSTS_N_INSNS (2);
22593 *total
= COSTS_N_INSNS (3);
22598 if (outer_code
== COMPARE
)
22606 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22620 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22623 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22624 int opno
, int *total
, bool speed
)
22626 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22629 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22630 "opno = %d, total = %d, speed = %s, x:\n",
22631 ret
? "complete" : "scan inner",
22632 GET_MODE_NAME (mode
),
22633 GET_RTX_NAME (outer_code
),
22636 speed
? "true" : "false");
22644 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22646 if (recog_memoized (insn
) < 0)
22649 /* If we are optimizing for size, just use the length. */
22651 return get_attr_length (insn
);
22653 /* Use the cost if provided. */
22654 int cost
= get_attr_cost (insn
);
22658 /* If the insn tells us how many insns there are, use that. Otherwise use
22659 the length/4. Adjust the insn length to remove the extra size that
22660 prefixed instructions take. */
22661 int n
= get_attr_num_insns (insn
);
22664 int length
= get_attr_length (insn
);
22665 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22668 ADJUST_INSN_LENGTH (insn
, adjust
);
22675 enum attr_type type
= get_attr_type (insn
);
22682 cost
= COSTS_N_INSNS (n
+ 1);
22686 switch (get_attr_size (insn
))
22689 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22692 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22695 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22698 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22701 gcc_unreachable ();
22705 switch (get_attr_size (insn
))
22708 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22711 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22714 gcc_unreachable ();
22719 cost
= n
* rs6000_cost
->fp
;
22722 cost
= n
* rs6000_cost
->dmul
;
22725 cost
= n
* rs6000_cost
->sdiv
;
22728 cost
= n
* rs6000_cost
->ddiv
;
22735 cost
= COSTS_N_INSNS (n
+ 2);
22739 cost
= COSTS_N_INSNS (n
);
22745 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22748 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22749 addr_space_t as
, bool speed
)
22751 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22753 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22754 ret
, speed
? "true" : "false");
22761 /* A C expression returning the cost of moving data from a register of class
22762 CLASS1 to one of CLASS2. */
22765 rs6000_register_move_cost (machine_mode mode
,
22766 reg_class_t from
, reg_class_t to
)
22769 reg_class_t rclass
;
22771 if (TARGET_DEBUG_COST
)
22774 /* If we have VSX, we can easily move between FPR or Altivec registers,
22775 otherwise we can only easily move within classes.
22776 Do this first so we give best-case answers for union classes
22777 containing both gprs and vsx regs. */
22778 HARD_REG_SET to_vsx
, from_vsx
;
22779 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22780 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22781 if (!hard_reg_set_empty_p (to_vsx
)
22782 && !hard_reg_set_empty_p (from_vsx
)
22784 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22786 int reg
= FIRST_FPR_REGNO
;
22788 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22789 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22790 reg
= FIRST_ALTIVEC_REGNO
;
22791 ret
= 2 * hard_regno_nregs (reg
, mode
);
22794 /* Moves from/to GENERAL_REGS. */
22795 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22796 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22798 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22800 if (TARGET_DIRECT_MOVE
)
22802 /* Keep the cost for direct moves above that for within
22803 a register class even if the actual processor cost is
22804 comparable. We do this because a direct move insn
22805 can't be a nop, whereas with ideal register
22806 allocation a move within the same class might turn
22807 out to be a nop. */
22808 if (rs6000_tune
== PROCESSOR_POWER9
22809 || rs6000_tune
== PROCESSOR_POWER10
)
22810 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22812 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22813 /* SFmode requires a conversion when moving between gprs
22815 if (mode
== SFmode
)
22819 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22820 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22823 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22825 else if (rclass
== CR_REGS
)
22828 /* For those processors that have slow LR/CTR moves, make them more
22829 expensive than memory in order to bias spills to memory .*/
22830 else if ((rs6000_tune
== PROCESSOR_POWER6
22831 || rs6000_tune
== PROCESSOR_POWER7
22832 || rs6000_tune
== PROCESSOR_POWER8
22833 || rs6000_tune
== PROCESSOR_POWER9
)
22834 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22835 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22838 /* A move will cost one instruction per GPR moved. */
22839 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22842 /* Everything else has to go through GENERAL_REGS. */
22844 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22845 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22847 if (TARGET_DEBUG_COST
)
22849 if (dbg_cost_ctrl
== 1)
22851 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22852 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22853 reg_class_names
[to
]);
22860 /* A C expressions returning the cost of moving data of MODE from a register to
22864 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22865 bool in ATTRIBUTE_UNUSED
)
22869 if (TARGET_DEBUG_COST
)
22872 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22873 ret
= 4 * hard_regno_nregs (0, mode
);
22874 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22875 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22876 ret
= 4 * hard_regno_nregs (32, mode
);
22877 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22878 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22880 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22882 if (TARGET_DEBUG_COST
)
22884 if (dbg_cost_ctrl
== 1)
22886 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22887 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22894 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22896 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22897 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22898 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22899 move cost between GENERAL_REGS and VSX_REGS low.
22901 It might seem reasonable to use a union class. After all, if usage
22902 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22903 rather than memory. However, in cases where register pressure of
22904 both is high, like the cactus_adm spec test, allowing
22905 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22906 the first scheduling pass. This is partly due to an allocno of
22907 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22908 class, which gives too high a pressure for GENERAL_REGS and too low
22909 for VSX_REGS. So, force a choice of the subclass here.
22911 The best class is also the union if GENERAL_REGS and VSX_REGS have
22912 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22913 allocno class, since trying to narrow down the class by regno mode
22914 is prone to error. For example, SImode is allowed in VSX regs and
22915 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22916 it would be wrong to choose an allocno of GENERAL_REGS based on
22920 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22921 reg_class_t allocno_class
,
22922 reg_class_t best_class
)
22924 switch (allocno_class
)
22926 case GEN_OR_VSX_REGS
:
22927 /* best_class must be a subset of allocno_class. */
22928 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22929 || best_class
== GEN_OR_FLOAT_REGS
22930 || best_class
== VSX_REGS
22931 || best_class
== ALTIVEC_REGS
22932 || best_class
== FLOAT_REGS
22933 || best_class
== GENERAL_REGS
22934 || best_class
== BASE_REGS
);
22935 /* Use best_class but choose wider classes when copying from the
22936 wider class to best_class is cheap. This mimics IRA choice
22937 of allocno class. */
22938 if (best_class
== BASE_REGS
)
22939 return GENERAL_REGS
;
22940 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22945 if (best_class
== ALTIVEC_REGS
)
22946 return ALTIVEC_REGS
;
22952 return allocno_class
;
22955 /* Load up a constant. If the mode is a vector mode, splat the value across
22956 all of the vector elements. */
22959 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22963 if (mode
== SFmode
|| mode
== DFmode
)
22965 rtx d
= const_double_from_real_value (dconst
, mode
);
22966 reg
= force_reg (mode
, d
);
22968 else if (mode
== V4SFmode
)
22970 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22971 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22972 reg
= gen_reg_rtx (mode
);
22973 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22975 else if (mode
== V2DFmode
)
22977 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22978 rtvec v
= gen_rtvec (2, d
, d
);
22979 reg
= gen_reg_rtx (mode
);
22980 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22983 gcc_unreachable ();
22988 /* Generate an FMA instruction. */
22991 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22993 machine_mode mode
= GET_MODE (target
);
22996 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22997 gcc_assert (dst
!= NULL
);
23000 emit_move_insn (target
, dst
);
23003 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
23006 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
23008 machine_mode mode
= GET_MODE (dst
);
23011 /* This is a tad more complicated, since the fnma_optab is for
23012 a different expression: fma(-m1, m2, a), which is the same
23013 thing except in the case of signed zeros.
23015 Fortunately we know that if FMA is supported that FNMSUB is
23016 also supported in the ISA. Just expand it directly. */
23018 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
23020 r
= gen_rtx_NEG (mode
, a
);
23021 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
23022 r
= gen_rtx_NEG (mode
, r
);
23023 emit_insn (gen_rtx_SET (dst
, r
));
23026 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23027 add a reg_note saying that this was a division. Support both scalar and
23028 vector divide. Assumes no trapping math and finite arguments. */
23031 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
23033 machine_mode mode
= GET_MODE (dst
);
23034 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
23037 /* Low precision estimates guarantee 5 bits of accuracy. High
23038 precision estimates guarantee 14 bits of accuracy. SFmode
23039 requires 23 bits of accuracy. DFmode requires 52 bits of
23040 accuracy. Each pass at least doubles the accuracy, leading
23041 to the following. */
23042 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23043 if (mode
== DFmode
|| mode
== V2DFmode
)
23046 enum insn_code code
= optab_handler (smul_optab
, mode
);
23047 insn_gen_fn gen_mul
= GEN_FCN (code
);
23049 gcc_assert (code
!= CODE_FOR_nothing
);
23051 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
23053 /* x0 = 1./d estimate */
23054 x0
= gen_reg_rtx (mode
);
23055 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
23058 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23061 /* e0 = 1. - d * x0 */
23062 e0
= gen_reg_rtx (mode
);
23063 rs6000_emit_nmsub (e0
, d
, x0
, one
);
23065 /* x1 = x0 + e0 * x0 */
23066 x1
= gen_reg_rtx (mode
);
23067 rs6000_emit_madd (x1
, e0
, x0
, x0
);
23069 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
23070 ++i
, xprev
= xnext
, eprev
= enext
) {
23072 /* enext = eprev * eprev */
23073 enext
= gen_reg_rtx (mode
);
23074 emit_insn (gen_mul (enext
, eprev
, eprev
));
23076 /* xnext = xprev + enext * xprev */
23077 xnext
= gen_reg_rtx (mode
);
23078 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
23084 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23086 /* u = n * xprev */
23087 u
= gen_reg_rtx (mode
);
23088 emit_insn (gen_mul (u
, n
, xprev
));
23090 /* v = n - (d * u) */
23091 v
= gen_reg_rtx (mode
);
23092 rs6000_emit_nmsub (v
, d
, u
, n
);
23094 /* dst = (v * xprev) + u */
23095 rs6000_emit_madd (dst
, v
, xprev
, u
);
23098 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
23101 /* Goldschmidt's Algorithm for single/double-precision floating point
23102 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23105 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
23107 machine_mode mode
= GET_MODE (src
);
23108 rtx e
= gen_reg_rtx (mode
);
23109 rtx g
= gen_reg_rtx (mode
);
23110 rtx h
= gen_reg_rtx (mode
);
23112 /* Low precision estimates guarantee 5 bits of accuracy. High
23113 precision estimates guarantee 14 bits of accuracy. SFmode
23114 requires 23 bits of accuracy. DFmode requires 52 bits of
23115 accuracy. Each pass at least doubles the accuracy, leading
23116 to the following. */
23117 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
23118 if (mode
== DFmode
|| mode
== V2DFmode
)
23123 enum insn_code code
= optab_handler (smul_optab
, mode
);
23124 insn_gen_fn gen_mul
= GEN_FCN (code
);
23126 gcc_assert (code
!= CODE_FOR_nothing
);
23128 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
23130 /* e = rsqrt estimate */
23131 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
23134 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23137 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
23139 if (mode
== SFmode
)
23141 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
23144 emit_move_insn (e
, target
);
23148 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
23149 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
23153 /* g = sqrt estimate. */
23154 emit_insn (gen_mul (g
, e
, src
));
23155 /* h = 1/(2*sqrt) estimate. */
23156 emit_insn (gen_mul (h
, e
, mhalf
));
23162 rtx t
= gen_reg_rtx (mode
);
23163 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23164 /* Apply correction directly to 1/rsqrt estimate. */
23165 rs6000_emit_madd (dst
, e
, t
, e
);
23169 for (i
= 0; i
< passes
; i
++)
23171 rtx t1
= gen_reg_rtx (mode
);
23172 rtx g1
= gen_reg_rtx (mode
);
23173 rtx h1
= gen_reg_rtx (mode
);
23175 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
23176 rs6000_emit_madd (g1
, g
, t1
, g
);
23177 rs6000_emit_madd (h1
, h
, t1
, h
);
23182 /* Multiply by 2 for 1/rsqrt. */
23183 emit_insn (gen_add3_insn (dst
, h
, h
));
23188 rtx t
= gen_reg_rtx (mode
);
23189 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
23190 rs6000_emit_madd (dst
, g
, t
, g
);
23196 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23197 (Power7) targets. DST is the target, and SRC is the argument operand. */
23200 rs6000_emit_popcount (rtx dst
, rtx src
)
23202 machine_mode mode
= GET_MODE (dst
);
23205 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23206 if (TARGET_POPCNTD
)
23208 if (mode
== SImode
)
23209 emit_insn (gen_popcntdsi2 (dst
, src
));
23211 emit_insn (gen_popcntddi2 (dst
, src
));
23215 tmp1
= gen_reg_rtx (mode
);
23217 if (mode
== SImode
)
23219 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23220 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
23222 tmp2
= force_reg (SImode
, tmp2
);
23223 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
23227 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23228 tmp2
= expand_mult (DImode
, tmp1
,
23229 GEN_INT ((HOST_WIDE_INT
)
23230 0x01010101 << 32 | 0x01010101),
23232 tmp2
= force_reg (DImode
, tmp2
);
23233 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
23238 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23239 target, and SRC is the argument operand. */
23242 rs6000_emit_parity (rtx dst
, rtx src
)
23244 machine_mode mode
= GET_MODE (dst
);
23247 tmp
= gen_reg_rtx (mode
);
23249 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23252 if (mode
== SImode
)
23254 emit_insn (gen_popcntbsi2 (tmp
, src
));
23255 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
23259 emit_insn (gen_popcntbdi2 (tmp
, src
));
23260 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
23265 if (mode
== SImode
)
23267 /* Is mult+shift >= shift+xor+shift+xor? */
23268 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
23270 rtx tmp1
, tmp2
, tmp3
, tmp4
;
23272 tmp1
= gen_reg_rtx (SImode
);
23273 emit_insn (gen_popcntbsi2 (tmp1
, src
));
23275 tmp2
= gen_reg_rtx (SImode
);
23276 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
23277 tmp3
= gen_reg_rtx (SImode
);
23278 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
23280 tmp4
= gen_reg_rtx (SImode
);
23281 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
23282 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
23285 rs6000_emit_popcount (tmp
, src
);
23286 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
23290 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23291 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
23293 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
23295 tmp1
= gen_reg_rtx (DImode
);
23296 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23298 tmp2
= gen_reg_rtx (DImode
);
23299 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
23300 tmp3
= gen_reg_rtx (DImode
);
23301 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
23303 tmp4
= gen_reg_rtx (DImode
);
23304 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
23305 tmp5
= gen_reg_rtx (DImode
);
23306 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
23308 tmp6
= gen_reg_rtx (DImode
);
23309 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
23310 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
23313 rs6000_emit_popcount (tmp
, src
);
23314 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
23318 /* Expand an Altivec constant permutation for little endian mode.
23319 OP0 and OP1 are the input vectors and TARGET is the output vector.
23320 SEL specifies the constant permutation vector.
23322 There are two issues: First, the two input operands must be
23323 swapped so that together they form a double-wide array in LE
23324 order. Second, the vperm instruction has surprising behavior
23325 in LE mode: it interprets the elements of the source vectors
23326 in BE mode ("left to right") and interprets the elements of
23327 the destination vector in LE mode ("right to left"). To
23328 correct for this, we must subtract each element of the permute
23329 control vector from 31.
23331 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23332 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23333 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23334 serve as the permute control vector. Then, in BE mode,
23338 places the desired result in vr9. However, in LE mode the
23339 vector contents will be
23341 vr10 = 00000003 00000002 00000001 00000000
23342 vr11 = 00000007 00000006 00000005 00000004
23344 The result of the vperm using the same permute control vector is
23346 vr9 = 05000000 07000000 01000000 03000000
23348 That is, the leftmost 4 bytes of vr10 are interpreted as the
23349 source for the rightmost 4 bytes of vr9, and so on.
23351 If we change the permute control vector to
23353 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23361 vr9 = 00000006 00000004 00000002 00000000. */
23364 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
23365 const vec_perm_indices
&sel
)
23369 rtx constv
, unspec
;
23371 /* Unpack and adjust the constant selector. */
23372 for (i
= 0; i
< 16; ++i
)
23374 unsigned int elt
= 31 - (sel
[i
] & 31);
23375 perm
[i
] = GEN_INT (elt
);
23378 /* Expand to a permute, swapping the inputs and using the
23379 adjusted selector. */
23381 op0
= force_reg (V16QImode
, op0
);
23383 op1
= force_reg (V16QImode
, op1
);
23385 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
23386 constv
= force_reg (V16QImode
, constv
);
23387 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
23389 if (!REG_P (target
))
23391 rtx tmp
= gen_reg_rtx (V16QImode
);
23392 emit_move_insn (tmp
, unspec
);
23396 emit_move_insn (target
, unspec
);
23399 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23400 permute control vector. But here it's not a constant, so we must
23401 generate a vector NAND or NOR to do the adjustment. */
23404 altivec_expand_vec_perm_le (rtx operands
[4])
23406 rtx notx
, iorx
, unspec
;
23407 rtx target
= operands
[0];
23408 rtx op0
= operands
[1];
23409 rtx op1
= operands
[2];
23410 rtx sel
= operands
[3];
23412 rtx norreg
= gen_reg_rtx (V16QImode
);
23413 machine_mode mode
= GET_MODE (target
);
23415 /* Get everything in regs so the pattern matches. */
23417 op0
= force_reg (mode
, op0
);
23419 op1
= force_reg (mode
, op1
);
23421 sel
= force_reg (V16QImode
, sel
);
23422 if (!REG_P (target
))
23423 tmp
= gen_reg_rtx (mode
);
23425 if (TARGET_P9_VECTOR
)
23427 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
23432 /* Invert the selector with a VNAND if available, else a VNOR.
23433 The VNAND is preferred for future fusion opportunities. */
23434 notx
= gen_rtx_NOT (V16QImode
, sel
);
23435 iorx
= (TARGET_P8_VECTOR
23436 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
23437 : gen_rtx_AND (V16QImode
, notx
, notx
));
23438 emit_insn (gen_rtx_SET (norreg
, iorx
));
23440 /* Permute with operands reversed and adjusted selector. */
23441 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
23445 /* Copy into target, possibly by way of a register. */
23446 if (!REG_P (target
))
23448 emit_move_insn (tmp
, unspec
);
23452 emit_move_insn (target
, unspec
);
23455 /* Expand an Altivec constant permutation. Return true if we match
23456 an efficient implementation; false to fall back to VPERM.
23458 OP0 and OP1 are the input vectors and TARGET is the output vector.
23459 SEL specifies the constant permutation vector. */
23462 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23463 const vec_perm_indices
&sel
)
23465 struct altivec_perm_insn
{
23466 HOST_WIDE_INT mask
;
23467 enum insn_code impl
;
23468 unsigned char perm
[16];
23470 static const struct altivec_perm_insn patterns
[] = {
23471 {OPTION_MASK_ALTIVEC
,
23472 CODE_FOR_altivec_vpkuhum_direct
,
23473 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23474 {OPTION_MASK_ALTIVEC
,
23475 CODE_FOR_altivec_vpkuwum_direct
,
23476 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23477 {OPTION_MASK_ALTIVEC
,
23478 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
23479 : CODE_FOR_altivec_vmrglb_direct
,
23480 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23481 {OPTION_MASK_ALTIVEC
,
23482 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
23483 : CODE_FOR_altivec_vmrglh_direct
,
23484 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23485 {OPTION_MASK_ALTIVEC
,
23486 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
23487 : CODE_FOR_altivec_vmrglw_direct_v4si
,
23488 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23489 {OPTION_MASK_ALTIVEC
,
23490 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
23491 : CODE_FOR_altivec_vmrghb_direct
,
23492 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23493 {OPTION_MASK_ALTIVEC
,
23494 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
23495 : CODE_FOR_altivec_vmrghh_direct
,
23496 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23497 {OPTION_MASK_ALTIVEC
,
23498 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
23499 : CODE_FOR_altivec_vmrghw_direct_v4si
,
23500 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23501 {OPTION_MASK_P8_VECTOR
,
23502 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23503 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23504 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23505 {OPTION_MASK_P8_VECTOR
,
23506 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23507 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23508 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23509 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23510 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23511 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23512 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23513 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23514 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23515 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23516 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23518 unsigned int i
, j
, elt
, which
;
23519 unsigned char perm
[16];
23523 /* Unpack the constant selector. */
23524 for (i
= which
= 0; i
< 16; ++i
)
23527 which
|= (elt
< 16 ? 1 : 2);
23531 /* Simplify the constant selector based on operands. */
23535 gcc_unreachable ();
23539 if (!rtx_equal_p (op0
, op1
))
23544 for (i
= 0; i
< 16; ++i
)
23556 /* Look for splat patterns. */
23561 for (i
= 0; i
< 16; ++i
)
23562 if (perm
[i
] != elt
)
23566 if (!BYTES_BIG_ENDIAN
)
23568 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23574 for (i
= 0; i
< 16; i
+= 2)
23575 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23579 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23580 x
= gen_reg_rtx (V8HImode
);
23581 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23583 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23590 for (i
= 0; i
< 16; i
+= 4)
23592 || perm
[i
+ 1] != elt
+ 1
23593 || perm
[i
+ 2] != elt
+ 2
23594 || perm
[i
+ 3] != elt
+ 3)
23598 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23599 x
= gen_reg_rtx (V4SImode
);
23600 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23602 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23608 /* Look for merge and pack patterns. */
23609 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23613 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23616 elt
= patterns
[j
].perm
[0];
23617 if (perm
[0] == elt
)
23619 else if (perm
[0] == elt
+ 16)
23623 for (i
= 1; i
< 16; ++i
)
23625 elt
= patterns
[j
].perm
[i
];
23627 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23628 else if (one_vec
&& elt
>= 16)
23630 if (perm
[i
] != elt
)
23635 enum insn_code icode
= patterns
[j
].impl
;
23636 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23637 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23639 rtx perm_idx
= GEN_INT (0);
23640 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23657 perm_idx
= GEN_INT (perm_val
);
23660 /* For little-endian, don't use vpkuwum and vpkuhum if the
23661 underlying vector type is not V4SI and V8HI, respectively.
23662 For example, using vpkuwum with a V8HI picks up the even
23663 halfwords (BE numbering) when the even halfwords (LE
23664 numbering) are what we need. */
23665 if (!BYTES_BIG_ENDIAN
23666 && icode
== CODE_FOR_altivec_vpkuwum_direct
23668 && GET_MODE (op0
) != V4SImode
)
23670 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23672 if (!BYTES_BIG_ENDIAN
23673 && icode
== CODE_FOR_altivec_vpkuhum_direct
23675 && GET_MODE (op0
) != V8HImode
)
23677 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23680 /* For little-endian, the two input operands must be swapped
23681 (or swapped back) to ensure proper right-to-left numbering
23683 if (swapped
== BYTES_BIG_ENDIAN
23684 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23685 std::swap (op0
, op1
);
23686 if (imode
!= V16QImode
)
23688 op0
= gen_lowpart (imode
, op0
);
23689 op1
= gen_lowpart (imode
, op1
);
23691 if (omode
== V16QImode
)
23694 x
= gen_reg_rtx (omode
);
23695 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23696 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23698 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23699 if (omode
!= V16QImode
)
23700 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23705 if (!BYTES_BIG_ENDIAN
)
23707 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23714 /* Expand a VSX Permute Doubleword constant permutation.
23715 Return true if we match an efficient implementation. */
23718 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23719 unsigned char perm0
, unsigned char perm1
)
23723 /* If both selectors come from the same operand, fold to single op. */
23724 if ((perm0
& 2) == (perm1
& 2))
23731 /* If both operands are equal, fold to simpler permutation. */
23732 if (rtx_equal_p (op0
, op1
))
23735 perm1
= (perm1
& 1) + 2;
23737 /* If the first selector comes from the second operand, swap. */
23738 else if (perm0
& 2)
23744 std::swap (op0
, op1
);
23746 /* If the second selector does not come from the second operand, fail. */
23747 else if ((perm1
& 2) == 0)
23751 if (target
!= NULL
)
23753 machine_mode vmode
, dmode
;
23756 vmode
= GET_MODE (target
);
23757 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23758 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23759 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23760 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23761 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23762 emit_insn (gen_rtx_SET (target
, x
));
23767 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23770 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23771 rtx target
, rtx op0
, rtx op1
,
23772 const vec_perm_indices
&sel
)
23774 if (vmode
!= op_mode
)
23777 bool testing_p
= !target
;
23779 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23780 if (TARGET_ALTIVEC
&& testing_p
)
23785 rtx nop0
= force_reg (vmode
, op0
);
23791 op1
= force_reg (vmode
, op1
);
23793 /* Check for ps_merge* or xxpermdi insns. */
23794 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23798 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23799 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23801 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23805 if (TARGET_ALTIVEC
)
23807 /* Force the target-independent code to lower to V16QImode. */
23808 if (vmode
!= V16QImode
)
23810 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23817 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23818 OP0 and OP1 are the input vectors and TARGET is the output vector.
23819 PERM specifies the constant permutation vector. */
23822 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23823 machine_mode vmode
, const vec_perm_builder
&perm
)
23825 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23827 emit_move_insn (target
, x
);
23830 /* Expand an extract even operation. */
23833 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23835 machine_mode vmode
= GET_MODE (target
);
23836 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23837 vec_perm_builder
perm (nelt
, nelt
, 1);
23839 for (i
= 0; i
< nelt
; i
++)
23840 perm
.quick_push (i
* 2);
23842 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23845 /* Expand a vector interleave operation. */
23848 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23850 machine_mode vmode
= GET_MODE (target
);
23851 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23852 vec_perm_builder
perm (nelt
, nelt
, 1);
23854 high
= (highp
? 0 : nelt
/ 2);
23855 for (i
= 0; i
< nelt
/ 2; i
++)
23857 perm
.quick_push (i
+ high
);
23858 perm
.quick_push (i
+ nelt
+ high
);
23861 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23864 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23866 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23868 HOST_WIDE_INT
hwi_scale (scale
);
23869 REAL_VALUE_TYPE r_pow
;
23870 rtvec v
= rtvec_alloc (2);
23872 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23873 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23874 elt
= const_double_from_real_value (r_pow
, DFmode
);
23875 RTVEC_ELT (v
, 0) = elt
;
23876 RTVEC_ELT (v
, 1) = elt
;
23877 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23878 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23881 /* Return an RTX representing where to find the function value of a
23882 function returning MODE. */
23884 rs6000_complex_function_value (machine_mode mode
)
23886 unsigned int regno
;
23888 machine_mode inner
= GET_MODE_INNER (mode
);
23889 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23891 if (TARGET_FLOAT128_TYPE
23893 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23894 regno
= ALTIVEC_ARG_RETURN
;
23896 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23897 regno
= FP_ARG_RETURN
;
23901 regno
= GP_ARG_RETURN
;
23903 /* 32-bit is OK since it'll go in r3/r4. */
23904 if (TARGET_32BIT
&& inner_bytes
>= 4)
23905 return gen_rtx_REG (mode
, regno
);
23908 if (inner_bytes
>= 8)
23909 return gen_rtx_REG (mode
, regno
);
23911 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23913 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23914 GEN_INT (inner_bytes
));
23915 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23918 /* Return an rtx describing a return value of MODE as a PARALLEL
23919 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23920 stride REG_STRIDE. */
23923 rs6000_parallel_return (machine_mode mode
,
23924 int n_elts
, machine_mode elt_mode
,
23925 unsigned int regno
, unsigned int reg_stride
)
23927 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23930 for (i
= 0; i
< n_elts
; i
++)
23932 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23933 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23934 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23935 regno
+= reg_stride
;
23941 /* Target hook for TARGET_FUNCTION_VALUE.
23943 An integer value is in r3 and a floating-point value is in fp1,
23944 unless -msoft-float. */
23947 rs6000_function_value (const_tree valtype
,
23948 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23949 bool outgoing ATTRIBUTE_UNUSED
)
23952 unsigned int regno
;
23953 machine_mode elt_mode
;
23956 /* Special handling for structs in darwin64. */
23958 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23960 CUMULATIVE_ARGS valcum
;
23964 valcum
.fregno
= FP_ARG_MIN_REG
;
23965 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23966 /* Do a trial code generation as if this were going to be passed as
23967 an argument; if any part goes in memory, we return NULL. */
23968 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23971 /* Otherwise fall through to standard ABI rules. */
23974 mode
= TYPE_MODE (valtype
);
23976 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23977 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23979 int first_reg
, n_regs
;
23981 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23983 /* _Decimal128 must use even/odd register pairs. */
23984 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23985 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23989 first_reg
= ALTIVEC_ARG_RETURN
;
23993 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23996 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23997 if (TARGET_32BIT
&& TARGET_POWERPC64
)
24006 int count
= GET_MODE_SIZE (mode
) / 4;
24007 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
24010 if ((INTEGRAL_TYPE_P (valtype
)
24011 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
24012 || POINTER_TYPE_P (valtype
))
24013 mode
= TARGET_32BIT
? SImode
: DImode
;
24015 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
24016 /* _Decimal128 must use an even/odd register pair. */
24017 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24018 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
24019 && !FLOAT128_VECTOR_P (mode
))
24020 regno
= FP_ARG_RETURN
;
24021 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
24022 && targetm
.calls
.split_complex_arg
)
24023 return rs6000_complex_function_value (mode
);
24024 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24025 return register is used in both cases, and we won't see V2DImode/V2DFmode
24026 for pure altivec, combine the two cases. */
24027 else if ((VECTOR_TYPE_P (valtype
) || VECTOR_ALIGNMENT_P (mode
))
24028 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
24029 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
24030 regno
= ALTIVEC_ARG_RETURN
;
24032 regno
= GP_ARG_RETURN
;
24034 return gen_rtx_REG (mode
, regno
);
24037 /* Define how to find the value returned by a library function
24038 assuming the value has mode MODE. */
24040 rs6000_libcall_value (machine_mode mode
)
24042 unsigned int regno
;
24044 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24045 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
24046 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
24048 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
24049 /* _Decimal128 must use an even/odd register pair. */
24050 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
24051 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
24052 regno
= FP_ARG_RETURN
;
24053 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24054 return register is used in both cases, and we won't see V2DImode/V2DFmode
24055 for pure altivec, combine the two cases. */
24056 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
24057 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
24058 regno
= ALTIVEC_ARG_RETURN
;
24059 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
24060 return rs6000_complex_function_value (mode
);
24062 regno
= GP_ARG_RETURN
;
24064 return gen_rtx_REG (mode
, regno
);
24067 /* Compute register pressure classes. We implement the target hook to avoid
24068 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24069 lead to incorrect estimates of number of available registers and therefor
24070 increased register pressure/spill. */
24072 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
24077 pressure_classes
[n
++] = GENERAL_REGS
;
24078 if (TARGET_ALTIVEC
)
24079 pressure_classes
[n
++] = ALTIVEC_REGS
;
24081 pressure_classes
[n
++] = VSX_REGS
;
24084 if (TARGET_HARD_FLOAT
)
24085 pressure_classes
[n
++] = FLOAT_REGS
;
24087 pressure_classes
[n
++] = CR_REGS
;
24088 pressure_classes
[n
++] = SPECIAL_REGS
;
24093 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24094 Frame pointer elimination is automatically handled.
24096 For the RS/6000, if frame pointer elimination is being done, we would like
24097 to convert ap into fp, not sp.
24099 We need r30 if -mminimal-toc was specified, and there are constant pool
24103 rs6000_can_eliminate (const int from
, const int to
)
24105 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
24106 ? ! frame_pointer_needed
24107 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
24108 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
24109 || constant_pool_empty_p ()
24113 /* Define the offset between two registers, FROM to be eliminated and its
24114 replacement TO, at the start of a routine. */
24116 rs6000_initial_elimination_offset (int from
, int to
)
24118 rs6000_stack_t
*info
= rs6000_stack_info ();
24119 HOST_WIDE_INT offset
;
24121 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24122 offset
= info
->push_p
? 0 : -info
->total_size
;
24123 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24125 offset
= info
->push_p
? 0 : -info
->total_size
;
24126 if (FRAME_GROWS_DOWNWARD
)
24127 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
24129 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24130 offset
= FRAME_GROWS_DOWNWARD
24131 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
24133 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
24134 offset
= info
->total_size
;
24135 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
24136 offset
= info
->push_p
? info
->total_size
: 0;
24137 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
24140 gcc_unreachable ();
24145 /* Fill in sizes of registers used by unwinder. */
24148 rs6000_init_dwarf_reg_sizes_extra (tree address
)
24150 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
24153 machine_mode mode
= TYPE_MODE (char_type_node
);
24154 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
24155 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
24156 rtx value
= gen_int_mode (16, mode
);
24158 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24159 The unwinder still needs to know the size of Altivec registers. */
24161 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
24163 int column
= DWARF_REG_TO_UNWIND_COLUMN
24164 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
24165 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
24167 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
24172 /* Map internal gcc register numbers to debug format register numbers.
24173 FORMAT specifies the type of debug register number to use:
24174 0 -- debug information, except for frame-related sections
24175 1 -- DWARF .debug_frame section
24176 2 -- DWARF .eh_frame section */
24179 rs6000_debugger_regno (unsigned int regno
, unsigned int format
)
24181 /* On some platforms, we use the standard DWARF register
24182 numbering for .debug_info and .debug_frame. */
24183 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
24185 #ifdef RS6000_USE_DWARF_NUMBERING
24188 if (FP_REGNO_P (regno
))
24189 return regno
- FIRST_FPR_REGNO
+ 32;
24190 if (ALTIVEC_REGNO_P (regno
))
24191 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
24192 if (regno
== LR_REGNO
)
24194 if (regno
== CTR_REGNO
)
24196 if (regno
== CA_REGNO
)
24197 return 101; /* XER */
24198 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24199 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24200 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24201 to the DWARF reg for CR. */
24202 if (format
== 1 && regno
== CR2_REGNO
)
24204 if (CR_REGNO_P (regno
))
24205 return regno
- CR0_REGNO
+ 86;
24206 if (regno
== VRSAVE_REGNO
)
24208 if (regno
== VSCR_REGNO
)
24211 /* These do not make much sense. */
24212 if (regno
== FRAME_POINTER_REGNUM
)
24214 if (regno
== ARG_POINTER_REGNUM
)
24219 gcc_unreachable ();
24223 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24224 information, and also for .eh_frame. */
24225 /* Translate the regnos to their numbers in GCC 7 (and before). */
24228 if (FP_REGNO_P (regno
))
24229 return regno
- FIRST_FPR_REGNO
+ 32;
24230 if (ALTIVEC_REGNO_P (regno
))
24231 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
24232 if (regno
== LR_REGNO
)
24234 if (regno
== CTR_REGNO
)
24236 if (regno
== CA_REGNO
)
24237 return 76; /* XER */
24238 if (CR_REGNO_P (regno
))
24239 return regno
- CR0_REGNO
+ 68;
24240 if (regno
== VRSAVE_REGNO
)
24242 if (regno
== VSCR_REGNO
)
24245 if (regno
== FRAME_POINTER_REGNUM
)
24247 if (regno
== ARG_POINTER_REGNUM
)
24252 gcc_unreachable ();
24255 /* target hook eh_return_filter_mode */
24256 static scalar_int_mode
24257 rs6000_eh_return_filter_mode (void)
24259 return TARGET_32BIT
? SImode
: word_mode
;
24262 /* Target hook for translate_mode_attribute. */
24263 static machine_mode
24264 rs6000_translate_mode_attribute (machine_mode mode
)
24266 if ((FLOAT128_IEEE_P (mode
)
24267 && ieee128_float_type_node
== long_double_type_node
)
24268 || (FLOAT128_IBM_P (mode
)
24269 && ibm128_float_type_node
== long_double_type_node
))
24270 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
24274 /* Target hook for scalar_mode_supported_p. */
24276 rs6000_scalar_mode_supported_p (scalar_mode mode
)
24278 /* -m32 does not support TImode. This is the default, from
24279 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24280 same ABI as for -m32. But default_scalar_mode_supported_p allows
24281 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24282 for -mpowerpc64. */
24283 if (TARGET_32BIT
&& mode
== TImode
)
24286 if (DECIMAL_FLOAT_MODE_P (mode
))
24287 return default_decimal_float_supported_p ();
24288 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
24291 return default_scalar_mode_supported_p (mode
);
24294 /* Target hook for libgcc_floating_mode_supported_p. */
24297 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
24306 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24307 if long double does not use the IEEE 128-bit format. If long double
24308 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24309 Because the code will not use KFmode in that case, there will be aborts
24310 because it can't find KFmode in the Floatn types. */
24312 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
24319 /* Target hook for vector_mode_supported_p. */
24321 rs6000_vector_mode_supported_p (machine_mode mode
)
24323 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24324 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24326 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
24333 /* Target hook for floatn_mode. */
24334 static opt_scalar_float_mode
24335 rs6000_floatn_mode (int n
, bool extended
)
24345 if (TARGET_FLOAT128_TYPE
)
24346 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24348 return opt_scalar_float_mode ();
24351 return opt_scalar_float_mode ();
24354 /* Those are the only valid _FloatNx types. */
24355 gcc_unreachable ();
24369 if (TARGET_FLOAT128_TYPE
)
24370 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24372 return opt_scalar_float_mode ();
24375 return opt_scalar_float_mode ();
24381 /* Target hook for c_mode_for_suffix. */
24382 static machine_mode
24383 rs6000_c_mode_for_suffix (char suffix
)
24385 if (TARGET_FLOAT128_TYPE
)
24387 if (suffix
== 'q' || suffix
== 'Q')
24388 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24390 /* At the moment, we are not defining a suffix for IBM extended double.
24391 If/when the default for -mabi=ieeelongdouble is changed, and we want
24392 to support __ibm128 constants in legacy library code, we may need to
24393 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24394 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24395 __float80 constants. */
24401 /* Target hook for invalid_arg_for_unprototyped_fn. */
24402 static const char *
24403 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
24405 return (!rs6000_darwin64_abi
24407 && VECTOR_TYPE_P (TREE_TYPE (val
))
24408 && (funcdecl
== NULL_TREE
24409 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
24410 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
24411 && !fndecl_built_in_p (funcdecl
, BUILT_IN_CLASSIFY_TYPE
))))
24412 ? N_("AltiVec argument passed to unprototyped function")
24416 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24417 setup by using __stack_chk_fail_local hidden function instead of
24418 calling __stack_chk_fail directly. Otherwise it is better to call
24419 __stack_chk_fail directly. */
24421 static tree ATTRIBUTE_UNUSED
24422 rs6000_stack_protect_fail (void)
24424 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
24425 ? default_hidden_stack_protect_fail ()
24426 : default_external_stack_protect_fail ();
24429 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24432 static unsigned HOST_WIDE_INT
24433 rs6000_asan_shadow_offset (void)
24435 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
24439 /* Mask options that we want to support inside of attribute((target)) and
24440 #pragma GCC target operations. Note, we do not include things like
24441 64/32-bit, endianness, hard/soft floating point, etc. that would have
24442 different calling sequences. */
24444 struct rs6000_opt_mask
{
24445 const char *name
; /* option name */
24446 HOST_WIDE_INT mask
; /* mask to set */
24447 bool invert
; /* invert sense of mask */
24448 bool valid_target
; /* option is a target option */
24451 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
24453 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
24454 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
24456 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
24458 { "cmpb", OPTION_MASK_CMPB
, false, true },
24459 { "crypto", OPTION_MASK_CRYPTO
, false, true },
24460 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
24461 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
24462 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
24464 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
24465 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24466 { "fprnd", OPTION_MASK_FPRND
, false, true },
24467 { "power10", OPTION_MASK_POWER10
, false, true },
24468 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24469 { "htm", OPTION_MASK_HTM
, false, true },
24470 { "isel", OPTION_MASK_ISEL
, false, true },
24471 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24472 { "mfpgpr", 0, false, true },
24473 { "mma", OPTION_MASK_MMA
, false, true },
24474 { "modulo", OPTION_MASK_MODULO
, false, true },
24475 { "mulhw", OPTION_MASK_MULHW
, false, true },
24476 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24477 { "pcrel", OPTION_MASK_PCREL
, false, true },
24478 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24479 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24480 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24481 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24482 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24483 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24484 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24485 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24486 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24487 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24488 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24489 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24490 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24491 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24492 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24493 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24494 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24495 { "string", 0, false, true },
24496 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24497 { "vsx", OPTION_MASK_VSX
, false, true },
24498 #ifdef OPTION_MASK_64BIT
24500 { "aix64", OPTION_MASK_64BIT
, false, false },
24501 { "aix32", OPTION_MASK_64BIT
, true, false },
24503 { "64", OPTION_MASK_64BIT
, false, false },
24504 { "32", OPTION_MASK_64BIT
, true, false },
24507 #ifdef OPTION_MASK_EABI
24508 { "eabi", OPTION_MASK_EABI
, false, false },
24510 #ifdef OPTION_MASK_LITTLE_ENDIAN
24511 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24512 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24514 #ifdef OPTION_MASK_RELOCATABLE
24515 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24517 #ifdef OPTION_MASK_STRICT_ALIGN
24518 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24520 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24521 { "string", 0, false, false },
24524 /* Option variables that we want to support inside attribute((target)) and
24525 #pragma GCC target operations. */
24527 struct rs6000_opt_var
{
24528 const char *name
; /* option name */
24529 size_t global_offset
; /* offset of the option in global_options. */
24530 size_t target_offset
; /* offset of the option in target options. */
24533 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24536 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24537 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24538 { "avoid-indexed-addresses",
24539 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24540 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24542 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24543 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24544 { "optimize-swaps",
24545 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24546 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24547 { "allow-movmisalign",
24548 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24549 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24551 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24552 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24554 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24555 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24556 { "align-branch-targets",
24557 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24558 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24560 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24561 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24563 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24564 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24565 { "speculate-indirect-jumps",
24566 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24567 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24570 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24571 parsing. Return true if there were no errors. */
24574 rs6000_inner_target_options (tree args
, bool attr_p
)
24578 if (args
== NULL_TREE
)
24581 else if (TREE_CODE (args
) == STRING_CST
)
24583 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24586 while ((q
= strtok (p
, ",")) != NULL
)
24588 bool error_p
= false;
24589 bool not_valid_p
= false;
24590 const char *cpu_opt
= NULL
;
24593 if (startswith (q
, "cpu="))
24595 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24596 if (cpu_index
>= 0)
24597 rs6000_cpu_index
= cpu_index
;
24604 else if (startswith (q
, "tune="))
24606 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24607 if (tune_index
>= 0)
24608 rs6000_tune_index
= tune_index
;
24618 bool invert
= false;
24622 if (startswith (r
, "no-"))
24628 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24629 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24631 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24633 if (!rs6000_opt_masks
[i
].valid_target
)
24634 not_valid_p
= true;
24638 rs6000_isa_flags_explicit
|= mask
;
24640 /* VSX needs altivec, so -mvsx automagically sets
24641 altivec and disables -mavoid-indexed-addresses. */
24644 if (mask
== OPTION_MASK_VSX
)
24646 mask
|= OPTION_MASK_ALTIVEC
;
24647 TARGET_AVOID_XFORM
= 0;
24651 if (rs6000_opt_masks
[i
].invert
)
24655 rs6000_isa_flags
&= ~mask
;
24657 rs6000_isa_flags
|= mask
;
24662 if (error_p
&& !not_valid_p
)
24664 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24665 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24667 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24668 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24670 not_valid_p
= false;
24678 const char *eprefix
, *esuffix
;
24683 eprefix
= "__attribute__((__target__(";
24688 eprefix
= "#pragma GCC target ";
24693 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24695 else if (not_valid_p
)
24696 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24698 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24703 else if (TREE_CODE (args
) == TREE_LIST
)
24707 tree value
= TREE_VALUE (args
);
24710 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24714 args
= TREE_CHAIN (args
);
24716 while (args
!= NULL_TREE
);
24721 error ("attribute %<target%> argument not a string");
24728 /* Print out the target options as a list for -mdebug=target. */
24731 rs6000_debug_target_options (tree args
, const char *prefix
)
24733 if (args
== NULL_TREE
)
24734 fprintf (stderr
, "%s<NULL>", prefix
);
24736 else if (TREE_CODE (args
) == STRING_CST
)
24738 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24741 while ((q
= strtok (p
, ",")) != NULL
)
24744 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24749 else if (TREE_CODE (args
) == TREE_LIST
)
24753 tree value
= TREE_VALUE (args
);
24756 rs6000_debug_target_options (value
, prefix
);
24759 args
= TREE_CHAIN (args
);
24761 while (args
!= NULL_TREE
);
24765 gcc_unreachable ();
24771 /* Hook to validate attribute((target("..."))). */
24774 rs6000_valid_attribute_p (tree fndecl
,
24775 tree
ARG_UNUSED (name
),
24779 struct cl_target_option cur_target
;
24782 tree new_target
, new_optimize
;
24783 tree func_optimize
;
24785 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24787 if (TARGET_DEBUG_TARGET
)
24789 tree tname
= DECL_NAME (fndecl
);
24790 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24792 fprintf (stderr
, "function: %.*s\n",
24793 (int) IDENTIFIER_LENGTH (tname
),
24794 IDENTIFIER_POINTER (tname
));
24796 fprintf (stderr
, "function: unknown\n");
24798 fprintf (stderr
, "args:");
24799 rs6000_debug_target_options (args
, " ");
24800 fprintf (stderr
, "\n");
24803 fprintf (stderr
, "flags: 0x%x\n", flags
);
24805 fprintf (stderr
, "--------------------\n");
24808 /* attribute((target("default"))) does nothing, beyond
24809 affecting multi-versioning. */
24810 if (TREE_VALUE (args
)
24811 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24812 && TREE_CHAIN (args
) == NULL_TREE
24813 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24816 old_optimize
= build_optimization_node (&global_options
,
24817 &global_options_set
);
24818 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24820 /* If the function changed the optimization levels as well as setting target
24821 options, start with the optimizations specified. */
24822 if (func_optimize
&& func_optimize
!= old_optimize
)
24823 cl_optimization_restore (&global_options
, &global_options_set
,
24824 TREE_OPTIMIZATION (func_optimize
));
24826 /* The target attributes may also change some optimization flags, so update
24827 the optimization options if necessary. */
24828 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24829 rs6000_cpu_index
= rs6000_tune_index
= -1;
24830 ret
= rs6000_inner_target_options (args
, true);
24832 /* Set up any additional state. */
24835 ret
= rs6000_option_override_internal (false);
24836 new_target
= build_target_option_node (&global_options
,
24837 &global_options_set
);
24842 new_optimize
= build_optimization_node (&global_options
,
24843 &global_options_set
);
24850 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24852 if (old_optimize
!= new_optimize
)
24853 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24856 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24858 if (old_optimize
!= new_optimize
)
24859 cl_optimization_restore (&global_options
, &global_options_set
,
24860 TREE_OPTIMIZATION (old_optimize
));
24866 /* Hook to validate the current #pragma GCC target and set the state, and
24867 update the macros based on what was changed. If ARGS is NULL, then
24868 POP_TARGET is used to reset the options. */
24871 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24873 tree prev_tree
= build_target_option_node (&global_options
,
24874 &global_options_set
);
24876 struct cl_target_option
*prev_opt
, *cur_opt
;
24877 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24879 if (TARGET_DEBUG_TARGET
)
24881 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24882 fprintf (stderr
, "args:");
24883 rs6000_debug_target_options (args
, " ");
24884 fprintf (stderr
, "\n");
24888 fprintf (stderr
, "pop_target:\n");
24889 debug_tree (pop_target
);
24892 fprintf (stderr
, "pop_target: <NULL>\n");
24894 fprintf (stderr
, "--------------------\n");
24899 cur_tree
= ((pop_target
)
24901 : target_option_default_node
);
24902 cl_target_option_restore (&global_options
, &global_options_set
,
24903 TREE_TARGET_OPTION (cur_tree
));
24907 rs6000_cpu_index
= rs6000_tune_index
= -1;
24908 if (!rs6000_inner_target_options (args
, false)
24909 || !rs6000_option_override_internal (false)
24910 || (cur_tree
= build_target_option_node (&global_options
,
24911 &global_options_set
))
24914 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24915 fprintf (stderr
, "invalid pragma\n");
24921 target_option_current_node
= cur_tree
;
24922 rs6000_activate_target_options (target_option_current_node
);
24924 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24925 change the macros that are defined. */
24926 if (rs6000_target_modify_macros_ptr
)
24928 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24929 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24931 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24932 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24934 diff_flags
= (prev_flags
^ cur_flags
);
24936 if (diff_flags
!= 0)
24938 /* Delete old macros. */
24939 rs6000_target_modify_macros_ptr (false,
24940 prev_flags
& diff_flags
);
24942 /* Define new macros. */
24943 rs6000_target_modify_macros_ptr (true,
24944 cur_flags
& diff_flags
);
24952 /* Remember the last target of rs6000_set_current_function. */
24953 static GTY(()) tree rs6000_previous_fndecl
;
24955 /* Restore target's globals from NEW_TREE and invalidate the
24956 rs6000_previous_fndecl cache. */
24959 rs6000_activate_target_options (tree new_tree
)
24961 cl_target_option_restore (&global_options
, &global_options_set
,
24962 TREE_TARGET_OPTION (new_tree
));
24963 if (TREE_TARGET_GLOBALS (new_tree
))
24964 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24965 else if (new_tree
== target_option_default_node
)
24966 restore_target_globals (&default_target_globals
);
24968 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24969 rs6000_previous_fndecl
= NULL_TREE
;
24972 /* Establish appropriate back-end context for processing the function
24973 FNDECL. The argument might be NULL to indicate processing at top
24974 level, outside of any function scope. */
24976 rs6000_set_current_function (tree fndecl
)
24978 if (TARGET_DEBUG_TARGET
)
24980 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24983 fprintf (stderr
, ", fndecl %s (%p)",
24984 (DECL_NAME (fndecl
)
24985 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24986 : "<unknown>"), (void *)fndecl
);
24988 if (rs6000_previous_fndecl
)
24989 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24991 fprintf (stderr
, "\n");
24994 /* Only change the context if the function changes. This hook is called
24995 several times in the course of compiling a function, and we don't want to
24996 slow things down too much or call target_reinit when it isn't safe. */
24997 if (fndecl
== rs6000_previous_fndecl
)
25001 if (rs6000_previous_fndecl
== NULL_TREE
)
25002 old_tree
= target_option_current_node
;
25003 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
25004 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
25006 old_tree
= target_option_default_node
;
25009 if (fndecl
== NULL_TREE
)
25011 if (old_tree
!= target_option_current_node
)
25012 new_tree
= target_option_current_node
;
25014 new_tree
= NULL_TREE
;
25018 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25019 if (new_tree
== NULL_TREE
)
25020 new_tree
= target_option_default_node
;
25023 if (TARGET_DEBUG_TARGET
)
25027 fprintf (stderr
, "\nnew fndecl target specific options:\n");
25028 debug_tree (new_tree
);
25033 fprintf (stderr
, "\nold fndecl target specific options:\n");
25034 debug_tree (old_tree
);
25037 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
25038 fprintf (stderr
, "--------------------\n");
25041 if (new_tree
&& old_tree
!= new_tree
)
25042 rs6000_activate_target_options (new_tree
);
25045 rs6000_previous_fndecl
= fndecl
;
25049 /* Save the current options */
25052 rs6000_function_specific_save (struct cl_target_option
*ptr
,
25053 struct gcc_options
*opts
,
25054 struct gcc_options */
* opts_set */
)
25056 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
25057 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
25060 /* Restore the current options */
25063 rs6000_function_specific_restore (struct gcc_options
*opts
,
25064 struct gcc_options */
* opts_set */
,
25065 struct cl_target_option
*ptr
)
25068 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
25069 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
25070 (void) rs6000_option_override_internal (false);
25073 /* Print the current options */
25076 rs6000_function_specific_print (FILE *file
, int indent
,
25077 struct cl_target_option
*ptr
)
25079 rs6000_print_isa_options (file
, indent
, "Isa options set",
25080 ptr
->x_rs6000_isa_flags
);
25082 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
25083 ptr
->x_rs6000_isa_flags_explicit
);
25086 /* Helper function to print the current isa or misc options on a line. */
25089 rs6000_print_options_internal (FILE *file
,
25091 const char *string
,
25092 HOST_WIDE_INT flags
,
25093 const char *prefix
,
25094 const struct rs6000_opt_mask
*opts
,
25095 size_t num_elements
)
25098 size_t start_column
= 0;
25100 size_t max_column
= 120;
25101 size_t prefix_len
= strlen (prefix
);
25102 size_t comma_len
= 0;
25103 const char *comma
= "";
25106 start_column
+= fprintf (file
, "%*s", indent
, "");
25110 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
25114 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
25116 /* Print the various mask options. */
25117 cur_column
= start_column
;
25118 for (i
= 0; i
< num_elements
; i
++)
25120 bool invert
= opts
[i
].invert
;
25121 const char *name
= opts
[i
].name
;
25122 const char *no_str
= "";
25123 HOST_WIDE_INT mask
= opts
[i
].mask
;
25124 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
25128 if ((flags
& mask
) == 0)
25131 len
+= strlen ("no-");
25139 if ((flags
& mask
) != 0)
25142 len
+= strlen ("no-");
25149 if (cur_column
> max_column
)
25151 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
25152 cur_column
= start_column
+ len
;
25156 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
25158 comma_len
= strlen (", ");
25161 fputs ("\n", file
);
25164 /* Helper function to print the current isa options on a line. */
25167 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
25168 HOST_WIDE_INT flags
)
25170 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
25171 &rs6000_opt_masks
[0],
25172 ARRAY_SIZE (rs6000_opt_masks
));
25175 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25176 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25177 -mupper-regs-df, etc.).
25179 If the user used -mno-power8-vector, we need to turn off all of the implicit
25180 ISA 2.07 and 3.0 options that relate to the vector unit.
25182 If the user used -mno-power9-vector, we need to turn off all of the implicit
25183 ISA 3.0 options that relate to the vector unit.
25185 This function does not handle explicit options such as the user specifying
25186 -mdirect-move. These are handled in rs6000_option_override_internal, and
25187 the appropriate error is given if needed.
25189 We return a mask of all of the implicit options that should not be enabled
25192 static HOST_WIDE_INT
25193 rs6000_disable_incompatible_switches (void)
25195 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
25198 static const struct {
25199 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
25200 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
25201 const char *const name
; /* name of the switch. */
25203 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
25204 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
25205 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
25206 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
25209 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
25211 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
25213 if ((rs6000_isa_flags
& no_flag
) == 0
25214 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
25216 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
25217 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
25223 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
25224 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
25226 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
25227 error ("%<-mno-%s%> turns off %<-m%s%>",
25229 rs6000_opt_masks
[j
].name
);
25232 gcc_assert (!set_flags
);
25235 rs6000_isa_flags
&= ~dep_flags
;
25236 ignore_masks
|= no_flag
| dep_flags
;
25240 return ignore_masks
;
25244 /* Helper function for printing the function name when debugging. */
25246 static const char *
25247 get_decl_name (tree fn
)
25254 name
= DECL_NAME (fn
);
25256 return "<no-name>";
25258 return IDENTIFIER_POINTER (name
);
25261 /* Return the clone id of the target we are compiling code for in a target
25262 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25263 the priority list for the target clones (ordered from lowest to
25267 rs6000_clone_priority (tree fndecl
)
25269 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
25270 HOST_WIDE_INT isa_masks
;
25271 int ret
= CLONE_DEFAULT
;
25272 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
25273 const char *attrs_str
= NULL
;
25275 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
25276 attrs_str
= TREE_STRING_POINTER (attrs
);
25278 /* Return priority zero for default function. Return the ISA needed for the
25279 function if it is not the default. */
25280 if (strcmp (attrs_str
, "default") != 0)
25282 if (fn_opts
== NULL_TREE
)
25283 fn_opts
= target_option_default_node
;
25285 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
25286 isa_masks
= rs6000_isa_flags
;
25288 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
25290 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
25291 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
25295 if (TARGET_DEBUG_TARGET
)
25296 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
25297 get_decl_name (fndecl
), ret
);
25302 /* This compares the priority of target features in function DECL1 and DECL2.
25303 It returns positive value if DECL1 is higher priority, negative value if
25304 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25305 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25308 rs6000_compare_version_priority (tree decl1
, tree decl2
)
25310 int priority1
= rs6000_clone_priority (decl1
);
25311 int priority2
= rs6000_clone_priority (decl2
);
25312 int ret
= priority1
- priority2
;
25314 if (TARGET_DEBUG_TARGET
)
25315 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
25316 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
25321 /* Make a dispatcher declaration for the multi-versioned function DECL.
25322 Calls to DECL function will be replaced with calls to the dispatcher
25323 by the front-end. Returns the decl of the dispatcher function. */
25326 rs6000_get_function_versions_dispatcher (void *decl
)
25328 tree fn
= (tree
) decl
;
25329 struct cgraph_node
*node
= NULL
;
25330 struct cgraph_node
*default_node
= NULL
;
25331 struct cgraph_function_version_info
*node_v
= NULL
;
25332 struct cgraph_function_version_info
*first_v
= NULL
;
25334 tree dispatch_decl
= NULL
;
25336 struct cgraph_function_version_info
*default_version_info
= NULL
;
25337 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
25339 if (TARGET_DEBUG_TARGET
)
25340 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
25341 get_decl_name (fn
));
25343 node
= cgraph_node::get (fn
);
25344 gcc_assert (node
!= NULL
);
25346 node_v
= node
->function_version ();
25347 gcc_assert (node_v
!= NULL
);
25349 if (node_v
->dispatcher_resolver
!= NULL
)
25350 return node_v
->dispatcher_resolver
;
25352 /* Find the default version and make it the first node. */
25354 /* Go to the beginning of the chain. */
25355 while (first_v
->prev
!= NULL
)
25356 first_v
= first_v
->prev
;
25358 default_version_info
= first_v
;
25359 while (default_version_info
!= NULL
)
25361 const tree decl2
= default_version_info
->this_node
->decl
;
25362 if (is_function_default_version (decl2
))
25364 default_version_info
= default_version_info
->next
;
25367 /* If there is no default node, just return NULL. */
25368 if (default_version_info
== NULL
)
25371 /* Make default info the first node. */
25372 if (first_v
!= default_version_info
)
25374 default_version_info
->prev
->next
= default_version_info
->next
;
25375 if (default_version_info
->next
)
25376 default_version_info
->next
->prev
= default_version_info
->prev
;
25377 first_v
->prev
= default_version_info
;
25378 default_version_info
->next
= first_v
;
25379 default_version_info
->prev
= NULL
;
25382 default_node
= default_version_info
->this_node
;
25384 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25385 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25386 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25387 "exports hardware capability bits");
25390 if (targetm
.has_ifunc_p ())
25392 struct cgraph_function_version_info
*it_v
= NULL
;
25393 struct cgraph_node
*dispatcher_node
= NULL
;
25394 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
25396 /* Right now, the dispatching is done via ifunc. */
25397 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
25398 TREE_NOTHROW (dispatch_decl
) = TREE_NOTHROW (fn
);
25400 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
25401 gcc_assert (dispatcher_node
!= NULL
);
25402 dispatcher_node
->dispatcher_function
= 1;
25403 dispatcher_version_info
25404 = dispatcher_node
->insert_new_function_version ();
25405 dispatcher_version_info
->next
= default_version_info
;
25406 dispatcher_node
->definition
= 1;
25408 /* Set the dispatcher for all the versions. */
25409 it_v
= default_version_info
;
25410 while (it_v
!= NULL
)
25412 it_v
->dispatcher_resolver
= dispatch_decl
;
25418 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25419 "multiversioning needs %<ifunc%> which is not supported "
25424 return dispatch_decl
;
25427 /* Make the resolver function decl to dispatch the versions of a multi-
25428 versioned function, DEFAULT_DECL. Create an empty basic block in the
25429 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25433 make_resolver_func (const tree default_decl
,
25434 const tree dispatch_decl
,
25435 basic_block
*empty_bb
)
25437 /* Make the resolver function static. The resolver function returns
25439 tree decl_name
= clone_function_name (default_decl
, "resolver");
25440 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25441 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25442 tree decl
= build_fn_decl (resolver_name
, type
);
25443 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25445 DECL_NAME (decl
) = decl_name
;
25446 TREE_USED (decl
) = 1;
25447 DECL_ARTIFICIAL (decl
) = 1;
25448 DECL_IGNORED_P (decl
) = 0;
25449 TREE_PUBLIC (decl
) = 0;
25450 DECL_UNINLINABLE (decl
) = 1;
25452 /* Resolver is not external, body is generated. */
25453 DECL_EXTERNAL (decl
) = 0;
25454 DECL_EXTERNAL (dispatch_decl
) = 0;
25456 DECL_CONTEXT (decl
) = NULL_TREE
;
25457 DECL_INITIAL (decl
) = make_node (BLOCK
);
25458 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25460 if (DECL_COMDAT_GROUP (default_decl
)
25461 || TREE_PUBLIC (default_decl
))
25463 /* In this case, each translation unit with a call to this
25464 versioned function will put out a resolver. Ensure it
25465 is comdat to keep just one copy. */
25466 DECL_COMDAT (decl
) = 1;
25467 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25470 TREE_PUBLIC (dispatch_decl
) = 0;
25472 /* Build result decl and add to function_decl. */
25473 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25474 DECL_CONTEXT (t
) = decl
;
25475 DECL_ARTIFICIAL (t
) = 1;
25476 DECL_IGNORED_P (t
) = 1;
25477 DECL_RESULT (decl
) = t
;
25479 gimplify_function_tree (decl
);
25480 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25481 *empty_bb
= init_lowered_empty_function (decl
, false,
25482 profile_count::uninitialized ());
25484 cgraph_node::add_new_function (decl
, true);
25485 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25489 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25490 DECL_ATTRIBUTES (dispatch_decl
)
25491 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25493 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25498 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25499 return a pointer to VERSION_DECL if we are running on a machine that
25500 supports the index CLONE_ISA hardware architecture bits. This function will
25501 be called during version dispatch to decide which function version to
25502 execute. It returns the basic block at the end, to which more conditions
25506 add_condition_to_bb (tree function_decl
, tree version_decl
,
25507 int clone_isa
, basic_block new_bb
)
25509 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25511 gcc_assert (new_bb
!= NULL
);
25512 gimple_seq gseq
= bb_seq (new_bb
);
25515 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25516 build_fold_addr_expr (version_decl
));
25517 tree result_var
= create_tmp_var (ptr_type_node
);
25518 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25519 gimple
*return_stmt
= gimple_build_return (result_var
);
25521 if (clone_isa
== CLONE_DEFAULT
)
25523 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25524 gimple_seq_add_stmt (&gseq
, return_stmt
);
25525 set_bb_seq (new_bb
, gseq
);
25526 gimple_set_bb (convert_stmt
, new_bb
);
25527 gimple_set_bb (return_stmt
, new_bb
);
25532 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25533 tree cond_var
= create_tmp_var (bool_int_type_node
);
25534 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25535 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25536 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25537 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25538 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25540 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25541 gimple_set_bb (call_cond_stmt
, new_bb
);
25542 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25544 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25545 NULL_TREE
, NULL_TREE
);
25546 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25547 gimple_set_bb (if_else_stmt
, new_bb
);
25548 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25550 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25551 gimple_seq_add_stmt (&gseq
, return_stmt
);
25552 set_bb_seq (new_bb
, gseq
);
25554 basic_block bb1
= new_bb
;
25555 edge e12
= split_block (bb1
, if_else_stmt
);
25556 basic_block bb2
= e12
->dest
;
25557 e12
->flags
&= ~EDGE_FALLTHRU
;
25558 e12
->flags
|= EDGE_TRUE_VALUE
;
25560 edge e23
= split_block (bb2
, return_stmt
);
25561 gimple_set_bb (convert_stmt
, bb2
);
25562 gimple_set_bb (return_stmt
, bb2
);
25564 basic_block bb3
= e23
->dest
;
25565 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25568 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25574 /* This function generates the dispatch function for multi-versioned functions.
25575 DISPATCH_DECL is the function which will contain the dispatch logic.
25576 FNDECLS are the function choices for dispatch, and is a tree chain.
25577 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25578 code is generated. */
25581 dispatch_function_versions (tree dispatch_decl
,
25583 basic_block
*empty_bb
)
25587 vec
<tree
> *fndecls
;
25588 tree clones
[CLONE_MAX
];
25590 if (TARGET_DEBUG_TARGET
)
25591 fputs ("dispatch_function_versions, top\n", stderr
);
25593 gcc_assert (dispatch_decl
!= NULL
25594 && fndecls_p
!= NULL
25595 && empty_bb
!= NULL
);
25597 /* fndecls_p is actually a vector. */
25598 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25600 /* At least one more version other than the default. */
25601 gcc_assert (fndecls
->length () >= 2);
25603 /* The first version in the vector is the default decl. */
25604 memset ((void *) clones
, '\0', sizeof (clones
));
25605 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25607 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25608 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25609 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25610 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25611 to insert the code here to do the call. */
25613 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25615 int priority
= rs6000_clone_priority (ele
);
25616 if (!clones
[priority
])
25617 clones
[priority
] = ele
;
25620 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25623 if (TARGET_DEBUG_TARGET
)
25624 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25625 ix
, get_decl_name (clones
[ix
]));
25627 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25634 /* Generate the dispatching code body to dispatch multi-versioned function
25635 DECL. The target hook is called to process the "target" attributes and
25636 provide the code to dispatch the right function at run-time. NODE points
25637 to the dispatcher decl whose body will be created. */
25640 rs6000_generate_version_dispatcher_body (void *node_p
)
25643 basic_block empty_bb
;
25644 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25645 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25647 if (ninfo
->dispatcher_resolver
)
25648 return ninfo
->dispatcher_resolver
;
25650 /* node is going to be an alias, so remove the finalized bit. */
25651 node
->definition
= false;
25653 /* The first version in the chain corresponds to the default version. */
25654 ninfo
->dispatcher_resolver
= resolver
25655 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25657 if (TARGET_DEBUG_TARGET
)
25658 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25659 get_decl_name (resolver
));
25661 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25662 auto_vec
<tree
, 2> fn_ver_vec
;
25664 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25666 vinfo
= vinfo
->next
)
25668 struct cgraph_node
*version
= vinfo
->this_node
;
25669 /* Check for virtual functions here again, as by this time it should
25670 have been determined if this function needs a vtable index or
25671 not. This happens for methods in derived classes that override
25672 virtual methods in base classes but are not explicitly marked as
25674 if (DECL_VINDEX (version
->decl
))
25675 sorry ("Virtual function multiversioning not supported");
25677 fn_ver_vec
.safe_push (version
->decl
);
25680 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25681 cgraph_edge::rebuild_edges ();
25686 /* Hook to decide if we need to scan function gimple statements to
25687 collect target specific information for inlining, and update the
25688 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25689 to predict which ISA feature is used at this time. Return true
25690 if we need to scan, otherwise return false. */
25693 rs6000_need_ipa_fn_target_info (const_tree decl
,
25694 unsigned int &info ATTRIBUTE_UNUSED
)
25696 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25698 target
= target_option_default_node
;
25699 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25701 /* See PR102059, we only handle HTM for now, so will only do
25702 the consequent scannings when HTM feature enabled. */
25703 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25709 /* Hook to update target specific information INFO for inlining by
25710 checking the given STMT. Return false if we don't need to scan
25711 any more, otherwise return true. */
25714 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25716 #ifndef HAVE_AS_POWER10_HTM
25717 /* Assume inline asm can use any instruction features. */
25718 if (gimple_code (stmt
) == GIMPLE_ASM
)
25720 const char *asm_str
= gimple_asm_string (as_a
<const gasm
*> (stmt
));
25721 /* Ignore empty inline asm string. */
25722 if (strlen (asm_str
) > 0)
25723 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25724 the only bit we care about. */
25725 info
|= RS6000_FN_TARGET_INFO_HTM
;
25730 if (gimple_code (stmt
) == GIMPLE_CALL
)
25732 tree fndecl
= gimple_call_fndecl (stmt
);
25733 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25735 enum rs6000_gen_builtins fcode
25736 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25737 /* HTM bifs definitely exploit HTM insns. */
25738 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25740 info
|= RS6000_FN_TARGET_INFO_HTM
;
25749 /* Hook to determine if one function can safely inline another. */
25752 rs6000_can_inline_p (tree caller
, tree callee
)
25755 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25756 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25758 /* If the caller/callee has option attributes, then use them.
25759 Otherwise, use the command line options. */
25761 callee_tree
= target_option_default_node
;
25763 caller_tree
= target_option_default_node
;
25765 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25766 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
25768 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25769 HOST_WIDE_INT caller_isa
= caller_opts
->x_rs6000_isa_flags
;
25770 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25772 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25773 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25775 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25776 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25778 callee_isa
&= ~OPTION_MASK_HTM
;
25779 explicit_isa
&= ~OPTION_MASK_HTM
;
25783 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25785 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25786 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25788 /* The callee's options must be a subset of the caller's options, i.e.
25789 a vsx function may inline an altivec function, but a no-vsx function
25790 must not inline a vsx function. However, for those options that the
25791 callee has explicitly enabled or disabled, then we must enforce that
25792 the callee's and caller's options match exactly; see PR70010. */
25793 if (((caller_isa
& callee_isa
) == callee_isa
)
25794 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25797 if (TARGET_DEBUG_TARGET
)
25798 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25799 get_decl_name (caller
), get_decl_name (callee
),
25800 (ret
? "can" : "cannot"));
25805 /* Allocate a stack temp and fixup the address so it meets the particular
25806 memory requirements (either offetable or REG+REG addressing). */
25809 rs6000_allocate_stack_temp (machine_mode mode
,
25810 bool offsettable_p
,
25813 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25814 rtx addr
= XEXP (stack
, 0);
25815 int strict_p
= reload_completed
;
25817 if (!legitimate_indirect_address_p (addr
, strict_p
))
25820 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25821 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25823 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25824 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25830 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25831 convert to such a form to deal with memory reference instructions
25832 like STFIWX and LDBRX that only take reg+reg addressing. */
25835 rs6000_force_indexed_or_indirect_mem (rtx x
)
25837 machine_mode mode
= GET_MODE (x
);
25839 gcc_assert (MEM_P (x
));
25840 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25842 rtx addr
= XEXP (x
, 0);
25843 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25845 rtx reg
= XEXP (addr
, 0);
25846 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25847 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25848 gcc_assert (REG_P (reg
));
25849 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25852 else if (GET_CODE (addr
) == PRE_MODIFY
)
25854 rtx reg
= XEXP (addr
, 0);
25855 rtx expr
= XEXP (addr
, 1);
25856 gcc_assert (REG_P (reg
));
25857 gcc_assert (GET_CODE (expr
) == PLUS
);
25858 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25862 if (GET_CODE (addr
) == PLUS
)
25864 rtx op0
= XEXP (addr
, 0);
25865 rtx op1
= XEXP (addr
, 1);
25866 op0
= force_reg (Pmode
, op0
);
25867 op1
= force_reg (Pmode
, op1
);
25868 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25871 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25877 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25879 On the RS/6000, all integer constants are acceptable, most won't be valid
25880 for particular insns, though. Only easy FP constants are acceptable. */
25883 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25885 if (TARGET_ELF
&& tls_referenced_p (x
))
25888 if (CONST_DOUBLE_P (x
))
25889 return easy_fp_constant (x
, mode
);
25891 if (GET_CODE (x
) == CONST_VECTOR
)
25892 return easy_vector_constant (x
, mode
);
25898 /* Implement TARGET_PRECOMPUTE_TLS_P.
25900 On the AIX, TLS symbols are in the TOC, which is maintained in the
25901 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25902 must be considered legitimate constants. */
25905 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25907 return tls_referenced_p (x
);
25912 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25915 chain_already_loaded (rtx_insn
*last
)
25917 for (; last
!= NULL
; last
= PREV_INSN (last
))
25919 if (NONJUMP_INSN_P (last
))
25921 rtx patt
= PATTERN (last
);
25923 if (GET_CODE (patt
) == SET
)
25925 rtx lhs
= XEXP (patt
, 0);
25927 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25935 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25938 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25940 rtx func
= func_desc
;
25941 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25942 rtx toc_load
= NULL_RTX
;
25943 rtx toc_restore
= NULL_RTX
;
25945 rtx abi_reg
= NULL_RTX
;
25949 bool is_pltseq_longcall
;
25952 tlsarg
= global_tlsarg
;
25954 /* Handle longcall attributes. */
25955 is_pltseq_longcall
= false;
25956 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25957 && GET_CODE (func_desc
) == SYMBOL_REF
)
25959 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25961 is_pltseq_longcall
= true;
25964 /* Handle indirect calls. */
25965 if (!SYMBOL_REF_P (func
)
25966 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25968 if (!rs6000_pcrel_p ())
25970 /* Save the TOC into its reserved slot before the call,
25971 and prepare to restore it after the call. */
25972 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25973 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25974 gen_rtvec (1, stack_toc_offset
),
25976 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25978 /* Can we optimize saving the TOC in the prologue or
25979 do we need to do it at every call? */
25980 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25981 cfun
->machine
->save_toc_in_prologue
= true;
25984 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25985 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25986 gen_rtx_PLUS (Pmode
, stack_ptr
,
25987 stack_toc_offset
));
25988 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25989 if (is_pltseq_longcall
)
25991 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25992 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25993 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25996 emit_move_insn (stack_toc_mem
, toc_reg
);
26000 if (DEFAULT_ABI
== ABI_ELFv2
)
26002 /* A function pointer in the ELFv2 ABI is just a plain address, but
26003 the ABI requires it to be loaded into r12 before the call. */
26004 func_addr
= gen_rtx_REG (Pmode
, 12);
26005 emit_move_insn (func_addr
, func
);
26006 abi_reg
= func_addr
;
26007 /* Indirect calls via CTR are strongly preferred over indirect
26008 calls via LR, so move the address there. Needed to mark
26009 this insn for linker plt sequence editing too. */
26010 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26011 if (is_pltseq_longcall
)
26013 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
26014 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26015 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26016 v
= gen_rtvec (2, func_addr
, func_desc
);
26017 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26020 emit_move_insn (func_addr
, abi_reg
);
26024 /* A function pointer under AIX is a pointer to a data area whose
26025 first word contains the actual address of the function, whose
26026 second word contains a pointer to its TOC, and whose third word
26027 contains a value to place in the static chain register (r11).
26028 Note that if we load the static chain, our "trampoline" need
26029 not have any executable code. */
26031 /* Load up address of the actual function. */
26032 func
= force_reg (Pmode
, func
);
26033 func_addr
= gen_reg_rtx (Pmode
);
26034 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
26036 /* Indirect calls via CTR are strongly preferred over indirect
26037 calls via LR, so move the address there. */
26038 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26039 emit_move_insn (ctr_reg
, func_addr
);
26040 func_addr
= ctr_reg
;
26042 /* Prepare to load the TOC of the called function. Note that the
26043 TOC load must happen immediately before the actual call so
26044 that unwinding the TOC registers works correctly. See the
26045 comment in frob_update_context. */
26046 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
26047 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
26048 gen_rtx_PLUS (Pmode
, func
,
26050 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
26052 /* If we have a static chain, load it up. But, if the call was
26053 originally direct, the 3rd word has not been written since no
26054 trampoline has been built, so we ought not to load it, lest we
26055 override a static chain value. */
26056 if (!(GET_CODE (func_desc
) == SYMBOL_REF
26057 && SYMBOL_REF_FUNCTION_P (func_desc
))
26058 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26059 && !chain_already_loaded (get_current_sequence ()->next
->last
))
26061 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
26062 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
26063 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
26064 gen_rtx_PLUS (Pmode
, func
,
26066 emit_move_insn (sc_reg
, func_sc_mem
);
26073 /* No TOC register needed for calls from PC-relative callers. */
26074 if (!rs6000_pcrel_p ())
26075 /* Direct calls use the TOC: for local calls, the callee will
26076 assume the TOC register is set; for non-local calls, the
26077 PLT stub needs the TOC register. */
26082 /* Create the call. */
26083 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26084 if (value
!= NULL_RTX
)
26085 call
[0] = gen_rtx_SET (value
, call
[0]);
26086 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26090 call
[n_call
++] = toc_load
;
26092 call
[n_call
++] = toc_restore
;
26094 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26096 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
26097 insn
= emit_call_insn (insn
);
26099 /* Mention all registers defined by the ABI to hold information
26100 as uses in CALL_INSN_FUNCTION_USAGE. */
26102 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26105 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26108 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26112 rtx r12
= NULL_RTX
;
26113 rtx func_addr
= func_desc
;
26116 tlsarg
= global_tlsarg
;
26118 /* Handle longcall attributes. */
26119 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
26121 /* PCREL can do a sibling call to a longcall function
26122 because we don't need to restore the TOC register. */
26123 gcc_assert (rs6000_pcrel_p ());
26124 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
26127 gcc_assert (INTVAL (cookie
) == 0);
26129 /* For ELFv2, r12 and CTR need to hold the function address
26130 for an indirect call. */
26131 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
26133 r12
= gen_rtx_REG (Pmode
, 12);
26134 emit_move_insn (r12
, func_desc
);
26135 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26136 emit_move_insn (func_addr
, r12
);
26139 /* Create the call. */
26140 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26141 if (value
!= NULL_RTX
)
26142 call
[0] = gen_rtx_SET (value
, call
[0]);
26144 call
[1] = simple_return_rtx
;
26146 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
26147 insn
= emit_call_insn (insn
);
26149 /* Note use of the TOC register. */
26150 if (!rs6000_pcrel_p ())
26151 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
26152 gen_rtx_REG (Pmode
, TOC_REGNUM
));
26154 /* Note use of r12. */
26156 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
26159 /* Expand code to perform a call under the SYSV4 ABI. */
26162 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26164 rtx func
= func_desc
;
26168 rtx abi_reg
= NULL_RTX
;
26172 tlsarg
= global_tlsarg
;
26174 /* Handle longcall attributes. */
26175 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26176 && GET_CODE (func_desc
) == SYMBOL_REF
)
26178 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26179 /* If the longcall was implemented as an inline PLT call using
26180 PLT unspecs then func will be REG:r11. If not, func will be
26181 a pseudo reg. The inline PLT call sequence supports lazy
26182 linking (and longcalls to functions in dlopen'd libraries).
26183 The other style of longcalls don't. The lazy linking entry
26184 to the dynamic symbol resolver requires r11 be the function
26185 address (as it is for linker generated PLT stubs). Ensure
26186 r11 stays valid to the bctrl by marking r11 used by the call. */
26191 /* Handle indirect calls. */
26192 if (GET_CODE (func
) != SYMBOL_REF
)
26194 func
= force_reg (Pmode
, func
);
26196 /* Indirect calls via CTR are strongly preferred over indirect
26197 calls via LR, so move the address there. That can't be left
26198 to reload because we want to mark every instruction in an
26199 inline PLT call sequence with a reloc, enabling the linker to
26200 edit the sequence back to a direct call when that makes sense. */
26201 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26204 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26205 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26206 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26207 v
= gen_rtvec (2, func_addr
, func_desc
);
26208 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26211 emit_move_insn (func_addr
, func
);
26216 /* Create the call. */
26217 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26218 if (value
!= NULL_RTX
)
26219 call
[0] = gen_rtx_SET (value
, call
[0]);
26221 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26223 if (TARGET_SECURE_PLT
26225 && GET_CODE (func_addr
) == SYMBOL_REF
26226 && !SYMBOL_REF_LOCAL_P (func_addr
))
26227 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
26229 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26231 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
26232 insn
= emit_call_insn (insn
);
26234 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26237 /* Expand code to perform a sibling call under the SysV4 ABI. */
26240 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
26242 rtx func
= func_desc
;
26246 rtx abi_reg
= NULL_RTX
;
26249 tlsarg
= global_tlsarg
;
26251 /* Handle longcall attributes. */
26252 if ((INTVAL (cookie
) & CALL_LONG
) != 0
26253 && GET_CODE (func_desc
) == SYMBOL_REF
)
26255 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
26256 /* If the longcall was implemented as an inline PLT call using
26257 PLT unspecs then func will be REG:r11. If not, func will be
26258 a pseudo reg. The inline PLT call sequence supports lazy
26259 linking (and longcalls to functions in dlopen'd libraries).
26260 The other style of longcalls don't. The lazy linking entry
26261 to the dynamic symbol resolver requires r11 be the function
26262 address (as it is for linker generated PLT stubs). Ensure
26263 r11 stays valid to the bctr by marking r11 used by the call. */
26268 /* Handle indirect calls. */
26269 if (GET_CODE (func
) != SYMBOL_REF
)
26271 func
= force_reg (Pmode
, func
);
26273 /* Indirect sibcalls must go via CTR. That can't be left to
26274 reload because we want to mark every instruction in an inline
26275 PLT call sequence with a reloc, enabling the linker to edit
26276 the sequence back to a direct call when that makes sense. */
26277 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26280 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
26281 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26282 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
26283 v
= gen_rtvec (2, func_addr
, func_desc
);
26284 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
26287 emit_move_insn (func_addr
, func
);
26292 /* Create the call. */
26293 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26294 if (value
!= NULL_RTX
)
26295 call
[0] = gen_rtx_SET (value
, call
[0]);
26297 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26298 call
[2] = simple_return_rtx
;
26300 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26301 insn
= emit_call_insn (insn
);
26303 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26308 /* Expand code to perform a call under the Darwin ABI.
26309 Modulo handling of mlongcall, this is much the same as sysv.
26310 if/when the longcall optimisation is removed, we could drop this
26311 code and use the sysv case (taking care to avoid the tls stuff).
26313 We can use this for sibcalls too, if needed. */
26316 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
26317 rtx cookie
, bool sibcall
)
26319 rtx func
= func_desc
;
26323 int cookie_val
= INTVAL (cookie
);
26324 bool make_island
= false;
26326 /* Handle longcall attributes, there are two cases for Darwin:
26327 1) Newer linkers are capable of synthesising any branch islands needed.
26328 2) We need a helper branch island synthesised by the compiler.
26329 The second case has mostly been retired and we don't use it for m64.
26330 In fact, it's is an optimisation, we could just indirect as sysv does..
26331 ... however, backwards compatibility for now.
26332 If we're going to use this, then we need to keep the CALL_LONG bit set,
26333 so that we can pick up the special insn form later. */
26334 if ((cookie_val
& CALL_LONG
) != 0
26335 && GET_CODE (func_desc
) == SYMBOL_REF
)
26337 /* FIXME: the longcall opt should not hang off this flag, it is most
26338 likely incorrect for kernel-mode code-generation. */
26339 if (darwin_symbol_stubs
&& TARGET_32BIT
)
26340 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
26343 /* The linker is capable of doing this, but the user explicitly
26344 asked for -mlongcall, so we'll do the 'normal' version. */
26345 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
26346 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
26350 /* Handle indirect calls. */
26351 if (GET_CODE (func
) != SYMBOL_REF
)
26353 func
= force_reg (Pmode
, func
);
26355 /* Indirect calls via CTR are strongly preferred over indirect
26356 calls via LR, and are required for indirect sibcalls, so move
26357 the address there. */
26358 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26359 emit_move_insn (func_addr
, func
);
26364 /* Create the call. */
26365 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26366 if (value
!= NULL_RTX
)
26367 call
[0] = gen_rtx_SET (value
, call
[0]);
26369 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
26372 call
[2] = simple_return_rtx
;
26374 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26376 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26377 insn
= emit_call_insn (insn
);
26378 /* Now we have the debug info in the insn, we can set up the branch island
26379 if we're using one. */
26382 tree funname
= get_identifier (XSTR (func_desc
, 0));
26384 if (no_previous_def (funname
))
26386 rtx label_rtx
= gen_label_rtx ();
26387 char *label_buf
, temp_buf
[256];
26388 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
26389 CODE_LABEL_NUMBER (label_rtx
));
26390 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
26391 tree labelname
= get_identifier (label_buf
);
26392 add_compiler_branch_island (labelname
, funname
,
26393 insn_line ((const rtx_insn
*)insn
));
26400 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26401 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26404 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
26412 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26413 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26416 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
26422 /* Return whether we should generate PC-relative code for FNDECL. */
26424 rs6000_fndecl_pcrel_p (const_tree fndecl
)
26426 if (DEFAULT_ABI
!= ABI_ELFv2
)
26429 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26431 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26432 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26435 /* Return whether we should generate PC-relative code for *FN. */
26437 rs6000_function_pcrel_p (struct function
*fn
)
26439 if (DEFAULT_ABI
!= ABI_ELFv2
)
26442 /* Optimize usual case. */
26444 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26445 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26447 return rs6000_fndecl_pcrel_p (fn
->decl
);
26450 /* Return whether we should generate PC-relative code for the current
26455 return (DEFAULT_ABI
== ABI_ELFv2
26456 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26457 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26461 /* Given an address (ADDR), a mode (MODE), and what the format of the
26462 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26463 for the address. */
26466 address_to_insn_form (rtx addr
,
26468 enum non_prefixed_form non_prefixed_format
)
26470 /* Single register is easy. */
26471 if (REG_P (addr
) || SUBREG_P (addr
))
26472 return INSN_FORM_BASE_REG
;
26474 /* If the non prefixed instruction format doesn't support offset addressing,
26475 make sure only indexed addressing is allowed.
26477 We special case SDmode so that the register allocator does not try to move
26478 SDmode through GPR registers, but instead uses the 32-bit integer load and
26479 store instructions for the floating point registers. */
26480 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26482 if (GET_CODE (addr
) != PLUS
)
26483 return INSN_FORM_BAD
;
26485 rtx op0
= XEXP (addr
, 0);
26486 rtx op1
= XEXP (addr
, 1);
26487 if (!REG_P (op0
) && !SUBREG_P (op0
))
26488 return INSN_FORM_BAD
;
26490 if (!REG_P (op1
) && !SUBREG_P (op1
))
26491 return INSN_FORM_BAD
;
26493 return INSN_FORM_X
;
26496 /* Deal with update forms. */
26497 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26498 return INSN_FORM_UPDATE
;
26500 /* Handle PC-relative symbols and labels. Check for both local and
26501 external symbols. Assume labels are always local. TLS symbols
26502 are not PC-relative for rs6000. */
26505 if (LABEL_REF_P (addr
))
26506 return INSN_FORM_PCREL_LOCAL
;
26508 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26510 if (!SYMBOL_REF_LOCAL_P (addr
))
26511 return INSN_FORM_PCREL_EXTERNAL
;
26513 return INSN_FORM_PCREL_LOCAL
;
26517 if (GET_CODE (addr
) == CONST
)
26518 addr
= XEXP (addr
, 0);
26520 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26521 if (GET_CODE (addr
) == LO_SUM
)
26522 return INSN_FORM_LO_SUM
;
26524 /* Everything below must be an offset address of some form. */
26525 if (GET_CODE (addr
) != PLUS
)
26526 return INSN_FORM_BAD
;
26528 rtx op0
= XEXP (addr
, 0);
26529 rtx op1
= XEXP (addr
, 1);
26531 /* Check for indexed addresses. */
26532 if (REG_P (op1
) || SUBREG_P (op1
))
26534 if (REG_P (op0
) || SUBREG_P (op0
))
26535 return INSN_FORM_X
;
26537 return INSN_FORM_BAD
;
26540 if (!CONST_INT_P (op1
))
26541 return INSN_FORM_BAD
;
26543 HOST_WIDE_INT offset
= INTVAL (op1
);
26544 if (!SIGNED_INTEGER_34BIT_P (offset
))
26545 return INSN_FORM_BAD
;
26547 /* Check for local and external PC-relative addresses. Labels are always
26548 local. TLS symbols are not PC-relative for rs6000. */
26551 if (LABEL_REF_P (op0
))
26552 return INSN_FORM_PCREL_LOCAL
;
26554 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26556 if (!SYMBOL_REF_LOCAL_P (op0
))
26557 return INSN_FORM_PCREL_EXTERNAL
;
26559 return INSN_FORM_PCREL_LOCAL
;
26563 /* If it isn't PC-relative, the address must use a base register. */
26564 if (!REG_P (op0
) && !SUBREG_P (op0
))
26565 return INSN_FORM_BAD
;
26567 /* Large offsets must be prefixed. */
26568 if (!SIGNED_INTEGER_16BIT_P (offset
))
26570 if (TARGET_PREFIXED
)
26571 return INSN_FORM_PREFIXED_NUMERIC
;
26573 return INSN_FORM_BAD
;
26576 /* We have a 16-bit offset, see what default instruction format to use. */
26577 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26579 unsigned size
= GET_MODE_SIZE (mode
);
26581 /* On 64-bit systems, assume 64-bit integers need to use DS form
26582 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26583 (for LXV and STXV). TImode is problematical in that its normal usage
26584 is expected to be GPRs where it wants a DS instruction format, but if
26585 it goes into the vector registers, it wants a DQ instruction
26587 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26588 non_prefixed_format
= NON_PREFIXED_DS
;
26590 else if (TARGET_VSX
&& size
>= 16
26591 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26592 non_prefixed_format
= NON_PREFIXED_DQ
;
26595 non_prefixed_format
= NON_PREFIXED_D
;
26598 /* Classify the D/DS/DQ-form addresses. */
26599 switch (non_prefixed_format
)
26601 /* Instruction format D, all 16 bits are valid. */
26602 case NON_PREFIXED_D
:
26603 return INSN_FORM_D
;
26605 /* Instruction format DS, bottom 2 bits must be 0. */
26606 case NON_PREFIXED_DS
:
26607 if ((offset
& 3) == 0)
26608 return INSN_FORM_DS
;
26610 else if (TARGET_PREFIXED
)
26611 return INSN_FORM_PREFIXED_NUMERIC
;
26614 return INSN_FORM_BAD
;
26616 /* Instruction format DQ, bottom 4 bits must be 0. */
26617 case NON_PREFIXED_DQ
:
26618 if ((offset
& 15) == 0)
26619 return INSN_FORM_DQ
;
26621 else if (TARGET_PREFIXED
)
26622 return INSN_FORM_PREFIXED_NUMERIC
;
26625 return INSN_FORM_BAD
;
26631 return INSN_FORM_BAD
;
26634 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26635 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26636 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26637 a D-form or DS-form instruction. X-form and base_reg are always
26640 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26641 enum non_prefixed_form non_prefixed_format
)
26643 enum insn_form result_form
;
26645 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26647 switch (non_prefixed_format
)
26649 case NON_PREFIXED_D
:
26650 switch (result_form
)
26655 case INSN_FORM_BASE_REG
:
26661 case NON_PREFIXED_DS
:
26662 switch (result_form
)
26666 case INSN_FORM_BASE_REG
:
26678 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26679 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26680 the load or store with the PCREL_OPT optimization to make sure it is an
26681 instruction that can be optimized.
26683 We need to specify the MODE separately from the REG to allow for loads that
26684 include zero/sign/float extension. */
26687 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26689 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26690 PCREL_OPT optimization. */
26691 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26692 if (non_prefixed
== NON_PREFIXED_X
)
26695 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26696 rtx addr
= XEXP (mem
, 0);
26697 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26698 return (iform
== INSN_FORM_BASE_REG
26699 || iform
== INSN_FORM_D
26700 || iform
== INSN_FORM_DS
26701 || iform
== INSN_FORM_DQ
);
26704 /* Helper function to see if we're potentially looking at lfs/stfs.
26705 - PARALLEL containing a SET and a CLOBBER
26707 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26708 - CLOBBER is a V4SF
26710 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26715 is_lfs_stfs_insn (rtx_insn
*insn
)
26717 rtx pattern
= PATTERN (insn
);
26718 if (GET_CODE (pattern
) != PARALLEL
)
26721 /* This should be a parallel with exactly one set and one clobber. */
26722 if (XVECLEN (pattern
, 0) != 2)
26725 rtx set
= XVECEXP (pattern
, 0, 0);
26726 if (GET_CODE (set
) != SET
)
26729 rtx clobber
= XVECEXP (pattern
, 0, 1);
26730 if (GET_CODE (clobber
) != CLOBBER
)
26733 /* All we care is that the destination of the SET is a mem:SI,
26734 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26735 should be a scratch:V4SF. */
26737 rtx dest
= SET_DEST (set
);
26738 rtx src
= SET_SRC (set
);
26739 rtx scratch
= SET_DEST (clobber
);
26741 if (GET_CODE (src
) != UNSPEC
)
26745 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26746 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26747 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26751 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26752 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26753 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26759 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26760 instruction format (D/DS/DQ) used for offset memory. */
26762 enum non_prefixed_form
26763 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26765 /* If it isn't a register, use the defaults. */
26766 if (!REG_P (reg
) && !SUBREG_P (reg
))
26767 return NON_PREFIXED_DEFAULT
;
26769 unsigned int r
= reg_or_subregno (reg
);
26771 /* If we have a pseudo, use the default instruction format. */
26772 if (!HARD_REGISTER_NUM_P (r
))
26773 return NON_PREFIXED_DEFAULT
;
26775 unsigned size
= GET_MODE_SIZE (mode
);
26777 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26778 128-bit floating point, and 128-bit integers. Before power9, only indexed
26779 addressing was available for vectors. */
26780 if (FP_REGNO_P (r
))
26782 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26783 return NON_PREFIXED_D
;
26786 return NON_PREFIXED_X
;
26788 else if (TARGET_VSX
&& size
>= 16
26789 && (VECTOR_MODE_P (mode
)
26790 || VECTOR_ALIGNMENT_P (mode
)
26791 || mode
== TImode
|| mode
== CTImode
))
26792 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26795 return NON_PREFIXED_DEFAULT
;
26798 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26799 128-bit floating point, and 128-bit integers. Before power9, only indexed
26800 addressing was available. */
26801 else if (ALTIVEC_REGNO_P (r
))
26803 if (!TARGET_P9_VECTOR
)
26804 return NON_PREFIXED_X
;
26806 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26807 return NON_PREFIXED_DS
;
26810 return NON_PREFIXED_X
;
26812 else if (TARGET_VSX
&& size
>= 16
26813 && (VECTOR_MODE_P (mode
)
26814 || VECTOR_ALIGNMENT_P (mode
)
26815 || mode
== TImode
|| mode
== CTImode
))
26816 return NON_PREFIXED_DQ
;
26819 return NON_PREFIXED_DEFAULT
;
26822 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26823 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26824 through the GPR registers for memory operations. */
26825 else if (TARGET_POWERPC64
&& size
>= 8)
26826 return NON_PREFIXED_DS
;
26828 return NON_PREFIXED_D
;
26832 /* Whether a load instruction is a prefixed instruction. This is called from
26833 the prefixed attribute processing. */
26836 prefixed_load_p (rtx_insn
*insn
)
26838 /* Validate the insn to make sure it is a normal load insn. */
26839 extract_insn_cached (insn
);
26840 if (recog_data
.n_operands
< 2)
26843 rtx reg
= recog_data
.operand
[0];
26844 rtx mem
= recog_data
.operand
[1];
26846 if (!REG_P (reg
) && !SUBREG_P (reg
))
26852 /* Prefixed load instructions do not support update or indexed forms. */
26853 if (get_attr_indexed (insn
) == INDEXED_YES
26854 || get_attr_update (insn
) == UPDATE_YES
)
26857 /* LWA uses the DS format instead of the D format that LWZ uses. */
26858 enum non_prefixed_form non_prefixed
;
26859 machine_mode reg_mode
= GET_MODE (reg
);
26860 machine_mode mem_mode
= GET_MODE (mem
);
26862 if (mem_mode
== SImode
&& reg_mode
== DImode
26863 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26864 non_prefixed
= NON_PREFIXED_DS
;
26867 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26869 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26870 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26872 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26875 /* Whether a store instruction is a prefixed instruction. This is called from
26876 the prefixed attribute processing. */
26879 prefixed_store_p (rtx_insn
*insn
)
26881 /* Validate the insn to make sure it is a normal store insn. */
26882 extract_insn_cached (insn
);
26883 if (recog_data
.n_operands
< 2)
26886 rtx mem
= recog_data
.operand
[0];
26887 rtx reg
= recog_data
.operand
[1];
26889 if (!REG_P (reg
) && !SUBREG_P (reg
))
26895 /* Prefixed store instructions do not support update or indexed forms. */
26896 if (get_attr_indexed (insn
) == INDEXED_YES
26897 || get_attr_update (insn
) == UPDATE_YES
)
26900 machine_mode mem_mode
= GET_MODE (mem
);
26901 rtx addr
= XEXP (mem
, 0);
26902 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26904 /* Need to make sure we aren't looking at a stfs which doesn't look
26905 like the other things reg_to_non_prefixed/address_is_prefixed
26907 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26908 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26910 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26913 /* Whether a load immediate or add instruction is a prefixed instruction. This
26914 is called from the prefixed attribute processing. */
26917 prefixed_paddi_p (rtx_insn
*insn
)
26919 rtx set
= single_set (insn
);
26923 rtx dest
= SET_DEST (set
);
26924 rtx src
= SET_SRC (set
);
26926 if (!REG_P (dest
) && !SUBREG_P (dest
))
26929 /* Is this a load immediate that can't be done with a simple ADDI or
26931 if (CONST_INT_P (src
))
26932 return (satisfies_constraint_eI (src
)
26933 && !satisfies_constraint_I (src
)
26934 && !satisfies_constraint_L (src
));
26936 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26938 if (GET_CODE (src
) == PLUS
)
26940 rtx op1
= XEXP (src
, 1);
26942 return (CONST_INT_P (op1
)
26943 && satisfies_constraint_eI (op1
)
26944 && !satisfies_constraint_I (op1
)
26945 && !satisfies_constraint_L (op1
));
26948 /* If not, is it a load of a PC-relative address? */
26949 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26952 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26955 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26956 NON_PREFIXED_DEFAULT
);
26958 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26961 /* Whether the next instruction needs a 'p' prefix issued before the
26962 instruction is printed out. */
26963 static bool prepend_p_to_next_insn
;
26965 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26966 outputting the assembler code. On the PowerPC, we remember if the current
26967 insn is a prefixed insn where we need to emit a 'p' before the insn.
26969 In addition, if the insn is part of a PC-relative reference to an external
26970 label optimization, this is recorded also. */
26972 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26974 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26975 == MAYBE_PREFIXED_YES
26976 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26980 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26981 We use it to emit a 'p' for prefixed insns that is set in
26982 FINAL_PRESCAN_INSN. */
26984 rs6000_asm_output_opcode (FILE *stream
)
26986 if (prepend_p_to_next_insn
)
26988 fprintf (stream
, "p");
26990 /* Reset the flag in the case where there are separate insn lines in the
26991 sequence, so the 'p' is only emitted for the first line. This shows up
26992 when we are doing the PCREL_OPT optimization, in that the label created
26993 with %r<n> would have a leading 'p' printed. */
26994 prepend_p_to_next_insn
= false;
27000 /* Emit the relocation to tie the next instruction to a previous instruction
27001 that loads up an external address. This is used to do the PCREL_OPT
27002 optimization. Note, the label is generated after the PLD of the got
27003 pc-relative address to allow for the assembler to insert NOPs before the PLD
27004 instruction. The operand is a constant integer that is the label
27008 output_pcrel_opt_reloc (rtx label_num
)
27010 rtx operands
[1] = { label_num
};
27011 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
27015 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
27016 should be adjusted to reflect any required changes. This macro is used when
27017 there is some systematic length adjustment required that would be difficult
27018 to express in the length attribute.
27020 In the PowerPC, we use this to adjust the length of an instruction if one or
27021 more prefixed instructions are generated, using the attribute
27022 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27023 hardware requires that a prefied instruciton does not cross a 64-byte
27024 boundary. This means the compiler has to assume the length of the first
27025 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27026 already set for the non-prefixed instruction, we just need to udpate for the
27030 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
27032 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
27034 rtx pattern
= PATTERN (insn
);
27035 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
27036 && get_attr_prefixed (insn
) == PREFIXED_YES
)
27038 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
27039 length
+= 4 * (num_prefixed
+ 1);
27047 #ifdef HAVE_GAS_HIDDEN
27048 # define USE_HIDDEN_LINKONCE 1
27050 # define USE_HIDDEN_LINKONCE 0
27053 /* Fills in the label name that should be used for a 476 link stack thunk. */
27056 get_ppc476_thunk_name (char name
[32])
27058 gcc_assert (TARGET_LINK_STACK
);
27060 if (USE_HIDDEN_LINKONCE
)
27061 sprintf (name
, "__ppc476.get_thunk");
27063 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
27066 /* This function emits the simple thunk routine that is used to preserve
27067 the link stack on the 476 cpu. */
27069 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
27071 rs6000_code_end (void)
27076 if (!TARGET_LINK_STACK
)
27079 get_ppc476_thunk_name (name
);
27081 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
27082 build_function_type_list (void_type_node
, NULL_TREE
));
27083 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
27084 NULL_TREE
, void_type_node
);
27085 TREE_PUBLIC (decl
) = 1;
27086 TREE_STATIC (decl
) = 1;
27089 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
27091 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
27092 targetm
.asm_out
.unique_section (decl
, 0);
27093 switch_to_section (get_named_section (decl
, NULL
, 0));
27094 DECL_WEAK (decl
) = 1;
27095 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
27096 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
27097 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
27098 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
27103 switch_to_section (text_section
);
27104 ASM_OUTPUT_LABEL (asm_out_file
, name
);
27107 DECL_INITIAL (decl
) = make_node (BLOCK
);
27108 current_function_decl
= decl
;
27109 allocate_struct_function (decl
, false);
27110 init_function_start (decl
);
27111 first_function_block_is_cold
= false;
27112 /* Make sure unwind info is emitted for the thunk if needed. */
27113 final_start_function (emit_barrier (), asm_out_file
, 1);
27115 fputs ("\tblr\n", asm_out_file
);
27117 final_end_function ();
27118 init_insn_lengths ();
27119 free_after_compilation (cfun
);
27121 current_function_decl
= NULL
;
27124 /* Add r30 to hard reg set if the prologue sets it up and it is not
27125 pic_offset_table_rtx. */
27128 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
27130 if (!TARGET_SINGLE_PIC_BASE
27132 && TARGET_MINIMAL_TOC
27133 && !constant_pool_empty_p ())
27134 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
27135 if (cfun
->machine
->split_stack_argp_used
)
27136 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
27138 /* Make sure the hard reg set doesn't include r2, which was possibly added
27139 via PIC_OFFSET_TABLE_REGNUM. */
27141 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
27145 /* Helper function for rs6000_split_logical to emit a logical instruction after
27146 spliting the operation to single GPR registers.
27148 DEST is the destination register.
27149 OP1 and OP2 are the input source registers.
27150 CODE is the base operation (AND, IOR, XOR, NOT).
27151 MODE is the machine mode.
27152 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27153 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27154 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27157 rs6000_split_logical_inner (rtx dest
,
27160 enum rtx_code code
,
27162 bool complement_final_p
,
27163 bool complement_op1_p
,
27164 bool complement_op2_p
)
27168 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27169 if (op2
&& CONST_INT_P (op2
)
27170 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
27171 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27173 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
27174 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
27176 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27181 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
27185 else if (value
== mask
)
27187 if (!rtx_equal_p (dest
, op1
))
27188 emit_insn (gen_rtx_SET (dest
, op1
));
27193 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27194 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27195 else if (code
== IOR
|| code
== XOR
)
27199 if (!rtx_equal_p (dest
, op1
))
27200 emit_insn (gen_rtx_SET (dest
, op1
));
27206 if (code
== AND
&& mode
== SImode
27207 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
27209 emit_insn (gen_andsi3 (dest
, op1
, op2
));
27213 if (complement_op1_p
)
27214 op1
= gen_rtx_NOT (mode
, op1
);
27216 if (complement_op2_p
)
27217 op2
= gen_rtx_NOT (mode
, op2
);
27219 /* For canonical RTL, if only one arm is inverted it is the first. */
27220 if (!complement_op1_p
&& complement_op2_p
)
27221 std::swap (op1
, op2
);
27223 bool_rtx
= ((code
== NOT
)
27224 ? gen_rtx_NOT (mode
, op1
)
27225 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
27227 if (complement_final_p
)
27228 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
27230 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
27233 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27234 operations are split immediately during RTL generation to allow for more
27235 optimizations of the AND/IOR/XOR.
27237 OPERANDS is an array containing the destination and two input operands.
27238 CODE is the base operation (AND, IOR, XOR, NOT).
27239 MODE is the machine mode.
27240 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27241 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27242 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27243 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27244 formation of the AND instructions. */
27247 rs6000_split_logical_di (rtx operands
[3],
27248 enum rtx_code code
,
27249 bool complement_final_p
,
27250 bool complement_op1_p
,
27251 bool complement_op2_p
)
27253 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
27254 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
27255 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
27256 enum hi_lo
{ hi
= 0, lo
= 1 };
27257 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
27260 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
27261 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
27262 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
27263 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
27266 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
27269 if (!CONST_INT_P (operands
[2]))
27271 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
27272 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
27276 HOST_WIDE_INT value
= INTVAL (operands
[2]);
27277 HOST_WIDE_INT value_hi_lo
[2];
27279 gcc_assert (!complement_final_p
);
27280 gcc_assert (!complement_op1_p
);
27281 gcc_assert (!complement_op2_p
);
27283 value_hi_lo
[hi
] = value
>> 32;
27284 value_hi_lo
[lo
] = value
& lower_32bits
;
27286 for (i
= 0; i
< 2; i
++)
27288 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
27290 if (sub_value
& sign_bit
)
27291 sub_value
|= upper_32bits
;
27293 op2_hi_lo
[i
] = GEN_INT (sub_value
);
27295 /* If this is an AND instruction, check to see if we need to load
27296 the value in a register. */
27297 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
27298 && !and_operand (op2_hi_lo
[i
], SImode
))
27299 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
27304 for (i
= 0; i
< 2; i
++)
27306 /* Split large IOR/XOR operations. */
27307 if ((code
== IOR
|| code
== XOR
)
27308 && CONST_INT_P (op2_hi_lo
[i
])
27309 && !complement_final_p
27310 && !complement_op1_p
27311 && !complement_op2_p
27312 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
27314 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
27315 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
27316 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
27317 rtx tmp
= gen_reg_rtx (SImode
);
27319 /* Make sure the constant is sign extended. */
27320 if ((hi_16bits
& sign_bit
) != 0)
27321 hi_16bits
|= upper_32bits
;
27323 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
27324 code
, SImode
, false, false, false);
27326 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
27327 code
, SImode
, false, false, false);
27330 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
27331 code
, SImode
, complement_final_p
,
27332 complement_op1_p
, complement_op2_p
);
27338 /* Split the insns that make up boolean operations operating on multiple GPR
27339 registers. The boolean MD patterns ensure that the inputs either are
27340 exactly the same as the output registers, or there is no overlap.
27342 OPERANDS is an array containing the destination and two input operands.
27343 CODE is the base operation (AND, IOR, XOR, NOT).
27344 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27345 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27346 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27349 rs6000_split_logical (rtx operands
[3],
27350 enum rtx_code code
,
27351 bool complement_final_p
,
27352 bool complement_op1_p
,
27353 bool complement_op2_p
)
27355 machine_mode mode
= GET_MODE (operands
[0]);
27356 machine_mode sub_mode
;
27358 int sub_size
, regno0
, regno1
, nregs
, i
;
27360 /* If this is DImode, use the specialized version that can run before
27361 register allocation. */
27362 if (mode
== DImode
&& !TARGET_POWERPC64
)
27364 rs6000_split_logical_di (operands
, code
, complement_final_p
,
27365 complement_op1_p
, complement_op2_p
);
27371 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
27372 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
27373 sub_size
= GET_MODE_SIZE (sub_mode
);
27374 regno0
= REGNO (op0
);
27375 regno1
= REGNO (op1
);
27377 gcc_assert (reload_completed
);
27378 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27379 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27381 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
27382 gcc_assert (nregs
> 1);
27384 if (op2
&& REG_P (op2
))
27385 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27387 for (i
= 0; i
< nregs
; i
++)
27389 int offset
= i
* sub_size
;
27390 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
27391 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
27392 rtx sub_op2
= ((code
== NOT
)
27394 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
27396 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
27397 complement_final_p
, complement_op1_p
,
27404 /* Emit instructions to move SRC to DST. Called by splitters for
27405 multi-register moves. It will emit at most one instruction for
27406 each register that is accessed; that is, it won't emit li/lis pairs
27407 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27411 rs6000_split_multireg_move (rtx dst
, rtx src
)
27413 /* The register number of the first register being moved. */
27415 /* The mode that is to be moved. */
27417 /* The mode that the move is being done in, and its size. */
27418 machine_mode reg_mode
;
27420 /* The number of registers that will be moved. */
27423 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
27424 mode
= GET_MODE (dst
);
27425 nregs
= hard_regno_nregs (reg
, mode
);
27427 /* If we have a vector quad register for MMA, and this is a load or store,
27428 see if we can use vector paired load/stores. */
27429 if (mode
== XOmode
&& TARGET_MMA
27430 && (MEM_P (dst
) || MEM_P (src
)))
27435 /* If we have a vector pair/quad mode, split it into two/four separate
27437 else if (mode
== OOmode
|| mode
== XOmode
)
27438 reg_mode
= V1TImode
;
27439 else if (FP_REGNO_P (reg
))
27440 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27441 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27442 else if (ALTIVEC_REGNO_P (reg
))
27443 reg_mode
= V16QImode
;
27445 reg_mode
= word_mode
;
27446 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27448 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27450 /* TDmode residing in FP registers is special, since the ISA requires that
27451 the lower-numbered word of a register pair is always the most significant
27452 word, even in little-endian mode. This does not match the usual subreg
27453 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27454 the appropriate constituent registers "by hand" in little-endian mode.
27456 Note we do not need to check for destructive overlap here since TDmode
27457 can only reside in even/odd register pairs. */
27458 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27463 for (i
= 0; i
< nregs
; i
++)
27465 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27466 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27468 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27469 i
* reg_mode_size
);
27471 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27472 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27474 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27475 i
* reg_mode_size
);
27477 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27483 /* The __vector_pair and __vector_quad modes are multi-register
27484 modes, so if we have to load or store the registers, we have to be
27485 careful to properly swap them if we're in little endian mode
27486 below. This means the last register gets the first memory
27487 location. We also need to be careful of using the right register
27488 numbers if we are splitting XO to OO. */
27489 if (mode
== OOmode
|| mode
== XOmode
)
27491 nregs
= hard_regno_nregs (reg
, mode
);
27492 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27495 unsigned offset
= 0;
27496 unsigned size
= GET_MODE_SIZE (reg_mode
);
27498 /* If we are reading an accumulator register, we have to
27499 deprime it before we can access it. */
27501 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27502 emit_insn (gen_mma_xxmfacc (src
, src
));
27504 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27507 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27508 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27509 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27511 emit_insn (gen_rtx_SET (dst2
, src2
));
27519 unsigned offset
= 0;
27520 unsigned size
= GET_MODE_SIZE (reg_mode
);
27522 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27525 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27526 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27527 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27529 emit_insn (gen_rtx_SET (dst2
, src2
));
27532 /* If we are writing an accumulator register, we have to
27533 prime it after we've written it. */
27535 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27536 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27541 if (GET_CODE (src
) == UNSPEC
27542 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27544 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27545 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27546 gcc_assert (REG_P (dst
));
27547 if (GET_MODE (src
) == XOmode
)
27548 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27549 if (GET_MODE (src
) == OOmode
)
27550 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27552 int nvecs
= XVECLEN (src
, 0);
27553 for (int i
= 0; i
< nvecs
; i
++)
27556 int regno
= reg
+ i
;
27558 if (WORDS_BIG_ENDIAN
)
27560 op
= XVECEXP (src
, 0, i
);
27562 /* If we are loading an even VSX register and the memory location
27563 is adjacent to the next register's memory location (if any),
27564 then we can load them both with one LXVP instruction. */
27565 if ((regno
& 1) == 0)
27567 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27568 if (adjacent_mem_locations (op
, op2
) == op
)
27570 op
= adjust_address (op
, OOmode
, 0);
27571 /* Skip the next register, since we're going to
27572 load it together with this register. */
27579 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27581 /* If we are loading an even VSX register and the memory location
27582 is adjacent to the next register's memory location (if any),
27583 then we can load them both with one LXVP instruction. */
27584 if ((regno
& 1) == 0)
27586 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27587 if (adjacent_mem_locations (op2
, op
) == op2
)
27589 op
= adjust_address (op2
, OOmode
, 0);
27590 /* Skip the next register, since we're going to
27591 load it together with this register. */
27597 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27598 emit_insn (gen_rtx_SET (dst_i
, op
));
27601 /* We are writing an accumulator register, so we have to
27602 prime it after we've written it. */
27603 if (GET_MODE (src
) == XOmode
)
27604 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27609 /* Register -> register moves can use common code. */
27612 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27614 /* If we are reading an accumulator register, we have to
27615 deprime it before we can access it. */
27617 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27618 emit_insn (gen_mma_xxmfacc (src
, src
));
27620 /* Move register range backwards, if we might have destructive
27623 /* XO/OO are opaque so cannot use subregs. */
27624 if (mode
== OOmode
|| mode
== XOmode
)
27626 for (i
= nregs
- 1; i
>= 0; i
--)
27628 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27629 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27630 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27635 for (i
= nregs
- 1; i
>= 0; i
--)
27636 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27637 i
* reg_mode_size
),
27638 simplify_gen_subreg (reg_mode
, src
, mode
,
27639 i
* reg_mode_size
)));
27642 /* If we are writing an accumulator register, we have to
27643 prime it after we've written it. */
27645 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27646 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27652 bool used_update
= false;
27653 rtx restore_basereg
= NULL_RTX
;
27655 if (MEM_P (src
) && INT_REGNO_P (reg
))
27659 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27660 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27663 breg
= XEXP (XEXP (src
, 0), 0);
27664 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27665 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27666 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27667 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27668 src
= replace_equiv_address (src
, breg
);
27670 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27672 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27674 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27677 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27678 emit_insn (gen_rtx_SET (ndst
,
27679 gen_rtx_MEM (reg_mode
,
27681 used_update
= true;
27684 emit_insn (gen_rtx_SET (basereg
,
27685 XEXP (XEXP (src
, 0), 1)));
27686 src
= replace_equiv_address (src
, basereg
);
27690 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27691 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27692 src
= replace_equiv_address (src
, basereg
);
27696 breg
= XEXP (src
, 0);
27697 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27698 breg
= XEXP (breg
, 0);
27700 /* If the base register we are using to address memory is
27701 also a destination reg, then change that register last. */
27703 && REGNO (breg
) >= REGNO (dst
)
27704 && REGNO (breg
) < REGNO (dst
) + nregs
)
27705 j
= REGNO (breg
) - REGNO (dst
);
27707 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27711 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27712 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27715 breg
= XEXP (XEXP (dst
, 0), 0);
27716 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27717 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27718 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27720 /* We have to update the breg before doing the store.
27721 Use store with update, if available. */
27725 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27726 emit_insn (TARGET_32BIT
27727 ? (TARGET_POWERPC64
27728 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27729 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27730 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27731 used_update
= true;
27734 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27735 dst
= replace_equiv_address (dst
, breg
);
27737 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27738 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27740 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27742 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27745 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27746 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27749 used_update
= true;
27752 emit_insn (gen_rtx_SET (basereg
,
27753 XEXP (XEXP (dst
, 0), 1)));
27754 dst
= replace_equiv_address (dst
, basereg
);
27758 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27759 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27760 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27762 && REG_P (offsetreg
)
27763 && REGNO (basereg
) != REGNO (offsetreg
));
27764 if (REGNO (basereg
) == 0)
27766 rtx tmp
= offsetreg
;
27767 offsetreg
= basereg
;
27770 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27771 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27772 dst
= replace_equiv_address (dst
, basereg
);
27775 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27776 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27779 /* If we are reading an accumulator register, we have to
27780 deprime it before we can access it. */
27781 if (TARGET_MMA
&& REG_P (src
)
27782 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27783 emit_insn (gen_mma_xxmfacc (src
, src
));
27785 for (i
= 0; i
< nregs
; i
++)
27787 /* Calculate index to next subword. */
27792 /* If compiler already emitted move of first word by
27793 store with update, no need to do anything. */
27794 if (j
== 0 && used_update
)
27797 /* XO/OO are opaque so cannot use subregs. */
27798 if (mode
== OOmode
|| mode
== XOmode
)
27800 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27801 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27802 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27805 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27806 j
* reg_mode_size
),
27807 simplify_gen_subreg (reg_mode
, src
, mode
,
27808 j
* reg_mode_size
)));
27811 /* If we are writing an accumulator register, we have to
27812 prime it after we've written it. */
27813 if (TARGET_MMA
&& REG_P (dst
)
27814 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27815 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27817 if (restore_basereg
!= NULL_RTX
)
27818 emit_insn (restore_basereg
);
27822 /* Return true if the peephole2 can combine a load involving a combination of
27823 an addis instruction and a load with an offset that can be fused together on
27827 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27828 rtx addis_value
, /* addis value. */
27829 rtx target
, /* target register that is loaded. */
27830 rtx mem
) /* bottom part of the memory addr. */
27835 /* Validate arguments. */
27836 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27839 if (!base_reg_operand (target
, GET_MODE (target
)))
27842 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27845 /* Allow sign/zero extension. */
27846 if (GET_CODE (mem
) == ZERO_EXTEND
27847 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27848 mem
= XEXP (mem
, 0);
27853 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27856 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27857 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27860 /* Validate that the register used to load the high value is either the
27861 register being loaded, or we can safely replace its use.
27863 This function is only called from the peephole2 pass and we assume that
27864 there are 2 instructions in the peephole (addis and load), so we want to
27865 check if the target register was not used in the memory address and the
27866 register to hold the addis result is dead after the peephole. */
27867 if (REGNO (addis_reg
) != REGNO (target
))
27869 if (reg_mentioned_p (target
, mem
))
27872 if (!peep2_reg_dead_p (2, addis_reg
))
27875 /* If the target register being loaded is the stack pointer, we must
27876 avoid loading any other value into it, even temporarily. */
27877 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27881 base_reg
= XEXP (addr
, 0);
27882 return REGNO (addis_reg
) == REGNO (base_reg
);
27885 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27886 sequence. We adjust the addis register to use the target register. If the
27887 load sign extends, we adjust the code to do the zero extending load, and an
27888 explicit sign extension later since the fusion only covers zero extending
27892 operands[0] register set with addis (to be replaced with target)
27893 operands[1] value set via addis
27894 operands[2] target register being loaded
27895 operands[3] D-form memory reference using operands[0]. */
27898 expand_fusion_gpr_load (rtx
*operands
)
27900 rtx addis_value
= operands
[1];
27901 rtx target
= operands
[2];
27902 rtx orig_mem
= operands
[3];
27903 rtx new_addr
, new_mem
, orig_addr
, offset
;
27904 enum rtx_code plus_or_lo_sum
;
27905 machine_mode target_mode
= GET_MODE (target
);
27906 machine_mode extend_mode
= target_mode
;
27907 machine_mode ptr_mode
= Pmode
;
27908 enum rtx_code extend
= UNKNOWN
;
27910 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27911 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27913 extend
= GET_CODE (orig_mem
);
27914 orig_mem
= XEXP (orig_mem
, 0);
27915 target_mode
= GET_MODE (orig_mem
);
27918 gcc_assert (MEM_P (orig_mem
));
27920 orig_addr
= XEXP (orig_mem
, 0);
27921 plus_or_lo_sum
= GET_CODE (orig_addr
);
27922 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27924 offset
= XEXP (orig_addr
, 1);
27925 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27926 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27928 if (extend
!= UNKNOWN
)
27929 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27931 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27932 UNSPEC_FUSION_GPR
);
27933 emit_insn (gen_rtx_SET (target
, new_mem
));
27935 if (extend
== SIGN_EXTEND
)
27937 int sub_off
= ((BYTES_BIG_ENDIAN
)
27938 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27941 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27943 emit_insn (gen_rtx_SET (target
,
27944 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27950 /* Emit the addis instruction that will be part of a fused instruction
27954 emit_fusion_addis (rtx target
, rtx addis_value
)
27957 const char *addis_str
= NULL
;
27959 /* Emit the addis instruction. */
27960 fuse_ops
[0] = target
;
27961 if (satisfies_constraint_L (addis_value
))
27963 fuse_ops
[1] = addis_value
;
27964 addis_str
= "lis %0,%v1";
27967 else if (GET_CODE (addis_value
) == PLUS
)
27969 rtx op0
= XEXP (addis_value
, 0);
27970 rtx op1
= XEXP (addis_value
, 1);
27972 if (REG_P (op0
) && CONST_INT_P (op1
)
27973 && satisfies_constraint_L (op1
))
27977 addis_str
= "addis %0,%1,%v2";
27981 else if (GET_CODE (addis_value
) == HIGH
)
27983 rtx value
= XEXP (addis_value
, 0);
27984 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27986 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27987 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27989 addis_str
= "addis %0,%2,%1@toc@ha";
27991 else if (TARGET_XCOFF
)
27992 addis_str
= "addis %0,%1@u(%2)";
27995 gcc_unreachable ();
27998 else if (GET_CODE (value
) == PLUS
)
28000 rtx op0
= XEXP (value
, 0);
28001 rtx op1
= XEXP (value
, 1);
28003 if (GET_CODE (op0
) == UNSPEC
28004 && XINT (op0
, 1) == UNSPEC_TOCREL
28005 && CONST_INT_P (op1
))
28007 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
28008 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
28011 addis_str
= "addis %0,%2,%1+%3@toc@ha";
28013 else if (TARGET_XCOFF
)
28014 addis_str
= "addis %0,%1+%3@u(%2)";
28017 gcc_unreachable ();
28021 else if (satisfies_constraint_L (value
))
28023 fuse_ops
[1] = value
;
28024 addis_str
= "lis %0,%v1";
28027 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
28029 fuse_ops
[1] = value
;
28030 addis_str
= "lis %0,%1@ha";
28035 fatal_insn ("Could not generate addis value for fusion", addis_value
);
28037 output_asm_insn (addis_str
, fuse_ops
);
28040 /* Emit a D-form load or store instruction that is the second instruction
28041 of a fusion sequence. */
28044 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
28047 char insn_template
[80];
28049 fuse_ops
[0] = load_reg
;
28050 fuse_ops
[1] = addis_reg
;
28052 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
28054 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
28055 fuse_ops
[2] = offset
;
28056 output_asm_insn (insn_template
, fuse_ops
);
28059 else if (GET_CODE (offset
) == UNSPEC
28060 && XINT (offset
, 1) == UNSPEC_TOCREL
)
28063 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
28065 else if (TARGET_XCOFF
)
28066 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28069 gcc_unreachable ();
28071 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
28072 output_asm_insn (insn_template
, fuse_ops
);
28075 else if (GET_CODE (offset
) == PLUS
28076 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
28077 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
28078 && CONST_INT_P (XEXP (offset
, 1)))
28080 rtx tocrel_unspec
= XEXP (offset
, 0);
28082 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
28084 else if (TARGET_XCOFF
)
28085 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
28088 gcc_unreachable ();
28090 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
28091 fuse_ops
[3] = XEXP (offset
, 1);
28092 output_asm_insn (insn_template
, fuse_ops
);
28095 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
28097 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
28099 fuse_ops
[2] = offset
;
28100 output_asm_insn (insn_template
, fuse_ops
);
28104 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
28109 /* Given an address, convert it into the addis and load offset parts. Addresses
28110 created during the peephole2 process look like:
28111 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28112 (unspec [(...)] UNSPEC_TOCREL)) */
28115 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
28119 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
28121 hi
= XEXP (addr
, 0);
28122 lo
= XEXP (addr
, 1);
28125 gcc_unreachable ();
28131 /* Return a string to fuse an addis instruction with a gpr load to the same
28132 register that we loaded up the addis instruction. The address that is used
28133 is the logical address that was formed during peephole2:
28134 (lo_sum (high) (low-part))
28136 The code is complicated, so we call output_asm_insn directly, and just
28140 emit_fusion_gpr_load (rtx target
, rtx mem
)
28145 const char *load_str
= NULL
;
28148 if (GET_CODE (mem
) == ZERO_EXTEND
)
28149 mem
= XEXP (mem
, 0);
28151 gcc_assert (REG_P (target
) && MEM_P (mem
));
28153 addr
= XEXP (mem
, 0);
28154 fusion_split_address (addr
, &addis_value
, &load_offset
);
28156 /* Now emit the load instruction to the same register. */
28157 mode
= GET_MODE (mem
);
28175 gcc_assert (TARGET_POWERPC64
);
28180 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
28183 /* Emit the addis instruction. */
28184 emit_fusion_addis (target
, addis_value
);
28186 /* Emit the D-form load instruction. */
28187 emit_fusion_load (target
, target
, load_offset
, load_str
);
28192 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28193 ignores it then. */
28194 static GTY(()) tree atomic_hold_decl
;
28195 static GTY(()) tree atomic_clear_decl
;
28196 static GTY(()) tree atomic_update_decl
;
28198 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28200 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
28202 if (!TARGET_HARD_FLOAT
)
28204 #ifdef RS6000_GLIBC_ATOMIC_FENV
28205 if (atomic_hold_decl
== NULL_TREE
)
28208 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28209 get_identifier ("__atomic_feholdexcept"),
28210 build_function_type_list (void_type_node
,
28211 double_ptr_type_node
,
28213 TREE_PUBLIC (atomic_hold_decl
) = 1;
28214 DECL_EXTERNAL (atomic_hold_decl
) = 1;
28217 if (atomic_clear_decl
== NULL_TREE
)
28220 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28221 get_identifier ("__atomic_feclearexcept"),
28222 build_function_type_list (void_type_node
,
28224 TREE_PUBLIC (atomic_clear_decl
) = 1;
28225 DECL_EXTERNAL (atomic_clear_decl
) = 1;
28228 tree const_double
= build_qualified_type (double_type_node
,
28230 tree const_double_ptr
= build_pointer_type (const_double
);
28231 if (atomic_update_decl
== NULL_TREE
)
28234 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
28235 get_identifier ("__atomic_feupdateenv"),
28236 build_function_type_list (void_type_node
,
28239 TREE_PUBLIC (atomic_update_decl
) = 1;
28240 DECL_EXTERNAL (atomic_update_decl
) = 1;
28243 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28244 TREE_ADDRESSABLE (fenv_var
) = 1;
28245 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
28246 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
28247 void_node
, NULL_TREE
, NULL_TREE
));
28249 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
28250 *clear
= build_call_expr (atomic_clear_decl
, 0);
28251 *update
= build_call_expr (atomic_update_decl
, 1,
28252 fold_convert (const_double_ptr
, fenv_addr
));
28257 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
28258 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
28259 tree call_mffs
= build_call_expr (mffs
, 0);
28261 /* Generates the equivalent of feholdexcept (&fenv_var)
28263 *fenv_var = __builtin_mffs ();
28265 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28266 __builtin_mtfsf (0xff, fenv_hold); */
28268 /* Mask to clear everything except for the rounding modes and non-IEEE
28269 arithmetic flag. */
28270 const unsigned HOST_WIDE_INT hold_exception_mask
28271 = HOST_WIDE_INT_C (0xffffffff00000007);
28273 tree fenv_var
= create_tmp_var_raw (double_type_node
);
28275 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
28276 NULL_TREE
, NULL_TREE
);
28278 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
28279 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28280 build_int_cst (uint64_type_node
,
28281 hold_exception_mask
));
28283 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28286 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
28287 build_int_cst (unsigned_type_node
, 0xff),
28290 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
28292 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28294 double fenv_clear = __builtin_mffs ();
28295 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28296 __builtin_mtfsf (0xff, fenv_clear); */
28298 /* Mask to clear everything except for the rounding modes and non-IEEE
28299 arithmetic flag. */
28300 const unsigned HOST_WIDE_INT clear_exception_mask
28301 = HOST_WIDE_INT_C (0xffffffff00000000);
28303 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
28305 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
28306 call_mffs
, NULL_TREE
, NULL_TREE
);
28308 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
28309 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
28311 build_int_cst (uint64_type_node
,
28312 clear_exception_mask
));
28314 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28315 fenv_clear_llu_and
);
28317 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
28318 build_int_cst (unsigned_type_node
, 0xff),
28321 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
28323 /* Generates the equivalent of feupdateenv (&fenv_var)
28325 double old_fenv = __builtin_mffs ();
28326 double fenv_update;
28327 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28328 (*(uint64_t*)fenv_var 0x1ff80fff);
28329 __builtin_mtfsf (0xff, fenv_update); */
28331 const unsigned HOST_WIDE_INT update_exception_mask
28332 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28333 const unsigned HOST_WIDE_INT new_exception_mask
28334 = HOST_WIDE_INT_C (0x1ff80fff);
28336 tree old_fenv
= create_tmp_var_raw (double_type_node
);
28337 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
28338 call_mffs
, NULL_TREE
, NULL_TREE
);
28340 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
28341 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
28342 build_int_cst (uint64_type_node
,
28343 update_exception_mask
));
28345 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28346 build_int_cst (uint64_type_node
,
28347 new_exception_mask
));
28349 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
28350 old_llu_and
, new_llu_and
);
28352 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28355 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
28356 build_int_cst (unsigned_type_node
, 0xff),
28357 fenv_update_mtfsf
);
28359 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
28363 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
28365 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28367 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28368 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28370 /* The destination of the vmrgew instruction layout is:
28371 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28372 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28373 vmrgew instruction will be correct. */
28374 if (BYTES_BIG_ENDIAN
)
28376 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
28378 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
28383 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28384 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28387 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28388 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28390 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
28391 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
28393 if (BYTES_BIG_ENDIAN
)
28394 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28396 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28400 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
28402 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28404 rtx_tmp0
= gen_reg_rtx (V2DImode
);
28405 rtx_tmp1
= gen_reg_rtx (V2DImode
);
28407 /* The destination of the vmrgew instruction layout is:
28408 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28409 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28410 vmrgew instruction will be correct. */
28411 if (BYTES_BIG_ENDIAN
)
28413 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28414 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28418 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28419 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28422 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28423 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28425 if (signed_convert
)
28427 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28428 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28432 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28433 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28436 if (BYTES_BIG_ENDIAN
)
28437 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28439 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28443 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28446 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28448 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28449 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28451 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28452 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28454 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28455 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28457 if (signed_convert
)
28459 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28460 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28464 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28465 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28468 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28471 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28474 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28475 optimization_type opt_type
)
28480 return (opt_type
== OPTIMIZE_FOR_SPEED
28481 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28488 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28490 static HOST_WIDE_INT
28491 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28493 if (TREE_CODE (exp
) == STRING_CST
28494 && (STRICT_ALIGNMENT
|| !optimize_size
))
28495 return MAX (align
, BITS_PER_WORD
);
28499 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28501 static HOST_WIDE_INT
28502 rs6000_starting_frame_offset (void)
28504 if (FRAME_GROWS_DOWNWARD
)
28506 return RS6000_STARTING_FRAME_OFFSET
;
28509 /* Internal function to return the built-in function id for the complex
28510 multiply operation for a given mode. */
28512 static inline built_in_function
28513 complex_multiply_builtin_code (machine_mode mode
)
28515 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28516 int func
= BUILT_IN_COMPLEX_MUL_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28517 return (built_in_function
) func
;
28520 /* Internal function to return the built-in function id for the complex divide
28521 operation for a given mode. */
28523 static inline built_in_function
28524 complex_divide_builtin_code (machine_mode mode
)
28526 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28527 int func
= BUILT_IN_COMPLEX_DIV_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28528 return (built_in_function
) func
;
28531 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28532 function names from <foo>l to <foo>f128 if the default long double type is
28533 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28534 include file switches the names on systems that support long double as IEEE
28535 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28536 In the future, glibc will export names like __ieee128_sinf128 and we can
28537 switch to using those instead of using sinf128, which pollutes the user's
28540 This will switch the names for Fortran math functions as well (which doesn't
28541 use math.h). However, Fortran needs other changes to the compiler and
28542 library before you can switch the real*16 type at compile time.
28544 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28545 only do this transformation if the __float128 type is enabled. This
28546 prevents us from doing the transformation on older 32-bit ports that might
28547 have enabled using IEEE 128-bit floating point as the default long double
28550 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28551 function names used for complex multiply and divide to the appropriate
28555 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28557 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28558 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28559 if (TARGET_FLOAT128_TYPE
28560 && TREE_CODE (decl
) == FUNCTION_DECL
28561 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28562 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28564 built_in_function id
= DECL_FUNCTION_CODE (decl
);
28565 const char *newname
= NULL
;
28567 if (id
== complex_multiply_builtin_code (KCmode
))
28568 newname
= "__mulkc3";
28570 else if (id
== complex_multiply_builtin_code (ICmode
))
28571 newname
= "__multc3";
28573 else if (id
== complex_multiply_builtin_code (TCmode
))
28574 newname
= (TARGET_IEEEQUAD
) ? "__mulkc3" : "__multc3";
28576 else if (id
== complex_divide_builtin_code (KCmode
))
28577 newname
= "__divkc3";
28579 else if (id
== complex_divide_builtin_code (ICmode
))
28580 newname
= "__divtc3";
28582 else if (id
== complex_divide_builtin_code (TCmode
))
28583 newname
= (TARGET_IEEEQUAD
) ? "__divkc3" : "__divtc3";
28587 if (TARGET_DEBUG_BUILTIN
)
28588 fprintf (stderr
, "Map complex mul/div => %s\n", newname
);
28590 return get_identifier (newname
);
28594 /* Map long double built-in functions if long double is IEEE 128-bit. */
28595 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28596 && TREE_CODE (decl
) == FUNCTION_DECL
28597 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28598 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28600 size_t len
= IDENTIFIER_LENGTH (id
);
28601 const char *name
= IDENTIFIER_POINTER (id
);
28602 char *newname
= NULL
;
28604 /* See if it is one of the built-in functions with an unusual name. */
28605 switch (DECL_FUNCTION_CODE (decl
))
28607 case BUILT_IN_DREML
:
28608 newname
= xstrdup ("__remainderieee128");
28611 case BUILT_IN_GAMMAL
:
28612 newname
= xstrdup ("__lgammaieee128");
28615 case BUILT_IN_GAMMAL_R
:
28616 case BUILT_IN_LGAMMAL_R
:
28617 newname
= xstrdup ("__lgammaieee128_r");
28620 case BUILT_IN_NEXTTOWARD
:
28621 newname
= xstrdup ("__nexttoward_to_ieee128");
28624 case BUILT_IN_NEXTTOWARDF
:
28625 newname
= xstrdup ("__nexttowardf_to_ieee128");
28628 case BUILT_IN_NEXTTOWARDL
:
28629 newname
= xstrdup ("__nexttowardieee128");
28632 case BUILT_IN_POW10L
:
28633 newname
= xstrdup ("__exp10ieee128");
28636 case BUILT_IN_SCALBL
:
28637 newname
= xstrdup ("__scalbieee128");
28640 case BUILT_IN_SIGNIFICANDL
:
28641 newname
= xstrdup ("__significandieee128");
28644 case BUILT_IN_SINCOSL
:
28645 newname
= xstrdup ("__sincosieee128");
28652 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28655 size_t printf_len
= strlen ("printf");
28656 size_t scanf_len
= strlen ("scanf");
28657 size_t printf_chk_len
= strlen ("printf_chk");
28659 if (len
>= printf_len
28660 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28661 newname
= xasprintf ("__%sieee128", name
);
28663 else if (len
>= scanf_len
28664 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28665 newname
= xasprintf ("__isoc99_%sieee128", name
);
28667 else if (len
>= printf_chk_len
28668 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28669 newname
= xasprintf ("%sieee128", name
);
28671 else if (name
[len
- 1] == 'l')
28673 bool uses_ieee128_p
= false;
28674 tree type
= TREE_TYPE (decl
);
28675 machine_mode ret_mode
= TYPE_MODE (type
);
28677 /* See if the function returns a IEEE 128-bit floating point type or
28679 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28680 uses_ieee128_p
= true;
28683 function_args_iterator args_iter
;
28686 /* See if the function passes a IEEE 128-bit floating point type
28687 or complex type. */
28688 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28690 machine_mode arg_mode
= TYPE_MODE (arg
);
28691 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28693 uses_ieee128_p
= true;
28699 /* If we passed or returned an IEEE 128-bit floating point type,
28700 change the name. Use __<name>ieee128, instead of <name>l. */
28701 if (uses_ieee128_p
)
28702 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28708 if (TARGET_DEBUG_BUILTIN
)
28709 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28711 id
= get_identifier (newname
);
28719 /* Predict whether the given loop in gimple will be transformed in the RTL
28720 doloop_optimize pass. */
28723 rs6000_predict_doloop_p (struct loop
*loop
)
28727 /* On rs6000, targetm.can_use_doloop_p is actually
28728 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28729 if (loop
->inner
!= NULL
)
28731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28732 fprintf (dump_file
, "Predict doloop failure due to"
28733 " loop nesting.\n");
28740 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28742 static machine_mode
28743 rs6000_preferred_doloop_mode (machine_mode
)
28748 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28751 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28753 gcc_assert (MEM_P (mem
));
28755 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28756 type addresses, so don't allow MEMs with those address types to be
28757 substituted as an equivalent expression. See PR93974 for details. */
28758 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28764 /* Implement TARGET_INVALID_CONVERSION. */
28766 static const char *
28767 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28769 /* Make sure we're working with the canonical types. */
28770 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28771 fromtype
= TYPE_CANONICAL (fromtype
);
28772 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28773 totype
= TYPE_CANONICAL (totype
);
28775 machine_mode frommode
= TYPE_MODE (fromtype
);
28776 machine_mode tomode
= TYPE_MODE (totype
);
28778 if (frommode
!= tomode
)
28780 /* Do not allow conversions to/from XOmode and OOmode types. */
28781 if (frommode
== XOmode
)
28782 return N_("invalid conversion from type %<__vector_quad%>");
28783 if (tomode
== XOmode
)
28784 return N_("invalid conversion to type %<__vector_quad%>");
28785 if (frommode
== OOmode
)
28786 return N_("invalid conversion from type %<__vector_pair%>");
28787 if (tomode
== OOmode
)
28788 return N_("invalid conversion to type %<__vector_pair%>");
28791 /* Conversion allowed. */
28795 /* Convert a SFmode constant to the integer bit pattern. */
28798 rs6000_const_f32_to_i32 (rtx operand
)
28801 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28803 gcc_assert (GET_MODE (operand
) == SFmode
);
28804 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28809 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28811 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28812 inform (input_location
,
28813 "the result for the xxspltidp instruction "
28814 "is undefined for subnormal input values");
28815 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28818 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28821 rs6000_gen_pic_addr_diff_vec (void)
28823 return rs6000_relative_jumptables
;
28827 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28829 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28832 fprintf (file
, "%s", directive
);
28833 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28834 assemble_name (file
, buf
);
28835 fprintf (file
, "\n");
28839 /* Copy an integer constant to the vector constant structure. */
28842 constant_int_to_128bit_vector (rtx op
,
28845 vec_const_128bit_type
*info
)
28847 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28848 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28850 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28851 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28854 /* Copy a floating point constant to the vector constant structure. */
28857 constant_fp_to_128bit_vector (rtx op
,
28860 vec_const_128bit_type
*info
)
28862 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28863 unsigned num_words
= bitsize
/ 32;
28864 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28865 long real_words
[VECTOR_128BIT_WORDS
];
28867 /* Make sure we don't overflow the real_words array and that it is
28868 filled completely. */
28869 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28871 real_to_target (real_words
, rtype
, mode
);
28873 /* Iterate over each 32-bit word in the floating point constant. The
28874 real_to_target function puts out words in target endian fashion. We need
28875 to arrange the order so that the bytes are written in big endian order. */
28876 for (unsigned num
= 0; num
< num_words
; num
++)
28878 unsigned endian_num
= (BYTES_BIG_ENDIAN
28880 : num_words
- 1 - num
);
28882 unsigned uvalue
= real_words
[endian_num
];
28883 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28884 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28887 /* Mark that this constant involves floating point. */
28888 info
->fp_constant_p
= true;
28891 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28894 Break out the constant out to bytes, half words, words, and double words.
28895 Return true if we have successfully converted the constant.
28897 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28898 constants. Integer and floating point scalar constants are splatted to fill
28902 vec_const_128bit_to_bytes (rtx op
,
28904 vec_const_128bit_type
*info
)
28906 /* Initialize the constant structure. */
28907 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28909 /* Assume CONST_INTs are DImode. */
28910 if (mode
== VOIDmode
)
28911 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28913 if (mode
== VOIDmode
)
28916 unsigned size
= GET_MODE_SIZE (mode
);
28917 bool splat_p
= false;
28919 if (size
> VECTOR_128BIT_BYTES
)
28922 /* Set up the bits. */
28923 switch (GET_CODE (op
))
28925 /* Integer constants, default to double word. */
28928 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28933 /* Floating point constants. */
28936 /* Fail if the floating point constant is the wrong mode. */
28937 if (GET_MODE (op
) != mode
)
28940 /* SFmode stored as scalars are stored in DFmode format. */
28941 if (mode
== SFmode
)
28944 size
= GET_MODE_SIZE (DFmode
);
28947 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28952 /* Vector constants, iterate over each element. On little endian
28953 systems, we have to reverse the element numbers. */
28956 /* Fail if the vector constant is the wrong mode or size. */
28957 if (GET_MODE (op
) != mode
28958 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28961 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28962 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28963 size_t nunits
= GET_MODE_NUNITS (mode
);
28965 for (size_t num
= 0; num
< nunits
; num
++)
28967 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28968 size_t byte_num
= (BYTES_BIG_ENDIAN
28970 : nunits
- 1 - num
) * ele_size
;
28972 if (CONST_INT_P (ele
))
28973 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28974 else if (CONST_DOUBLE_P (ele
))
28975 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28983 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28984 Since we are duplicating the element, we don't have to worry about
28986 case VEC_DUPLICATE
:
28988 /* Fail if the vector duplicate is the wrong mode or size. */
28989 if (GET_MODE (op
) != mode
28990 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28993 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28994 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28995 rtx ele
= XEXP (op
, 0);
28996 size_t nunits
= GET_MODE_NUNITS (mode
);
28998 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
29001 for (size_t num
= 0; num
< nunits
; num
++)
29003 size_t byte_num
= num
* ele_size
;
29005 if (CONST_INT_P (ele
))
29006 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
29008 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
29014 /* Any thing else, just return failure. */
29019 /* Splat the constant to fill 128 bits if desired. */
29020 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
29022 if ((VECTOR_128BIT_BYTES
% size
) != 0)
29025 for (size_t offset
= size
;
29026 offset
< VECTOR_128BIT_BYTES
;
29028 memcpy ((void *) &info
->bytes
[offset
],
29029 (void *) &info
->bytes
[0],
29033 /* Remember original size. */
29034 info
->original_size
= size
;
29036 /* Determine if the bytes are all the same. */
29037 unsigned char first_byte
= info
->bytes
[0];
29038 info
->all_bytes_same
= true;
29039 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
29040 if (first_byte
!= info
->bytes
[i
])
29042 info
->all_bytes_same
= false;
29046 /* Pack half words together & determine if all of the half words are the
29048 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
29049 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
29050 | info
->bytes
[(i
* 2) + 1]);
29052 unsigned short first_hword
= info
->half_words
[0];
29053 info
->all_half_words_same
= true;
29054 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
29055 if (first_hword
!= info
->half_words
[i
])
29057 info
->all_half_words_same
= false;
29061 /* Pack words together & determine if all of the words are the same. */
29062 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
29063 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
29064 | (info
->bytes
[(i
* 4) + 1] << 16)
29065 | (info
->bytes
[(i
* 4) + 2] << 8)
29066 | info
->bytes
[(i
* 4) + 3]);
29068 info
->all_words_same
29069 = (info
->words
[0] == info
->words
[1]
29070 && info
->words
[0] == info
->words
[2]
29071 && info
->words
[0] == info
->words
[3]);
29073 /* Pack double words together & determine if all of the double words are the
29075 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
29077 unsigned HOST_WIDE_INT d_word
= 0;
29078 for (size_t j
= 0; j
< 8; j
++)
29079 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
29081 info
->double_words
[i
] = d_word
;
29084 info
->all_double_words_same
29085 = (info
->double_words
[0] == info
->double_words
[1]);
29090 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29091 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29092 value to be used with the LXVKQ instruction. */
29095 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
29097 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29098 floating point hardware and VSX registers are available. */
29099 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
29103 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29105 if (vsx_const
->words
[1] != 0
29106 || vsx_const
->words
[2] != 0
29107 || vsx_const
->words
[3] != 0)
29110 /* See if we have a match for the first word. */
29111 switch (vsx_const
->words
[0])
29113 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
29114 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
29115 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
29116 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
29117 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
29118 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
29119 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
29120 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
29121 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
29122 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
29123 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
29124 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
29125 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
29126 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
29127 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
29128 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
29129 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
29130 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
29132 /* anything else cannot be loaded. */
29140 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29141 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29142 value to be used with the XXSPLTIW instruction. */
29145 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
29147 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29150 if (!vsx_const
->all_words_same
)
29153 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29154 if (vsx_const
->all_bytes_same
)
29157 /* See if we can use VSPLTISH or VSPLTISW. */
29158 if (vsx_const
->all_half_words_same
)
29160 short sign_h_word
= vsx_const
->half_words
[0];
29161 if (EASY_VECTOR_15 (sign_h_word
))
29165 int sign_word
= vsx_const
->words
[0];
29166 if (EASY_VECTOR_15 (sign_word
))
29169 return vsx_const
->words
[0];
29172 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29173 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29174 value to be used with the XXSPLTIDP instruction. */
29177 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
29179 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
29182 /* Reject if the two 64-bit segments are not the same. */
29183 if (!vsx_const
->all_double_words_same
)
29186 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29187 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29188 if (vsx_const
->all_bytes_same
29189 || vsx_const
->all_half_words_same
29190 || vsx_const
->all_words_same
)
29193 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
29195 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29196 pattern and the signalling NaN bit pattern. Recognize infinity and
29197 negative infinity. */
29199 /* Bit representation of DFmode normal quiet NaN. */
29200 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29202 /* Bit representation of DFmode normal signaling NaN. */
29203 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29205 /* Bit representation of DFmode positive infinity. */
29206 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29208 /* Bit representation of DFmode negative infinity. */
29209 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29211 if (value
!= RS6000_CONST_DF_NAN
29212 && value
!= RS6000_CONST_DF_NANS
29213 && value
!= RS6000_CONST_DF_INF
29214 && value
!= RS6000_CONST_DF_NEG_INF
)
29216 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29217 the exponent, and 52 bits for the mantissa (not counting the hidden
29218 bit used for normal numbers). NaN values have the exponent set to all
29219 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29221 int df_exponent
= (value
>> 52) & 0x7ff;
29222 unsigned HOST_WIDE_INT
29223 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
29225 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
29228 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29229 the exponent all 0 bits, and the mantissa non-zero. If the value is
29230 subnormal, then the hidden bit in the mantissa is not set. */
29231 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
29235 /* Change the representation to DFmode constant. */
29236 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
29238 /* real_from_target takes the target words in target order. */
29239 if (!BYTES_BIG_ENDIAN
)
29240 std::swap (df_words
[0], df_words
[1]);
29242 REAL_VALUE_TYPE rv_type
;
29243 real_from_target (&rv_type
, df_words
, DFmode
);
29245 const REAL_VALUE_TYPE
*rv
= &rv_type
;
29247 /* Validate that the number can be stored as a SFmode value. */
29248 if (!exact_real_truncate (SFmode
, rv
))
29251 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29252 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29255 real_to_target (&sf_value
, rv
, SFmode
);
29257 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29258 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29259 0 bits, and the mantissa non-zero. */
29260 long sf_exponent
= (sf_value
>> 23) & 0xFF;
29261 long sf_mantissa
= sf_value
& 0x7FFFFF;
29263 if (sf_exponent
== 0 && sf_mantissa
!= 0)
29266 /* Return the immediate to be used. */
29270 /* Now we have only two opaque types, they are __vector_quad and
29271 __vector_pair built-in types. They are target specific and
29272 only available when MMA is supported. With MMA supported, it
29273 simply returns true, otherwise it checks if the given gimple
29274 STMT is an assignment, asm or call stmt and uses either of
29275 these two opaque types unexpectedly, if yes, it would raise
29276 an error message and returns true, otherwise it returns false. */
29279 rs6000_opaque_type_invalid_use_p (gimple
*stmt
)
29284 /* If the given TYPE is one MMA opaque type, emit the corresponding
29285 error messages and return true, otherwise return false. */
29286 auto check_and_error_invalid_use
= [](tree type
)
29288 tree mv
= TYPE_MAIN_VARIANT (type
);
29289 if (mv
== vector_quad_type_node
)
29291 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29294 else if (mv
== vector_pair_type_node
)
29296 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29304 /* The usage of MMA opaque types is very limited for now,
29305 to check with gassign, gasm and gcall is enough so far. */
29306 if (gassign
*ga
= dyn_cast
<gassign
*> (stmt
))
29308 tree lhs
= gimple_assign_lhs (ga
);
29309 tree type
= TREE_TYPE (lhs
);
29310 if (check_and_error_invalid_use (type
))
29313 else if (gasm
*gs
= dyn_cast
<gasm
*> (stmt
))
29315 unsigned ninputs
= gimple_asm_ninputs (gs
);
29316 for (unsigned i
= 0; i
< ninputs
; i
++)
29318 tree op
= gimple_asm_input_op (gs
, i
);
29319 tree val
= TREE_VALUE (op
);
29320 tree type
= TREE_TYPE (val
);
29321 if (check_and_error_invalid_use (type
))
29324 unsigned noutputs
= gimple_asm_noutputs (gs
);
29325 for (unsigned i
= 0; i
< noutputs
; i
++)
29327 tree op
= gimple_asm_output_op (gs
, i
);
29328 tree val
= TREE_VALUE (op
);
29329 tree type
= TREE_TYPE (val
);
29330 if (check_and_error_invalid_use (type
))
29334 else if (gcall
*gc
= dyn_cast
<gcall
*> (stmt
))
29336 unsigned nargs
= gimple_call_num_args (gc
);
29337 for (unsigned i
= 0; i
< nargs
; i
++)
29339 tree arg
= gimple_call_arg (gc
, i
);
29340 tree type
= TREE_TYPE (arg
);
29341 if (check_and_error_invalid_use (type
))
29350 struct gcc_target targetm
= TARGET_INITIALIZER
;
29352 #include "gt-rs6000.h"