1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
26 #include "coretypes.h"
36 #include "stringpool.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
47 #include "fold-const.h"
49 #include "stor-layout.h"
51 #include "print-tree.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
59 #include "sched-int.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
71 #include "tree-vector-builder.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
76 #include "ipa-fnsummary.h"
78 #include "case-cfn-macros.h"
80 #include "rs6000-internal.h"
83 /* This file should be included last. */
84 #include "target-def.h"
86 extern tree
rs6000_builtin_mask_for_load (void);
87 extern tree
rs6000_builtin_md_vectorized_function (tree
, tree
, tree
);
88 extern tree
rs6000_builtin_reciprocal (tree
);
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
100 #define TARGET_IEEEQUAD_DEFAULT 0
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno
= 0;
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode
;
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected
= false;
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size
;
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float
= false;
138 bool rs6000_passes_long_double
= false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector
= false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct
= false;
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs
[NUM_MACHINE_MODES
][LIM_REG_CLASSES
];
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs
[NUM_MACHINE_MODES
][FIRST_PSEUDO_REGISTER
];
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass
[FIRST_PSEUDO_REGISTER
];
158 static int dbg_cost_ctrl
;
160 /* Flag to say the TOC is initialized */
161 int toc_initialized
, need_toc_init
;
162 char toc_label_name
[10];
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more
;
168 static GTY(()) section
*read_only_data_section
;
169 static GTY(()) section
*private_data_section
;
170 static GTY(()) section
*tls_data_section
;
171 static GTY(()) section
*tls_private_data_section
;
172 static GTY(()) section
*read_only_private_data_section
;
173 static GTY(()) section
*sdata2_section
;
175 section
*toc_section
= 0;
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit
[NUM_MACHINE_MODES
];
179 enum rs6000_vector rs6000_vector_mem
[NUM_MACHINE_MODES
];
181 /* Register classes for various constraints that are based on the target
183 enum reg_class rs6000_constraints
[RS6000_CONSTRAINT_MAX
];
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align
[NUM_MACHINE_MODES
];
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits
[MAX_MACHINE_MODE
];
192 /* Masks to determine which reciprocal esitmate instructions to generate
194 enum rs6000_recip_mask
{
195 RECIP_SF_DIV
= 0x001, /* Use divide estimate */
196 RECIP_DF_DIV
= 0x002,
197 RECIP_V4SF_DIV
= 0x004,
198 RECIP_V2DF_DIV
= 0x008,
200 RECIP_SF_RSQRT
= 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT
= 0x020,
202 RECIP_V4SF_RSQRT
= 0x040,
203 RECIP_V2DF_RSQRT
= 0x080,
205 /* Various combination of flags for -mrecip=xxx. */
207 RECIP_ALL
= (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV
| RECIP_SF_RSQRT
| RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT
| RECIP_V2DF_RSQRT
),
211 RECIP_HIGH_PRECISION
= RECIP_ALL
,
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION
= (RECIP_ALL
& ~(RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
))
218 /* -mrecip options. */
221 const char *string
; /* option name */
222 unsigned int mask
; /* mask bits to set */
223 } recip_options
[] = {
224 { "all", RECIP_ALL
},
225 { "none", RECIP_NONE
},
226 { "div", (RECIP_SF_DIV
| RECIP_DF_DIV
| RECIP_V4SF_DIV
228 { "divf", (RECIP_SF_DIV
| RECIP_V4SF_DIV
) },
229 { "divd", (RECIP_DF_DIV
| RECIP_V2DF_DIV
) },
230 { "rsqrt", (RECIP_SF_RSQRT
| RECIP_DF_RSQRT
| RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT
) },
232 { "rsqrtf", (RECIP_SF_RSQRT
| RECIP_V4SF_RSQRT
) },
233 { "rsqrtd", (RECIP_DF_RSQRT
| RECIP_V2DF_RSQRT
) },
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
241 CLONE_DEFAULT
= 0, /* default clone. */
242 CLONE_ISA_2_05
, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06
, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07
, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00
, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1
, /* ISA 3.1 (power10). */
250 /* Map compiler ISA bits into HWCAP names. */
252 HOST_WIDE_INT isa_mask
; /* rs6000_isa mask */
253 const char *name
; /* name to use in __builtin_cpu_supports. */
256 static const struct clone_map rs6000_clone_map
[CLONE_MAX
] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB
, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD
, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR
, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR
, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10
, "arch_3_1" }, /* ISA 3.1 (power10). */
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol
= "__parse_hwcap_and_convert_at_platform";
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p
= false;
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr
) (bool, HOST_WIDE_INT
);
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
285 enum rs6000_reg_type
{
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type
[N_REG_CLASSES
];
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
310 enum rs6000_reload_reg_type
{
311 RELOAD_REG_GPR
, /* General purpose registers. */
312 RELOAD_REG_FPR
, /* Traditional floating point regs. */
313 RELOAD_REG_VMX
, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY
, /* OR of GPR, FPR, Altivec masks. */
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type
{
326 const char *name
; /* Register class name. */
327 int reg
; /* Register in the register class. */
330 static const struct reload_reg_map_type reload_reg_map
[N_RELOAD_REG
] = {
331 { "Gpr", FIRST_GPR_REGNO
}, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO
}, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO
}, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type
;
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr
{
353 enum insn_code reload_load
; /* INSN to reload for loading. */
354 enum insn_code reload_store
; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr
; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx
; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr
; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask
[(int)N_RELOAD_REG
]; /* Valid address masks. */
359 bool scalar_in_vmx_p
; /* Scalar value can go in VMX. */
362 static struct rs6000_reg_addr reg_addr
[NUM_MACHINE_MODES
];
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
366 mode_supports_pre_incdec_p (machine_mode mode
)
368 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_INCDEC
)
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
374 mode_supports_pre_modify_p (machine_mode mode
)
376 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_PRE_MODIFY
)
380 /* Return true if we have D-form addressing in altivec registers. */
382 mode_supports_vmx_dform (machine_mode mode
)
384 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_OFFSET
) != 0);
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
391 mode_supports_dq_form (machine_mode mode
)
393 return ((reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
] & RELOAD_REG_QUAD_OFFSET
)
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
413 rs6000_store_data_bypass_p (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
420 in_set
= single_set (in_insn
);
423 if (MEM_P (SET_DEST (in_set
)))
425 out_set
= single_set (out_insn
);
428 out_pat
= PATTERN (out_insn
);
429 if (GET_CODE (out_pat
) == PARALLEL
)
431 for (i
= 0; i
< XVECLEN (out_pat
, 0); i
++)
433 out_exp
= XVECEXP (out_pat
, 0, i
);
434 if ((GET_CODE (out_exp
) == CLOBBER
)
435 || (GET_CODE (out_exp
) == USE
))
437 else if (GET_CODE (out_exp
) != SET
)
446 in_pat
= PATTERN (in_insn
);
447 if (GET_CODE (in_pat
) != PARALLEL
)
450 for (i
= 0; i
< XVECLEN (in_pat
, 0); i
++)
452 in_exp
= XVECEXP (in_pat
, 0, i
);
453 if ((GET_CODE (in_exp
) == CLOBBER
) || (GET_CODE (in_exp
) == USE
))
455 else if (GET_CODE (in_exp
) != SET
)
458 if (MEM_P (SET_DEST (in_exp
)))
460 out_set
= single_set (out_insn
);
463 out_pat
= PATTERN (out_insn
);
464 if (GET_CODE (out_pat
) != PARALLEL
)
466 for (j
= 0; j
< XVECLEN (out_pat
, 0); j
++)
468 out_exp
= XVECEXP (out_pat
, 0, j
);
469 if ((GET_CODE (out_exp
) == CLOBBER
)
470 || (GET_CODE (out_exp
) == USE
))
472 else if (GET_CODE (out_exp
) != SET
)
479 return store_data_bypass_p (out_insn
, in_insn
);
483 /* Processor costs (relative to an add) */
485 const struct processor_costs
*rs6000_cost
;
487 /* Instruction size costs on 32bit processors. */
489 struct processor_costs size32_cost
= {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
504 0, /* SF->DF convert */
507 /* Instruction size costs on 64bit processors. */
509 struct processor_costs size64_cost
= {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
524 0, /* SF->DF convert */
527 /* Instruction costs on RS64A processors. */
529 struct processor_costs rs64a_cost
= {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
544 0, /* SF->DF convert */
547 /* Instruction costs on MPCCORE processors. */
549 struct processor_costs mpccore_cost
= {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
564 0, /* SF->DF convert */
567 /* Instruction costs on PPC403 processors. */
569 struct processor_costs ppc403_cost
= {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
584 0, /* SF->DF convert */
587 /* Instruction costs on PPC405 processors. */
589 struct processor_costs ppc405_cost
= {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
604 0, /* SF->DF convert */
607 /* Instruction costs on PPC440 processors. */
609 struct processor_costs ppc440_cost
= {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
624 0, /* SF->DF convert */
627 /* Instruction costs on PPC476 processors. */
629 struct processor_costs ppc476_cost
= {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
644 0, /* SF->DF convert */
647 /* Instruction costs on PPC601 processors. */
649 struct processor_costs ppc601_cost
= {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
664 0, /* SF->DF convert */
667 /* Instruction costs on PPC603 processors. */
669 struct processor_costs ppc603_cost
= {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
684 0, /* SF->DF convert */
687 /* Instruction costs on PPC604 processors. */
689 struct processor_costs ppc604_cost
= {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
704 0, /* SF->DF convert */
707 /* Instruction costs on PPC604e processors. */
709 struct processor_costs ppc604e_cost
= {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
724 0, /* SF->DF convert */
727 /* Instruction costs on PPC620 processors. */
729 struct processor_costs ppc620_cost
= {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
744 0, /* SF->DF convert */
747 /* Instruction costs on PPC630 processors. */
749 struct processor_costs ppc630_cost
= {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
764 0, /* SF->DF convert */
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
770 struct processor_costs ppccell_cost
= {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
785 0, /* SF->DF convert */
788 /* Instruction costs on PPC750 and PPC7400 processors. */
790 struct processor_costs ppc750_cost
= {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
805 0, /* SF->DF convert */
808 /* Instruction costs on PPC7450 processors. */
810 struct processor_costs ppc7450_cost
= {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
825 0, /* SF->DF convert */
828 /* Instruction costs on PPC8540 processors. */
830 struct processor_costs ppc8540_cost
= {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
848 /* Instruction costs on E300C2 and E300C3 cores. */
850 struct processor_costs ppce300c2c3_cost
= {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
868 /* Instruction costs on PPCE500MC processors. */
870 struct processor_costs ppce500mc_cost
= {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
888 /* Instruction costs on PPCE500MC64 processors. */
890 struct processor_costs ppce500mc64_cost
= {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
908 /* Instruction costs on PPCE5500 processors. */
910 struct processor_costs ppce5500_cost
= {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
928 /* Instruction costs on PPCE6500 processors. */
930 struct processor_costs ppce6500_cost
= {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
948 /* Instruction costs on AppliedMicro Titan processors. */
950 struct processor_costs titan_cost
= {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
968 /* Instruction costs on POWER4 and POWER5 processors. */
970 struct processor_costs power4_cost
= {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
988 /* Instruction costs on POWER6 processors. */
990 struct processor_costs power6_cost
= {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1008 /* Instruction costs on POWER7 processors. */
1010 struct processor_costs power7_cost
= {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1028 /* Instruction costs on POWER8 processors. */
1030 struct processor_costs power8_cost
= {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1048 /* Instruction costs on POWER9 processors. */
1050 struct processor_costs power9_cost
= {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1068 /* Instruction costs on POWER10 processors. */
1070 struct processor_costs power10_cost
= {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1088 /* Instruction costs on POWER A2 processors. */
1090 struct processor_costs ppca2_cost
= {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler
) (combined_fn
, tree
, tree
);
1112 static bool rs6000_debug_legitimate_address_p (machine_mode
, rtx
, bool);
1113 static tree
rs6000_handle_longcall_attribute (tree
*, tree
, tree
, int, bool *);
1114 static tree
rs6000_handle_altivec_attribute (tree
*, tree
, tree
, int, bool *);
1115 static tree
rs6000_handle_struct_attribute (tree
*, tree
, tree
, int, bool *);
1116 static tree
rs6000_builtin_vectorized_libmass (combined_fn
, tree
, tree
);
1117 static void rs6000_emit_set_long_const (rtx
, HOST_WIDE_INT
);
1118 static int rs6000_memory_move_cost (machine_mode
, reg_class_t
, bool);
1119 static bool rs6000_debug_rtx_costs (rtx
, machine_mode
, int, int, int *, bool);
1120 static int rs6000_debug_address_cost (rtx
, machine_mode
, addr_space_t
,
1122 static int rs6000_debug_adjust_cost (rtx_insn
*, int, rtx_insn
*, int,
1124 static bool is_microcoded_insn (rtx_insn
*);
1125 static bool is_nonpipeline_insn (rtx_insn
*);
1126 static bool is_cracked_insn (rtx_insn
*);
1127 static bool is_load_insn (rtx
, rtx
*);
1128 static bool is_store_insn (rtx
, rtx
*);
1129 static bool set_to_load_agen (rtx_insn
*,rtx_insn
*);
1130 static bool insn_terminates_group_p (rtx_insn
*, enum group_termination
);
1131 static bool insn_must_be_first_in_group (rtx_insn
*);
1132 static bool insn_must_be_last_in_group (rtx_insn
*);
1133 bool easy_vector_constant (rtx
, machine_mode
);
1134 static rtx
rs6000_debug_legitimize_address (rtx
, rtx
, machine_mode
);
1135 static rtx
rs6000_legitimize_tls_address (rtx
, enum tls_model
);
1137 static tree
get_prev_label (tree
);
1139 static bool rs6000_mode_dependent_address (const_rtx
);
1140 static bool rs6000_debug_mode_dependent_address (const_rtx
);
1141 static bool rs6000_offsettable_memref_p (rtx
, machine_mode
, bool);
1142 static enum reg_class
rs6000_secondary_reload_class (enum reg_class
,
1144 static enum reg_class
rs6000_debug_secondary_reload_class (enum reg_class
,
1147 static enum reg_class
rs6000_preferred_reload_class (rtx
, enum reg_class
);
1148 static enum reg_class
rs6000_debug_preferred_reload_class (rtx
,
1150 static bool rs6000_debug_secondary_memory_needed (machine_mode
,
1153 static bool rs6000_debug_can_change_mode_class (machine_mode
,
1157 static bool (*rs6000_mode_dependent_address_ptr
) (const_rtx
)
1158 = rs6000_mode_dependent_address
;
1160 enum reg_class (*rs6000_secondary_reload_class_ptr
) (enum reg_class
,
1162 = rs6000_secondary_reload_class
;
1164 enum reg_class (*rs6000_preferred_reload_class_ptr
) (rtx
, enum reg_class
)
1165 = rs6000_preferred_reload_class
;
1167 const int INSN_NOT_AVAILABLE
= -1;
1169 static void rs6000_print_isa_options (FILE *, int, const char *,
1171 static HOST_WIDE_INT
rs6000_disable_incompatible_switches (void);
1173 static enum rs6000_reg_type
register_to_reg_type (rtx
, bool *);
1174 static bool rs6000_secondary_reload_move (enum rs6000_reg_type
,
1175 enum rs6000_reg_type
,
1177 secondary_reload_info
*,
1179 rtl_opt_pass
*make_pass_analyze_swaps (gcc::context
*);
1181 /* Hash table stuff for keeping track of TOC entries. */
1183 struct GTY((for_user
)) toc_hash_struct
1185 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1186 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1188 machine_mode key_mode
;
1192 struct toc_hasher
: ggc_ptr_hash
<toc_hash_struct
>
1194 static hashval_t
hash (toc_hash_struct
*);
1195 static bool equal (toc_hash_struct
*, toc_hash_struct
*);
1198 static GTY (()) hash_table
<toc_hasher
> *toc_hash_table
;
1202 /* Default register names. */
1203 char rs6000_reg_names
[][8] =
1206 "0", "1", "2", "3", "4", "5", "6", "7",
1207 "8", "9", "10", "11", "12", "13", "14", "15",
1208 "16", "17", "18", "19", "20", "21", "22", "23",
1209 "24", "25", "26", "27", "28", "29", "30", "31",
1211 "0", "1", "2", "3", "4", "5", "6", "7",
1212 "8", "9", "10", "11", "12", "13", "14", "15",
1213 "16", "17", "18", "19", "20", "21", "22", "23",
1214 "24", "25", "26", "27", "28", "29", "30", "31",
1216 "0", "1", "2", "3", "4", "5", "6", "7",
1217 "8", "9", "10", "11", "12", "13", "14", "15",
1218 "16", "17", "18", "19", "20", "21", "22", "23",
1219 "24", "25", "26", "27", "28", "29", "30", "31",
1221 "lr", "ctr", "ca", "ap",
1223 "0", "1", "2", "3", "4", "5", "6", "7",
1224 /* vrsave vscr sfp */
1225 "vrsave", "vscr", "sfp",
1228 #ifdef TARGET_REGNAMES
1229 static const char alt_reg_names
[][8] =
1232 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1233 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1234 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1235 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1237 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1238 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1239 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1240 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1242 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1243 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1244 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1245 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1247 "lr", "ctr", "ca", "ap",
1249 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1250 /* vrsave vscr sfp */
1251 "vrsave", "vscr", "sfp",
1255 /* Table of valid machine attributes. */
1257 static const struct attribute_spec rs6000_attribute_table
[] =
1259 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1260 affects_type_identity, handler, exclude } */
1261 { "altivec", 1, 1, false, true, false, false,
1262 rs6000_handle_altivec_attribute
, NULL
},
1263 { "longcall", 0, 0, false, true, true, false,
1264 rs6000_handle_longcall_attribute
, NULL
},
1265 { "shortcall", 0, 0, false, true, true, false,
1266 rs6000_handle_longcall_attribute
, NULL
},
1267 { "ms_struct", 0, 0, false, false, false, false,
1268 rs6000_handle_struct_attribute
, NULL
},
1269 { "gcc_struct", 0, 0, false, false, false, false,
1270 rs6000_handle_struct_attribute
, NULL
},
1271 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1272 SUBTARGET_ATTRIBUTE_TABLE
,
1274 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1277 #ifndef TARGET_PROFILE_KERNEL
1278 #define TARGET_PROFILE_KERNEL 0
1281 /* Initialize the GCC target structure. */
1282 #undef TARGET_ATTRIBUTE_TABLE
1283 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1284 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1285 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1286 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1287 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1289 #undef TARGET_ASM_ALIGNED_DI_OP
1290 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1292 /* Default unaligned ops are only provided for ELF. Find the ops needed
1293 for non-ELF systems. */
1294 #ifndef OBJECT_FORMAT_ELF
1296 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1298 #undef TARGET_ASM_UNALIGNED_HI_OP
1299 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1300 #undef TARGET_ASM_UNALIGNED_SI_OP
1301 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1302 #undef TARGET_ASM_UNALIGNED_DI_OP
1303 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1306 #undef TARGET_ASM_UNALIGNED_HI_OP
1307 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1308 #undef TARGET_ASM_UNALIGNED_SI_OP
1309 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1310 #undef TARGET_ASM_UNALIGNED_DI_OP
1311 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1312 #undef TARGET_ASM_ALIGNED_DI_OP
1313 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1317 /* This hook deals with fixups for relocatable code and DI-mode objects
1319 #undef TARGET_ASM_INTEGER
1320 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1322 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1323 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1324 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1327 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1328 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1329 rs6000_print_patchable_function_entry
1331 #undef TARGET_SET_UP_BY_PROLOGUE
1332 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1334 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1335 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1336 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1337 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1338 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1339 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1340 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1341 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1342 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1343 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1344 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1345 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1347 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1348 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1350 #undef TARGET_INTERNAL_ARG_POINTER
1351 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1353 #undef TARGET_HAVE_TLS
1354 #define TARGET_HAVE_TLS HAVE_AS_TLS
1356 #undef TARGET_CANNOT_FORCE_CONST_MEM
1357 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1359 #undef TARGET_DELEGITIMIZE_ADDRESS
1360 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1362 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1363 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1365 #undef TARGET_LEGITIMATE_COMBINED_INSN
1366 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1368 #undef TARGET_ASM_FUNCTION_PROLOGUE
1369 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1370 #undef TARGET_ASM_FUNCTION_EPILOGUE
1371 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1373 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1374 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1376 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1377 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1379 #undef TARGET_LEGITIMIZE_ADDRESS
1380 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1382 #undef TARGET_SCHED_VARIABLE_ISSUE
1383 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1385 #undef TARGET_SCHED_ISSUE_RATE
1386 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1387 #undef TARGET_SCHED_ADJUST_COST
1388 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1389 #undef TARGET_SCHED_ADJUST_PRIORITY
1390 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1391 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1392 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1393 #undef TARGET_SCHED_INIT
1394 #define TARGET_SCHED_INIT rs6000_sched_init
1395 #undef TARGET_SCHED_FINISH
1396 #define TARGET_SCHED_FINISH rs6000_sched_finish
1397 #undef TARGET_SCHED_REORDER
1398 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1399 #undef TARGET_SCHED_REORDER2
1400 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1402 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1403 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1405 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1406 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1408 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1409 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1410 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1411 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1412 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1413 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1414 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1415 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1417 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1418 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1420 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1421 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1422 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1423 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1424 rs6000_builtin_support_vector_misalignment
1425 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1426 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1427 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1428 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1429 rs6000_builtin_vectorization_cost
1430 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1431 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1432 rs6000_preferred_simd_mode
1433 #undef TARGET_VECTORIZE_CREATE_COSTS
1434 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1436 #undef TARGET_LOOP_UNROLL_ADJUST
1437 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1439 #undef TARGET_INIT_BUILTINS
1440 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1441 #undef TARGET_BUILTIN_DECL
1442 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1444 #undef TARGET_FOLD_BUILTIN
1445 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1446 #undef TARGET_GIMPLE_FOLD_BUILTIN
1447 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1449 #undef TARGET_EXPAND_BUILTIN
1450 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1452 #undef TARGET_MANGLE_TYPE
1453 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1455 #undef TARGET_INIT_LIBFUNCS
1456 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1459 #undef TARGET_BINDS_LOCAL_P
1460 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1463 #undef TARGET_MS_BITFIELD_LAYOUT_P
1464 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1466 #undef TARGET_ASM_OUTPUT_MI_THUNK
1467 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1472 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1473 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1475 #undef TARGET_REGISTER_MOVE_COST
1476 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1477 #undef TARGET_MEMORY_MOVE_COST
1478 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1479 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1480 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1481 rs6000_ira_change_pseudo_allocno_class
1482 #undef TARGET_CANNOT_COPY_INSN_P
1483 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1484 #undef TARGET_RTX_COSTS
1485 #define TARGET_RTX_COSTS rs6000_rtx_costs
1486 #undef TARGET_ADDRESS_COST
1487 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1488 #undef TARGET_INSN_COST
1489 #define TARGET_INSN_COST rs6000_insn_cost
1491 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1492 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1494 #undef TARGET_PROMOTE_FUNCTION_MODE
1495 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1497 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1498 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1500 #undef TARGET_RETURN_IN_MEMORY
1501 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1503 #undef TARGET_RETURN_IN_MSB
1504 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1506 #undef TARGET_SETUP_INCOMING_VARARGS
1507 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1509 /* Always strict argument naming on rs6000. */
1510 #undef TARGET_STRICT_ARGUMENT_NAMING
1511 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1512 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1513 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1514 #undef TARGET_SPLIT_COMPLEX_ARG
1515 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1516 #undef TARGET_MUST_PASS_IN_STACK
1517 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1518 #undef TARGET_PASS_BY_REFERENCE
1519 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1520 #undef TARGET_ARG_PARTIAL_BYTES
1521 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1522 #undef TARGET_FUNCTION_ARG_ADVANCE
1523 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1524 #undef TARGET_FUNCTION_ARG
1525 #define TARGET_FUNCTION_ARG rs6000_function_arg
1526 #undef TARGET_FUNCTION_ARG_PADDING
1527 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1528 #undef TARGET_FUNCTION_ARG_BOUNDARY
1529 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1531 #undef TARGET_BUILD_BUILTIN_VA_LIST
1532 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1534 #undef TARGET_EXPAND_BUILTIN_VA_START
1535 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1537 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1538 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1540 #undef TARGET_EH_RETURN_FILTER_MODE
1541 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1543 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1544 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1546 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1547 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1549 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1550 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1551 rs6000_libgcc_floating_mode_supported_p
1553 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1554 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1556 #undef TARGET_FLOATN_MODE
1557 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1559 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1560 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1562 #undef TARGET_MD_ASM_ADJUST
1563 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1565 #undef TARGET_OPTION_OVERRIDE
1566 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1568 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1569 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1570 rs6000_builtin_vectorized_function
1572 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1573 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1574 rs6000_builtin_md_vectorized_function
1576 #undef TARGET_STACK_PROTECT_GUARD
1577 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1580 #undef TARGET_STACK_PROTECT_FAIL
1581 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1585 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1586 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1589 /* Use a 32-bit anchor range. This leads to sequences like:
1591 addis tmp,anchor,high
1594 where tmp itself acts as an anchor, and can be shared between
1595 accesses to the same 64k page. */
1596 #undef TARGET_MIN_ANCHOR_OFFSET
1597 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1598 #undef TARGET_MAX_ANCHOR_OFFSET
1599 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1600 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1601 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1602 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1603 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1605 #undef TARGET_BUILTIN_RECIPROCAL
1606 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1608 #undef TARGET_SECONDARY_RELOAD
1609 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1610 #undef TARGET_SECONDARY_MEMORY_NEEDED
1611 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1612 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1613 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1615 #undef TARGET_LEGITIMATE_ADDRESS_P
1616 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1618 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1619 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1621 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1622 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1624 #undef TARGET_CAN_ELIMINATE
1625 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1628 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1630 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1631 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1633 #undef TARGET_TRAMPOLINE_INIT
1634 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1636 #undef TARGET_FUNCTION_VALUE
1637 #define TARGET_FUNCTION_VALUE rs6000_function_value
1639 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1640 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1642 #undef TARGET_OPTION_SAVE
1643 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1645 #undef TARGET_OPTION_RESTORE
1646 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1648 #undef TARGET_OPTION_PRINT
1649 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1651 #undef TARGET_CAN_INLINE_P
1652 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1654 #undef TARGET_SET_CURRENT_FUNCTION
1655 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1657 #undef TARGET_LEGITIMATE_CONSTANT_P
1658 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1660 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1661 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1663 #undef TARGET_CAN_USE_DOLOOP_P
1664 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1666 #undef TARGET_PREDICT_DOLOOP_P
1667 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1669 #undef TARGET_HAVE_COUNT_REG_DECR_P
1670 #define TARGET_HAVE_COUNT_REG_DECR_P true
1672 /* 1000000000 is infinite cost in IVOPTs. */
1673 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1674 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1676 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1677 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1679 #undef TARGET_PREFERRED_DOLOOP_MODE
1680 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1682 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1683 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1685 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1686 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1687 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1688 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1689 #undef TARGET_UNWIND_WORD_MODE
1690 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1692 #undef TARGET_OFFLOAD_OPTIONS
1693 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1695 #undef TARGET_C_MODE_FOR_SUFFIX
1696 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1698 #undef TARGET_INVALID_BINARY_OP
1699 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1701 #undef TARGET_OPTAB_SUPPORTED_P
1702 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1704 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1705 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1707 #undef TARGET_COMPARE_VERSION_PRIORITY
1708 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1710 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1711 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1712 rs6000_generate_version_dispatcher_body
1714 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1715 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1716 rs6000_get_function_versions_dispatcher
1718 #undef TARGET_OPTION_FUNCTION_VERSIONS
1719 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1721 #undef TARGET_HARD_REGNO_NREGS
1722 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1723 #undef TARGET_HARD_REGNO_MODE_OK
1724 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1726 #undef TARGET_MODES_TIEABLE_P
1727 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1729 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1730 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1731 rs6000_hard_regno_call_part_clobbered
1733 #undef TARGET_SLOW_UNALIGNED_ACCESS
1734 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1736 #undef TARGET_CAN_CHANGE_MODE_CLASS
1737 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1739 #undef TARGET_CONSTANT_ALIGNMENT
1740 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1742 #undef TARGET_STARTING_FRAME_OFFSET
1743 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1745 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1746 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1748 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1749 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1751 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1752 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1753 rs6000_cannot_substitute_mem_equiv_p
1755 #undef TARGET_INVALID_CONVERSION
1756 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1758 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1759 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1761 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1762 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1765 /* Processor table. */
1768 const char *const name
; /* Canonical processor name. */
1769 const enum processor_type processor
; /* Processor type enum value. */
1770 const HOST_WIDE_INT target_enable
; /* Target flags to enable. */
1773 static struct rs6000_ptt
const processor_target_table
[] =
1775 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1776 #include "rs6000-cpus.def"
1780 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1784 rs6000_cpu_name_lookup (const char *name
)
1790 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
1791 if (! strcmp (name
, processor_target_table
[i
].name
))
1799 /* Return number of consecutive hard regs needed starting at reg REGNO
1800 to hold something of mode MODE.
1801 This is ordinarily the length in words of a value of mode MODE
1802 but can be less for certain modes in special long registers.
1804 POWER and PowerPC GPRs hold 32 bits worth;
1805 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1808 rs6000_hard_regno_nregs_internal (int regno
, machine_mode mode
)
1810 unsigned HOST_WIDE_INT reg_size
;
1812 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1813 128-bit floating point that can go in vector registers, which has VSX
1814 memory addressing. */
1815 if (FP_REGNO_P (regno
))
1816 reg_size
= (VECTOR_MEM_VSX_P (mode
) || VECTOR_ALIGNMENT_P (mode
)
1817 ? UNITS_PER_VSX_WORD
1818 : UNITS_PER_FP_WORD
);
1820 else if (ALTIVEC_REGNO_P (regno
))
1821 reg_size
= UNITS_PER_ALTIVEC_WORD
;
1824 reg_size
= UNITS_PER_WORD
;
1826 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
1829 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1832 rs6000_hard_regno_mode_ok_uncached (int regno
, machine_mode mode
)
1834 int last_regno
= regno
+ rs6000_hard_regno_nregs
[mode
][regno
] - 1;
1836 if (COMPLEX_MODE_P (mode
))
1837 mode
= GET_MODE_INNER (mode
);
1839 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1842 return (TARGET_MMA
&& VSX_REGNO_P (regno
) && (regno
& 1) == 0);
1844 /* MMA accumulator modes need FPR registers divisible by 4. */
1846 return (TARGET_MMA
&& FP_REGNO_P (regno
) && (regno
& 3) == 0);
1848 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1849 register combinations, and use PTImode where we need to deal with quad
1850 word memory operations. Don't allow quad words in the argument or frame
1851 pointer registers, just registers 0..31. */
1852 if (mode
== PTImode
)
1853 return (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1854 && IN_RANGE (last_regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
)
1855 && ((regno
& 1) == 0));
1857 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1858 implementations. Don't allow an item to be split between a FP register
1859 and an Altivec register. Allow TImode in all VSX registers if the user
1861 if (TARGET_VSX
&& VSX_REGNO_P (regno
)
1862 && (VECTOR_MEM_VSX_P (mode
)
1863 || VECTOR_ALIGNMENT_P (mode
)
1864 || reg_addr
[mode
].scalar_in_vmx_p
1866 || (TARGET_VADDUQM
&& mode
== V1TImode
)))
1868 if (FP_REGNO_P (regno
))
1869 return FP_REGNO_P (last_regno
);
1871 if (ALTIVEC_REGNO_P (regno
))
1873 if (GET_MODE_SIZE (mode
) < 16 && !reg_addr
[mode
].scalar_in_vmx_p
)
1876 return ALTIVEC_REGNO_P (last_regno
);
1880 /* The GPRs can hold any mode, but values bigger than one register
1881 cannot go past R31. */
1882 if (INT_REGNO_P (regno
))
1883 return INT_REGNO_P (last_regno
);
1885 /* The float registers (except for VSX vector modes) can only hold floating
1886 modes and DImode. */
1887 if (FP_REGNO_P (regno
))
1889 if (VECTOR_ALIGNMENT_P (mode
))
1892 if (SCALAR_FLOAT_MODE_P (mode
)
1893 && (mode
!= TDmode
|| (regno
% 2) == 0)
1894 && FP_REGNO_P (last_regno
))
1897 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1899 if(GET_MODE_SIZE (mode
) == UNITS_PER_FP_WORD
)
1902 if (TARGET_P8_VECTOR
&& (mode
== SImode
))
1905 if (TARGET_P9_VECTOR
&& (mode
== QImode
|| mode
== HImode
))
1912 /* The CR register can only hold CC modes. */
1913 if (CR_REGNO_P (regno
))
1914 return GET_MODE_CLASS (mode
) == MODE_CC
;
1916 if (CA_REGNO_P (regno
))
1917 return mode
== Pmode
|| mode
== SImode
;
1919 /* AltiVec only in AldyVec registers. */
1920 if (ALTIVEC_REGNO_P (regno
))
1921 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
1922 || mode
== V1TImode
);
1924 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1925 and it must be able to fit within the register set. */
1927 return GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
;
1930 /* Implement TARGET_HARD_REGNO_NREGS. */
1933 rs6000_hard_regno_nregs_hook (unsigned int regno
, machine_mode mode
)
1935 return rs6000_hard_regno_nregs
[mode
][regno
];
1938 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1941 rs6000_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
1943 return rs6000_hard_regno_mode_ok_p
[mode
][regno
];
1946 /* Implement TARGET_MODES_TIEABLE_P.
1948 PTImode cannot tie with other modes because PTImode is restricted to even
1949 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1952 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1953 registers) or XOmode (vector quad, restricted to FPR registers divisible
1954 by 4) to tie with other modes.
1956 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1957 128-bit floating point on VSX systems ties with other vectors. */
1960 rs6000_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
1962 if (mode1
== PTImode
|| mode1
== OOmode
|| mode1
== XOmode
1963 || mode2
== PTImode
|| mode2
== OOmode
|| mode2
== XOmode
)
1964 return mode1
== mode2
;
1966 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1
))
1967 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2
);
1968 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2
))
1971 if (SCALAR_FLOAT_MODE_P (mode1
))
1972 return SCALAR_FLOAT_MODE_P (mode2
);
1973 if (SCALAR_FLOAT_MODE_P (mode2
))
1976 if (GET_MODE_CLASS (mode1
) == MODE_CC
)
1977 return GET_MODE_CLASS (mode2
) == MODE_CC
;
1978 if (GET_MODE_CLASS (mode2
) == MODE_CC
)
1984 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1987 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
1992 && GET_MODE_SIZE (mode
) > 4
1993 && INT_REGNO_P (regno
))
1997 && FP_REGNO_P (regno
)
1998 && GET_MODE_SIZE (mode
) > 8
1999 && !FLOAT128_2REG_P (mode
))
2005 /* Print interesting facts about registers. */
2007 rs6000_debug_reg_print (int first_regno
, int last_regno
, const char *reg_name
)
2011 for (r
= first_regno
; r
<= last_regno
; ++r
)
2013 const char *comma
= "";
2016 if (first_regno
== last_regno
)
2017 fprintf (stderr
, "%s:\t", reg_name
);
2019 fprintf (stderr
, "%s%d:\t", reg_name
, r
- first_regno
);
2022 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2023 if (rs6000_hard_regno_mode_ok_p
[m
][r
] && rs6000_hard_regno_nregs
[m
][r
])
2027 fprintf (stderr
, ",\n\t");
2032 if (rs6000_hard_regno_nregs
[m
][r
] > 1)
2033 len
+= fprintf (stderr
, "%s%s/%d", comma
, GET_MODE_NAME (m
),
2034 rs6000_hard_regno_nregs
[m
][r
]);
2036 len
+= fprintf (stderr
, "%s%s", comma
, GET_MODE_NAME (m
));
2041 if (call_used_or_fixed_reg_p (r
))
2045 fprintf (stderr
, ",\n\t");
2050 len
+= fprintf (stderr
, "%s%s", comma
, "call-used");
2058 fprintf (stderr
, ",\n\t");
2063 len
+= fprintf (stderr
, "%s%s", comma
, "fixed");
2069 fprintf (stderr
, ",\n\t");
2073 len
+= fprintf (stderr
, "%sreg-class = %s", comma
,
2074 reg_class_names
[(int)rs6000_regno_regclass
[r
]]);
2079 fprintf (stderr
, ",\n\t");
2083 fprintf (stderr
, "%sregno = %d\n", comma
, r
);
2088 rs6000_debug_vector_unit (enum rs6000_vector v
)
2094 case VECTOR_NONE
: ret
= "none"; break;
2095 case VECTOR_ALTIVEC
: ret
= "altivec"; break;
2096 case VECTOR_VSX
: ret
= "vsx"; break;
2097 case VECTOR_P8_VECTOR
: ret
= "p8_vector"; break;
2098 default: ret
= "unknown"; break;
2104 /* Inner function printing just the address mask for a particular reload
2106 DEBUG_FUNCTION
char *
2107 rs6000_debug_addr_mask (addr_mask_type mask
, bool keep_spaces
)
2112 if ((mask
& RELOAD_REG_VALID
) != 0)
2114 else if (keep_spaces
)
2117 if ((mask
& RELOAD_REG_MULTIPLE
) != 0)
2119 else if (keep_spaces
)
2122 if ((mask
& RELOAD_REG_INDEXED
) != 0)
2124 else if (keep_spaces
)
2127 if ((mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
2129 else if ((mask
& RELOAD_REG_OFFSET
) != 0)
2131 else if (keep_spaces
)
2134 if ((mask
& RELOAD_REG_PRE_INCDEC
) != 0)
2136 else if (keep_spaces
)
2139 if ((mask
& RELOAD_REG_PRE_MODIFY
) != 0)
2141 else if (keep_spaces
)
2144 if ((mask
& RELOAD_REG_AND_M16
) != 0)
2146 else if (keep_spaces
)
2154 /* Print the address masks in a human readble fashion. */
2156 rs6000_debug_print_mode (ssize_t m
)
2161 fprintf (stderr
, "Mode: %-5s", GET_MODE_NAME (m
));
2162 for (rc
= 0; rc
< N_RELOAD_REG
; rc
++)
2163 fprintf (stderr
, " %s: %s", reload_reg_map
[rc
].name
,
2164 rs6000_debug_addr_mask (reg_addr
[m
].addr_mask
[rc
], true));
2166 if ((reg_addr
[m
].reload_store
!= CODE_FOR_nothing
)
2167 || (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
))
2169 fprintf (stderr
, "%*s Reload=%c%c", spaces
, "",
2170 (reg_addr
[m
].reload_store
!= CODE_FOR_nothing
) ? 's' : '*',
2171 (reg_addr
[m
].reload_load
!= CODE_FOR_nothing
) ? 'l' : '*');
2175 spaces
+= strlen (" Reload=sl");
2177 if (reg_addr
[m
].scalar_in_vmx_p
)
2179 fprintf (stderr
, "%*s Upper=y", spaces
, "");
2183 spaces
+= strlen (" Upper=y");
2185 if (rs6000_vector_unit
[m
] != VECTOR_NONE
2186 || rs6000_vector_mem
[m
] != VECTOR_NONE
)
2188 fprintf (stderr
, "%*s vector: arith=%-10s mem=%s",
2190 rs6000_debug_vector_unit (rs6000_vector_unit
[m
]),
2191 rs6000_debug_vector_unit (rs6000_vector_mem
[m
]));
2194 fputs ("\n", stderr
);
2197 #define DEBUG_FMT_ID "%-32s= "
2198 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2199 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2200 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2202 /* Print various interesting information with -mdebug=reg. */
2204 rs6000_debug_reg_global (void)
2206 static const char *const tf
[2] = { "false", "true" };
2207 const char *nl
= (const char *)0;
2210 char costly_num
[20];
2212 char flags_buffer
[40];
2213 const char *costly_str
;
2214 const char *nop_str
;
2215 const char *trace_str
;
2216 const char *abi_str
;
2217 const char *cmodel_str
;
2218 struct cl_target_option cl_opts
;
2220 /* Modes we want tieable information on. */
2221 static const machine_mode print_tieable_modes
[] = {
2260 /* Virtual regs we are interested in. */
2261 const static struct {
2262 int regno
; /* register number. */
2263 const char *name
; /* register name. */
2264 } virtual_regs
[] = {
2265 { STACK_POINTER_REGNUM
, "stack pointer:" },
2266 { TOC_REGNUM
, "toc: " },
2267 { STATIC_CHAIN_REGNUM
, "static chain: " },
2268 { RS6000_PIC_OFFSET_TABLE_REGNUM
, "pic offset: " },
2269 { HARD_FRAME_POINTER_REGNUM
, "hard frame: " },
2270 { ARG_POINTER_REGNUM
, "arg pointer: " },
2271 { FRAME_POINTER_REGNUM
, "frame pointer:" },
2272 { FIRST_PSEUDO_REGISTER
, "first pseudo: " },
2273 { FIRST_VIRTUAL_REGISTER
, "first virtual:" },
2274 { VIRTUAL_INCOMING_ARGS_REGNUM
, "incoming_args:" },
2275 { VIRTUAL_STACK_VARS_REGNUM
, "stack_vars: " },
2276 { VIRTUAL_STACK_DYNAMIC_REGNUM
, "stack_dynamic:" },
2277 { VIRTUAL_OUTGOING_ARGS_REGNUM
, "outgoing_args:" },
2278 { VIRTUAL_CFA_REGNUM
, "cfa (frame): " },
2279 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM
, "stack boundry:" },
2280 { LAST_VIRTUAL_REGISTER
, "last virtual: " },
2283 fputs ("\nHard register information:\n", stderr
);
2284 rs6000_debug_reg_print (FIRST_GPR_REGNO
, LAST_GPR_REGNO
, "gr");
2285 rs6000_debug_reg_print (FIRST_FPR_REGNO
, LAST_FPR_REGNO
, "fp");
2286 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO
,
2289 rs6000_debug_reg_print (LR_REGNO
, LR_REGNO
, "lr");
2290 rs6000_debug_reg_print (CTR_REGNO
, CTR_REGNO
, "ctr");
2291 rs6000_debug_reg_print (CR0_REGNO
, CR7_REGNO
, "cr");
2292 rs6000_debug_reg_print (CA_REGNO
, CA_REGNO
, "ca");
2293 rs6000_debug_reg_print (VRSAVE_REGNO
, VRSAVE_REGNO
, "vrsave");
2294 rs6000_debug_reg_print (VSCR_REGNO
, VSCR_REGNO
, "vscr");
2296 fputs ("\nVirtual/stack/frame registers:\n", stderr
);
2297 for (v
= 0; v
< ARRAY_SIZE (virtual_regs
); v
++)
2298 fprintf (stderr
, "%s regno = %3d\n", virtual_regs
[v
].name
, virtual_regs
[v
].regno
);
2302 "d reg_class = %s\n"
2303 "v reg_class = %s\n"
2304 "wa reg_class = %s\n"
2305 "we reg_class = %s\n"
2306 "wr reg_class = %s\n"
2307 "wx reg_class = %s\n"
2308 "wA reg_class = %s\n"
2310 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_d
]],
2311 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_v
]],
2312 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wa
]],
2313 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_we
]],
2314 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wr
]],
2315 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wx
]],
2316 reg_class_names
[rs6000_constraints
[RS6000_CONSTRAINT_wA
]]);
2319 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2320 rs6000_debug_print_mode (m
);
2322 fputs ("\n", stderr
);
2324 for (m1
= 0; m1
< ARRAY_SIZE (print_tieable_modes
); m1
++)
2326 machine_mode mode1
= print_tieable_modes
[m1
];
2327 bool first_time
= true;
2329 nl
= (const char *)0;
2330 for (m2
= 0; m2
< ARRAY_SIZE (print_tieable_modes
); m2
++)
2332 machine_mode mode2
= print_tieable_modes
[m2
];
2333 if (mode1
!= mode2
&& rs6000_modes_tieable_p (mode1
, mode2
))
2337 fprintf (stderr
, "Tieable modes %s:", GET_MODE_NAME (mode1
));
2342 fprintf (stderr
, " %s", GET_MODE_NAME (mode2
));
2347 fputs ("\n", stderr
);
2353 if (rs6000_recip_control
)
2355 fprintf (stderr
, "\nReciprocal mask = 0x%x\n", rs6000_recip_control
);
2357 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2358 if (rs6000_recip_bits
[m
])
2361 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2363 (RS6000_RECIP_AUTO_RE_P (m
)
2365 : (RS6000_RECIP_HAVE_RE_P (m
) ? "have" : "none")),
2366 (RS6000_RECIP_AUTO_RSQRTE_P (m
)
2368 : (RS6000_RECIP_HAVE_RSQRTE_P (m
) ? "have" : "none")));
2371 fputs ("\n", stderr
);
2374 if (rs6000_cpu_index
>= 0)
2376 const char *name
= processor_target_table
[rs6000_cpu_index
].name
;
2378 = processor_target_table
[rs6000_cpu_index
].target_enable
;
2380 sprintf (flags_buffer
, "-mcpu=%s flags", name
);
2381 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2384 fprintf (stderr
, DEBUG_FMT_S
, "cpu", "<none>");
2386 if (rs6000_tune_index
>= 0)
2388 const char *name
= processor_target_table
[rs6000_tune_index
].name
;
2390 = processor_target_table
[rs6000_tune_index
].target_enable
;
2392 sprintf (flags_buffer
, "-mtune=%s flags", name
);
2393 rs6000_print_isa_options (stderr
, 0, flags_buffer
, flags
);
2396 fprintf (stderr
, DEBUG_FMT_S
, "tune", "<none>");
2398 cl_target_option_save (&cl_opts
, &global_options
, &global_options_set
);
2399 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags",
2402 rs6000_print_isa_options (stderr
, 0, "rs6000_isa_flags_explicit",
2403 rs6000_isa_flags_explicit
);
2405 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
2407 fprintf (stderr
, DEBUG_FMT_S
, "--with-cpu default",
2408 OPTION_TARGET_CPU_DEFAULT
? OPTION_TARGET_CPU_DEFAULT
: "<none>");
2410 switch (rs6000_sched_costly_dep
)
2412 case max_dep_latency
:
2413 costly_str
= "max_dep_latency";
2417 costly_str
= "no_dep_costly";
2420 case all_deps_costly
:
2421 costly_str
= "all_deps_costly";
2424 case true_store_to_load_dep_costly
:
2425 costly_str
= "true_store_to_load_dep_costly";
2428 case store_to_load_dep_costly
:
2429 costly_str
= "store_to_load_dep_costly";
2433 costly_str
= costly_num
;
2434 sprintf (costly_num
, "%d", (int)rs6000_sched_costly_dep
);
2438 fprintf (stderr
, DEBUG_FMT_S
, "sched_costly_dep", costly_str
);
2440 switch (rs6000_sched_insert_nops
)
2442 case sched_finish_regroup_exact
:
2443 nop_str
= "sched_finish_regroup_exact";
2446 case sched_finish_pad_groups
:
2447 nop_str
= "sched_finish_pad_groups";
2450 case sched_finish_none
:
2451 nop_str
= "sched_finish_none";
2456 sprintf (nop_num
, "%d", (int)rs6000_sched_insert_nops
);
2460 fprintf (stderr
, DEBUG_FMT_S
, "sched_insert_nops", nop_str
);
2462 switch (rs6000_sdata
)
2469 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "data");
2473 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "sysv");
2477 fprintf (stderr
, DEBUG_FMT_S
, "sdata", "eabi");
2482 switch (rs6000_traceback
)
2484 case traceback_default
: trace_str
= "default"; break;
2485 case traceback_none
: trace_str
= "none"; break;
2486 case traceback_part
: trace_str
= "part"; break;
2487 case traceback_full
: trace_str
= "full"; break;
2488 default: trace_str
= "unknown"; break;
2491 fprintf (stderr
, DEBUG_FMT_S
, "traceback", trace_str
);
2493 switch (rs6000_current_cmodel
)
2495 case CMODEL_SMALL
: cmodel_str
= "small"; break;
2496 case CMODEL_MEDIUM
: cmodel_str
= "medium"; break;
2497 case CMODEL_LARGE
: cmodel_str
= "large"; break;
2498 default: cmodel_str
= "unknown"; break;
2501 fprintf (stderr
, DEBUG_FMT_S
, "cmodel", cmodel_str
);
2503 switch (rs6000_current_abi
)
2505 case ABI_NONE
: abi_str
= "none"; break;
2506 case ABI_AIX
: abi_str
= "aix"; break;
2507 case ABI_ELFv2
: abi_str
= "ELFv2"; break;
2508 case ABI_V4
: abi_str
= "V4"; break;
2509 case ABI_DARWIN
: abi_str
= "darwin"; break;
2510 default: abi_str
= "unknown"; break;
2513 fprintf (stderr
, DEBUG_FMT_S
, "abi", abi_str
);
2515 if (rs6000_altivec_abi
)
2516 fprintf (stderr
, DEBUG_FMT_S
, "altivec_abi", "true");
2518 if (rs6000_aix_extabi
)
2519 fprintf (stderr
, DEBUG_FMT_S
, "AIX vec-extabi", "true");
2521 if (rs6000_darwin64_abi
)
2522 fprintf (stderr
, DEBUG_FMT_S
, "darwin64_abi", "true");
2524 fprintf (stderr
, DEBUG_FMT_S
, "soft_float",
2525 (TARGET_SOFT_FLOAT
? "true" : "false"));
2527 if (TARGET_LINK_STACK
)
2528 fprintf (stderr
, DEBUG_FMT_S
, "link_stack", "true");
2530 if (TARGET_P8_FUSION
)
2534 strcpy (options
, "power8");
2535 if (TARGET_P8_FUSION_SIGN
)
2536 strcat (options
, ", sign");
2538 fprintf (stderr
, DEBUG_FMT_S
, "fusion", options
);
2541 fprintf (stderr
, DEBUG_FMT_S
, "plt-format",
2542 TARGET_SECURE_PLT
? "secure" : "bss");
2543 fprintf (stderr
, DEBUG_FMT_S
, "struct-return",
2544 aix_struct_return
? "aix" : "sysv");
2545 fprintf (stderr
, DEBUG_FMT_S
, "always_hint", tf
[!!rs6000_always_hint
]);
2546 fprintf (stderr
, DEBUG_FMT_S
, "sched_groups", tf
[!!rs6000_sched_groups
]);
2547 fprintf (stderr
, DEBUG_FMT_S
, "align_branch",
2548 tf
[!!rs6000_align_branch_targets
]);
2549 fprintf (stderr
, DEBUG_FMT_D
, "tls_size", rs6000_tls_size
);
2550 fprintf (stderr
, DEBUG_FMT_D
, "long_double_size",
2551 rs6000_long_double_type_size
);
2552 if (rs6000_long_double_type_size
> 64)
2554 fprintf (stderr
, DEBUG_FMT_S
, "long double type",
2555 TARGET_IEEEQUAD
? "IEEE" : "IBM");
2556 fprintf (stderr
, DEBUG_FMT_S
, "default long double type",
2557 TARGET_IEEEQUAD_DEFAULT
? "IEEE" : "IBM");
2559 fprintf (stderr
, DEBUG_FMT_D
, "sched_restricted_insns_priority",
2560 (int)rs6000_sched_restricted_insns_priority
);
2561 fprintf (stderr
, DEBUG_FMT_D
, "Number of standard builtins",
2564 fprintf (stderr
, DEBUG_FMT_D
, "Enable float128 on VSX",
2565 (int)TARGET_FLOAT128_ENABLE_TYPE
);
2568 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit scalar element",
2569 (int)VECTOR_ELEMENT_SCALAR_64BIT
);
2571 if (TARGET_DIRECT_MOVE_128
)
2572 fprintf (stderr
, DEBUG_FMT_D
, "VSX easy 64-bit mfvsrld element",
2573 (int)VECTOR_ELEMENT_MFVSRLD_64BIT
);
2577 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2578 legitimate address support to figure out the appropriate addressing to
2582 rs6000_setup_reg_addr_masks (void)
2584 ssize_t rc
, reg
, m
, nregs
;
2585 addr_mask_type any_addr_mask
, addr_mask
;
2587 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
2589 machine_mode m2
= (machine_mode
) m
;
2590 bool complex_p
= false;
2591 bool small_int_p
= (m2
== QImode
|| m2
== HImode
|| m2
== SImode
);
2594 if (COMPLEX_MODE_P (m2
))
2597 m2
= GET_MODE_INNER (m2
);
2600 msize
= GET_MODE_SIZE (m2
);
2602 /* SDmode is special in that we want to access it only via REG+REG
2603 addressing on power7 and above, since we want to use the LFIWZX and
2604 STFIWZX instructions to load it. */
2605 bool indexed_only_p
= (m
== SDmode
&& TARGET_NO_SDMODE_STACK
);
2608 for (rc
= FIRST_RELOAD_REG_CLASS
; rc
<= LAST_RELOAD_REG_CLASS
; rc
++)
2611 reg
= reload_reg_map
[rc
].reg
;
2613 /* Can mode values go in the GPR/FPR/Altivec registers? */
2614 if (reg
>= 0 && rs6000_hard_regno_mode_ok_p
[m
][reg
])
2616 bool small_int_vsx_p
= (small_int_p
2617 && (rc
== RELOAD_REG_FPR
2618 || rc
== RELOAD_REG_VMX
));
2620 nregs
= rs6000_hard_regno_nregs
[m
][reg
];
2621 addr_mask
|= RELOAD_REG_VALID
;
2623 /* Indicate if the mode takes more than 1 physical register. If
2624 it takes a single register, indicate it can do REG+REG
2625 addressing. Small integers in VSX registers can only do
2626 REG+REG addressing. */
2627 if (small_int_vsx_p
)
2628 addr_mask
|= RELOAD_REG_INDEXED
;
2629 else if (nregs
> 1 || m
== BLKmode
|| complex_p
)
2630 addr_mask
|= RELOAD_REG_MULTIPLE
;
2632 addr_mask
|= RELOAD_REG_INDEXED
;
2634 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2635 addressing. If we allow scalars into Altivec registers,
2636 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2638 For VSX systems, we don't allow update addressing for
2639 DFmode/SFmode if those registers can go in both the
2640 traditional floating point registers and Altivec registers.
2641 The load/store instructions for the Altivec registers do not
2642 have update forms. If we allowed update addressing, it seems
2643 to break IV-OPT code using floating point if the index type is
2644 int instead of long (PR target/81550 and target/84042). */
2647 && (rc
== RELOAD_REG_GPR
|| rc
== RELOAD_REG_FPR
)
2649 && !VECTOR_MODE_P (m2
)
2650 && !VECTOR_ALIGNMENT_P (m2
)
2652 && (m
!= E_DFmode
|| !TARGET_VSX
)
2653 && (m
!= E_SFmode
|| !TARGET_P8_VECTOR
)
2654 && !small_int_vsx_p
)
2656 addr_mask
|= RELOAD_REG_PRE_INCDEC
;
2658 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2659 we don't allow PRE_MODIFY for some multi-register
2664 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2668 if (TARGET_POWERPC64
)
2669 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2674 if (TARGET_HARD_FLOAT
)
2675 addr_mask
|= RELOAD_REG_PRE_MODIFY
;
2681 /* GPR and FPR registers can do REG+OFFSET addressing, except
2682 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2683 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2684 if ((addr_mask
!= 0) && !indexed_only_p
2686 && (rc
== RELOAD_REG_GPR
2687 || ((msize
== 8 || m2
== SFmode
)
2688 && (rc
== RELOAD_REG_FPR
2689 || (rc
== RELOAD_REG_VMX
&& TARGET_P9_VECTOR
)))))
2690 addr_mask
|= RELOAD_REG_OFFSET
;
2692 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2693 instructions are enabled. The offset for 128-bit VSX registers is
2694 only 12-bits. While GPRs can handle the full offset range, VSX
2695 registers can only handle the restricted range. */
2696 else if ((addr_mask
!= 0) && !indexed_only_p
2697 && msize
== 16 && TARGET_P9_VECTOR
2698 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2
)
2699 || (m2
== TImode
&& TARGET_VSX
)))
2701 addr_mask
|= RELOAD_REG_OFFSET
;
2702 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2703 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2706 /* Vector pairs can do both indexed and offset loads if the
2707 instructions are enabled, otherwise they can only do offset loads
2708 since it will be broken into two vector moves. Vector quads can
2709 only do offset loads. */
2710 else if ((addr_mask
!= 0) && TARGET_MMA
2711 && (m2
== OOmode
|| m2
== XOmode
))
2713 addr_mask
|= RELOAD_REG_OFFSET
;
2714 if (rc
== RELOAD_REG_FPR
|| rc
== RELOAD_REG_VMX
)
2716 addr_mask
|= RELOAD_REG_QUAD_OFFSET
;
2718 addr_mask
|= RELOAD_REG_INDEXED
;
2722 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2723 addressing on 128-bit types. */
2724 if (rc
== RELOAD_REG_VMX
&& msize
== 16
2725 && (addr_mask
& RELOAD_REG_VALID
) != 0)
2726 addr_mask
|= RELOAD_REG_AND_M16
;
2728 reg_addr
[m
].addr_mask
[rc
] = addr_mask
;
2729 any_addr_mask
|= addr_mask
;
2732 reg_addr
[m
].addr_mask
[RELOAD_REG_ANY
] = any_addr_mask
;
2737 /* Initialize the various global tables that are based on register size. */
2739 rs6000_init_hard_regno_mode_ok (bool global_init_p
)
2745 /* Precalculate REGNO_REG_CLASS. */
2746 rs6000_regno_regclass
[0] = GENERAL_REGS
;
2747 for (r
= 1; r
< 32; ++r
)
2748 rs6000_regno_regclass
[r
] = BASE_REGS
;
2750 for (r
= 32; r
< 64; ++r
)
2751 rs6000_regno_regclass
[r
] = FLOAT_REGS
;
2753 for (r
= 64; HARD_REGISTER_NUM_P (r
); ++r
)
2754 rs6000_regno_regclass
[r
] = NO_REGS
;
2756 for (r
= FIRST_ALTIVEC_REGNO
; r
<= LAST_ALTIVEC_REGNO
; ++r
)
2757 rs6000_regno_regclass
[r
] = ALTIVEC_REGS
;
2759 rs6000_regno_regclass
[CR0_REGNO
] = CR0_REGS
;
2760 for (r
= CR1_REGNO
; r
<= CR7_REGNO
; ++r
)
2761 rs6000_regno_regclass
[r
] = CR_REGS
;
2763 rs6000_regno_regclass
[LR_REGNO
] = LINK_REGS
;
2764 rs6000_regno_regclass
[CTR_REGNO
] = CTR_REGS
;
2765 rs6000_regno_regclass
[CA_REGNO
] = NO_REGS
;
2766 rs6000_regno_regclass
[VRSAVE_REGNO
] = VRSAVE_REGS
;
2767 rs6000_regno_regclass
[VSCR_REGNO
] = VRSAVE_REGS
;
2768 rs6000_regno_regclass
[ARG_POINTER_REGNUM
] = BASE_REGS
;
2769 rs6000_regno_regclass
[FRAME_POINTER_REGNUM
] = BASE_REGS
;
2771 /* Precalculate register class to simpler reload register class. We don't
2772 need all of the register classes that are combinations of different
2773 classes, just the simple ones that have constraint letters. */
2774 for (c
= 0; c
< N_REG_CLASSES
; c
++)
2775 reg_class_to_reg_type
[c
] = NO_REG_TYPE
;
2777 reg_class_to_reg_type
[(int)GENERAL_REGS
] = GPR_REG_TYPE
;
2778 reg_class_to_reg_type
[(int)BASE_REGS
] = GPR_REG_TYPE
;
2779 reg_class_to_reg_type
[(int)VSX_REGS
] = VSX_REG_TYPE
;
2780 reg_class_to_reg_type
[(int)VRSAVE_REGS
] = SPR_REG_TYPE
;
2781 reg_class_to_reg_type
[(int)VSCR_REGS
] = SPR_REG_TYPE
;
2782 reg_class_to_reg_type
[(int)LINK_REGS
] = SPR_REG_TYPE
;
2783 reg_class_to_reg_type
[(int)CTR_REGS
] = SPR_REG_TYPE
;
2784 reg_class_to_reg_type
[(int)LINK_OR_CTR_REGS
] = SPR_REG_TYPE
;
2785 reg_class_to_reg_type
[(int)CR_REGS
] = CR_REG_TYPE
;
2786 reg_class_to_reg_type
[(int)CR0_REGS
] = CR_REG_TYPE
;
2790 reg_class_to_reg_type
[(int)FLOAT_REGS
] = VSX_REG_TYPE
;
2791 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = VSX_REG_TYPE
;
2795 reg_class_to_reg_type
[(int)FLOAT_REGS
] = FPR_REG_TYPE
;
2796 reg_class_to_reg_type
[(int)ALTIVEC_REGS
] = ALTIVEC_REG_TYPE
;
2799 /* Precalculate the valid memory formats as well as the vector information,
2800 this must be set up before the rs6000_hard_regno_nregs_internal calls
2802 gcc_assert ((int)VECTOR_NONE
== 0);
2803 memset ((void *) &rs6000_vector_unit
[0], '\0', sizeof (rs6000_vector_unit
));
2804 memset ((void *) &rs6000_vector_mem
[0], '\0', sizeof (rs6000_vector_mem
));
2806 gcc_assert ((int)CODE_FOR_nothing
== 0);
2807 memset ((void *) ®_addr
[0], '\0', sizeof (reg_addr
));
2809 gcc_assert ((int)NO_REGS
== 0);
2810 memset ((void *) &rs6000_constraints
[0], '\0', sizeof (rs6000_constraints
));
2812 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2813 believes it can use native alignment or still uses 128-bit alignment. */
2814 if (TARGET_VSX
&& !TARGET_VSX_ALIGN_128
)
2825 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2826 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2827 if (TARGET_FLOAT128_TYPE
)
2829 rs6000_vector_mem
[KFmode
] = VECTOR_VSX
;
2830 rs6000_vector_align
[KFmode
] = 128;
2832 if (FLOAT128_IEEE_P (TFmode
))
2834 rs6000_vector_mem
[TFmode
] = VECTOR_VSX
;
2835 rs6000_vector_align
[TFmode
] = 128;
2839 /* V2DF mode, VSX only. */
2842 rs6000_vector_unit
[V2DFmode
] = VECTOR_VSX
;
2843 rs6000_vector_mem
[V2DFmode
] = VECTOR_VSX
;
2844 rs6000_vector_align
[V2DFmode
] = align64
;
2847 /* V4SF mode, either VSX or Altivec. */
2850 rs6000_vector_unit
[V4SFmode
] = VECTOR_VSX
;
2851 rs6000_vector_mem
[V4SFmode
] = VECTOR_VSX
;
2852 rs6000_vector_align
[V4SFmode
] = align32
;
2854 else if (TARGET_ALTIVEC
)
2856 rs6000_vector_unit
[V4SFmode
] = VECTOR_ALTIVEC
;
2857 rs6000_vector_mem
[V4SFmode
] = VECTOR_ALTIVEC
;
2858 rs6000_vector_align
[V4SFmode
] = align32
;
2861 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2865 rs6000_vector_unit
[V4SImode
] = VECTOR_ALTIVEC
;
2866 rs6000_vector_unit
[V8HImode
] = VECTOR_ALTIVEC
;
2867 rs6000_vector_unit
[V16QImode
] = VECTOR_ALTIVEC
;
2868 rs6000_vector_align
[V4SImode
] = align32
;
2869 rs6000_vector_align
[V8HImode
] = align32
;
2870 rs6000_vector_align
[V16QImode
] = align32
;
2874 rs6000_vector_mem
[V4SImode
] = VECTOR_VSX
;
2875 rs6000_vector_mem
[V8HImode
] = VECTOR_VSX
;
2876 rs6000_vector_mem
[V16QImode
] = VECTOR_VSX
;
2880 rs6000_vector_mem
[V4SImode
] = VECTOR_ALTIVEC
;
2881 rs6000_vector_mem
[V8HImode
] = VECTOR_ALTIVEC
;
2882 rs6000_vector_mem
[V16QImode
] = VECTOR_ALTIVEC
;
2886 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2887 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2890 rs6000_vector_mem
[V2DImode
] = VECTOR_VSX
;
2891 rs6000_vector_unit
[V2DImode
]
2892 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2893 rs6000_vector_align
[V2DImode
] = align64
;
2895 rs6000_vector_mem
[V1TImode
] = VECTOR_VSX
;
2896 rs6000_vector_unit
[V1TImode
]
2897 = (TARGET_P8_VECTOR
) ? VECTOR_P8_VECTOR
: VECTOR_NONE
;
2898 rs6000_vector_align
[V1TImode
] = 128;
2901 /* DFmode, see if we want to use the VSX unit. Memory is handled
2902 differently, so don't set rs6000_vector_mem. */
2905 rs6000_vector_unit
[DFmode
] = VECTOR_VSX
;
2906 rs6000_vector_align
[DFmode
] = 64;
2909 /* SFmode, see if we want to use the VSX unit. */
2910 if (TARGET_P8_VECTOR
)
2912 rs6000_vector_unit
[SFmode
] = VECTOR_VSX
;
2913 rs6000_vector_align
[SFmode
] = 32;
2916 /* Allow TImode in VSX register and set the VSX memory macros. */
2919 rs6000_vector_mem
[TImode
] = VECTOR_VSX
;
2920 rs6000_vector_align
[TImode
] = align64
;
2923 /* Add support for vector pairs and vector quad registers. */
2926 rs6000_vector_unit
[OOmode
] = VECTOR_NONE
;
2927 rs6000_vector_mem
[OOmode
] = VECTOR_VSX
;
2928 rs6000_vector_align
[OOmode
] = 256;
2930 rs6000_vector_unit
[XOmode
] = VECTOR_NONE
;
2931 rs6000_vector_mem
[XOmode
] = VECTOR_VSX
;
2932 rs6000_vector_align
[XOmode
] = 512;
2935 /* Register class constraints for the constraints that depend on compile
2936 switches. When the VSX code was added, different constraints were added
2937 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2938 of the VSX registers are used. The register classes for scalar floating
2939 point types is set, based on whether we allow that type into the upper
2940 (Altivec) registers. GCC has register classes to target the Altivec
2941 registers for load/store operations, to select using a VSX memory
2942 operation instead of the traditional floating point operation. The
2945 d - Register class to use with traditional DFmode instructions.
2946 v - Altivec register.
2947 wa - Any VSX register.
2948 wc - Reserved to represent individual CR bits (used in LLVM).
2949 wn - always NO_REGS.
2950 wr - GPR if 64-bit mode is permitted.
2951 wx - Float register if we can do 32-bit int stores. */
2953 if (TARGET_HARD_FLOAT
)
2954 rs6000_constraints
[RS6000_CONSTRAINT_d
] = FLOAT_REGS
;
2956 rs6000_constraints
[RS6000_CONSTRAINT_v
] = ALTIVEC_REGS
;
2958 rs6000_constraints
[RS6000_CONSTRAINT_wa
] = VSX_REGS
;
2960 if (TARGET_POWERPC64
)
2962 rs6000_constraints
[RS6000_CONSTRAINT_wr
] = GENERAL_REGS
;
2963 rs6000_constraints
[RS6000_CONSTRAINT_wA
] = BASE_REGS
;
2967 rs6000_constraints
[RS6000_CONSTRAINT_wx
] = FLOAT_REGS
; /* DImode */
2969 /* Support for new direct moves (ISA 3.0 + 64bit). */
2970 if (TARGET_DIRECT_MOVE_128
)
2971 rs6000_constraints
[RS6000_CONSTRAINT_we
] = VSX_REGS
;
2973 /* Set up the reload helper and direct move functions. */
2974 if (TARGET_VSX
|| TARGET_ALTIVEC
)
2978 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_di_store
;
2979 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_di_load
;
2980 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_di_store
;
2981 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_di_load
;
2982 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_di_store
;
2983 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_di_load
;
2984 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_di_store
;
2985 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_di_load
;
2986 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_di_store
;
2987 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_di_load
;
2988 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_di_store
;
2989 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_di_load
;
2990 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_di_store
;
2991 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_di_load
;
2992 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_di_store
;
2993 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_di_load
;
2994 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_di_store
;
2995 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_di_load
;
2996 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_di_store
;
2997 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_di_load
;
2999 if (FLOAT128_VECTOR_P (KFmode
))
3001 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_di_store
;
3002 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_di_load
;
3005 if (FLOAT128_VECTOR_P (TFmode
))
3007 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_di_store
;
3008 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_di_load
;
3011 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3013 if (TARGET_NO_SDMODE_STACK
)
3015 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_di_store
;
3016 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_di_load
;
3021 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_di_store
;
3022 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_di_load
;
3025 if (TARGET_DIRECT_MOVE
&& !TARGET_DIRECT_MOVE_128
)
3027 reg_addr
[TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxti
;
3028 reg_addr
[V1TImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv1ti
;
3029 reg_addr
[V2DFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2df
;
3030 reg_addr
[V2DImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv2di
;
3031 reg_addr
[V4SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4sf
;
3032 reg_addr
[V4SImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv4si
;
3033 reg_addr
[V8HImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv8hi
;
3034 reg_addr
[V16QImode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxv16qi
;
3035 reg_addr
[SFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxsf
;
3037 reg_addr
[TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprti
;
3038 reg_addr
[V1TImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv1ti
;
3039 reg_addr
[V2DFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2df
;
3040 reg_addr
[V2DImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv2di
;
3041 reg_addr
[V4SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4sf
;
3042 reg_addr
[V4SImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv4si
;
3043 reg_addr
[V8HImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv8hi
;
3044 reg_addr
[V16QImode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprv16qi
;
3045 reg_addr
[SFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprsf
;
3047 if (FLOAT128_VECTOR_P (KFmode
))
3049 reg_addr
[KFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxkf
;
3050 reg_addr
[KFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprkf
;
3053 if (FLOAT128_VECTOR_P (TFmode
))
3055 reg_addr
[TFmode
].reload_gpr_vsx
= CODE_FOR_reload_gpr_from_vsxtf
;
3056 reg_addr
[TFmode
].reload_vsx_gpr
= CODE_FOR_reload_vsx_from_gprtf
;
3061 reg_addr
[OOmode
].reload_store
= CODE_FOR_reload_oo_di_store
;
3062 reg_addr
[OOmode
].reload_load
= CODE_FOR_reload_oo_di_load
;
3063 reg_addr
[XOmode
].reload_store
= CODE_FOR_reload_xo_di_store
;
3064 reg_addr
[XOmode
].reload_load
= CODE_FOR_reload_xo_di_load
;
3070 reg_addr
[V16QImode
].reload_store
= CODE_FOR_reload_v16qi_si_store
;
3071 reg_addr
[V16QImode
].reload_load
= CODE_FOR_reload_v16qi_si_load
;
3072 reg_addr
[V8HImode
].reload_store
= CODE_FOR_reload_v8hi_si_store
;
3073 reg_addr
[V8HImode
].reload_load
= CODE_FOR_reload_v8hi_si_load
;
3074 reg_addr
[V4SImode
].reload_store
= CODE_FOR_reload_v4si_si_store
;
3075 reg_addr
[V4SImode
].reload_load
= CODE_FOR_reload_v4si_si_load
;
3076 reg_addr
[V2DImode
].reload_store
= CODE_FOR_reload_v2di_si_store
;
3077 reg_addr
[V2DImode
].reload_load
= CODE_FOR_reload_v2di_si_load
;
3078 reg_addr
[V1TImode
].reload_store
= CODE_FOR_reload_v1ti_si_store
;
3079 reg_addr
[V1TImode
].reload_load
= CODE_FOR_reload_v1ti_si_load
;
3080 reg_addr
[V4SFmode
].reload_store
= CODE_FOR_reload_v4sf_si_store
;
3081 reg_addr
[V4SFmode
].reload_load
= CODE_FOR_reload_v4sf_si_load
;
3082 reg_addr
[V2DFmode
].reload_store
= CODE_FOR_reload_v2df_si_store
;
3083 reg_addr
[V2DFmode
].reload_load
= CODE_FOR_reload_v2df_si_load
;
3084 reg_addr
[DFmode
].reload_store
= CODE_FOR_reload_df_si_store
;
3085 reg_addr
[DFmode
].reload_load
= CODE_FOR_reload_df_si_load
;
3086 reg_addr
[DDmode
].reload_store
= CODE_FOR_reload_dd_si_store
;
3087 reg_addr
[DDmode
].reload_load
= CODE_FOR_reload_dd_si_load
;
3088 reg_addr
[SFmode
].reload_store
= CODE_FOR_reload_sf_si_store
;
3089 reg_addr
[SFmode
].reload_load
= CODE_FOR_reload_sf_si_load
;
3091 if (FLOAT128_VECTOR_P (KFmode
))
3093 reg_addr
[KFmode
].reload_store
= CODE_FOR_reload_kf_si_store
;
3094 reg_addr
[KFmode
].reload_load
= CODE_FOR_reload_kf_si_load
;
3097 if (FLOAT128_IEEE_P (TFmode
))
3099 reg_addr
[TFmode
].reload_store
= CODE_FOR_reload_tf_si_store
;
3100 reg_addr
[TFmode
].reload_load
= CODE_FOR_reload_tf_si_load
;
3103 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3105 if (TARGET_NO_SDMODE_STACK
)
3107 reg_addr
[SDmode
].reload_store
= CODE_FOR_reload_sd_si_store
;
3108 reg_addr
[SDmode
].reload_load
= CODE_FOR_reload_sd_si_load
;
3113 reg_addr
[TImode
].reload_store
= CODE_FOR_reload_ti_si_store
;
3114 reg_addr
[TImode
].reload_load
= CODE_FOR_reload_ti_si_load
;
3117 if (TARGET_DIRECT_MOVE
)
3119 reg_addr
[DImode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdi
;
3120 reg_addr
[DDmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdd
;
3121 reg_addr
[DFmode
].reload_fpr_gpr
= CODE_FOR_reload_fpr_from_gprdf
;
3125 reg_addr
[DFmode
].scalar_in_vmx_p
= true;
3126 reg_addr
[DImode
].scalar_in_vmx_p
= true;
3128 if (TARGET_P8_VECTOR
)
3130 reg_addr
[SFmode
].scalar_in_vmx_p
= true;
3131 reg_addr
[SImode
].scalar_in_vmx_p
= true;
3133 if (TARGET_P9_VECTOR
)
3135 reg_addr
[HImode
].scalar_in_vmx_p
= true;
3136 reg_addr
[QImode
].scalar_in_vmx_p
= true;
3141 /* Precalculate HARD_REGNO_NREGS. */
3142 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3143 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3144 rs6000_hard_regno_nregs
[m
][r
]
3145 = rs6000_hard_regno_nregs_internal (r
, (machine_mode
) m
);
3147 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3148 for (r
= 0; HARD_REGISTER_NUM_P (r
); ++r
)
3149 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3150 rs6000_hard_regno_mode_ok_p
[m
][r
]
3151 = rs6000_hard_regno_mode_ok_uncached (r
, (machine_mode
) m
);
3153 /* Precalculate CLASS_MAX_NREGS sizes. */
3154 for (c
= 0; c
< LIM_REG_CLASSES
; ++c
)
3158 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
))
3159 reg_size
= UNITS_PER_VSX_WORD
;
3161 else if (c
== ALTIVEC_REGS
)
3162 reg_size
= UNITS_PER_ALTIVEC_WORD
;
3164 else if (c
== FLOAT_REGS
)
3165 reg_size
= UNITS_PER_FP_WORD
;
3168 reg_size
= UNITS_PER_WORD
;
3170 for (m
= 0; m
< NUM_MACHINE_MODES
; ++m
)
3172 machine_mode m2
= (machine_mode
)m
;
3173 int reg_size2
= reg_size
;
3175 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3177 if (TARGET_VSX
&& VSX_REG_CLASS_P (c
) && FLOAT128_2REG_P (m
))
3178 reg_size2
= UNITS_PER_FP_WORD
;
3180 rs6000_class_max_nregs
[m
][c
]
3181 = (GET_MODE_SIZE (m2
) + reg_size2
- 1) / reg_size2
;
3185 /* Calculate which modes to automatically generate code to use a the
3186 reciprocal divide and square root instructions. In the future, possibly
3187 automatically generate the instructions even if the user did not specify
3188 -mrecip. The older machines double precision reciprocal sqrt estimate is
3189 not accurate enough. */
3190 memset (rs6000_recip_bits
, 0, sizeof (rs6000_recip_bits
));
3192 rs6000_recip_bits
[SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3194 rs6000_recip_bits
[DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3195 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3196 rs6000_recip_bits
[V4SFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3197 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3198 rs6000_recip_bits
[V2DFmode
] = RS6000_RECIP_MASK_HAVE_RE
;
3200 if (TARGET_FRSQRTES
)
3201 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3203 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3204 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode
))
3205 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3206 if (VECTOR_UNIT_VSX_P (V2DFmode
))
3207 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_HAVE_RSQRTE
;
3209 if (rs6000_recip_control
)
3211 if (!flag_finite_math_only
)
3212 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3214 if (flag_trapping_math
)
3215 warning (0, "%qs requires %qs or %qs", "-mrecip",
3216 "-fno-trapping-math", "-ffast-math");
3217 if (!flag_reciprocal_math
)
3218 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3220 if (flag_finite_math_only
&& !flag_trapping_math
&& flag_reciprocal_math
)
3222 if (RS6000_RECIP_HAVE_RE_P (SFmode
)
3223 && (rs6000_recip_control
& RECIP_SF_DIV
) != 0)
3224 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3226 if (RS6000_RECIP_HAVE_RE_P (DFmode
)
3227 && (rs6000_recip_control
& RECIP_DF_DIV
) != 0)
3228 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3230 if (RS6000_RECIP_HAVE_RE_P (V4SFmode
)
3231 && (rs6000_recip_control
& RECIP_V4SF_DIV
) != 0)
3232 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3234 if (RS6000_RECIP_HAVE_RE_P (V2DFmode
)
3235 && (rs6000_recip_control
& RECIP_V2DF_DIV
) != 0)
3236 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RE
;
3238 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode
)
3239 && (rs6000_recip_control
& RECIP_SF_RSQRT
) != 0)
3240 rs6000_recip_bits
[SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3242 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode
)
3243 && (rs6000_recip_control
& RECIP_DF_RSQRT
) != 0)
3244 rs6000_recip_bits
[DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3246 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode
)
3247 && (rs6000_recip_control
& RECIP_V4SF_RSQRT
) != 0)
3248 rs6000_recip_bits
[V4SFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3250 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode
)
3251 && (rs6000_recip_control
& RECIP_V2DF_RSQRT
) != 0)
3252 rs6000_recip_bits
[V2DFmode
] |= RS6000_RECIP_MASK_AUTO_RSQRTE
;
3256 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3257 legitimate address support to figure out the appropriate addressing to
3259 rs6000_setup_reg_addr_masks ();
3261 if (global_init_p
|| TARGET_DEBUG_TARGET
)
3263 if (TARGET_DEBUG_REG
)
3264 rs6000_debug_reg_global ();
3266 if (TARGET_DEBUG_COST
|| TARGET_DEBUG_REG
)
3268 "SImode variable mult cost = %d\n"
3269 "SImode constant mult cost = %d\n"
3270 "SImode short constant mult cost = %d\n"
3271 "DImode multipliciation cost = %d\n"
3272 "SImode division cost = %d\n"
3273 "DImode division cost = %d\n"
3274 "Simple fp operation cost = %d\n"
3275 "DFmode multiplication cost = %d\n"
3276 "SFmode division cost = %d\n"
3277 "DFmode division cost = %d\n"
3278 "cache line size = %d\n"
3279 "l1 cache size = %d\n"
3280 "l2 cache size = %d\n"
3281 "simultaneous prefetches = %d\n"
3284 rs6000_cost
->mulsi_const
,
3285 rs6000_cost
->mulsi_const9
,
3293 rs6000_cost
->cache_line_size
,
3294 rs6000_cost
->l1_cache_size
,
3295 rs6000_cost
->l2_cache_size
,
3296 rs6000_cost
->simultaneous_prefetches
);
3301 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3304 darwin_rs6000_override_options (void)
3306 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3308 rs6000_altivec_abi
= 1;
3309 TARGET_ALTIVEC_VRSAVE
= 1;
3310 rs6000_current_abi
= ABI_DARWIN
;
3312 if (DEFAULT_ABI
== ABI_DARWIN
3314 darwin_one_byte_bool
= 1;
3316 if (TARGET_64BIT
&& ! TARGET_POWERPC64
)
3318 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3319 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3322 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3323 optimisation, and will not work with the most generic case (where the
3324 symbol is undefined external, but there is no symbl stub). */
3326 rs6000_default_long_calls
= 0;
3328 /* ld_classic is (so far) still used for kernel (static) code, and supports
3329 the JBSR longcall / branch islands. */
3332 rs6000_default_long_calls
= 1;
3334 /* Allow a kext author to do -mkernel -mhard-float. */
3335 if (! (rs6000_isa_flags_explicit
& OPTION_MASK_SOFT_FLOAT
))
3336 rs6000_isa_flags
|= OPTION_MASK_SOFT_FLOAT
;
3339 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3341 if (!flag_mkernel
&& !flag_apple_kext
3343 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
))
3344 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3346 /* Unless the user (not the configurer) has explicitly overridden
3347 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3348 G4 unless targeting the kernel. */
3351 && strverscmp (darwin_macosx_version_min
, "10.5") >= 0
3352 && ! (rs6000_isa_flags_explicit
& OPTION_MASK_ALTIVEC
)
3353 && ! OPTION_SET_P (rs6000_cpu_index
))
3355 rs6000_isa_flags
|= OPTION_MASK_ALTIVEC
;
3360 /* If not otherwise specified by a target, make 'long double' equivalent to
3363 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3364 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3367 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3368 to clobber the XER[CA] bit because clobbering that bit without telling
3369 the compiler worked just fine with versions of GCC before GCC 5, and
3370 breaking a lot of older code in ways that are hard to track down is
3371 not such a great idea. */
3374 rs6000_md_asm_adjust (vec
<rtx
> & /*outputs*/, vec
<rtx
> & /*inputs*/,
3375 vec
<machine_mode
> & /*input_modes*/,
3376 vec
<const char *> & /*constraints*/, vec
<rtx
> &clobbers
,
3377 HARD_REG_SET
&clobbered_regs
, location_t
/*loc*/)
3379 clobbers
.safe_push (gen_rtx_REG (SImode
, CA_REGNO
));
3380 SET_HARD_REG_BIT (clobbered_regs
, CA_REGNO
);
3384 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3385 but is called when the optimize level is changed via an attribute or
3386 pragma or when it is reset at the end of the code affected by the
3387 attribute or pragma. It is not called at the beginning of compilation
3388 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3389 actions then, you should have TARGET_OPTION_OVERRIDE call
3390 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3393 rs6000_override_options_after_change (void)
3395 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3396 turns -frename-registers on. */
3397 if ((OPTION_SET_P (flag_unroll_loops
) && flag_unroll_loops
)
3398 || (OPTION_SET_P (flag_unroll_all_loops
)
3399 && flag_unroll_all_loops
))
3401 if (!OPTION_SET_P (unroll_only_small_loops
))
3402 unroll_only_small_loops
= 0;
3403 if (!OPTION_SET_P (flag_rename_registers
))
3404 flag_rename_registers
= 1;
3405 if (!OPTION_SET_P (flag_cunroll_grow_size
))
3406 flag_cunroll_grow_size
= 1;
3408 else if (!OPTION_SET_P (flag_cunroll_grow_size
))
3409 flag_cunroll_grow_size
= flag_peel_loops
|| optimize
>= 3;
3411 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3412 if (rs6000_rop_protect
)
3413 flag_shrink_wrap
= 0;
3416 #ifdef TARGET_USES_LINUX64_OPT
3418 rs6000_linux64_override_options ()
3420 if (!OPTION_SET_P (rs6000_alignment_flags
))
3421 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
3422 if (rs6000_isa_flags
& OPTION_MASK_64BIT
)
3424 if (DEFAULT_ABI
!= ABI_AIX
)
3426 rs6000_current_abi
= ABI_AIX
;
3427 error (INVALID_64BIT
, "call");
3429 dot_symbols
= !strcmp (rs6000_abi_name
, "aixdesc");
3430 if (ELFv2_ABI_CHECK
)
3432 rs6000_current_abi
= ABI_ELFv2
;
3434 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3436 if (rs6000_isa_flags
& OPTION_MASK_RELOCATABLE
)
3438 rs6000_isa_flags
&= ~OPTION_MASK_RELOCATABLE
;
3439 error (INVALID_64BIT
, "relocatable");
3441 if (rs6000_isa_flags
& OPTION_MASK_EABI
)
3443 rs6000_isa_flags
&= ~OPTION_MASK_EABI
;
3444 error (INVALID_64BIT
, "eabi");
3446 if (TARGET_PROTOTYPE
)
3448 target_prototype
= 0;
3449 error (INVALID_64BIT
, "prototype");
3451 if ((rs6000_isa_flags
& OPTION_MASK_POWERPC64
) == 0)
3453 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3454 error ("%<-m64%> requires a PowerPC64 cpu");
3456 if (!OPTION_SET_P (rs6000_current_cmodel
))
3457 SET_CMODEL (CMODEL_MEDIUM
);
3458 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MINIMAL_TOC
) != 0)
3460 if (OPTION_SET_P (rs6000_current_cmodel
)
3461 && rs6000_current_cmodel
!= CMODEL_SMALL
)
3462 error ("%<-mcmodel%> incompatible with other toc options");
3463 if (TARGET_MINIMAL_TOC
)
3464 SET_CMODEL (CMODEL_SMALL
);
3465 else if (TARGET_PCREL
3466 || (PCREL_SUPPORTED_BY_OS
3467 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0))
3468 /* Ignore -mno-minimal-toc. */
3471 SET_CMODEL (CMODEL_SMALL
);
3473 if (rs6000_current_cmodel
!= CMODEL_SMALL
)
3475 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
3476 TARGET_NO_FP_IN_TOC
= rs6000_current_cmodel
== CMODEL_MEDIUM
;
3477 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC
))
3478 TARGET_NO_SUM_IN_TOC
= 0;
3480 if (TARGET_PLTSEQ
&& DEFAULT_ABI
!= ABI_ELFv2
)
3482 if (OPTION_SET_P (rs6000_pltseq
))
3483 warning (0, "%qs unsupported for this ABI",
3485 rs6000_pltseq
= false;
3488 else if (TARGET_64BIT
)
3489 error (INVALID_32BIT
, "32");
3492 if (TARGET_PROFILE_KERNEL
)
3495 error (INVALID_32BIT
, "profile-kernel");
3497 if (OPTION_SET_P (rs6000_current_cmodel
))
3499 SET_CMODEL (CMODEL_SMALL
);
3500 error (INVALID_32BIT
, "cmodel");
3506 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3507 This support is only in little endian GLIBC 2.32 or newer. */
3509 glibc_supports_ieee_128bit (void)
3512 if (OPTION_GLIBC
&& !BYTES_BIG_ENDIAN
3513 && ((TARGET_GLIBC_MAJOR
* 1000) + TARGET_GLIBC_MINOR
) >= 2032)
3515 #endif /* OPTION_GLIBC. */
3520 /* Override command line options.
3522 Combine build-specific configuration information with options
3523 specified on the command line to set various state variables which
3524 influence code generation, optimization, and expansion of built-in
3525 functions. Assure that command-line configuration preferences are
3526 compatible with each other and with the build configuration; issue
3527 warnings while adjusting configuration or error messages while
3528 rejecting configuration.
3530 Upon entry to this function:
3532 This function is called once at the beginning of
3533 compilation, and then again at the start and end of compiling
3534 each section of code that has a different configuration, as
3535 indicated, for example, by adding the
3537 __attribute__((__target__("cpu=power9")))
3539 qualifier to a function definition or, for example, by bracketing
3542 #pragma GCC target("altivec")
3546 #pragma GCC reset_options
3548 directives. Parameter global_init_p is true for the initial
3549 invocation, which initializes global variables, and false for all
3550 subsequent invocations.
3553 Various global state information is assumed to be valid. This
3554 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3555 default CPU specified at build configure time, TARGET_DEFAULT,
3556 representing the default set of option flags for the default
3557 target, and OPTION_SET_P (rs6000_isa_flags), representing
3558 which options were requested on the command line.
3560 Upon return from this function:
3562 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3563 was set by name on the command line. Additionally, if certain
3564 attributes are automatically enabled or disabled by this function
3565 in order to assure compatibility between options and
3566 configuration, the flags associated with those attributes are
3567 also set. By setting these "explicit bits", we avoid the risk
3568 that other code might accidentally overwrite these particular
3569 attributes with "default values".
3571 The various bits of rs6000_isa_flags are set to indicate the
3572 target options that have been selected for the most current
3573 compilation efforts. This has the effect of also turning on the
3574 associated TARGET_XXX values since these are macros which are
3575 generally defined to test the corresponding bit of the
3576 rs6000_isa_flags variable.
3578 Various other global variables and fields of global structures
3579 (over 50 in all) are initialized to reflect the desired options
3580 for the most current compilation efforts. */
3583 rs6000_option_override_internal (bool global_init_p
)
3587 HOST_WIDE_INT set_masks
;
3588 HOST_WIDE_INT ignore_masks
;
3591 struct cl_target_option
*main_target_opt
3592 = ((global_init_p
|| target_option_default_node
== NULL
)
3593 ? NULL
: TREE_TARGET_OPTION (target_option_default_node
));
3595 /* Print defaults. */
3596 if ((TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
) && global_init_p
)
3597 rs6000_print_isa_options (stderr
, 0, "TARGET_DEFAULT", TARGET_DEFAULT
);
3599 /* Remember the explicit arguments. */
3601 rs6000_isa_flags_explicit
= OPTION_SET_P (rs6000_isa_flags
);
3603 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3604 library functions, so warn about it. The flag may be useful for
3605 performance studies from time to time though, so don't disable it
3607 if (OPTION_SET_P (rs6000_alignment_flags
)
3608 && rs6000_alignment_flags
== MASK_ALIGN_POWER
3609 && DEFAULT_ABI
== ABI_DARWIN
3611 warning (0, "%qs is not supported for 64-bit Darwin;"
3612 " it is incompatible with the installed C and C++ libraries",
3615 /* Numerous experiment shows that IRA based loop pressure
3616 calculation works better for RTL loop invariant motion on targets
3617 with enough (>= 32) registers. It is an expensive optimization.
3618 So it is on only for peak performance. */
3619 if (optimize
>= 3 && global_init_p
3620 && !OPTION_SET_P (flag_ira_loop_pressure
))
3621 flag_ira_loop_pressure
= 1;
3623 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3624 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3625 options were already specified. */
3626 if (flag_sanitize
& SANITIZE_USER_ADDRESS
3627 && !OPTION_SET_P (flag_asynchronous_unwind_tables
))
3628 flag_asynchronous_unwind_tables
= 1;
3630 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3631 loop unroller is active. It is only checked during unrolling, so
3632 we can just set it on by default. */
3633 if (!OPTION_SET_P (flag_variable_expansion_in_unroller
))
3634 flag_variable_expansion_in_unroller
= 1;
3636 /* Set the pointer size. */
3639 rs6000_pmode
= DImode
;
3640 rs6000_pointer_size
= 64;
3644 rs6000_pmode
= SImode
;
3645 rs6000_pointer_size
= 32;
3648 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3649 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3650 must explicitly specify it and we won't interfere with the user's
3653 set_masks
= POWERPC_MASKS
;
3654 #ifdef OS_MISSING_ALTIVEC
3655 if (OS_MISSING_ALTIVEC
)
3656 set_masks
&= ~(OPTION_MASK_ALTIVEC
| OPTION_MASK_VSX
3657 | OTHER_VSX_VECTOR_MASKS
);
3660 /* Don't override by the processor default if given explicitly. */
3661 set_masks
&= ~rs6000_isa_flags_explicit
;
3663 /* Without option powerpc64 specified explicitly, we need to ensure
3664 powerpc64 always enabled for 64 bit here, otherwise some following
3665 checks can use unexpected TARGET_POWERPC64 value. */
3666 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
)
3669 rs6000_isa_flags
|= OPTION_MASK_POWERPC64
;
3670 /* Need to stop powerpc64 from being unset in later processing,
3671 so clear it in set_masks. But as PR108240 shows, to keep it
3672 consistent with before, we want to make this only if 64 bit
3673 is enabled explicitly. This is a hack, revisit this later. */
3674 if (rs6000_isa_flags_explicit
& OPTION_MASK_64BIT
)
3675 set_masks
&= ~OPTION_MASK_POWERPC64
;
3678 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3679 the cpu in a target attribute or pragma, but did not specify a tuning
3680 option, use the cpu for the tuning option rather than the option specified
3681 with -mtune on the command line. Process a '--with-cpu' configuration
3682 request as an implicit --cpu. */
3683 if (rs6000_cpu_index
>= 0)
3684 cpu_index
= rs6000_cpu_index
;
3685 else if (main_target_opt
!= NULL
&& main_target_opt
->x_rs6000_cpu_index
>= 0)
3686 cpu_index
= main_target_opt
->x_rs6000_cpu_index
;
3687 else if (OPTION_TARGET_CPU_DEFAULT
)
3688 cpu_index
= rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT
);
3690 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3691 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3692 with those from the cpu, except for options that were explicitly set. If
3693 we don't have a cpu, do not override the target bits set in
3697 rs6000_cpu_index
= cpu_index
;
3698 rs6000_isa_flags
&= ~set_masks
;
3699 rs6000_isa_flags
|= (processor_target_table
[cpu_index
].target_enable
3704 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3705 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3706 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3707 to using rs6000_isa_flags, we need to do the initialization here.
3709 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3710 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3711 HOST_WIDE_INT flags
;
3713 flags
= TARGET_DEFAULT
;
3716 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3717 const char *default_cpu
= (!TARGET_POWERPC64
3722 int default_cpu_index
= rs6000_cpu_name_lookup (default_cpu
);
3723 flags
= processor_target_table
[default_cpu_index
].target_enable
;
3725 rs6000_isa_flags
|= (flags
& ~rs6000_isa_flags_explicit
);
3728 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3729 since they do not save and restore the high half of the GPRs correctly
3730 in all cases. If the user explicitly specifies it, we won't interfere
3731 with the user's specification. */
3732 #ifdef OS_MISSING_POWERPC64
3733 if (OS_MISSING_POWERPC64
3736 && !(rs6000_isa_flags_explicit
& OPTION_MASK_POWERPC64
))
3737 rs6000_isa_flags
&= ~OPTION_MASK_POWERPC64
;
3740 if (rs6000_tune_index
>= 0)
3741 tune_index
= rs6000_tune_index
;
3742 else if (cpu_index
>= 0)
3743 rs6000_tune_index
= tune_index
= cpu_index
;
3747 enum processor_type tune_proc
3748 = (TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
);
3751 for (i
= 0; i
< ARRAY_SIZE (processor_target_table
); i
++)
3752 if (processor_target_table
[i
].processor
== tune_proc
)
3760 rs6000_cpu
= processor_target_table
[cpu_index
].processor
;
3762 rs6000_cpu
= TARGET_POWERPC64
? PROCESSOR_DEFAULT64
: PROCESSOR_DEFAULT
;
3764 gcc_assert (tune_index
>= 0);
3765 rs6000_tune
= processor_target_table
[tune_index
].processor
;
3767 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
3768 || rs6000_cpu
== PROCESSOR_PPCE500MC
|| rs6000_cpu
== PROCESSOR_PPCE500MC64
3769 || rs6000_cpu
== PROCESSOR_PPCE5500
)
3772 error ("AltiVec not supported in this target");
3775 /* If we are optimizing big endian systems for space, use the load/store
3776 multiple instructions. */
3777 if (BYTES_BIG_ENDIAN
&& optimize_size
)
3778 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
;
3780 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3781 because the hardware doesn't support the instructions used in little
3782 endian mode, and causes an alignment trap. The 750 does not cause an
3783 alignment trap (except when the target is unaligned). */
3785 if (!BYTES_BIG_ENDIAN
&& rs6000_cpu
!= PROCESSOR_PPC750
&& TARGET_MULTIPLE
)
3787 rs6000_isa_flags
&= ~OPTION_MASK_MULTIPLE
;
3788 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MULTIPLE
) != 0)
3789 warning (0, "%qs is not supported on little endian systems",
3793 /* If little-endian, default to -mstrict-align on older processors.
3794 Testing for direct_move matches power8 and later. */
3795 if (!BYTES_BIG_ENDIAN
3796 && !(processor_target_table
[tune_index
].target_enable
3797 & OPTION_MASK_DIRECT_MOVE
))
3798 rs6000_isa_flags
|= ~rs6000_isa_flags_explicit
& OPTION_MASK_STRICT_ALIGN
;
3800 /* Add some warnings for VSX. */
3803 const char *msg
= NULL
;
3804 if (!TARGET_HARD_FLOAT
)
3806 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3807 msg
= N_("%<-mvsx%> requires hardware floating point");
3810 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3811 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3814 else if (TARGET_AVOID_XFORM
> 0)
3815 msg
= N_("%<-mvsx%> needs indexed addressing");
3816 else if (!TARGET_ALTIVEC
&& (rs6000_isa_flags_explicit
3817 & OPTION_MASK_ALTIVEC
))
3819 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3820 msg
= N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3822 msg
= N_("%<-mno-altivec%> disables vsx");
3828 rs6000_isa_flags
&= ~ OPTION_MASK_VSX
;
3829 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3833 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3834 the -mcpu setting to enable options that conflict. */
3835 if ((!TARGET_HARD_FLOAT
|| !TARGET_ALTIVEC
|| !TARGET_VSX
)
3836 && (rs6000_isa_flags_explicit
& (OPTION_MASK_SOFT_FLOAT
3837 | OPTION_MASK_ALTIVEC
3838 | OPTION_MASK_VSX
)) != 0)
3839 rs6000_isa_flags
&= ~((OPTION_MASK_P8_VECTOR
| OPTION_MASK_CRYPTO
3840 | OPTION_MASK_DIRECT_MOVE
)
3841 & ~rs6000_isa_flags_explicit
);
3843 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
3844 rs6000_print_isa_options (stderr
, 0, "before defaults", rs6000_isa_flags
);
3846 #ifdef XCOFF_DEBUGGING_INFO
3847 /* For AIX default to 64-bit DWARF. */
3848 if (!OPTION_SET_P (dwarf_offset_size
))
3849 dwarf_offset_size
= POINTER_SIZE_UNITS
;
3852 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3853 off all of the options that depend on those flags. */
3854 ignore_masks
= rs6000_disable_incompatible_switches ();
3856 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3857 unless the user explicitly used the -mno-<option> to disable the code. */
3858 if (TARGET_P9_VECTOR
|| TARGET_MODULO
|| TARGET_P9_MISC
)
3859 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3860 else if (TARGET_P9_MINMAX
)
3864 if (cpu_index
== PROCESSOR_POWER9
)
3866 /* legacy behavior: allow -mcpu=power9 with certain
3867 capabilities explicitly disabled. */
3868 rs6000_isa_flags
|= (ISA_3_0_MASKS_SERVER
& ~ignore_masks
);
3871 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3872 "for <xxx> less than power9", "-mcpu");
3874 else if ((ISA_3_0_MASKS_SERVER
& rs6000_isa_flags_explicit
)
3875 != (ISA_3_0_MASKS_SERVER
& rs6000_isa_flags
3876 & rs6000_isa_flags_explicit
))
3877 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3878 were explicitly cleared. */
3879 error ("%qs incompatible with explicitly disabled options",
3882 rs6000_isa_flags
|= ISA_3_0_MASKS_SERVER
;
3884 else if (TARGET_P8_VECTOR
|| TARGET_DIRECT_MOVE
|| TARGET_CRYPTO
)
3885 rs6000_isa_flags
|= (ISA_2_7_MASKS_SERVER
& ~ignore_masks
);
3886 else if (TARGET_VSX
)
3887 rs6000_isa_flags
|= (ISA_2_6_MASKS_SERVER
& ~ignore_masks
);
3888 else if (TARGET_POPCNTD
)
3889 rs6000_isa_flags
|= (ISA_2_6_MASKS_EMBEDDED
& ~ignore_masks
);
3890 else if (TARGET_DFP
)
3891 rs6000_isa_flags
|= (ISA_2_5_MASKS_SERVER
& ~ignore_masks
);
3892 else if (TARGET_CMPB
)
3893 rs6000_isa_flags
|= (ISA_2_5_MASKS_EMBEDDED
& ~ignore_masks
);
3894 else if (TARGET_FPRND
)
3895 rs6000_isa_flags
|= (ISA_2_4_MASKS
& ~ignore_masks
);
3896 else if (TARGET_POPCNTB
)
3897 rs6000_isa_flags
|= (ISA_2_2_MASKS
& ~ignore_masks
);
3898 else if (TARGET_ALTIVEC
)
3899 rs6000_isa_flags
|= (OPTION_MASK_PPC_GFXOPT
& ~ignore_masks
);
3901 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3902 target attribute or pragma which automatically enables both options,
3903 unless the altivec ABI was set. This is set by default for 64-bit, but
3904 not for 32-bit. Don't move this before the above code using ignore_masks,
3905 since it can reset the cleared VSX/ALTIVEC flag again. */
3906 if (main_target_opt
&& !main_target_opt
->x_rs6000_altivec_abi
)
3907 rs6000_isa_flags
&= ~((OPTION_MASK_VSX
| OPTION_MASK_ALTIVEC
)
3908 & ~rs6000_isa_flags_explicit
);
3910 if (TARGET_CRYPTO
&& !TARGET_ALTIVEC
)
3912 if (rs6000_isa_flags_explicit
& OPTION_MASK_CRYPTO
)
3913 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3914 rs6000_isa_flags
&= ~OPTION_MASK_CRYPTO
;
3917 if (!TARGET_FPRND
&& TARGET_VSX
)
3919 if (rs6000_isa_flags_explicit
& OPTION_MASK_FPRND
)
3920 /* TARGET_VSX = 1 implies Power 7 and newer */
3921 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3922 rs6000_isa_flags
&= ~OPTION_MASK_FPRND
;
3925 if (TARGET_DIRECT_MOVE
&& !TARGET_VSX
)
3927 if (rs6000_isa_flags_explicit
& OPTION_MASK_DIRECT_MOVE
)
3928 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3929 rs6000_isa_flags
&= ~OPTION_MASK_DIRECT_MOVE
;
3932 if (TARGET_P8_VECTOR
&& !TARGET_ALTIVEC
)
3934 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3935 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3936 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3939 if (TARGET_P8_VECTOR
&& !TARGET_VSX
)
3941 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
3942 && (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
))
3943 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3944 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
) == 0)
3946 rs6000_isa_flags
&= ~OPTION_MASK_P8_VECTOR
;
3947 if (rs6000_isa_flags_explicit
& OPTION_MASK_VSX
)
3948 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
3952 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3954 rs6000_isa_flags
|= OPTION_MASK_VSX
;
3955 rs6000_isa_flags_explicit
|= OPTION_MASK_VSX
;
3959 if (TARGET_DFP
&& !TARGET_HARD_FLOAT
)
3961 if (rs6000_isa_flags_explicit
& OPTION_MASK_DFP
)
3962 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3963 rs6000_isa_flags
&= ~OPTION_MASK_DFP
;
3966 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3967 silently turn off quad memory mode. */
3968 if ((TARGET_QUAD_MEMORY
|| TARGET_QUAD_MEMORY_ATOMIC
) && !TARGET_POWERPC64
)
3970 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3971 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3973 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) != 0)
3974 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3976 rs6000_isa_flags
&= ~(OPTION_MASK_QUAD_MEMORY
3977 | OPTION_MASK_QUAD_MEMORY_ATOMIC
);
3980 /* Non-atomic quad memory load/store are disabled for little endian, since
3981 the words are reversed, but atomic operations can still be done by
3982 swapping the words. */
3983 if (TARGET_QUAD_MEMORY
&& !WORDS_BIG_ENDIAN
)
3985 if ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY
) != 0)
3986 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3989 rs6000_isa_flags
&= ~OPTION_MASK_QUAD_MEMORY
;
3992 /* Assume if the user asked for normal quad memory instructions, they want
3993 the atomic versions as well, unless they explicity told us not to use quad
3994 word atomic instructions. */
3995 if (TARGET_QUAD_MEMORY
3996 && !TARGET_QUAD_MEMORY_ATOMIC
3997 && ((rs6000_isa_flags_explicit
& OPTION_MASK_QUAD_MEMORY_ATOMIC
) == 0))
3998 rs6000_isa_flags
|= OPTION_MASK_QUAD_MEMORY_ATOMIC
;
4000 /* If we can shrink-wrap the TOC register save separately, then use
4001 -msave-toc-indirect unless explicitly disabled. */
4002 if ((rs6000_isa_flags_explicit
& OPTION_MASK_SAVE_TOC_INDIRECT
) == 0
4003 && flag_shrink_wrap_separate
4004 && optimize_function_for_speed_p (cfun
))
4005 rs6000_isa_flags
|= OPTION_MASK_SAVE_TOC_INDIRECT
;
4007 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4008 generating power8 instructions. Power9 does not optimize power8 fusion
4010 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
))
4012 if (processor_target_table
[tune_index
].processor
== PROCESSOR_POWER8
)
4013 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4015 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4018 /* Setting additional fusion flags turns on base fusion. */
4019 if (!TARGET_P8_FUSION
&& TARGET_P8_FUSION_SIGN
)
4021 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION
)
4023 if (TARGET_P8_FUSION_SIGN
)
4024 error ("%qs requires %qs", "-mpower8-fusion-sign",
4027 rs6000_isa_flags
&= ~OPTION_MASK_P8_FUSION
;
4030 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION
;
4033 /* Power8 does not fuse sign extended loads with the addis. If we are
4034 optimizing at high levels for speed, convert a sign extended load into a
4035 zero extending load, and an explicit sign extension. */
4036 if (TARGET_P8_FUSION
4037 && !(rs6000_isa_flags_explicit
& OPTION_MASK_P8_FUSION_SIGN
)
4038 && optimize_function_for_speed_p (cfun
)
4040 rs6000_isa_flags
|= OPTION_MASK_P8_FUSION_SIGN
;
4042 /* ISA 3.0 vector instructions include ISA 2.07. */
4043 if (TARGET_P9_VECTOR
&& !TARGET_P8_VECTOR
)
4045 /* We prefer to not mention undocumented options in
4046 error messages. However, if users have managed to select
4047 power9-vector without selecting power8-vector, they
4048 already know about undocumented flags. */
4049 if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) &&
4050 (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
))
4051 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4052 else if ((rs6000_isa_flags_explicit
& OPTION_MASK_P9_VECTOR
) == 0)
4054 rs6000_isa_flags
&= ~OPTION_MASK_P9_VECTOR
;
4055 if (rs6000_isa_flags_explicit
& OPTION_MASK_P8_VECTOR
)
4056 rs6000_isa_flags_explicit
|= OPTION_MASK_P9_VECTOR
;
4060 /* OPTION_MASK_P9_VECTOR is explicit and
4061 OPTION_MASK_P8_VECTOR is not explicit. */
4062 rs6000_isa_flags
|= OPTION_MASK_P8_VECTOR
;
4063 rs6000_isa_flags_explicit
|= OPTION_MASK_P8_VECTOR
;
4067 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4068 support. If we only have ISA 2.06 support, and the user did not specify
4069 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4070 but we don't enable the full vectorization support */
4071 if (TARGET_ALLOW_MOVMISALIGN
== -1 && TARGET_P8_VECTOR
&& TARGET_DIRECT_MOVE
)
4072 TARGET_ALLOW_MOVMISALIGN
= 1;
4074 else if (TARGET_ALLOW_MOVMISALIGN
&& !TARGET_VSX
)
4076 if (TARGET_ALLOW_MOVMISALIGN
> 0
4077 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN
))
4078 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4080 TARGET_ALLOW_MOVMISALIGN
= 0;
4083 /* Determine when unaligned vector accesses are permitted, and when
4084 they are preferred over masked Altivec loads. Note that if
4085 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4086 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4088 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4092 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4093 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4095 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4098 else if (!TARGET_ALLOW_MOVMISALIGN
)
4100 if (rs6000_isa_flags_explicit
& OPTION_MASK_EFFICIENT_UNALIGNED_VSX
)
4101 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4102 "-mallow-movmisalign");
4104 rs6000_isa_flags
&= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX
;
4108 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
))
4110 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4111 rs6000_isa_flags
|= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4113 rs6000_isa_flags
&= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
;
4116 /* Use long double size to select the appropriate long double. We use
4117 TYPE_PRECISION to differentiate the 3 different long double types. We map
4118 128 into the precision used for TFmode. */
4119 int default_long_double_size
= (RS6000_DEFAULT_LONG_DOUBLE_SIZE
== 64
4121 : FLOAT_PRECISION_TFmode
);
4123 /* Set long double size before the IEEE 128-bit tests. */
4124 if (!OPTION_SET_P (rs6000_long_double_type_size
))
4126 if (main_target_opt
!= NULL
4127 && (main_target_opt
->x_rs6000_long_double_type_size
4128 != default_long_double_size
))
4129 error ("target attribute or pragma changes %<long double%> size");
4131 rs6000_long_double_type_size
= default_long_double_size
;
4133 else if (rs6000_long_double_type_size
== FLOAT_PRECISION_TFmode
)
4134 ; /* The option value can be seen when cl_target_option_restore is called. */
4135 else if (rs6000_long_double_type_size
== 128)
4136 rs6000_long_double_type_size
= FLOAT_PRECISION_TFmode
;
4138 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4139 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4140 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4141 those systems will not pick up this default. Warn if the user changes the
4142 default unless -Wno-psabi. */
4143 if (!OPTION_SET_P (rs6000_ieeequad
))
4144 rs6000_ieeequad
= TARGET_IEEEQUAD_DEFAULT
;
4146 else if (TARGET_LONG_DOUBLE_128
)
4148 if (global_options
.x_rs6000_ieeequad
4149 && (!TARGET_POPCNTD
|| !TARGET_VSX
))
4150 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4152 if (rs6000_ieeequad
!= TARGET_IEEEQUAD_DEFAULT
)
4154 /* Determine if the user can change the default long double type at
4155 compilation time. You need GLIBC 2.32 or newer to be able to
4156 change the long double type. Only issue one warning. */
4157 static bool warned_change_long_double
;
4159 if (!warned_change_long_double
&& !glibc_supports_ieee_128bit ())
4161 warned_change_long_double
= true;
4162 if (TARGET_IEEEQUAD
)
4163 warning (OPT_Wpsabi
, "Using IEEE extended precision "
4166 warning (OPT_Wpsabi
, "Using IBM extended precision "
4172 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4173 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4174 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4175 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4176 the keyword as well as the type. */
4177 TARGET_FLOAT128_TYPE
= TARGET_FLOAT128_ENABLE_TYPE
&& TARGET_VSX
;
4179 /* IEEE 128-bit floating point requires VSX support. */
4180 if (TARGET_FLOAT128_KEYWORD
)
4184 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) != 0)
4185 error ("%qs requires VSX support", "-mfloat128");
4187 TARGET_FLOAT128_TYPE
= 0;
4188 rs6000_isa_flags
&= ~(OPTION_MASK_FLOAT128_KEYWORD
4189 | OPTION_MASK_FLOAT128_HW
);
4191 else if (!TARGET_FLOAT128_TYPE
)
4193 TARGET_FLOAT128_TYPE
= 1;
4194 warning (0, "The %<-mfloat128%> option may not be fully supported");
4198 /* Enable the __float128 keyword under Linux by default. */
4199 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_KEYWORD
4200 && (rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_KEYWORD
) == 0)
4201 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_KEYWORD
;
4203 /* If we have are supporting the float128 type and full ISA 3.0 support,
4204 enable -mfloat128-hardware by default. However, don't enable the
4205 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4206 because sometimes the compiler wants to put things in an integer
4207 container, and if we don't have __int128 support, it is impossible. */
4208 if (TARGET_FLOAT128_TYPE
&& !TARGET_FLOAT128_HW
&& TARGET_64BIT
4209 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) == ISA_3_0_MASKS_IEEE
4210 && !(rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
))
4211 rs6000_isa_flags
|= OPTION_MASK_FLOAT128_HW
;
4213 if (TARGET_FLOAT128_HW
4214 && (rs6000_isa_flags
& ISA_3_0_MASKS_IEEE
) != ISA_3_0_MASKS_IEEE
)
4216 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4217 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4219 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4222 if (TARGET_FLOAT128_HW
&& !TARGET_64BIT
)
4224 if ((rs6000_isa_flags_explicit
& OPTION_MASK_FLOAT128_HW
) != 0)
4225 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4227 rs6000_isa_flags
&= ~OPTION_MASK_FLOAT128_HW
;
4230 /* Enable -mprefixed by default on power10 systems. */
4231 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) == 0)
4232 rs6000_isa_flags
|= OPTION_MASK_PREFIXED
;
4234 /* -mprefixed requires -mcpu=power10 (or later). */
4235 else if (TARGET_PREFIXED
&& !TARGET_POWER10
)
4237 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PREFIXED
) != 0)
4238 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4240 rs6000_isa_flags
&= ~OPTION_MASK_PREFIXED
;
4243 /* -mpcrel requires prefixed load/store addressing. */
4244 if (TARGET_PCREL
&& !TARGET_PREFIXED
)
4246 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4247 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4249 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4252 /* Print the options after updating the defaults. */
4253 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4254 rs6000_print_isa_options (stderr
, 0, "after defaults", rs6000_isa_flags
);
4256 /* E500mc does "better" if we inline more aggressively. Respect the
4257 user's opinion, though. */
4258 if (rs6000_block_move_inline_limit
== 0
4259 && (rs6000_tune
== PROCESSOR_PPCE500MC
4260 || rs6000_tune
== PROCESSOR_PPCE500MC64
4261 || rs6000_tune
== PROCESSOR_PPCE5500
4262 || rs6000_tune
== PROCESSOR_PPCE6500
))
4263 rs6000_block_move_inline_limit
= 128;
4265 /* store_one_arg depends on expand_block_move to handle at least the
4266 size of reg_parm_stack_space. */
4267 if (rs6000_block_move_inline_limit
< (TARGET_POWERPC64
? 64 : 32))
4268 rs6000_block_move_inline_limit
= (TARGET_POWERPC64
? 64 : 32);
4272 /* If the appropriate debug option is enabled, replace the target hooks
4273 with debug versions that call the real version and then prints
4274 debugging information. */
4275 if (TARGET_DEBUG_COST
)
4277 targetm
.rtx_costs
= rs6000_debug_rtx_costs
;
4278 targetm
.address_cost
= rs6000_debug_address_cost
;
4279 targetm
.sched
.adjust_cost
= rs6000_debug_adjust_cost
;
4282 if (TARGET_DEBUG_ADDR
)
4284 targetm
.legitimate_address_p
= rs6000_debug_legitimate_address_p
;
4285 targetm
.legitimize_address
= rs6000_debug_legitimize_address
;
4286 rs6000_secondary_reload_class_ptr
4287 = rs6000_debug_secondary_reload_class
;
4288 targetm
.secondary_memory_needed
4289 = rs6000_debug_secondary_memory_needed
;
4290 targetm
.can_change_mode_class
4291 = rs6000_debug_can_change_mode_class
;
4292 rs6000_preferred_reload_class_ptr
4293 = rs6000_debug_preferred_reload_class
;
4294 rs6000_mode_dependent_address_ptr
4295 = rs6000_debug_mode_dependent_address
;
4298 if (rs6000_veclibabi_name
)
4300 if (strcmp (rs6000_veclibabi_name
, "mass") == 0)
4301 rs6000_veclib_handler
= rs6000_builtin_vectorized_libmass
;
4304 error ("unknown vectorization library ABI type in "
4305 "%<-mveclibabi=%s%>", rs6000_veclibabi_name
);
4311 /* Enable Altivec ABI for AIX -maltivec. */
4313 && (TARGET_ALTIVEC
|| TARGET_VSX
)
4314 && !OPTION_SET_P (rs6000_altivec_abi
))
4316 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_altivec_abi
)
4317 error ("target attribute or pragma changes AltiVec ABI");
4319 rs6000_altivec_abi
= 1;
4322 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4323 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4324 be explicitly overridden in either case. */
4327 if (!OPTION_SET_P (rs6000_altivec_abi
)
4328 && (TARGET_64BIT
|| TARGET_ALTIVEC
|| TARGET_VSX
))
4330 if (main_target_opt
!= NULL
&&
4331 !main_target_opt
->x_rs6000_altivec_abi
)
4332 error ("target attribute or pragma changes AltiVec ABI");
4334 rs6000_altivec_abi
= 1;
4338 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4339 So far, the only darwin64 targets are also MACH-O. */
4341 && DEFAULT_ABI
== ABI_DARWIN
4344 if (main_target_opt
!= NULL
&& !main_target_opt
->x_rs6000_darwin64_abi
)
4345 error ("target attribute or pragma changes darwin64 ABI");
4348 rs6000_darwin64_abi
= 1;
4349 /* Default to natural alignment, for better performance. */
4350 rs6000_alignment_flags
= MASK_ALIGN_NATURAL
;
4354 /* Place FP constants in the constant pool instead of TOC
4355 if section anchors enabled. */
4356 if (flag_section_anchors
4357 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC
))
4358 TARGET_NO_FP_IN_TOC
= 1;
4360 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4361 rs6000_print_isa_options (stderr
, 0, "before subtarget", rs6000_isa_flags
);
4363 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4364 SUBTARGET_OVERRIDE_OPTIONS
;
4366 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4367 SUBSUBTARGET_OVERRIDE_OPTIONS
;
4369 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4370 SUB3TARGET_OVERRIDE_OPTIONS
;
4373 /* If the ABI has support for PC-relative relocations, enable it by default.
4374 This test depends on the sub-target tests above setting the code model to
4375 medium for ELF v2 systems. */
4376 if (PCREL_SUPPORTED_BY_OS
4377 && (rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) == 0)
4378 rs6000_isa_flags
|= OPTION_MASK_PCREL
;
4380 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4381 after the subtarget override options are done. */
4382 else if (TARGET_PCREL
&& TARGET_CMODEL
!= CMODEL_MEDIUM
)
4384 if ((rs6000_isa_flags_explicit
& OPTION_MASK_PCREL
) != 0)
4385 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4387 rs6000_isa_flags
&= ~OPTION_MASK_PCREL
;
4390 /* Enable -mmma by default on power10 systems. */
4391 if (TARGET_POWER10
&& (rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) == 0)
4392 rs6000_isa_flags
|= OPTION_MASK_MMA
;
4394 /* Turn off vector pair/mma options on non-power10 systems. */
4395 else if (!TARGET_POWER10
&& TARGET_MMA
)
4397 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4398 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4400 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4403 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4404 generating power10 instructions. */
4405 if (!(rs6000_isa_flags_explicit
& OPTION_MASK_P10_FUSION
))
4407 if (rs6000_tune
== PROCESSOR_POWER10
)
4408 rs6000_isa_flags
|= OPTION_MASK_P10_FUSION
;
4410 rs6000_isa_flags
&= ~OPTION_MASK_P10_FUSION
;
4413 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4414 such as "*movoo" uses vector pair access which use VSX registers.
4415 So make MMA require VSX support here. */
4416 if (TARGET_MMA
&& !TARGET_VSX
)
4418 if ((rs6000_isa_flags_explicit
& OPTION_MASK_MMA
) != 0)
4419 error ("%qs requires %qs", "-mmma", "-mvsx");
4420 rs6000_isa_flags
&= ~OPTION_MASK_MMA
;
4423 if (!TARGET_PCREL
&& TARGET_PCREL_OPT
)
4424 rs6000_isa_flags
&= ~OPTION_MASK_PCREL_OPT
;
4426 if (TARGET_DEBUG_REG
|| TARGET_DEBUG_TARGET
)
4427 rs6000_print_isa_options (stderr
, 0, "after subtarget", rs6000_isa_flags
);
4429 rs6000_always_hint
= (rs6000_tune
!= PROCESSOR_POWER4
4430 && rs6000_tune
!= PROCESSOR_POWER5
4431 && rs6000_tune
!= PROCESSOR_POWER6
4432 && rs6000_tune
!= PROCESSOR_POWER7
4433 && rs6000_tune
!= PROCESSOR_POWER8
4434 && rs6000_tune
!= PROCESSOR_POWER9
4435 && rs6000_tune
!= PROCESSOR_POWER10
4436 && rs6000_tune
!= PROCESSOR_PPCA2
4437 && rs6000_tune
!= PROCESSOR_CELL
4438 && rs6000_tune
!= PROCESSOR_PPC476
);
4439 rs6000_sched_groups
= (rs6000_tune
== PROCESSOR_POWER4
4440 || rs6000_tune
== PROCESSOR_POWER5
4441 || rs6000_tune
== PROCESSOR_POWER7
4442 || rs6000_tune
== PROCESSOR_POWER8
);
4443 rs6000_align_branch_targets
= (rs6000_tune
== PROCESSOR_POWER4
4444 || rs6000_tune
== PROCESSOR_POWER5
4445 || rs6000_tune
== PROCESSOR_POWER6
4446 || rs6000_tune
== PROCESSOR_POWER7
4447 || rs6000_tune
== PROCESSOR_POWER8
4448 || rs6000_tune
== PROCESSOR_POWER9
4449 || rs6000_tune
== PROCESSOR_POWER10
4450 || rs6000_tune
== PROCESSOR_PPCE500MC
4451 || rs6000_tune
== PROCESSOR_PPCE500MC64
4452 || rs6000_tune
== PROCESSOR_PPCE5500
4453 || rs6000_tune
== PROCESSOR_PPCE6500
);
4455 /* Allow debug switches to override the above settings. These are set to -1
4456 in rs6000.opt to indicate the user hasn't directly set the switch. */
4457 if (TARGET_ALWAYS_HINT
>= 0)
4458 rs6000_always_hint
= TARGET_ALWAYS_HINT
;
4460 if (TARGET_SCHED_GROUPS
>= 0)
4461 rs6000_sched_groups
= TARGET_SCHED_GROUPS
;
4463 if (TARGET_ALIGN_BRANCH_TARGETS
>= 0)
4464 rs6000_align_branch_targets
= TARGET_ALIGN_BRANCH_TARGETS
;
4466 rs6000_sched_restricted_insns_priority
4467 = (rs6000_sched_groups
? 1 : 0);
4469 /* Handle -msched-costly-dep option. */
4470 rs6000_sched_costly_dep
4471 = (rs6000_sched_groups
? true_store_to_load_dep_costly
: no_dep_costly
);
4473 if (rs6000_sched_costly_dep_str
)
4475 if (! strcmp (rs6000_sched_costly_dep_str
, "no"))
4476 rs6000_sched_costly_dep
= no_dep_costly
;
4477 else if (! strcmp (rs6000_sched_costly_dep_str
, "all"))
4478 rs6000_sched_costly_dep
= all_deps_costly
;
4479 else if (! strcmp (rs6000_sched_costly_dep_str
, "true_store_to_load"))
4480 rs6000_sched_costly_dep
= true_store_to_load_dep_costly
;
4481 else if (! strcmp (rs6000_sched_costly_dep_str
, "store_to_load"))
4482 rs6000_sched_costly_dep
= store_to_load_dep_costly
;
4484 rs6000_sched_costly_dep
= ((enum rs6000_dependence_cost
)
4485 atoi (rs6000_sched_costly_dep_str
));
4488 /* Handle -minsert-sched-nops option. */
4489 rs6000_sched_insert_nops
4490 = (rs6000_sched_groups
? sched_finish_regroup_exact
: sched_finish_none
);
4492 if (rs6000_sched_insert_nops_str
)
4494 if (! strcmp (rs6000_sched_insert_nops_str
, "no"))
4495 rs6000_sched_insert_nops
= sched_finish_none
;
4496 else if (! strcmp (rs6000_sched_insert_nops_str
, "pad"))
4497 rs6000_sched_insert_nops
= sched_finish_pad_groups
;
4498 else if (! strcmp (rs6000_sched_insert_nops_str
, "regroup_exact"))
4499 rs6000_sched_insert_nops
= sched_finish_regroup_exact
;
4501 rs6000_sched_insert_nops
= ((enum rs6000_nop_insertion
)
4502 atoi (rs6000_sched_insert_nops_str
));
4505 /* Handle stack protector */
4506 if (!OPTION_SET_P (rs6000_stack_protector_guard
))
4507 #ifdef TARGET_THREAD_SSP_OFFSET
4508 rs6000_stack_protector_guard
= SSP_TLS
;
4510 rs6000_stack_protector_guard
= SSP_GLOBAL
;
4513 #ifdef TARGET_THREAD_SSP_OFFSET
4514 rs6000_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
4515 rs6000_stack_protector_guard_reg
= TARGET_64BIT
? 13 : 2;
4518 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str
))
4521 const char *str
= rs6000_stack_protector_guard_offset_str
;
4524 long offset
= strtol (str
, &endp
, 0);
4525 if (!*str
|| *endp
|| errno
)
4526 error ("%qs is not a valid number in %qs", str
,
4527 "-mstack-protector-guard-offset=");
4529 if (!IN_RANGE (offset
, -0x8000, 0x7fff)
4530 || (TARGET_64BIT
&& (offset
& 3)))
4531 error ("%qs is not a valid offset in %qs", str
,
4532 "-mstack-protector-guard-offset=");
4534 rs6000_stack_protector_guard_offset
= offset
;
4537 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str
))
4539 const char *str
= rs6000_stack_protector_guard_reg_str
;
4540 int reg
= decode_reg_name (str
);
4542 if (!IN_RANGE (reg
, 1, 31))
4543 error ("%qs is not a valid base register in %qs", str
,
4544 "-mstack-protector-guard-reg=");
4546 rs6000_stack_protector_guard_reg
= reg
;
4549 if (rs6000_stack_protector_guard
== SSP_TLS
4550 && !IN_RANGE (rs6000_stack_protector_guard_reg
, 1, 31))
4551 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4555 #ifdef TARGET_REGNAMES
4556 /* If the user desires alternate register names, copy in the
4557 alternate names now. */
4558 if (TARGET_REGNAMES
)
4559 memcpy (rs6000_reg_names
, alt_reg_names
, sizeof (rs6000_reg_names
));
4562 /* Set aix_struct_return last, after the ABI is determined.
4563 If -maix-struct-return or -msvr4-struct-return was explicitly
4564 used, don't override with the ABI default. */
4565 if (!OPTION_SET_P (aix_struct_return
))
4566 aix_struct_return
= (DEFAULT_ABI
!= ABI_V4
|| DRAFT_V4_STRUCT_RET
);
4569 /* IBM XL compiler defaults to unsigned bitfields. */
4570 if (TARGET_XL_COMPAT
)
4571 flag_signed_bitfields
= 0;
4574 if (TARGET_LONG_DOUBLE_128
&& !TARGET_IEEEQUAD
)
4575 REAL_MODE_FORMAT (TFmode
) = &ibm_extended_format
;
4577 ASM_GENERATE_INTERNAL_LABEL (toc_label_name
, "LCTOC", 1);
4579 /* We can only guarantee the availability of DI pseudo-ops when
4580 assembling for 64-bit targets. */
4583 targetm
.asm_out
.aligned_op
.di
= NULL
;
4584 targetm
.asm_out
.unaligned_op
.di
= NULL
;
4588 /* Set branch target alignment, if not optimizing for size. */
4591 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4592 aligned 8byte to avoid misprediction by the branch predictor. */
4593 if (rs6000_tune
== PROCESSOR_TITAN
4594 || rs6000_tune
== PROCESSOR_CELL
)
4596 if (flag_align_functions
&& !str_align_functions
)
4597 str_align_functions
= "8";
4598 if (flag_align_jumps
&& !str_align_jumps
)
4599 str_align_jumps
= "8";
4600 if (flag_align_loops
&& !str_align_loops
)
4601 str_align_loops
= "8";
4603 if (rs6000_align_branch_targets
)
4605 if (flag_align_functions
&& !str_align_functions
)
4606 str_align_functions
= "16";
4607 if (flag_align_jumps
&& !str_align_jumps
)
4608 str_align_jumps
= "16";
4609 if (flag_align_loops
&& !str_align_loops
)
4611 can_override_loop_align
= 1;
4612 str_align_loops
= "16";
4617 /* Arrange to save and restore machine status around nested functions. */
4618 init_machine_status
= rs6000_init_machine_status
;
4620 /* We should always be splitting complex arguments, but we can't break
4621 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4622 if (DEFAULT_ABI
== ABI_V4
|| DEFAULT_ABI
== ABI_DARWIN
)
4623 targetm
.calls
.split_complex_arg
= NULL
;
4625 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4626 if (DEFAULT_ABI
== ABI_AIX
)
4627 targetm
.calls
.custom_function_descriptors
= 0;
4630 /* Initialize rs6000_cost with the appropriate target costs. */
4632 rs6000_cost
= TARGET_POWERPC64
? &size64_cost
: &size32_cost
;
4634 switch (rs6000_tune
)
4636 case PROCESSOR_RS64A
:
4637 rs6000_cost
= &rs64a_cost
;
4640 case PROCESSOR_MPCCORE
:
4641 rs6000_cost
= &mpccore_cost
;
4644 case PROCESSOR_PPC403
:
4645 rs6000_cost
= &ppc403_cost
;
4648 case PROCESSOR_PPC405
:
4649 rs6000_cost
= &ppc405_cost
;
4652 case PROCESSOR_PPC440
:
4653 rs6000_cost
= &ppc440_cost
;
4656 case PROCESSOR_PPC476
:
4657 rs6000_cost
= &ppc476_cost
;
4660 case PROCESSOR_PPC601
:
4661 rs6000_cost
= &ppc601_cost
;
4664 case PROCESSOR_PPC603
:
4665 rs6000_cost
= &ppc603_cost
;
4668 case PROCESSOR_PPC604
:
4669 rs6000_cost
= &ppc604_cost
;
4672 case PROCESSOR_PPC604e
:
4673 rs6000_cost
= &ppc604e_cost
;
4676 case PROCESSOR_PPC620
:
4677 rs6000_cost
= &ppc620_cost
;
4680 case PROCESSOR_PPC630
:
4681 rs6000_cost
= &ppc630_cost
;
4684 case PROCESSOR_CELL
:
4685 rs6000_cost
= &ppccell_cost
;
4688 case PROCESSOR_PPC750
:
4689 case PROCESSOR_PPC7400
:
4690 rs6000_cost
= &ppc750_cost
;
4693 case PROCESSOR_PPC7450
:
4694 rs6000_cost
= &ppc7450_cost
;
4697 case PROCESSOR_PPC8540
:
4698 case PROCESSOR_PPC8548
:
4699 rs6000_cost
= &ppc8540_cost
;
4702 case PROCESSOR_PPCE300C2
:
4703 case PROCESSOR_PPCE300C3
:
4704 rs6000_cost
= &ppce300c2c3_cost
;
4707 case PROCESSOR_PPCE500MC
:
4708 rs6000_cost
= &ppce500mc_cost
;
4711 case PROCESSOR_PPCE500MC64
:
4712 rs6000_cost
= &ppce500mc64_cost
;
4715 case PROCESSOR_PPCE5500
:
4716 rs6000_cost
= &ppce5500_cost
;
4719 case PROCESSOR_PPCE6500
:
4720 rs6000_cost
= &ppce6500_cost
;
4723 case PROCESSOR_TITAN
:
4724 rs6000_cost
= &titan_cost
;
4727 case PROCESSOR_POWER4
:
4728 case PROCESSOR_POWER5
:
4729 rs6000_cost
= &power4_cost
;
4732 case PROCESSOR_POWER6
:
4733 rs6000_cost
= &power6_cost
;
4736 case PROCESSOR_POWER7
:
4737 rs6000_cost
= &power7_cost
;
4740 case PROCESSOR_POWER8
:
4741 rs6000_cost
= &power8_cost
;
4744 case PROCESSOR_POWER9
:
4745 rs6000_cost
= &power9_cost
;
4748 case PROCESSOR_POWER10
:
4749 rs6000_cost
= &power10_cost
;
4752 case PROCESSOR_PPCA2
:
4753 rs6000_cost
= &ppca2_cost
;
4762 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4763 param_simultaneous_prefetches
,
4764 rs6000_cost
->simultaneous_prefetches
);
4765 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4766 param_l1_cache_size
,
4767 rs6000_cost
->l1_cache_size
);
4768 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4769 param_l1_cache_line_size
,
4770 rs6000_cost
->cache_line_size
);
4771 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4772 param_l2_cache_size
,
4773 rs6000_cost
->l2_cache_size
);
4775 /* Increase loop peeling limits based on performance analysis. */
4776 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4777 param_max_peeled_insns
, 400);
4778 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4779 param_max_completely_peeled_insns
, 400);
4781 /* The lxvl/stxvl instructions don't perform well before Power10. */
4783 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4784 param_vect_partial_vector_usage
, 1);
4786 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4787 param_vect_partial_vector_usage
, 0);
4789 /* Use the 'model' -fsched-pressure algorithm by default. */
4790 SET_OPTION_IF_UNSET (&global_options
, &global_options_set
,
4791 param_sched_pressure_algorithm
,
4792 SCHED_PRESSURE_MODEL
);
4794 /* If using typedef char *va_list, signal that
4795 __builtin_va_start (&ap, 0) can be optimized to
4796 ap = __builtin_next_arg (0). */
4797 if (DEFAULT_ABI
!= ABI_V4
)
4798 targetm
.expand_builtin_va_start
= NULL
;
4801 rs6000_override_options_after_change ();
4803 /* If not explicitly specified via option, decide whether to generate indexed
4804 load/store instructions. A value of -1 indicates that the
4805 initial value of this variable has not been overwritten. During
4806 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4807 if (TARGET_AVOID_XFORM
== -1)
4808 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4809 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4810 need indexed accesses and the type used is the scalar type of the element
4811 being loaded or stored. */
4812 TARGET_AVOID_XFORM
= (rs6000_tune
== PROCESSOR_POWER6
&& TARGET_CMPB
4813 && !TARGET_ALTIVEC
);
4815 /* Set the -mrecip options. */
4816 if (rs6000_recip_name
)
4818 char *p
= ASTRDUP (rs6000_recip_name
);
4820 unsigned int mask
, i
;
4823 while ((q
= strtok (p
, ",")) != NULL
)
4834 if (!strcmp (q
, "default"))
4835 mask
= ((TARGET_RECIP_PRECISION
)
4836 ? RECIP_HIGH_PRECISION
: RECIP_LOW_PRECISION
);
4839 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4840 if (!strcmp (q
, recip_options
[i
].string
))
4842 mask
= recip_options
[i
].mask
;
4846 if (i
== ARRAY_SIZE (recip_options
))
4848 error ("unknown option for %<%s=%s%>", "-mrecip", q
);
4856 rs6000_recip_control
&= ~mask
;
4858 rs6000_recip_control
|= mask
;
4862 /* Initialize all of the registers. */
4863 rs6000_init_hard_regno_mode_ok (global_init_p
);
4865 /* Save the initial options in case the user does function specific options */
4867 target_option_default_node
= target_option_current_node
4868 = build_target_option_node (&global_options
, &global_options_set
);
4870 /* If not explicitly specified via option, decide whether to generate the
4871 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4872 if (TARGET_LINK_STACK
== -1)
4873 SET_TARGET_LINK_STACK (rs6000_tune
== PROCESSOR_PPC476
&& flag_pic
);
4875 /* Deprecate use of -mno-speculate-indirect-jumps. */
4876 if (!rs6000_speculate_indirect_jumps
)
4877 warning (0, "%qs is deprecated and not recommended in any circumstances",
4878 "-mno-speculate-indirect-jumps");
4883 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4884 define the target cpu type. */
4887 rs6000_option_override (void)
4889 (void) rs6000_option_override_internal (true);
4893 /* Implement LOOP_ALIGN. */
4895 rs6000_loop_align (rtx label
)
4900 /* Don't override loop alignment if -falign-loops was specified. */
4901 if (!can_override_loop_align
)
4904 bb
= BLOCK_FOR_INSN (label
);
4905 ninsns
= num_loop_insns(bb
->loop_father
);
4907 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4908 if (ninsns
> 4 && ninsns
<= 8
4909 && (rs6000_tune
== PROCESSOR_POWER4
4910 || rs6000_tune
== PROCESSOR_POWER5
4911 || rs6000_tune
== PROCESSOR_POWER6
4912 || rs6000_tune
== PROCESSOR_POWER7
4913 || rs6000_tune
== PROCESSOR_POWER8
))
4914 return align_flags (5);
4919 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4920 after applying N number of iterations. This routine does not determine
4921 how may iterations are required to reach desired alignment. */
4924 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
4931 if (rs6000_alignment_flags
== MASK_ALIGN_NATURAL
)
4934 if (rs6000_alignment_flags
== MASK_ALIGN_POWER
)
4944 /* Assuming that all other types are naturally aligned. CHECKME! */
4949 /* Return true if the vector misalignment factor is supported by the
4952 rs6000_builtin_support_vector_misalignment (machine_mode mode
,
4959 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
4962 /* Return if movmisalign pattern is not supported for this mode. */
4963 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
4966 if (misalignment
== -1)
4968 /* Misalignment factor is unknown at compile time but we know
4969 it's word aligned. */
4970 if (rs6000_vector_alignment_reachable (type
, is_packed
))
4972 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
4974 if (element_size
== 64 || element_size
== 32)
4981 /* VSX supports word-aligned vector. */
4982 if (misalignment
% 4 == 0)
4988 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4990 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4991 tree vectype
, int misalign
)
4996 switch (type_of_cost
)
5004 case cond_branch_not_taken
:
5008 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5012 /* Power7 has only one permute unit, make it a bit expensive. */
5013 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5018 case vec_promote_demote
:
5019 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5020 if (TARGET_VSX
&& rs6000_tune
== PROCESSOR_POWER7
)
5025 case cond_branch_taken
:
5028 case unaligned_load
:
5029 case vector_gather_load
:
5030 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5031 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5034 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5036 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5037 /* See PR102767, consider V1TI to keep consistency. */
5038 if (elements
== 2 || elements
== 1)
5039 /* Double word aligned. */
5047 /* Double word aligned. */
5051 /* Unknown misalignment. */
5064 /* Misaligned loads are not supported. */
5067 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5070 case unaligned_store
:
5071 case vector_scatter_store
:
5072 if (TARGET_EFFICIENT_UNALIGNED_VSX
)
5075 if (TARGET_VSX
&& TARGET_ALLOW_MOVMISALIGN
)
5077 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5078 /* See PR102767, consider V1TI to keep consistency. */
5079 if (elements
== 2 || elements
== 1)
5080 /* Double word aligned. */
5088 /* Double word aligned. */
5092 /* Unknown misalignment. */
5105 /* Misaligned stores are not supported. */
5111 /* This is a rough approximation assuming non-constant elements
5112 constructed into a vector via element insertion. FIXME:
5113 vec_construct is not granular enough for uniformly good
5114 decisions. If the initialization is a splat, this is
5115 cheaper than we estimate. Improve this someday. */
5116 elem_type
= TREE_TYPE (vectype
);
5117 /* 32-bit vectors loaded into registers are stored as double
5118 precision, so we need 2 permutes, 2 converts, and 1 merge
5119 to construct a vector of short floats from them. */
5120 if (SCALAR_FLOAT_TYPE_P (elem_type
)
5121 && TYPE_PRECISION (elem_type
) == 32)
5123 /* On POWER9, integer vector types are built up in GPRs and then
5124 use a direct move (2 cycles). For POWER8 this is even worse,
5125 as we need two direct moves and a merge, and the direct moves
5127 else if (INTEGRAL_TYPE_P (elem_type
))
5129 if (TARGET_P9_VECTOR
)
5130 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 2;
5132 return TYPE_VECTOR_SUBPARTS (vectype
) - 1 + 5;
5135 /* V2DFmode doesn't need a direct move. */
5143 /* Implement targetm.vectorize.preferred_simd_mode. */
5146 rs6000_preferred_simd_mode (scalar_mode mode
)
5148 opt_machine_mode vmode
= mode_for_vector (mode
, 16 / GET_MODE_SIZE (mode
));
5150 if (vmode
.exists () && !VECTOR_MEM_NONE_P (vmode
.require ()))
5151 return vmode
.require ();
5156 class rs6000_cost_data
: public vector_costs
5159 using vector_costs::vector_costs
;
5161 unsigned int add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5162 stmt_vec_info stmt_info
, slp_tree
, tree vectype
,
5164 vect_cost_model_location where
) override
;
5165 void finish_cost (const vector_costs
*) override
;
5168 void update_target_cost_per_stmt (vect_cost_for_stmt
, stmt_vec_info
,
5169 vect_cost_model_location
, unsigned int);
5170 void density_test (loop_vec_info
);
5171 void adjust_vect_cost_per_loop (loop_vec_info
);
5172 unsigned int determine_suggested_unroll_factor (loop_vec_info
);
5174 /* Total number of vectorized stmts (loop only). */
5175 unsigned m_nstmts
= 0;
5176 /* Total number of loads (loop only). */
5177 unsigned m_nloads
= 0;
5178 /* Total number of stores (loop only). */
5179 unsigned m_nstores
= 0;
5180 /* Reduction factor for suggesting unroll factor (loop only). */
5181 unsigned m_reduc_factor
= 0;
5182 /* Possible extra penalized cost on vector construction (loop only). */
5183 unsigned m_extra_ctor_cost
= 0;
5184 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5185 instruction is needed by the vectorization. */
5186 bool m_vect_nonmem
= false;
5187 /* If this loop gets vectorized with emulated gather load. */
5188 bool m_gather_load
= false;
5191 /* Test for likely overcommitment of vector hardware resources. If a
5192 loop iteration is relatively large, and too large a percentage of
5193 instructions in the loop are vectorized, the cost model may not
5194 adequately reflect delays from unavailable vector resources.
5195 Penalize the loop body cost for this case. */
5198 rs6000_cost_data::density_test (loop_vec_info loop_vinfo
)
5200 /* This density test only cares about the cost of vector version of the
5201 loop, so immediately return if we are passed costing for the scalar
5202 version (namely computing single scalar iteration cost). */
5203 if (m_costing_for_scalar
)
5206 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5207 basic_block
*bbs
= get_loop_body (loop
);
5208 int nbbs
= loop
->num_nodes
;
5209 int vec_cost
= m_costs
[vect_body
], not_vec_cost
= 0;
5211 for (int i
= 0; i
< nbbs
; i
++)
5213 basic_block bb
= bbs
[i
];
5214 gimple_stmt_iterator gsi
;
5216 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5218 gimple
*stmt
= gsi_stmt (gsi
);
5219 if (is_gimple_debug (stmt
))
5222 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
5224 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5225 && !STMT_VINFO_IN_PATTERN_P (stmt_info
))
5231 int density_pct
= (vec_cost
* 100) / (vec_cost
+ not_vec_cost
);
5233 if (density_pct
> rs6000_density_pct_threshold
5234 && vec_cost
+ not_vec_cost
> rs6000_density_size_threshold
)
5236 m_costs
[vect_body
] = vec_cost
* (100 + rs6000_density_penalty
) / 100;
5237 if (dump_enabled_p ())
5238 dump_printf_loc (MSG_NOTE
, vect_location
,
5239 "density %d%%, cost %d exceeds threshold, penalizing "
5240 "loop body cost by %u%%\n", density_pct
,
5241 vec_cost
+ not_vec_cost
, rs6000_density_penalty
);
5244 /* Check whether we need to penalize the body cost to account
5245 for excess strided or elementwise loads. */
5246 if (m_extra_ctor_cost
> 0)
5248 gcc_assert (m_nloads
<= m_nstmts
);
5249 unsigned int load_pct
= (m_nloads
* 100) / m_nstmts
;
5251 /* It's likely to be bounded by latency and execution resources
5252 from many scalar loads which are strided or elementwise loads
5253 into a vector if both conditions below are found:
5254 1. there are many loads, it's easy to result in a long wait
5256 2. load has a big proportion of all vectorized statements,
5257 it's not easy to schedule other statements to spread among
5259 One typical case is the innermost loop of the hotspot of SPEC2017
5260 503.bwaves_r without loop interchange. */
5261 if (m_nloads
> (unsigned int) rs6000_density_load_num_threshold
5262 && load_pct
> (unsigned int) rs6000_density_load_pct_threshold
)
5264 m_costs
[vect_body
] += m_extra_ctor_cost
;
5265 if (dump_enabled_p ())
5266 dump_printf_loc (MSG_NOTE
, vect_location
,
5267 "Found %u loads and "
5268 "load pct. %u%% exceed "
5270 "penalizing loop body "
5271 "cost by extra cost %u "
5279 /* Implement targetm.vectorize.create_costs. */
5281 static vector_costs
*
5282 rs6000_vectorize_create_costs (vec_info
*vinfo
, bool costing_for_scalar
)
5284 return new rs6000_cost_data (vinfo
, costing_for_scalar
);
5287 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5288 For some statement, we would like to further fine-grain tweak the cost on
5289 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5290 information on statement operation codes etc. One typical case here is
5291 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5292 for scalar cost, but it should be priced more whatever transformed to either
5293 compare + branch or compare + isel instructions. */
5296 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind
,
5297 struct _stmt_vec_info
*stmt_info
)
5299 if (kind
== scalar_stmt
&& stmt_info
&& stmt_info
->stmt
5300 && gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
5302 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
5303 if (subcode
== COND_EXPR
)
5310 /* Helper function for add_stmt_cost. Check each statement cost
5311 entry, gather information and update the target_cost fields
5314 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind
,
5315 stmt_vec_info stmt_info
,
5316 vect_cost_model_location where
,
5317 unsigned int orig_count
)
5320 /* Check whether we're doing something other than just a copy loop.
5321 Not all such loops may be profitably vectorized; see
5322 rs6000_finish_cost. */
5323 if (kind
== vec_to_scalar
5325 || kind
== vec_promote_demote
5326 || kind
== vec_construct
5327 || kind
== scalar_to_vec
5328 || (where
== vect_body
&& kind
== vector_stmt
))
5329 m_vect_nonmem
= true;
5331 /* Gather some information when we are costing the vectorized instruction
5332 for the statements located in a loop body. */
5333 if (!m_costing_for_scalar
5334 && is_a
<loop_vec_info
> (m_vinfo
)
5335 && where
== vect_body
)
5337 m_nstmts
+= orig_count
;
5339 if (kind
== scalar_load
5340 || kind
== vector_load
5341 || kind
== unaligned_load
5342 || kind
== vector_gather_load
)
5344 m_nloads
+= orig_count
;
5345 if (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
5346 m_gather_load
= true;
5348 else if (kind
== scalar_store
5349 || kind
== vector_store
5350 || kind
== unaligned_store
5351 || kind
== vector_scatter_store
)
5352 m_nstores
+= orig_count
;
5353 else if ((kind
== scalar_stmt
5354 || kind
== vector_stmt
5355 || kind
== vec_to_scalar
)
5357 && vect_is_reduction (stmt_info
))
5359 /* Loop body contains normal int or fp operations and epilogue
5360 contains vector reduction. For simplicity, we assume int
5361 operation takes one cycle and fp operation takes one more. */
5362 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
5363 bool is_float
= FLOAT_TYPE_P (TREE_TYPE (lhs
));
5364 unsigned int basic_cost
= is_float
? 2 : 1;
5365 m_reduc_factor
= MAX (basic_cost
* orig_count
, m_reduc_factor
);
5368 /* Power processors do not currently have instructions for strided
5369 and elementwise loads, and instead we must generate multiple
5370 scalar loads. This leads to undercounting of the cost. We
5371 account for this by scaling the construction cost by the number
5372 of elements involved, and saving this as extra cost that we may
5373 or may not need to apply. When finalizing the cost of the loop,
5374 the extra penalty is applied when the load density heuristics
5376 if (kind
== vec_construct
&& stmt_info
5377 && STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
5378 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
5379 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_STRIDED_SLP
))
5381 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5382 unsigned int nunits
= vect_nunits_for_cost (vectype
);
5383 /* As PR103702 shows, it's possible that vectorizer wants to do
5384 costings for only one unit here, it's no need to do any
5385 penalization for it, so simply early return here. */
5388 /* i386 port adopts nunits * stmt_cost as the penalized cost
5389 for this kind of penalization, we used to follow it but
5390 found it could result in an unreliable body cost especially
5391 for V16QI/V8HI modes. To make it better, we choose this
5392 new heuristic: for each scalar load, we use 2 as penalized
5393 cost for the case with 2 nunits and use 1 for the other
5394 cases. It's without much supporting theory, mainly
5395 concluded from the broad performance evaluations on Power8,
5396 Power9 and Power10. One possibly related point is that:
5397 vector construction for more units would use more insns,
5398 it has more chances to schedule them better (even run in
5399 parallelly when enough available units at that time), so
5400 it seems reasonable not to penalize that much for them. */
5401 unsigned int adjusted_cost
= (nunits
== 2) ? 2 : 1;
5402 unsigned int extra_cost
= nunits
* adjusted_cost
;
5403 m_extra_ctor_cost
+= extra_cost
;
5409 rs6000_cost_data::add_stmt_cost (int count
, vect_cost_for_stmt kind
,
5410 stmt_vec_info stmt_info
, slp_tree
,
5411 tree vectype
, int misalign
,
5412 vect_cost_model_location where
)
5414 unsigned retval
= 0;
5416 if (flag_vect_cost_model
)
5418 int stmt_cost
= rs6000_builtin_vectorization_cost (kind
, vectype
,
5420 stmt_cost
+= rs6000_adjust_vect_cost_per_stmt (kind
, stmt_info
);
5421 /* Statements in an inner loop relative to the loop being
5422 vectorized are weighted more heavily. The value here is
5423 arbitrary and could potentially be improved with analysis. */
5424 unsigned int orig_count
= count
;
5425 retval
= adjust_cost_for_freq (stmt_info
, where
, count
* stmt_cost
);
5426 m_costs
[where
] += retval
;
5428 update_target_cost_per_stmt (kind
, stmt_info
, where
, orig_count
);
5434 /* For some target specific vectorization cost which can't be handled per stmt,
5435 we check the requisite conditions and adjust the vectorization cost
5436 accordingly if satisfied. One typical example is to model shift cost for
5437 vector with length by counting number of required lengths under condition
5438 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5441 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo
)
5443 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
5445 rgroup_controls
*rgc
;
5446 unsigned int num_vectors_m1
;
5447 unsigned int shift_cnt
= 0;
5448 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo
), num_vectors_m1
, rgc
)
5450 /* Each length needs one shift to fill into bits 0-7. */
5451 shift_cnt
+= num_vectors_m1
+ 1;
5453 add_stmt_cost (shift_cnt
, scalar_stmt
, NULL
, NULL
,
5454 NULL_TREE
, 0, vect_body
);
5458 /* Determine suggested unroll factor by considering some below factors:
5460 - unroll option/pragma which can disable unrolling for this loop;
5461 - simple hardware resource model for non memory vector insns;
5462 - aggressive heuristics when iteration count is unknown:
5463 - reduction case to break cross iteration dependency;
5464 - emulated gather load;
5465 - estimated iteration count when iteration count is unknown;
5470 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo
)
5472 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5474 /* Don't unroll if it's specified explicitly not to be unrolled. */
5475 if (loop
->unroll
== 1
5476 || (OPTION_SET_P (flag_unroll_loops
) && !flag_unroll_loops
)
5477 || (OPTION_SET_P (flag_unroll_all_loops
) && !flag_unroll_all_loops
))
5480 unsigned int nstmts_nonldst
= m_nstmts
- m_nloads
- m_nstores
;
5481 /* Don't unroll if no vector instructions excepting for memory access. */
5482 if (nstmts_nonldst
== 0)
5485 /* Consider breaking cross iteration dependency for reduction. */
5486 unsigned int reduc_factor
= m_reduc_factor
> 1 ? m_reduc_factor
: 1;
5488 /* Use this simple hardware resource model that how many non ld/st
5489 vector instructions can be issued per cycle. */
5490 unsigned int issue_width
= rs6000_vect_unroll_issue
;
5491 unsigned int uf
= CEIL (reduc_factor
* issue_width
, nstmts_nonldst
);
5492 uf
= MIN ((unsigned int) rs6000_vect_unroll_limit
, uf
);
5493 /* Make sure it is power of 2. */
5494 uf
= 1 << ceil_log2 (uf
);
5496 /* If the iteration count is known, the costing would be exact enough,
5497 don't worry it could be worse. */
5498 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo
))
5501 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5502 loop if either condition is satisfied:
5503 - reduction factor exceeds the threshold;
5504 - emulated gather load adopted. */
5505 if (reduc_factor
> (unsigned int) rs6000_vect_unroll_reduc_threshold
5509 /* Check if we can conclude it's good to unroll from the estimated
5511 HOST_WIDE_INT est_niter
= get_estimated_loop_iterations_int (loop
);
5512 unsigned int vf
= vect_vf_for_cost (loop_vinfo
);
5513 unsigned int unrolled_vf
= vf
* uf
;
5514 if (est_niter
== -1 || est_niter
< unrolled_vf
)
5515 /* When the estimated iteration of this loop is unknown, it's possible
5516 that we are able to vectorize this loop with the original VF but fail
5517 to vectorize it with the unrolled VF any more if the actual iteration
5518 count is in between. */
5522 unsigned int epil_niter_unr
= est_niter
% unrolled_vf
;
5523 unsigned int epil_niter
= est_niter
% vf
;
5524 /* Even if we have partial vector support, it can be still inefficent
5525 to calculate the length when the iteration count is unknown, so
5526 only expect it's good to unroll when the epilogue iteration count
5527 is not bigger than VF (only one time length calculation). */
5528 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5529 && epil_niter_unr
<= vf
)
5531 /* Without partial vector support, conservatively unroll this when
5532 the epilogue iteration count is less than the original one
5533 (epilogue execution time wouldn't be longer than before). */
5534 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
5535 && epil_niter_unr
<= epil_niter
)
5543 rs6000_cost_data::finish_cost (const vector_costs
*scalar_costs
)
5545 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (m_vinfo
))
5547 adjust_vect_cost_per_loop (loop_vinfo
);
5548 density_test (loop_vinfo
);
5550 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5551 that require versioning for any reason. The vectorization is at
5552 best a wash inside the loop, and the versioning checks make
5553 profitability highly unlikely and potentially quite harmful. */
5555 && LOOP_VINFO_VECT_FACTOR (loop_vinfo
) == 2
5556 && LOOP_REQUIRES_VERSIONING (loop_vinfo
))
5557 m_costs
[vect_body
] += 10000;
5559 m_suggested_unroll_factor
5560 = determine_suggested_unroll_factor (loop_vinfo
);
5563 vector_costs::finish_cost (scalar_costs
);
5566 /* Implement targetm.loop_unroll_adjust. */
5569 rs6000_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
5571 if (unroll_only_small_loops
)
5573 /* TODO: These are hardcoded values right now. We probably should use
5575 if (loop
->ninsns
<= 6)
5576 return MIN (4, nunroll
);
5577 if (loop
->ninsns
<= 10)
5578 return MIN (2, nunroll
);
5586 /* Returns a function decl for a vectorized version of the builtin function
5587 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5588 if it is not available.
5590 Implement targetm.vectorize.builtin_vectorized_function. */
5593 rs6000_builtin_vectorized_function (unsigned int fn
, tree type_out
,
5596 machine_mode in_mode
, out_mode
;
5599 if (TARGET_DEBUG_BUILTIN
)
5600 fprintf (stderr
, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5601 combined_fn_name (combined_fn (fn
)),
5602 GET_MODE_NAME (TYPE_MODE (type_out
)),
5603 GET_MODE_NAME (TYPE_MODE (type_in
)));
5605 /* TODO: Should this be gcc_assert? */
5606 if (TREE_CODE (type_out
) != VECTOR_TYPE
5607 || TREE_CODE (type_in
) != VECTOR_TYPE
)
5610 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5611 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
5612 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5613 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5618 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5619 && out_mode
== DFmode
&& out_n
== 2
5620 && in_mode
== DFmode
&& in_n
== 2)
5621 return rs6000_builtin_decls
[RS6000_BIF_CPSGNDP
];
5622 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5623 && out_mode
== SFmode
&& out_n
== 4
5624 && in_mode
== SFmode
&& in_n
== 4)
5625 return rs6000_builtin_decls
[RS6000_BIF_CPSGNSP
];
5626 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5627 && out_mode
== SFmode
&& out_n
== 4
5628 && in_mode
== SFmode
&& in_n
== 4)
5629 return rs6000_builtin_decls
[RS6000_BIF_COPYSIGN_V4SF
];
5632 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5633 && out_mode
== DFmode
&& out_n
== 2
5634 && in_mode
== DFmode
&& in_n
== 2)
5635 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIP
];
5636 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5637 && out_mode
== SFmode
&& out_n
== 4
5638 && in_mode
== SFmode
&& in_n
== 4)
5639 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIP
];
5640 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5641 && out_mode
== SFmode
&& out_n
== 4
5642 && in_mode
== SFmode
&& in_n
== 4)
5643 return rs6000_builtin_decls
[RS6000_BIF_VRFIP
];
5646 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5647 && out_mode
== DFmode
&& out_n
== 2
5648 && in_mode
== DFmode
&& in_n
== 2)
5649 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIM
];
5650 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5651 && out_mode
== SFmode
&& out_n
== 4
5652 && in_mode
== SFmode
&& in_n
== 4)
5653 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIM
];
5654 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5655 && out_mode
== SFmode
&& out_n
== 4
5656 && in_mode
== SFmode
&& in_n
== 4)
5657 return rs6000_builtin_decls
[RS6000_BIF_VRFIM
];
5660 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5661 && out_mode
== DFmode
&& out_n
== 2
5662 && in_mode
== DFmode
&& in_n
== 2)
5663 return rs6000_builtin_decls
[RS6000_BIF_XVMADDDP
];
5664 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5665 && out_mode
== SFmode
&& out_n
== 4
5666 && in_mode
== SFmode
&& in_n
== 4)
5667 return rs6000_builtin_decls
[RS6000_BIF_XVMADDSP
];
5668 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5669 && out_mode
== SFmode
&& out_n
== 4
5670 && in_mode
== SFmode
&& in_n
== 4)
5671 return rs6000_builtin_decls
[RS6000_BIF_VMADDFP
];
5674 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5675 && out_mode
== DFmode
&& out_n
== 2
5676 && in_mode
== DFmode
&& in_n
== 2)
5677 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIZ
];
5678 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5679 && out_mode
== SFmode
&& out_n
== 4
5680 && in_mode
== SFmode
&& in_n
== 4)
5681 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIZ
];
5682 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode
)
5683 && out_mode
== SFmode
&& out_n
== 4
5684 && in_mode
== SFmode
&& in_n
== 4)
5685 return rs6000_builtin_decls
[RS6000_BIF_VRFIZ
];
5688 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5689 && flag_unsafe_math_optimizations
5690 && out_mode
== DFmode
&& out_n
== 2
5691 && in_mode
== DFmode
&& in_n
== 2)
5692 return rs6000_builtin_decls
[RS6000_BIF_XVRDPI
];
5693 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5694 && flag_unsafe_math_optimizations
5695 && out_mode
== SFmode
&& out_n
== 4
5696 && in_mode
== SFmode
&& in_n
== 4)
5697 return rs6000_builtin_decls
[RS6000_BIF_XVRSPI
];
5700 if (VECTOR_UNIT_VSX_P (V2DFmode
)
5701 && !flag_trapping_math
5702 && out_mode
== DFmode
&& out_n
== 2
5703 && in_mode
== DFmode
&& in_n
== 2)
5704 return rs6000_builtin_decls
[RS6000_BIF_XVRDPIC
];
5705 if (VECTOR_UNIT_VSX_P (V4SFmode
)
5706 && !flag_trapping_math
5707 && out_mode
== SFmode
&& out_n
== 4
5708 && in_mode
== SFmode
&& in_n
== 4)
5709 return rs6000_builtin_decls
[RS6000_BIF_XVRSPIC
];
5715 /* Generate calls to libmass if appropriate. */
5716 if (rs6000_veclib_handler
)
5717 return rs6000_veclib_handler (combined_fn (fn
), type_out
, type_in
);
5722 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5723 library with vectorized intrinsics. */
5726 rs6000_builtin_vectorized_libmass (combined_fn fn
, tree type_out
,
5730 const char *suffix
= NULL
;
5731 tree fntype
, new_fndecl
, bdecl
= NULL_TREE
;
5734 machine_mode el_mode
, in_mode
;
5737 /* Libmass is suitable for unsafe math only as it does not correctly support
5738 parts of IEEE with the required precision such as denormals. Only support
5739 it if we have VSX to use the simd d2 or f4 functions.
5740 XXX: Add variable length support. */
5741 if (!flag_unsafe_math_optimizations
|| !TARGET_VSX
)
5744 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
5745 n
= TYPE_VECTOR_SUBPARTS (type_out
);
5746 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
5747 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
5748 if (el_mode
!= in_mode
5784 if (el_mode
== DFmode
&& n
== 2)
5786 bdecl
= mathfn_built_in (double_type_node
, fn
);
5787 suffix
= "d2"; /* pow -> powd2 */
5789 else if (el_mode
== SFmode
&& n
== 4)
5791 bdecl
= mathfn_built_in (float_type_node
, fn
);
5792 suffix
= "4"; /* powf -> powf4 */
5804 gcc_assert (suffix
!= NULL
);
5805 bname
= IDENTIFIER_POINTER (DECL_NAME (bdecl
));
5809 strcpy (name
, bname
+ strlen ("__builtin_"));
5810 strcat (name
, suffix
);
5813 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
5814 else if (n_args
== 2)
5815 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
5819 /* Build a function declaration for the vectorized function. */
5820 new_fndecl
= build_decl (BUILTINS_LOCATION
,
5821 FUNCTION_DECL
, get_identifier (name
), fntype
);
5822 TREE_PUBLIC (new_fndecl
) = 1;
5823 DECL_EXTERNAL (new_fndecl
) = 1;
5824 DECL_IS_NOVOPS (new_fndecl
) = 1;
5825 TREE_READONLY (new_fndecl
) = 1;
5831 /* Default CPU string for rs6000*_file_start functions. */
5832 static const char *rs6000_default_cpu
;
5834 #ifdef USING_ELFOS_H
5835 const char *rs6000_machine
;
5838 rs6000_machine_from_flags (void)
5841 if (rs6000_cpu
== PROCESSOR_PPCE300C2
|| rs6000_cpu
== PROCESSOR_PPCE300C3
)
5843 if (rs6000_cpu
== PROCESSOR_PPC8540
|| rs6000_cpu
== PROCESSOR_PPC8548
)
5845 if (rs6000_cpu
== PROCESSOR_PPCE500MC
)
5847 if (rs6000_cpu
== PROCESSOR_PPCE500MC64
)
5849 if (rs6000_cpu
== PROCESSOR_PPCE5500
)
5851 if (rs6000_cpu
== PROCESSOR_PPCE6500
)
5855 if (rs6000_cpu
== PROCESSOR_PPC403
)
5857 if (rs6000_cpu
== PROCESSOR_PPC405
)
5859 if (rs6000_cpu
== PROCESSOR_PPC440
)
5861 if (rs6000_cpu
== PROCESSOR_PPC476
)
5865 if (rs6000_cpu
== PROCESSOR_PPCA2
)
5869 if (rs6000_cpu
== PROCESSOR_CELL
)
5873 if (rs6000_cpu
== PROCESSOR_TITAN
)
5876 /* 500 series and 800 series */
5877 if (rs6000_cpu
== PROCESSOR_MPCCORE
)
5881 /* This (and ppc64 below) are disabled here (for now at least) because
5882 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5883 are #define'd as some of these. Untangling that is a job for later. */
5885 /* 600 series and 700 series, "classic" */
5886 if (rs6000_cpu
== PROCESSOR_PPC601
|| rs6000_cpu
== PROCESSOR_PPC603
5887 || rs6000_cpu
== PROCESSOR_PPC604
|| rs6000_cpu
== PROCESSOR_PPC604e
5888 || rs6000_cpu
== PROCESSOR_PPC750
)
5892 /* Classic with AltiVec, "G4" */
5893 if (rs6000_cpu
== PROCESSOR_PPC7400
|| rs6000_cpu
== PROCESSOR_PPC7450
)
5897 /* The older 64-bit CPUs */
5898 if (rs6000_cpu
== PROCESSOR_PPC620
|| rs6000_cpu
== PROCESSOR_PPC630
5899 || rs6000_cpu
== PROCESSOR_RS64A
)
5903 HOST_WIDE_INT flags
= rs6000_isa_flags
;
5905 /* Disable the flags that should never influence the .machine selection. */
5906 flags
&= ~(OPTION_MASK_PPC_GFXOPT
| OPTION_MASK_PPC_GPOPT
| OPTION_MASK_ISEL
);
5908 if ((flags
& (ISA_3_1_MASKS_SERVER
& ~ISA_3_0_MASKS_SERVER
)) != 0)
5910 if ((flags
& (ISA_3_0_MASKS_SERVER
& ~ISA_2_7_MASKS_SERVER
)) != 0)
5912 if ((flags
& (ISA_2_7_MASKS_SERVER
& ~ISA_2_6_MASKS_SERVER
)) != 0)
5914 if ((flags
& (ISA_2_6_MASKS_SERVER
& ~ISA_2_5_MASKS_SERVER
)) != 0)
5916 if ((flags
& (ISA_2_5_MASKS_SERVER
& ~ISA_2_4_MASKS
)) != 0)
5918 if ((flags
& (ISA_2_4_MASKS
& ~ISA_2_1_MASKS
)) != 0)
5920 if ((flags
& ISA_2_1_MASKS
) != 0)
5922 if ((flags
& OPTION_MASK_POWERPC64
) != 0)
5928 emit_asm_machine (void)
5930 fprintf (asm_out_file
, "\t.machine %s\n", rs6000_machine
);
5934 /* Do anything needed at the start of the asm file. */
5937 rs6000_file_start (void)
5940 const char *start
= buffer
;
5941 FILE *file
= asm_out_file
;
5943 rs6000_default_cpu
= TARGET_CPU_DEFAULT
;
5945 default_file_start ();
5947 if (flag_verbose_asm
)
5949 sprintf (buffer
, "\n%s rs6000/powerpc options:", ASM_COMMENT_START
);
5951 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
5953 fprintf (file
, "%s --with-cpu=%s", start
, rs6000_default_cpu
);
5957 if (OPTION_SET_P (rs6000_cpu_index
))
5959 fprintf (file
, "%s -mcpu=%s", start
,
5960 processor_target_table
[rs6000_cpu_index
].name
);
5964 if (OPTION_SET_P (rs6000_tune_index
))
5966 fprintf (file
, "%s -mtune=%s", start
,
5967 processor_target_table
[rs6000_tune_index
].name
);
5971 if (PPC405_ERRATUM77
)
5973 fprintf (file
, "%s PPC405CR_ERRATUM77", start
);
5977 #ifdef USING_ELFOS_H
5978 switch (rs6000_sdata
)
5980 case SDATA_NONE
: fprintf (file
, "%s -msdata=none", start
); start
= ""; break;
5981 case SDATA_DATA
: fprintf (file
, "%s -msdata=data", start
); start
= ""; break;
5982 case SDATA_SYSV
: fprintf (file
, "%s -msdata=sysv", start
); start
= ""; break;
5983 case SDATA_EABI
: fprintf (file
, "%s -msdata=eabi", start
); start
= ""; break;
5986 if (rs6000_sdata
&& g_switch_value
)
5988 fprintf (file
, "%s -G %d", start
,
5998 #ifdef USING_ELFOS_H
5999 rs6000_machine
= rs6000_machine_from_flags ();
6000 emit_asm_machine ();
6003 if (DEFAULT_ABI
== ABI_ELFv2
)
6004 fprintf (file
, "\t.abiversion 2\n");
6008 /* Return nonzero if this function is known to have a null epilogue. */
6011 direct_return (void)
6013 if (reload_completed
)
6015 rs6000_stack_t
*info
= rs6000_stack_info ();
6017 if (info
->first_gp_reg_save
== 32
6018 && info
->first_fp_reg_save
== 64
6019 && info
->first_altivec_reg_save
== LAST_ALTIVEC_REGNO
+ 1
6020 && ! info
->lr_save_p
6021 && ! info
->cr_save_p
6022 && info
->vrsave_size
== 0
6030 /* Helper for num_insns_constant. Calculate number of instructions to
6031 load VALUE to a single gpr using combinations of addi, addis, ori,
6032 oris, sldi and rldimi instructions. */
6035 num_insns_constant_gpr (HOST_WIDE_INT value
)
6037 /* signed constant loadable with addi */
6038 if (SIGNED_INTEGER_16BIT_P (value
))
6041 /* constant loadable with addis */
6042 else if ((value
& 0xffff) == 0
6043 && (value
>> 31 == -1 || value
>> 31 == 0))
6046 /* PADDI can support up to 34 bit signed integers. */
6047 else if (TARGET_PREFIXED
&& SIGNED_INTEGER_34BIT_P (value
))
6050 else if (TARGET_POWERPC64
)
6052 HOST_WIDE_INT low
= sext_hwi (value
, 32);
6053 HOST_WIDE_INT high
= value
>> 31;
6055 if (high
== 0 || high
== -1)
6060 if (low
== 0 || low
== high
)
6061 return num_insns_constant_gpr (high
) + 1;
6063 return num_insns_constant_gpr (low
) + 1;
6065 return (num_insns_constant_gpr (high
)
6066 + num_insns_constant_gpr (low
) + 1);
6073 /* Helper for num_insns_constant. Allow constants formed by the
6074 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6075 and handle modes that require multiple gprs. */
6078 num_insns_constant_multi (HOST_WIDE_INT value
, machine_mode mode
)
6080 int nregs
= (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6084 HOST_WIDE_INT low
= sext_hwi (value
, BITS_PER_WORD
);
6085 int insns
= num_insns_constant_gpr (low
);
6087 /* We won't get more than 2 from num_insns_constant_gpr
6088 except when TARGET_POWERPC64 and mode is DImode or
6089 wider, so the register mode must be DImode. */
6090 && rs6000_is_valid_and_mask (GEN_INT (low
), DImode
))
6093 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6094 it all at once would be UB. */
6095 value
>>= (BITS_PER_WORD
- 1);
6101 /* Return the number of instructions it takes to form a constant in as
6102 many gprs are needed for MODE. */
6105 num_insns_constant (rtx op
, machine_mode mode
)
6109 switch (GET_CODE (op
))
6115 case CONST_WIDE_INT
:
6118 for (int i
= 0; i
< CONST_WIDE_INT_NUNITS (op
); i
++)
6119 insns
+= num_insns_constant_multi (CONST_WIDE_INT_ELT (op
, i
),
6126 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (op
);
6128 if (mode
== SFmode
|| mode
== SDmode
)
6133 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv
, l
);
6135 REAL_VALUE_TO_TARGET_SINGLE (*rv
, l
);
6136 /* See the first define_split in rs6000.md handling a
6137 const_double_operand. */
6141 else if (mode
== DFmode
|| mode
== DDmode
)
6146 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv
, l
);
6148 REAL_VALUE_TO_TARGET_DOUBLE (*rv
, l
);
6150 /* See the second (32-bit) and third (64-bit) define_split
6151 in rs6000.md handling a const_double_operand. */
6152 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 1] << 32;
6153 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffffUL
;
6156 else if (mode
== TFmode
|| mode
== TDmode
6157 || mode
== KFmode
|| mode
== IFmode
)
6163 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv
, l
);
6165 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv
, l
);
6167 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 0 : 3] << 32;
6168 val
|= l
[WORDS_BIG_ENDIAN
? 1 : 2] & 0xffffffffUL
;
6169 insns
= num_insns_constant_multi (val
, DImode
);
6170 val
= (unsigned HOST_WIDE_INT
) l
[WORDS_BIG_ENDIAN
? 2 : 1] << 32;
6171 val
|= l
[WORDS_BIG_ENDIAN
? 3 : 0] & 0xffffffffUL
;
6172 insns
+= num_insns_constant_multi (val
, DImode
);
6184 return num_insns_constant_multi (val
, mode
);
6187 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6188 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6189 corresponding element of the vector, but for V4SFmode, the
6190 corresponding "float" is interpreted as an SImode integer. */
6193 const_vector_elt_as_int (rtx op
, unsigned int elt
)
6197 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6198 gcc_assert (GET_MODE (op
) != V2DImode
6199 && GET_MODE (op
) != V2DFmode
);
6201 tmp
= CONST_VECTOR_ELT (op
, elt
);
6202 if (GET_MODE (op
) == V4SFmode
)
6203 tmp
= gen_lowpart (SImode
, tmp
);
6204 return INTVAL (tmp
);
6207 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6208 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6209 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6210 all items are set to the same value and contain COPIES replicas of the
6211 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6212 operand and the others are set to the value of the operand's msb. */
6215 vspltis_constant (rtx op
, unsigned step
, unsigned copies
)
6217 machine_mode mode
= GET_MODE (op
);
6218 machine_mode inner
= GET_MODE_INNER (mode
);
6226 HOST_WIDE_INT splat_val
;
6227 HOST_WIDE_INT msb_val
;
6229 if (mode
== V2DImode
|| mode
== V2DFmode
|| mode
== V1TImode
)
6232 nunits
= GET_MODE_NUNITS (mode
);
6233 bitsize
= GET_MODE_BITSIZE (inner
);
6234 mask
= GET_MODE_MASK (inner
);
6236 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6238 msb_val
= val
>= 0 ? 0 : -1;
6240 if (val
== 0 && step
> 1)
6242 /* Special case for loading most significant bit with step > 1.
6243 In that case, match 0s in all but step-1s elements, where match
6245 for (i
= 1; i
< nunits
; ++i
)
6247 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6248 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6249 if ((i
& (step
- 1)) == step
- 1)
6251 if (!EASY_VECTOR_MSB (elt_val
, inner
))
6261 /* Construct the value to be splatted, if possible. If not, return 0. */
6262 for (i
= 2; i
<= copies
; i
*= 2)
6264 HOST_WIDE_INT small_val
;
6266 small_val
= splat_val
>> bitsize
;
6268 if (splat_val
!= ((HOST_WIDE_INT
)
6269 ((unsigned HOST_WIDE_INT
) small_val
<< bitsize
)
6270 | (small_val
& mask
)))
6272 splat_val
= small_val
;
6273 inner
= smallest_int_mode_for_size (bitsize
);
6276 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6277 if (EASY_VECTOR_15 (splat_val
))
6280 /* Also check if we can splat, and then add the result to itself. Do so if
6281 the value is positive, of if the splat instruction is using OP's mode;
6282 for splat_val < 0, the splat and the add should use the same mode. */
6283 else if (EASY_VECTOR_15_ADD_SELF (splat_val
)
6284 && (splat_val
>= 0 || (step
== 1 && copies
== 1)))
6287 /* Also check if are loading up the most significant bit which can be done by
6288 loading up -1 and shifting the value left by -1. Only do this for
6289 step 1 here, for larger steps it is done earlier. */
6290 else if (EASY_VECTOR_MSB (splat_val
, inner
) && step
== 1)
6296 /* Check if VAL is present in every STEP-th element, and the
6297 other elements are filled with its most significant bit. */
6298 for (i
= 1; i
< nunits
; ++i
)
6300 HOST_WIDE_INT desired_val
;
6301 unsigned elt
= BYTES_BIG_ENDIAN
? nunits
- 1 - i
: i
;
6302 if ((i
& (step
- 1)) == 0)
6305 desired_val
= msb_val
;
6307 if (desired_val
!= const_vector_elt_as_int (op
, elt
))
6314 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6315 instruction, filling in the bottom elements with 0 or -1.
6317 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6318 for the number of zeroes to shift in, or negative for the number of 0xff
6321 OP is a CONST_VECTOR. */
6324 vspltis_shifted (rtx op
)
6326 machine_mode mode
= GET_MODE (op
);
6327 machine_mode inner
= GET_MODE_INNER (mode
);
6335 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
)
6338 /* We need to create pseudo registers to do the shift, so don't recognize
6339 shift vector constants after reload. Don't match it even before RA
6340 after split1 is done, because there won't be further splitting pass
6341 before RA to do the splitting. */
6342 if (!can_create_pseudo_p ()
6343 || (cfun
->curr_properties
& PROP_rtl_split_insns
))
6346 nunits
= GET_MODE_NUNITS (mode
);
6347 mask
= GET_MODE_MASK (inner
);
6349 val
= const_vector_elt_as_int (op
, BYTES_BIG_ENDIAN
? 0 : nunits
- 1);
6351 /* Check if the value can really be the operand of a vspltis[bhw]. */
6352 if (EASY_VECTOR_15 (val
))
6355 /* Also check if we are loading up the most significant bit which can be done
6356 by loading up -1 and shifting the value left by -1. */
6357 else if (EASY_VECTOR_MSB (val
, inner
))
6363 /* Check if VAL is present in every STEP-th element until we find elements
6364 that are 0 or all 1 bits. */
6365 for (i
= 1; i
< nunits
; ++i
)
6367 unsigned elt
= BYTES_BIG_ENDIAN
? i
: nunits
- 1 - i
;
6368 HOST_WIDE_INT elt_val
= const_vector_elt_as_int (op
, elt
);
6370 /* If the value isn't the splat value, check for the remaining elements
6376 for (j
= i
+1; j
< nunits
; ++j
)
6378 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6379 if (const_vector_elt_as_int (op
, elt2
) != 0)
6383 return (nunits
- i
) * GET_MODE_SIZE (inner
);
6386 else if ((elt_val
& mask
) == mask
)
6388 for (j
= i
+1; j
< nunits
; ++j
)
6390 unsigned elt2
= BYTES_BIG_ENDIAN
? j
: nunits
- 1 - j
;
6391 if ((const_vector_elt_as_int (op
, elt2
) & mask
) != mask
)
6395 return -((nunits
- i
) * GET_MODE_SIZE (inner
));
6403 /* If all elements are equal, we don't need to do VSLDOI. */
6408 /* Return non-zero (element mode byte size) if OP is of the given MODE
6409 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6412 easy_altivec_constant (rtx op
, machine_mode mode
)
6414 unsigned step
, copies
;
6416 if (mode
== VOIDmode
)
6417 mode
= GET_MODE (op
);
6418 else if (mode
!= GET_MODE (op
))
6421 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6423 if (mode
== V2DFmode
)
6424 return zero_constant (op
, mode
) ? 8 : 0;
6426 else if (mode
== V2DImode
)
6428 if (!CONST_INT_P (CONST_VECTOR_ELT (op
, 0))
6429 || !CONST_INT_P (CONST_VECTOR_ELT (op
, 1)))
6432 if (zero_constant (op
, mode
))
6435 if (INTVAL (CONST_VECTOR_ELT (op
, 0)) == -1
6436 && INTVAL (CONST_VECTOR_ELT (op
, 1)) == -1)
6442 /* V1TImode is a special container for TImode. Ignore for now. */
6443 else if (mode
== V1TImode
)
6446 /* Start with a vspltisw. */
6447 step
= GET_MODE_NUNITS (mode
) / 4;
6450 if (vspltis_constant (op
, step
, copies
))
6453 /* Then try with a vspltish. */
6459 if (vspltis_constant (op
, step
, copies
))
6462 /* And finally a vspltisb. */
6468 if (vspltis_constant (op
, step
, copies
))
6471 if (vspltis_shifted (op
) != 0)
6472 return GET_MODE_SIZE (GET_MODE_INNER (mode
));
6477 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6478 result is OP. Abort if it is not possible. */
6481 gen_easy_altivec_constant (rtx op
)
6483 machine_mode mode
= GET_MODE (op
);
6484 int nunits
= GET_MODE_NUNITS (mode
);
6485 rtx val
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? nunits
- 1 : 0);
6486 unsigned step
= nunits
/ 4;
6487 unsigned copies
= 1;
6489 /* Start with a vspltisw. */
6490 if (vspltis_constant (op
, step
, copies
))
6491 return gen_rtx_VEC_DUPLICATE (V4SImode
, gen_lowpart (SImode
, val
));
6493 /* Then try with a vspltish. */
6499 if (vspltis_constant (op
, step
, copies
))
6500 return gen_rtx_VEC_DUPLICATE (V8HImode
, gen_lowpart (HImode
, val
));
6502 /* And finally a vspltisb. */
6508 if (vspltis_constant (op
, step
, copies
))
6509 return gen_rtx_VEC_DUPLICATE (V16QImode
, gen_lowpart (QImode
, val
));
6514 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6515 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6517 Return the number of instructions needed (1 or 2) into the address pointed
6520 Return the constant that is being split via CONSTANT_PTR. */
6523 xxspltib_constant_p (rtx op
,
6528 size_t nunits
= GET_MODE_NUNITS (mode
);
6530 HOST_WIDE_INT value
;
6533 /* Set the returned values to out of bound values. */
6534 *num_insns_ptr
= -1;
6535 *constant_ptr
= 256;
6537 if (!TARGET_P9_VECTOR
)
6540 if (mode
== VOIDmode
)
6541 mode
= GET_MODE (op
);
6543 else if (mode
!= GET_MODE (op
) && GET_MODE (op
) != VOIDmode
)
6546 /* Handle (vec_duplicate <constant>). */
6547 if (GET_CODE (op
) == VEC_DUPLICATE
)
6549 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6550 && mode
!= V2DImode
)
6553 element
= XEXP (op
, 0);
6554 if (!CONST_INT_P (element
))
6557 value
= INTVAL (element
);
6558 if (!IN_RANGE (value
, -128, 127))
6562 /* Handle (const_vector [...]). */
6563 else if (GET_CODE (op
) == CONST_VECTOR
)
6565 if (mode
!= V16QImode
&& mode
!= V8HImode
&& mode
!= V4SImode
6566 && mode
!= V2DImode
)
6569 element
= CONST_VECTOR_ELT (op
, 0);
6570 if (!CONST_INT_P (element
))
6573 value
= INTVAL (element
);
6574 if (!IN_RANGE (value
, -128, 127))
6577 for (i
= 1; i
< nunits
; i
++)
6579 element
= CONST_VECTOR_ELT (op
, i
);
6580 if (!CONST_INT_P (element
))
6583 if (value
!= INTVAL (element
))
6588 /* Handle integer constants being loaded into the upper part of the VSX
6589 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6590 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6591 else if (CONST_INT_P (op
))
6593 if (!SCALAR_INT_MODE_P (mode
))
6596 value
= INTVAL (op
);
6597 if (!IN_RANGE (value
, -128, 127))
6600 if (!IN_RANGE (value
, -1, 0))
6602 if (!(reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
))
6605 if (EASY_VECTOR_15 (value
))
6613 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6614 sign extend. Special case 0/-1 to allow getting any VSX register instead
6615 of an Altivec register. */
6616 if ((mode
== V4SImode
|| mode
== V8HImode
) && !IN_RANGE (value
, -1, 0)
6617 && EASY_VECTOR_15 (value
))
6620 /* Return # of instructions and the constant byte for XXSPLTIB. */
6621 if (mode
== V16QImode
)
6624 else if (IN_RANGE (value
, -1, 0))
6627 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6628 single XXSPLTIW or XXSPLTIDP instruction. */
6629 else if (vsx_prefixed_constant (op
, mode
))
6632 /* Return XXSPLITB followed by a sign extend operation to convert the
6633 constant to V8HImode or V4SImode. */
6637 *constant_ptr
= (int) value
;
6642 output_vec_const_move (rtx
*operands
)
6650 mode
= GET_MODE (dest
);
6654 bool dest_vmx_p
= ALTIVEC_REGNO_P (REGNO (dest
));
6655 int xxspltib_value
= 256;
6658 if (zero_constant (vec
, mode
))
6660 if (TARGET_P9_VECTOR
)
6661 return "xxspltib %x0,0";
6663 else if (dest_vmx_p
)
6664 return "vspltisw %0,0";
6667 return "xxlxor %x0,%x0,%x0";
6670 if (all_ones_constant (vec
, mode
))
6672 if (TARGET_P9_VECTOR
)
6673 return "xxspltib %x0,255";
6675 else if (dest_vmx_p
)
6676 return "vspltisw %0,-1";
6678 else if (TARGET_P8_VECTOR
)
6679 return "xxlorc %x0,%x0,%x0";
6685 vec_const_128bit_type vsx_const
;
6686 if (TARGET_POWER10
&& vec_const_128bit_to_bytes (vec
, mode
, &vsx_const
))
6688 unsigned imm
= constant_generates_lxvkq (&vsx_const
);
6691 operands
[2] = GEN_INT (imm
);
6692 return "lxvkq %x0,%2";
6695 imm
= constant_generates_xxspltiw (&vsx_const
);
6698 operands
[2] = GEN_INT (imm
);
6699 return "xxspltiw %x0,%2";
6702 imm
= constant_generates_xxspltidp (&vsx_const
);
6705 operands
[2] = GEN_INT (imm
);
6706 return "xxspltidp %x0,%2";
6710 if (TARGET_P9_VECTOR
6711 && xxspltib_constant_p (vec
, mode
, &num_insns
, &xxspltib_value
))
6715 operands
[2] = GEN_INT (xxspltib_value
& 0xff);
6716 return "xxspltib %x0,%2";
6727 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest
)));
6728 if (zero_constant (vec
, mode
))
6729 return "vspltisw %0,0";
6731 if (all_ones_constant (vec
, mode
))
6732 return "vspltisw %0,-1";
6734 /* Do we need to construct a value using VSLDOI? */
6735 shift
= vspltis_shifted (vec
);
6739 splat_vec
= gen_easy_altivec_constant (vec
);
6740 gcc_assert (GET_CODE (splat_vec
) == VEC_DUPLICATE
);
6741 operands
[1] = XEXP (splat_vec
, 0);
6742 if (!EASY_VECTOR_15 (INTVAL (operands
[1])))
6745 switch (GET_MODE (splat_vec
))
6748 return "vspltisw %0,%1";
6751 return "vspltish %0,%1";
6754 return "vspltisb %0,%1";
6764 /* Initialize vector TARGET to VALS. */
6767 rs6000_expand_vector_init (rtx target
, rtx vals
)
6769 machine_mode mode
= GET_MODE (target
);
6770 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6771 unsigned int n_elts
= GET_MODE_NUNITS (mode
);
6772 int n_var
= 0, one_var
= -1;
6773 bool all_same
= true, all_const_zero
= true;
6777 for (i
= 0; i
< n_elts
; ++i
)
6779 x
= XVECEXP (vals
, 0, i
);
6780 if (!(CONST_SCALAR_INT_P (x
) || CONST_DOUBLE_P (x
) || CONST_FIXED_P (x
)))
6781 ++n_var
, one_var
= i
;
6782 else if (x
!= CONST0_RTX (inner_mode
))
6783 all_const_zero
= false;
6785 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6791 rtx const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6792 bool int_vector_p
= (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
);
6793 if ((int_vector_p
|| TARGET_VSX
) && all_const_zero
)
6795 /* Zero register. */
6796 emit_move_insn (target
, CONST0_RTX (mode
));
6799 else if (int_vector_p
&& easy_vector_constant (const_vec
, mode
))
6801 /* Splat immediate. */
6802 emit_insn (gen_rtx_SET (target
, const_vec
));
6807 /* Load from constant pool. */
6808 emit_move_insn (target
, const_vec
);
6813 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6814 if (VECTOR_MEM_VSX_P (mode
) && (mode
== V2DFmode
|| mode
== V2DImode
))
6818 size_t num_elements
= all_same
? 1 : 2;
6819 for (i
= 0; i
< num_elements
; i
++)
6821 op
[i
] = XVECEXP (vals
, 0, i
);
6822 /* Just in case there is a SUBREG with a smaller mode, do a
6824 if (GET_MODE (op
[i
]) != inner_mode
)
6826 rtx tmp
= gen_reg_rtx (inner_mode
);
6827 convert_move (tmp
, op
[i
], 0);
6830 /* Allow load with splat double word. */
6831 else if (MEM_P (op
[i
]))
6834 op
[i
] = force_reg (inner_mode
, op
[i
]);
6836 else if (!REG_P (op
[i
]))
6837 op
[i
] = force_reg (inner_mode
, op
[i
]);
6842 if (mode
== V2DFmode
)
6843 emit_insn (gen_vsx_splat_v2df (target
, op
[0]));
6845 emit_insn (gen_vsx_splat_v2di (target
, op
[0]));
6849 if (mode
== V2DFmode
)
6850 emit_insn (gen_vsx_concat_v2df (target
, op
[0], op
[1]));
6852 emit_insn (gen_vsx_concat_v2di (target
, op
[0], op
[1]));
6857 /* Special case initializing vector int if we are on 64-bit systems with
6858 direct move or we have the ISA 3.0 instructions. */
6859 if (mode
== V4SImode
&& VECTOR_MEM_VSX_P (V4SImode
)
6860 && TARGET_DIRECT_MOVE_64BIT
)
6864 rtx element0
= XVECEXP (vals
, 0, 0);
6865 if (MEM_P (element0
))
6866 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6868 element0
= force_reg (SImode
, element0
);
6870 if (TARGET_P9_VECTOR
)
6871 emit_insn (gen_vsx_splat_v4si (target
, element0
));
6874 rtx tmp
= gen_reg_rtx (DImode
);
6875 emit_insn (gen_zero_extendsidi2 (tmp
, element0
));
6876 emit_insn (gen_vsx_splat_v4si_di (target
, tmp
));
6885 for (i
= 0; i
< 4; i
++)
6886 elements
[i
] = force_reg (SImode
, XVECEXP (vals
, 0, i
));
6888 emit_insn (gen_vsx_init_v4si (target
, elements
[0], elements
[1],
6889 elements
[2], elements
[3]));
6894 /* With single precision floating point on VSX, know that internally single
6895 precision is actually represented as a double, and either make 2 V2DF
6896 vectors, and convert these vectors to single precision, or do one
6897 conversion, and splat the result to the other elements. */
6898 if (mode
== V4SFmode
&& VECTOR_MEM_VSX_P (V4SFmode
))
6902 rtx element0
= XVECEXP (vals
, 0, 0);
6904 if (TARGET_P9_VECTOR
)
6906 if (MEM_P (element0
))
6907 element0
= rs6000_force_indexed_or_indirect_mem (element0
);
6909 emit_insn (gen_vsx_splat_v4sf (target
, element0
));
6914 rtx freg
= gen_reg_rtx (V4SFmode
);
6915 rtx sreg
= force_reg (SFmode
, element0
);
6916 rtx cvt
= (TARGET_XSCVDPSPN
6917 ? gen_vsx_xscvdpspn_scalar (freg
, sreg
)
6918 : gen_vsx_xscvdpsp_scalar (freg
, sreg
));
6921 emit_insn (gen_vsx_xxspltw_v4sf_direct (target
, freg
,
6927 if (TARGET_P8_VECTOR
&& TARGET_POWERPC64
)
6933 for (i
= 0; i
< 4; i
++)
6935 tmp_si
[i
] = gen_reg_rtx (SImode
);
6936 tmp_di
[i
] = gen_reg_rtx (DImode
);
6937 mrg_di
[i
] = gen_reg_rtx (DImode
);
6938 tmp_sf
[i
] = force_reg (SFmode
, XVECEXP (vals
, 0, i
));
6939 emit_insn (gen_movsi_from_sf (tmp_si
[i
], tmp_sf
[i
]));
6940 emit_insn (gen_zero_extendsidi2 (tmp_di
[i
], tmp_si
[i
]));
6943 if (!BYTES_BIG_ENDIAN
)
6945 std::swap (tmp_di
[0], tmp_di
[1]);
6946 std::swap (tmp_di
[2], tmp_di
[3]);
6949 emit_insn (gen_ashldi3 (mrg_di
[0], tmp_di
[0], GEN_INT (32)));
6950 emit_insn (gen_iordi3 (mrg_di
[1], mrg_di
[0], tmp_di
[1]));
6951 emit_insn (gen_ashldi3 (mrg_di
[2], tmp_di
[2], GEN_INT (32)));
6952 emit_insn (gen_iordi3 (mrg_di
[3], mrg_di
[2], tmp_di
[3]));
6954 rtx tmp_v2di
= gen_reg_rtx (V2DImode
);
6955 emit_insn (gen_vsx_concat_v2di (tmp_v2di
, mrg_di
[1], mrg_di
[3]));
6956 emit_move_insn (target
, gen_lowpart (V4SFmode
, tmp_v2di
));
6960 rtx dbl_even
= gen_reg_rtx (V2DFmode
);
6961 rtx dbl_odd
= gen_reg_rtx (V2DFmode
);
6962 rtx flt_even
= gen_reg_rtx (V4SFmode
);
6963 rtx flt_odd
= gen_reg_rtx (V4SFmode
);
6964 rtx op0
= force_reg (SFmode
, XVECEXP (vals
, 0, 0));
6965 rtx op1
= force_reg (SFmode
, XVECEXP (vals
, 0, 1));
6966 rtx op2
= force_reg (SFmode
, XVECEXP (vals
, 0, 2));
6967 rtx op3
= force_reg (SFmode
, XVECEXP (vals
, 0, 3));
6969 emit_insn (gen_vsx_concat_v2sf (dbl_even
, op0
, op1
));
6970 emit_insn (gen_vsx_concat_v2sf (dbl_odd
, op2
, op3
));
6971 emit_insn (gen_vsx_xvcvdpsp (flt_even
, dbl_even
));
6972 emit_insn (gen_vsx_xvcvdpsp (flt_odd
, dbl_odd
));
6973 rs6000_expand_extract_even (target
, flt_even
, flt_odd
);
6979 /* Special case initializing vector short/char that are splats if we are on
6980 64-bit systems with direct move. */
6981 if (all_same
&& TARGET_DIRECT_MOVE_64BIT
6982 && (mode
== V16QImode
|| mode
== V8HImode
))
6984 rtx op0
= XVECEXP (vals
, 0, 0);
6985 rtx di_tmp
= gen_reg_rtx (DImode
);
6988 op0
= force_reg (GET_MODE_INNER (mode
), op0
);
6990 if (mode
== V16QImode
)
6992 emit_insn (gen_zero_extendqidi2 (di_tmp
, op0
));
6993 emit_insn (gen_vsx_vspltb_di (target
, di_tmp
));
6997 if (mode
== V8HImode
)
6999 emit_insn (gen_zero_extendhidi2 (di_tmp
, op0
));
7000 emit_insn (gen_vsx_vsplth_di (target
, di_tmp
));
7005 /* Store value to stack temp. Load vector element. Splat. However, splat
7006 of 64-bit items is not supported on Altivec. */
7007 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
7009 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7010 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0),
7011 XVECEXP (vals
, 0, 0));
7012 x
= gen_rtx_UNSPEC (VOIDmode
,
7013 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7014 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7016 gen_rtx_SET (target
, mem
),
7018 x
= gen_rtx_VEC_SELECT (inner_mode
, target
,
7019 gen_rtx_PARALLEL (VOIDmode
,
7020 gen_rtvec (1, const0_rtx
)));
7021 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, x
)));
7025 /* One field is non-constant. Load constant then overwrite
7029 rtx copy
= copy_rtx (vals
);
7031 /* Load constant part of vector, substitute neighboring value for
7033 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
7034 rs6000_expand_vector_init (target
, copy
);
7036 /* Insert variable. */
7037 rs6000_expand_vector_set (target
, XVECEXP (vals
, 0, one_var
),
7042 if (TARGET_DIRECT_MOVE
&& (mode
== V16QImode
|| mode
== V8HImode
))
7045 /* Force the values into word_mode registers. */
7046 for (i
= 0; i
< n_elts
; i
++)
7048 rtx tmp
= force_reg (inner_mode
, XVECEXP (vals
, 0, i
));
7049 machine_mode tmode
= TARGET_POWERPC64
? DImode
: SImode
;
7050 op
[i
] = simplify_gen_subreg (tmode
, tmp
, inner_mode
, 0);
7053 /* Take unsigned char big endianness on 64bit as example for below
7054 construction, the input values are: A, B, C, D, ..., O, P. */
7056 if (TARGET_DIRECT_MOVE_128
)
7058 /* Move to VSX register with vec_concat, each has 2 values.
7059 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7060 vr1[1] = { xxxxxxxC, xxxxxxxD };
7062 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7064 for (i
= 0; i
< n_elts
/ 2; i
++)
7066 vr1
[i
] = gen_reg_rtx (V2DImode
);
7067 emit_insn (gen_vsx_concat_v2di (vr1
[i
], op
[i
* 2],
7071 /* Pack vectors with 2 values into vectors with 4 values.
7072 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7073 vr2[1] = { xxxExxxF, xxxGxxxH };
7074 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7075 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7077 for (i
= 0; i
< n_elts
/ 4; i
++)
7079 vr2
[i
] = gen_reg_rtx (V4SImode
);
7080 emit_insn (gen_altivec_vpkudum (vr2
[i
], vr1
[i
* 2],
7084 /* Pack vectors with 4 values into vectors with 8 values.
7085 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7086 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7088 for (i
= 0; i
< n_elts
/ 8; i
++)
7090 vr3
[i
] = gen_reg_rtx (V8HImode
);
7091 emit_insn (gen_altivec_vpkuwum (vr3
[i
], vr2
[i
* 2],
7095 /* If it's V8HImode, it's done and return it. */
7096 if (mode
== V8HImode
)
7098 emit_insn (gen_rtx_SET (target
, vr3
[0]));
7102 /* Pack vectors with 8 values into 16 values. */
7103 rtx res
= gen_reg_rtx (V16QImode
);
7104 emit_insn (gen_altivec_vpkuhum (res
, vr3
[0], vr3
[1]));
7105 emit_insn (gen_rtx_SET (target
, res
));
7109 rtx (*merge_v16qi
) (rtx
, rtx
, rtx
) = NULL
;
7110 rtx (*merge_v8hi
) (rtx
, rtx
, rtx
) = NULL
;
7111 rtx (*merge_v4si
) (rtx
, rtx
, rtx
) = NULL
;
7114 /* Set up some common gen routines and values. */
7115 if (BYTES_BIG_ENDIAN
)
7117 if (mode
== V16QImode
)
7119 merge_v16qi
= gen_altivec_vmrghb
;
7120 merge_v8hi
= gen_altivec_vmrglh
;
7123 merge_v8hi
= gen_altivec_vmrghh
;
7125 merge_v4si
= gen_altivec_vmrglw
;
7126 perm_idx
= GEN_INT (3);
7130 if (mode
== V16QImode
)
7132 merge_v16qi
= gen_altivec_vmrglb
;
7133 merge_v8hi
= gen_altivec_vmrghh
;
7136 merge_v8hi
= gen_altivec_vmrglh
;
7138 merge_v4si
= gen_altivec_vmrghw
;
7139 perm_idx
= GEN_INT (0);
7142 /* Move to VSX register with direct move.
7143 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7144 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7146 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7148 for (i
= 0; i
< n_elts
; i
++)
7150 vr_qi
[i
] = gen_reg_rtx (V16QImode
);
7151 if (TARGET_POWERPC64
)
7152 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi
[i
], op
[i
]));
7154 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi
[i
], op
[i
]));
7157 /* Merge/move to vector short.
7158 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7159 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7161 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7163 for (i
= 0; i
< 8; i
++)
7166 if (mode
== V16QImode
)
7168 tmp
= gen_reg_rtx (V16QImode
);
7169 emit_insn (merge_v16qi (tmp
, vr_qi
[2 * i
], vr_qi
[2 * i
+ 1]));
7171 vr_hi
[i
] = gen_reg_rtx (V8HImode
);
7172 emit_move_insn (vr_hi
[i
], gen_lowpart (V8HImode
, tmp
));
7175 /* Merge vector short to vector int.
7176 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7177 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7179 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7181 for (i
= 0; i
< 4; i
++)
7183 rtx tmp
= gen_reg_rtx (V8HImode
);
7184 emit_insn (merge_v8hi (tmp
, vr_hi
[2 * i
], vr_hi
[2 * i
+ 1]));
7185 vr_si
[i
] = gen_reg_rtx (V4SImode
);
7186 emit_move_insn (vr_si
[i
], gen_lowpart (V4SImode
, tmp
));
7189 /* Merge vector int to vector long.
7190 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7191 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7193 for (i
= 0; i
< 2; i
++)
7195 rtx tmp
= gen_reg_rtx (V4SImode
);
7196 emit_insn (merge_v4si (tmp
, vr_si
[2 * i
], vr_si
[2 * i
+ 1]));
7197 vr_di
[i
] = gen_reg_rtx (V2DImode
);
7198 emit_move_insn (vr_di
[i
], gen_lowpart (V2DImode
, tmp
));
7201 rtx res
= gen_reg_rtx (V2DImode
);
7202 emit_insn (gen_vsx_xxpermdi_v2di (res
, vr_di
[0], vr_di
[1], perm_idx
));
7203 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, res
)));
7209 /* Construct the vector in memory one field at a time
7210 and load the whole vector. */
7211 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7212 for (i
= 0; i
< n_elts
; i
++)
7213 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7214 i
* GET_MODE_SIZE (inner_mode
)),
7215 XVECEXP (vals
, 0, i
));
7216 emit_move_insn (target
, mem
);
7219 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7220 is variable and also counts by vector element size for p9 and above. */
7223 rs6000_expand_vector_set_var_p9 (rtx target
, rtx val
, rtx idx
)
7225 machine_mode mode
= GET_MODE (target
);
7227 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7229 machine_mode inner_mode
= GET_MODE (val
);
7231 int width
= GET_MODE_SIZE (inner_mode
);
7233 gcc_assert (width
>= 1 && width
<= 8);
7235 int shift
= exact_log2 (width
);
7237 machine_mode idx_mode
= GET_MODE (idx
);
7239 machine_mode shift_mode
;
7240 /* Gen function pointers for shifting left and generation of permutation
7242 rtx (*gen_ashl
) (rtx
, rtx
, rtx
);
7243 rtx (*gen_pcvr1
) (rtx
, rtx
);
7244 rtx (*gen_pcvr2
) (rtx
, rtx
);
7246 if (TARGET_POWERPC64
)
7248 shift_mode
= DImode
;
7249 gen_ashl
= gen_ashldi3
;
7250 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_di
7251 : gen_altivec_lvsr_reg_di
;
7252 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_di
7253 : gen_altivec_lvsl_reg_di
;
7257 shift_mode
= SImode
;
7258 gen_ashl
= gen_ashlsi3
;
7259 gen_pcvr1
= BYTES_BIG_ENDIAN
? gen_altivec_lvsl_reg_si
7260 : gen_altivec_lvsr_reg_si
;
7261 gen_pcvr2
= BYTES_BIG_ENDIAN
? gen_altivec_lvsr_reg_si
7262 : gen_altivec_lvsl_reg_si
;
7264 /* Generate the IDX for permute shift, width is the vector element size.
7265 idx = idx * width. */
7266 rtx tmp
= gen_reg_rtx (shift_mode
);
7267 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7269 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7271 /* Generate one permutation control vector used for rotating the element
7272 at to-insert position to element zero in target vector. lvsl is
7273 used for big endianness while lvsr is used for little endianness:
7274 lvs[lr] v1,0,idx. */
7275 rtx pcvr1
= gen_reg_rtx (V16QImode
);
7276 emit_insn (gen_pcvr1 (pcvr1
, tmp
));
7278 rtx sub_target
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7279 rtx perm1
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7283 /* Insert val into element 0 of target vector. */
7284 rs6000_expand_vector_set (target
, val
, const0_rtx
);
7286 /* Rotate back with a reversed permutation control vector generated from:
7287 lvs[rl] v2,0,idx. */
7288 rtx pcvr2
= gen_reg_rtx (V16QImode
);
7289 emit_insn (gen_pcvr2 (pcvr2
, tmp
));
7291 rtx perm2
= gen_altivec_vperm_v8hiv16qi (sub_target
, sub_target
, sub_target
,
7296 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7297 is variable and also counts by vector element size for p7 & p8. */
7300 rs6000_expand_vector_set_var_p7 (rtx target
, rtx val
, rtx idx
)
7302 machine_mode mode
= GET_MODE (target
);
7304 gcc_assert (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (idx
));
7306 machine_mode inner_mode
= GET_MODE (val
);
7307 HOST_WIDE_INT mode_mask
= GET_MODE_MASK (inner_mode
);
7309 int width
= GET_MODE_SIZE (inner_mode
);
7310 gcc_assert (width
>= 1 && width
<= 4);
7312 int shift
= exact_log2 (width
);
7314 machine_mode idx_mode
= GET_MODE (idx
);
7316 machine_mode shift_mode
;
7317 rtx (*gen_ashl
)(rtx
, rtx
, rtx
);
7318 rtx (*gen_add
)(rtx
, rtx
, rtx
);
7319 rtx (*gen_sub
)(rtx
, rtx
, rtx
);
7320 rtx (*gen_lvsl
)(rtx
, rtx
);
7322 if (TARGET_POWERPC64
)
7324 shift_mode
= DImode
;
7325 gen_ashl
= gen_ashldi3
;
7326 gen_add
= gen_adddi3
;
7327 gen_sub
= gen_subdi3
;
7328 gen_lvsl
= gen_altivec_lvsl_reg_di
;
7332 shift_mode
= SImode
;
7333 gen_ashl
= gen_ashlsi3
;
7334 gen_add
= gen_addsi3
;
7335 gen_sub
= gen_subsi3
;
7336 gen_lvsl
= gen_altivec_lvsl_reg_si
;
7339 /* idx = idx * width. */
7340 rtx tmp
= gen_reg_rtx (shift_mode
);
7341 idx
= convert_modes (shift_mode
, idx_mode
, idx
, 1);
7343 emit_insn (gen_ashl (tmp
, idx
, GEN_INT (shift
)));
7345 /* For LE: idx = idx + 8. */
7346 if (!BYTES_BIG_ENDIAN
)
7347 emit_insn (gen_add (tmp
, tmp
, GEN_INT (8)));
7349 emit_insn (gen_sub (tmp
, GEN_INT (24 - width
), tmp
));
7352 DImode: 0xffffffffffffffff0000000000000000
7353 SImode: 0x00000000ffffffff0000000000000000
7354 HImode: 0x000000000000ffff0000000000000000.
7355 QImode: 0x00000000000000ff0000000000000000. */
7356 rtx mask
= gen_reg_rtx (V16QImode
);
7357 rtx mask_v2di
= gen_reg_rtx (V2DImode
);
7358 rtvec v
= rtvec_alloc (2);
7359 if (!BYTES_BIG_ENDIAN
)
7361 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7362 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7366 RTVEC_ELT (v
, 0) = gen_rtx_CONST_INT (DImode
, mode_mask
);
7367 RTVEC_ELT (v
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7369 emit_insn (gen_vec_initv2didi (mask_v2di
, gen_rtx_PARALLEL (V2DImode
, v
)));
7370 rtx sub_mask
= simplify_gen_subreg (V16QImode
, mask_v2di
, V2DImode
, 0);
7371 emit_insn (gen_rtx_SET (mask
, sub_mask
));
7373 /* mtvsrd[wz] f0,tmp_val. */
7374 rtx tmp_val
= gen_reg_rtx (SImode
);
7375 if (inner_mode
== E_SFmode
)
7376 if (TARGET_DIRECT_MOVE_64BIT
)
7377 emit_insn (gen_movsi_from_sf (tmp_val
, val
));
7380 rtx stack
= rs6000_allocate_stack_temp (SFmode
, false, true);
7381 emit_insn (gen_movsf_hardfloat (stack
, val
));
7382 rtx stack2
= copy_rtx (stack
);
7383 PUT_MODE (stack2
, SImode
);
7384 emit_move_insn (tmp_val
, stack2
);
7387 tmp_val
= force_reg (SImode
, val
);
7389 rtx val_v16qi
= gen_reg_rtx (V16QImode
);
7390 rtx val_v2di
= gen_reg_rtx (V2DImode
);
7391 rtvec vec_val
= rtvec_alloc (2);
7392 if (!BYTES_BIG_ENDIAN
)
7394 RTVEC_ELT (vec_val
, 0) = gen_rtx_CONST_INT (DImode
, 0);
7395 RTVEC_ELT (vec_val
, 1) = tmp_val
;
7399 RTVEC_ELT (vec_val
, 0) = tmp_val
;
7400 RTVEC_ELT (vec_val
, 1) = gen_rtx_CONST_INT (DImode
, 0);
7403 gen_vec_initv2didi (val_v2di
, gen_rtx_PARALLEL (V2DImode
, vec_val
)));
7404 rtx sub_val
= simplify_gen_subreg (V16QImode
, val_v2di
, V2DImode
, 0);
7405 emit_insn (gen_rtx_SET (val_v16qi
, sub_val
));
7407 /* lvsl 13,0,idx. */
7408 rtx pcv
= gen_reg_rtx (V16QImode
);
7409 emit_insn (gen_lvsl (pcv
, tmp
));
7411 /* vperm 1,1,1,13. */
7412 /* vperm 0,0,0,13. */
7413 rtx val_perm
= gen_reg_rtx (V16QImode
);
7414 rtx mask_perm
= gen_reg_rtx (V16QImode
);
7415 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm
, val_v16qi
, val_v16qi
, pcv
));
7416 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm
, mask
, mask
, pcv
));
7418 rtx target_v16qi
= simplify_gen_subreg (V16QImode
, target
, mode
, 0);
7420 /* xxsel 34,34,32,33. */
7422 gen_vector_select_v16qi (target_v16qi
, target_v16qi
, val_perm
, mask_perm
));
7425 /* Set field ELT_RTX of TARGET to VAL. */
7428 rs6000_expand_vector_set (rtx target
, rtx val
, rtx elt_rtx
)
7430 machine_mode mode
= GET_MODE (target
);
7431 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7432 rtx reg
= gen_reg_rtx (mode
);
7434 int width
= GET_MODE_SIZE (inner_mode
);
7437 val
= force_reg (GET_MODE (val
), val
);
7439 if (VECTOR_MEM_VSX_P (mode
))
7441 if (!CONST_INT_P (elt_rtx
))
7443 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7444 when elt_rtx is variable. */
7445 if ((TARGET_P9_VECTOR
&& TARGET_POWERPC64
) || width
== 8)
7447 rs6000_expand_vector_set_var_p9 (target
, val
, elt_rtx
);
7450 else if (TARGET_VSX
)
7452 rs6000_expand_vector_set_var_p7 (target
, val
, elt_rtx
);
7456 gcc_assert (CONST_INT_P (elt_rtx
));
7459 rtx insn
= NULL_RTX
;
7461 if (mode
== V2DFmode
)
7462 insn
= gen_vsx_set_v2df (target
, target
, val
, elt_rtx
);
7464 else if (mode
== V2DImode
)
7465 insn
= gen_vsx_set_v2di (target
, target
, val
, elt_rtx
);
7467 else if (TARGET_P9_VECTOR
&& TARGET_POWERPC64
)
7469 if (mode
== V4SImode
)
7470 insn
= gen_vsx_set_v4si_p9 (target
, target
, val
, elt_rtx
);
7471 else if (mode
== V8HImode
)
7472 insn
= gen_vsx_set_v8hi_p9 (target
, target
, val
, elt_rtx
);
7473 else if (mode
== V16QImode
)
7474 insn
= gen_vsx_set_v16qi_p9 (target
, target
, val
, elt_rtx
);
7475 else if (mode
== V4SFmode
)
7476 insn
= gen_vsx_set_v4sf_p9 (target
, target
, val
, elt_rtx
);
7486 /* Simplify setting single element vectors like V1TImode. */
7487 if (GET_MODE_SIZE (mode
) == GET_MODE_SIZE (inner_mode
)
7488 && INTVAL (elt_rtx
) == 0)
7490 emit_move_insn (target
, gen_lowpart (mode
, val
));
7494 /* Load single variable value. */
7495 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (inner_mode
));
7496 emit_move_insn (adjust_address_nv (mem
, inner_mode
, 0), val
);
7497 x
= gen_rtx_UNSPEC (VOIDmode
,
7498 gen_rtvec (1, const0_rtx
), UNSPEC_LVE
);
7499 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
7501 gen_rtx_SET (reg
, mem
),
7504 /* Linear sequence. */
7505 mask
= gen_rtx_PARALLEL (V16QImode
, rtvec_alloc (16));
7506 for (i
= 0; i
< 16; ++i
)
7507 XVECEXP (mask
, 0, i
) = GEN_INT (i
);
7509 /* Set permute mask to insert element into target. */
7510 for (i
= 0; i
< width
; ++i
)
7511 XVECEXP (mask
, 0, INTVAL (elt_rtx
) * width
+ i
) = GEN_INT (i
+ 0x10);
7512 x
= gen_rtx_CONST_VECTOR (V16QImode
, XVEC (mask
, 0));
7514 if (BYTES_BIG_ENDIAN
)
7515 x
= gen_rtx_UNSPEC (mode
,
7516 gen_rtvec (3, target
, reg
,
7517 force_reg (V16QImode
, x
)),
7521 if (TARGET_P9_VECTOR
)
7522 x
= gen_rtx_UNSPEC (mode
,
7523 gen_rtvec (3, reg
, target
,
7524 force_reg (V16QImode
, x
)),
7528 /* Invert selector. We prefer to generate VNAND on P8 so
7529 that future fusion opportunities can kick in, but must
7530 generate VNOR elsewhere. */
7531 rtx notx
= gen_rtx_NOT (V16QImode
, force_reg (V16QImode
, x
));
7532 rtx iorx
= (TARGET_P8_VECTOR
7533 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
7534 : gen_rtx_AND (V16QImode
, notx
, notx
));
7535 rtx tmp
= gen_reg_rtx (V16QImode
);
7536 emit_insn (gen_rtx_SET (tmp
, iorx
));
7538 /* Permute with operands reversed and adjusted selector. */
7539 x
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, reg
, target
, tmp
),
7544 emit_insn (gen_rtx_SET (target
, x
));
7547 /* Extract field ELT from VEC into TARGET. */
7550 rs6000_expand_vector_extract (rtx target
, rtx vec
, rtx elt
)
7552 machine_mode mode
= GET_MODE (vec
);
7553 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7556 if (VECTOR_MEM_VSX_P (mode
) && CONST_INT_P (elt
))
7563 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7566 emit_insn (gen_vsx_extract_v2df (target
, vec
, elt
));
7569 emit_insn (gen_vsx_extract_v2di (target
, vec
, elt
));
7572 emit_insn (gen_vsx_extract_v4sf (target
, vec
, elt
));
7575 if (TARGET_DIRECT_MOVE_64BIT
)
7577 emit_insn (gen_vsx_extract_v16qi (target
, vec
, elt
));
7583 if (TARGET_DIRECT_MOVE_64BIT
)
7585 emit_insn (gen_vsx_extract_v8hi (target
, vec
, elt
));
7591 if (TARGET_DIRECT_MOVE_64BIT
)
7593 emit_insn (gen_vsx_extract_v4si (target
, vec
, elt
));
7599 else if (VECTOR_MEM_VSX_P (mode
) && !CONST_INT_P (elt
)
7600 && TARGET_DIRECT_MOVE_64BIT
)
7602 if (GET_MODE (elt
) != DImode
)
7604 rtx tmp
= gen_reg_rtx (DImode
);
7605 convert_move (tmp
, elt
, 0);
7608 else if (!REG_P (elt
))
7609 elt
= force_reg (DImode
, elt
);
7614 emit_move_insn (target
, gen_lowpart (TImode
, vec
));
7618 emit_insn (gen_vsx_extract_v2df_var (target
, vec
, elt
));
7622 emit_insn (gen_vsx_extract_v2di_var (target
, vec
, elt
));
7626 emit_insn (gen_vsx_extract_v4sf_var (target
, vec
, elt
));
7630 emit_insn (gen_vsx_extract_v4si_var (target
, vec
, elt
));
7634 emit_insn (gen_vsx_extract_v8hi_var (target
, vec
, elt
));
7638 emit_insn (gen_vsx_extract_v16qi_var (target
, vec
, elt
));
7646 /* Allocate mode-sized buffer. */
7647 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7649 emit_move_insn (mem
, vec
);
7650 if (CONST_INT_P (elt
))
7652 int modulo_elt
= INTVAL (elt
) % GET_MODE_NUNITS (mode
);
7654 /* Add offset to field within buffer matching vector element. */
7655 mem
= adjust_address_nv (mem
, inner_mode
,
7656 modulo_elt
* GET_MODE_SIZE (inner_mode
));
7657 emit_move_insn (target
, adjust_address_nv (mem
, inner_mode
, 0));
7661 unsigned int ele_size
= GET_MODE_SIZE (inner_mode
);
7662 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (mode
) - 1);
7663 rtx new_addr
= gen_reg_rtx (Pmode
);
7665 elt
= gen_rtx_AND (Pmode
, elt
, num_ele_m1
);
7667 elt
= gen_rtx_MULT (Pmode
, elt
, GEN_INT (ele_size
));
7668 new_addr
= gen_rtx_PLUS (Pmode
, XEXP (mem
, 0), elt
);
7669 new_addr
= change_address (mem
, inner_mode
, new_addr
);
7670 emit_move_insn (target
, new_addr
);
7674 /* Return the offset within a memory object (MEM) of a vector type to a given
7675 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7676 the element is constant, we return a constant integer.
7678 Otherwise, we use a base register temporary to calculate the offset after
7679 masking it to fit within the bounds of the vector and scaling it. The
7680 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7681 built-in function. */
7684 get_vector_offset (rtx mem
, rtx element
, rtx base_tmp
, unsigned scalar_size
)
7686 if (CONST_INT_P (element
))
7687 return GEN_INT (INTVAL (element
) * scalar_size
);
7689 /* All insns should use the 'Q' constraint (address is a single register) if
7690 the element number is not a constant. */
7691 gcc_assert (satisfies_constraint_Q (mem
));
7693 /* Mask the element to make sure the element number is between 0 and the
7694 maximum number of elements - 1 so that we don't generate an address
7695 outside the vector. */
7696 rtx num_ele_m1
= GEN_INT (GET_MODE_NUNITS (GET_MODE (mem
)) - 1);
7697 rtx and_op
= gen_rtx_AND (Pmode
, element
, num_ele_m1
);
7698 emit_insn (gen_rtx_SET (base_tmp
, and_op
));
7700 /* Shift the element to get the byte offset from the element number. */
7701 int shift
= exact_log2 (scalar_size
);
7702 gcc_assert (shift
>= 0);
7706 rtx shift_op
= gen_rtx_ASHIFT (Pmode
, base_tmp
, GEN_INT (shift
));
7707 emit_insn (gen_rtx_SET (base_tmp
, shift_op
));
7713 /* Helper function update PC-relative addresses when we are adjusting a memory
7714 address (ADDR) to a vector to point to a scalar field within the vector with
7715 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7716 use the base register temporary (BASE_TMP) to form the address. */
7719 adjust_vec_address_pcrel (rtx addr
, rtx element_offset
, rtx base_tmp
)
7721 rtx new_addr
= NULL
;
7723 gcc_assert (CONST_INT_P (element_offset
));
7725 if (GET_CODE (addr
) == CONST
)
7726 addr
= XEXP (addr
, 0);
7728 if (GET_CODE (addr
) == PLUS
)
7730 rtx op0
= XEXP (addr
, 0);
7731 rtx op1
= XEXP (addr
, 1);
7733 if (CONST_INT_P (op1
))
7735 HOST_WIDE_INT offset
7736 = INTVAL (XEXP (addr
, 1)) + INTVAL (element_offset
);
7743 rtx plus
= gen_rtx_PLUS (Pmode
, op0
, GEN_INT (offset
));
7744 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7750 emit_move_insn (base_tmp
, addr
);
7751 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7755 else if (SYMBOL_REF_P (addr
) || LABEL_REF_P (addr
))
7757 rtx plus
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7758 new_addr
= gen_rtx_CONST (Pmode
, plus
);
7767 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7768 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7769 temporary (BASE_TMP) to fixup the address. Return the new memory address
7770 that is valid for reads or writes to a given register (SCALAR_REG).
7772 This function is expected to be called after reload is completed when we are
7773 splitting insns. The temporary BASE_TMP might be set multiple times with
7777 rs6000_adjust_vec_address (rtx scalar_reg
,
7781 machine_mode scalar_mode
)
7783 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7784 rtx addr
= XEXP (mem
, 0);
7787 gcc_assert (!reg_mentioned_p (base_tmp
, addr
));
7788 gcc_assert (!reg_mentioned_p (base_tmp
, element
));
7790 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7791 gcc_assert (GET_RTX_CLASS (GET_CODE (addr
)) != RTX_AUTOINC
);
7793 /* Calculate what we need to add to the address to get the element
7795 rtx element_offset
= get_vector_offset (mem
, element
, base_tmp
, scalar_size
);
7797 /* Create the new address pointing to the element within the vector. If we
7798 are adding 0, we don't have to change the address. */
7799 if (element_offset
== const0_rtx
)
7802 /* A simple indirect address can be converted into a reg + offset
7804 else if (REG_P (addr
) || SUBREG_P (addr
))
7805 new_addr
= gen_rtx_PLUS (Pmode
, addr
, element_offset
);
7807 /* For references to local static variables, fold a constant offset into the
7809 else if (pcrel_local_address (addr
, Pmode
) && CONST_INT_P (element_offset
))
7810 new_addr
= adjust_vec_address_pcrel (addr
, element_offset
, base_tmp
);
7812 /* Optimize D-FORM addresses with constant offset with a constant element, to
7813 include the element offset in the address directly. */
7814 else if (GET_CODE (addr
) == PLUS
)
7816 rtx op0
= XEXP (addr
, 0);
7817 rtx op1
= XEXP (addr
, 1);
7819 gcc_assert (REG_P (op0
) || SUBREG_P (op0
));
7820 if (CONST_INT_P (op1
) && CONST_INT_P (element_offset
))
7822 /* op0 should never be r0, because r0+offset is not valid. But it
7823 doesn't hurt to make sure it is not r0. */
7824 gcc_assert (reg_or_subregno (op0
) != 0);
7826 /* D-FORM address with constant element number. */
7827 HOST_WIDE_INT offset
= INTVAL (op1
) + INTVAL (element_offset
);
7828 rtx offset_rtx
= GEN_INT (offset
);
7829 new_addr
= gen_rtx_PLUS (Pmode
, op0
, offset_rtx
);
7833 /* If we don't have a D-FORM address with a constant element number,
7834 add the two elements in the current address. Then add the offset.
7836 Previously, we tried to add the offset to OP1 and change the
7837 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7838 complicated because we had to verify that op1 was not GPR0 and we
7839 had a constant element offset (due to the way ADDI is defined).
7840 By doing the add of OP0 and OP1 first, and then adding in the
7841 offset, it has the benefit that if D-FORM instructions are
7842 allowed, the offset is part of the memory access to the vector
7844 emit_insn (gen_rtx_SET (base_tmp
, gen_rtx_PLUS (Pmode
, op0
, op1
)));
7845 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7851 emit_move_insn (base_tmp
, addr
);
7852 new_addr
= gen_rtx_PLUS (Pmode
, base_tmp
, element_offset
);
7855 /* If the address isn't valid, move the address into the temporary base
7856 register. Some reasons it could not be valid include:
7858 The address offset overflowed the 16 or 34 bit offset size;
7859 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7860 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7861 Only X_FORM loads can be done, and the address is D_FORM. */
7863 enum insn_form iform
7864 = address_to_insn_form (new_addr
, scalar_mode
,
7865 reg_to_non_prefixed (scalar_reg
, scalar_mode
));
7867 if (iform
== INSN_FORM_BAD
)
7869 emit_move_insn (base_tmp
, new_addr
);
7870 new_addr
= base_tmp
;
7873 return change_address (mem
, scalar_mode
, new_addr
);
7876 /* Split a variable vec_extract operation into the component instructions. */
7879 rs6000_split_vec_extract_var (rtx dest
, rtx src
, rtx element
, rtx tmp_gpr
,
7882 machine_mode mode
= GET_MODE (src
);
7883 machine_mode scalar_mode
= GET_MODE_INNER (GET_MODE (src
));
7884 unsigned scalar_size
= GET_MODE_SIZE (scalar_mode
);
7885 int byte_shift
= exact_log2 (scalar_size
);
7887 gcc_assert (byte_shift
>= 0);
7889 /* If we are given a memory address, optimize to load just the element. We
7890 don't have to adjust the vector element number on little endian
7894 emit_move_insn (dest
,
7895 rs6000_adjust_vec_address (dest
, src
, element
, tmp_gpr
,
7900 else if (REG_P (src
) || SUBREG_P (src
))
7902 int num_elements
= GET_MODE_NUNITS (mode
);
7903 int bits_in_element
= mode_to_bits (GET_MODE_INNER (mode
));
7904 int bit_shift
= 7 - exact_log2 (num_elements
);
7906 unsigned int dest_regno
= reg_or_subregno (dest
);
7907 unsigned int src_regno
= reg_or_subregno (src
);
7908 unsigned int element_regno
= reg_or_subregno (element
);
7910 gcc_assert (REG_P (tmp_gpr
));
7912 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7913 a general purpose register. */
7914 if (TARGET_P9_VECTOR
7915 && (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
7916 && INT_REGNO_P (dest_regno
)
7917 && ALTIVEC_REGNO_P (src_regno
)
7918 && INT_REGNO_P (element_regno
))
7920 rtx dest_si
= gen_rtx_REG (SImode
, dest_regno
);
7921 rtx element_si
= gen_rtx_REG (SImode
, element_regno
);
7923 if (mode
== V16QImode
)
7924 emit_insn (BYTES_BIG_ENDIAN
7925 ? gen_vextublx (dest_si
, element_si
, src
)
7926 : gen_vextubrx (dest_si
, element_si
, src
));
7928 else if (mode
== V8HImode
)
7930 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7931 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const1_rtx
));
7932 emit_insn (BYTES_BIG_ENDIAN
7933 ? gen_vextuhlx (dest_si
, tmp_gpr_si
, src
)
7934 : gen_vextuhrx (dest_si
, tmp_gpr_si
, src
));
7940 rtx tmp_gpr_si
= gen_rtx_REG (SImode
, REGNO (tmp_gpr
));
7941 emit_insn (gen_ashlsi3 (tmp_gpr_si
, element_si
, const2_rtx
));
7942 emit_insn (BYTES_BIG_ENDIAN
7943 ? gen_vextuwlx (dest_si
, tmp_gpr_si
, src
)
7944 : gen_vextuwrx (dest_si
, tmp_gpr_si
, src
));
7951 gcc_assert (REG_P (tmp_altivec
));
7953 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7954 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7955 will shift the element into the upper position (adding 3 to convert a
7956 byte shift into a bit shift). */
7957 if (scalar_size
== 8)
7959 if (!BYTES_BIG_ENDIAN
)
7961 emit_insn (gen_xordi3 (tmp_gpr
, element
, const1_rtx
));
7967 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7969 emit_insn (gen_rtx_SET (tmp_gpr
,
7970 gen_rtx_AND (DImode
,
7971 gen_rtx_ASHIFT (DImode
,
7978 if (!BYTES_BIG_ENDIAN
)
7980 rtx num_ele_m1
= GEN_INT (num_elements
- 1);
7982 emit_insn (gen_anddi3 (tmp_gpr
, element
, num_ele_m1
));
7983 emit_insn (gen_subdi3 (tmp_gpr
, num_ele_m1
, tmp_gpr
));
7989 emit_insn (gen_ashldi3 (tmp_gpr
, element2
, GEN_INT (bit_shift
)));
7992 /* Get the value into the lower byte of the Altivec register where VSLO
7994 if (TARGET_P9_VECTOR
)
7995 emit_insn (gen_vsx_splat_v2di (tmp_altivec
, tmp_gpr
));
7996 else if (can_create_pseudo_p ())
7997 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_gpr
, tmp_gpr
));
8000 rtx tmp_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8001 emit_move_insn (tmp_di
, tmp_gpr
);
8002 emit_insn (gen_vsx_concat_v2di (tmp_altivec
, tmp_di
, tmp_di
));
8005 /* Do the VSLO to get the value into the final location. */
8009 emit_insn (gen_vsx_vslo_v2df (dest
, src
, tmp_altivec
));
8013 emit_insn (gen_vsx_vslo_v2di (dest
, src
, tmp_altivec
));
8018 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8019 rtx tmp_altivec_v4sf
= gen_rtx_REG (V4SFmode
, REGNO (tmp_altivec
));
8020 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8021 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8024 emit_insn (gen_vsx_xscvspdp_scalar2 (dest
, tmp_altivec_v4sf
));
8032 rtx tmp_altivec_di
= gen_rtx_REG (DImode
, REGNO (tmp_altivec
));
8033 rtx src_v2di
= gen_rtx_REG (V2DImode
, REGNO (src
));
8034 rtx tmp_gpr_di
= gen_rtx_REG (DImode
, REGNO (dest
));
8035 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di
, src_v2di
,
8037 emit_move_insn (tmp_gpr_di
, tmp_altivec_di
);
8038 emit_insn (gen_lshrdi3 (tmp_gpr_di
, tmp_gpr_di
,
8039 GEN_INT (64 - bits_in_element
)));
8053 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8054 selects whether the alignment is abi mandated, optional, or
8055 both abi and optional alignment. */
8058 rs6000_data_alignment (tree type
, unsigned int align
, enum data_align how
)
8060 if (how
!= align_opt
)
8062 if (TREE_CODE (type
) == VECTOR_TYPE
&& align
< 128)
8066 if (how
!= align_abi
)
8068 if (TREE_CODE (type
) == ARRAY_TYPE
8069 && TYPE_MODE (TREE_TYPE (type
)) == QImode
)
8071 if (align
< BITS_PER_WORD
)
8072 align
= BITS_PER_WORD
;
8079 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8080 instructions simply ignore the low bits; VSX memory instructions
8081 are aligned to 4 or 8 bytes. */
8084 rs6000_slow_unaligned_access (machine_mode mode
, unsigned int align
)
8086 return (STRICT_ALIGNMENT
8087 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8088 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && align
< 32)
8089 || ((VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
))
8090 && (int) align
< VECTOR_ALIGN (mode
)))));
8093 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8096 rs6000_special_adjust_field_align (tree type
, unsigned int computed
)
8098 if (computed
<= 32 || TYPE_PACKED (type
))
8101 /* Strip initial arrays. */
8102 while (TREE_CODE (type
) == ARRAY_TYPE
)
8103 type
= TREE_TYPE (type
);
8105 /* If RECORD or UNION, recursively find the first field. */
8106 while (AGGREGATE_TYPE_P (type
))
8108 tree field
= TYPE_FIELDS (type
);
8110 /* Skip all non field decls */
8111 while (field
!= NULL
8112 && (TREE_CODE (field
) != FIELD_DECL
8113 || DECL_FIELD_ABI_IGNORED (field
)))
8114 field
= DECL_CHAIN (field
);
8119 /* A packed field does not contribute any extra alignment. */
8120 if (DECL_PACKED (field
))
8123 type
= TREE_TYPE (field
);
8126 while (TREE_CODE (type
) == ARRAY_TYPE
)
8127 type
= TREE_TYPE (type
);
8130 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8131 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8132 computed
= MIN (computed
, 32);
8137 /* AIX increases natural record alignment to doubleword if the innermost first
8138 field is an FP double while the FP fields remain word aligned.
8139 Only called if TYPE initially is a RECORD or UNION. */
8142 rs6000_special_round_type_align (tree type
, unsigned int computed
,
8143 unsigned int specified
)
8145 unsigned int align
= MAX (computed
, specified
);
8147 if (TYPE_PACKED (type
) || align
>= 64)
8150 /* If RECORD or UNION, recursively find the first field. */
8153 tree field
= TYPE_FIELDS (type
);
8155 /* Skip all non field decls */
8156 while (field
!= NULL
8157 && (TREE_CODE (field
) != FIELD_DECL
8158 || DECL_FIELD_ABI_IGNORED (field
)))
8159 field
= DECL_CHAIN (field
);
8164 /* A packed field does not contribute any extra alignment. */
8165 if (DECL_PACKED (field
))
8168 type
= TREE_TYPE (field
);
8171 while (TREE_CODE (type
) == ARRAY_TYPE
)
8172 type
= TREE_TYPE (type
);
8173 } while (AGGREGATE_TYPE_P (type
));
8175 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
8176 && (TYPE_MODE (type
) == DFmode
|| TYPE_MODE (type
) == DCmode
))
8177 align
= MAX (align
, 64);
8182 /* Darwin increases record alignment to the natural alignment of
8186 darwin_rs6000_special_round_type_align (tree type
, unsigned int computed
,
8187 unsigned int specified
)
8189 unsigned int align
= MAX (computed
, specified
);
8191 if (TYPE_PACKED (type
))
8194 /* Find the first field, looking down into aggregates. */
8196 tree field
= TYPE_FIELDS (type
);
8197 /* Skip all non field decls */
8198 while (field
!= NULL
8199 && (TREE_CODE (field
) != FIELD_DECL
8200 || DECL_FIELD_ABI_IGNORED (field
)))
8201 field
= DECL_CHAIN (field
);
8204 /* A packed field does not contribute any extra alignment. */
8205 if (DECL_PACKED (field
))
8207 type
= TREE_TYPE (field
);
8208 while (TREE_CODE (type
) == ARRAY_TYPE
)
8209 type
= TREE_TYPE (type
);
8210 } while (AGGREGATE_TYPE_P (type
));
8212 if (! AGGREGATE_TYPE_P (type
) && type
!= error_mark_node
)
8213 align
= MAX (align
, TYPE_ALIGN (type
));
8218 /* Return 1 for an operand in small memory on V.4/eabi. */
8221 small_data_operand (rtx op ATTRIBUTE_UNUSED
,
8222 machine_mode mode ATTRIBUTE_UNUSED
)
8227 if (rs6000_sdata
== SDATA_NONE
|| rs6000_sdata
== SDATA_DATA
)
8230 if (DEFAULT_ABI
!= ABI_V4
)
8233 if (SYMBOL_REF_P (op
))
8236 else if (GET_CODE (op
) != CONST
8237 || GET_CODE (XEXP (op
, 0)) != PLUS
8238 || !SYMBOL_REF_P (XEXP (XEXP (op
, 0), 0))
8239 || !CONST_INT_P (XEXP (XEXP (op
, 0), 1)))
8244 rtx sum
= XEXP (op
, 0);
8245 HOST_WIDE_INT summand
;
8247 /* We have to be careful here, because it is the referenced address
8248 that must be 32k from _SDA_BASE_, not just the symbol. */
8249 summand
= INTVAL (XEXP (sum
, 1));
8250 if (summand
< 0 || summand
> g_switch_value
)
8253 sym_ref
= XEXP (sum
, 0);
8256 return SYMBOL_REF_SMALL_P (sym_ref
);
8262 /* Return true if either operand is a general purpose register. */
8265 gpr_or_gpr_p (rtx op0
, rtx op1
)
8267 return ((REG_P (op0
) && INT_REGNO_P (REGNO (op0
)))
8268 || (REG_P (op1
) && INT_REGNO_P (REGNO (op1
))));
8271 /* Return true if this is a move direct operation between GPR registers and
8272 floating point/VSX registers. */
8275 direct_move_p (rtx op0
, rtx op1
)
8277 if (!REG_P (op0
) || !REG_P (op1
))
8280 if (!TARGET_DIRECT_MOVE
)
8283 int regno0
= REGNO (op0
);
8284 int regno1
= REGNO (op1
);
8285 if (!HARD_REGISTER_NUM_P (regno0
) || !HARD_REGISTER_NUM_P (regno1
))
8288 if (INT_REGNO_P (regno0
) && VSX_REGNO_P (regno1
))
8291 if (VSX_REGNO_P (regno0
) && INT_REGNO_P (regno1
))
8297 /* Return true if the ADDR is an acceptable address for a quad memory
8298 operation of mode MODE (either LQ/STQ for general purpose registers, or
8299 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8300 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8301 3.0 LXV/STXV instruction. */
8304 quad_address_p (rtx addr
, machine_mode mode
, bool strict
)
8308 if (GET_MODE_SIZE (mode
) < 16)
8311 if (legitimate_indirect_address_p (addr
, strict
))
8314 if (VECTOR_MODE_P (mode
) && !mode_supports_dq_form (mode
))
8317 /* Is this a valid prefixed address? If the bottom four bits of the offset
8318 are non-zero, we could use a prefixed instruction (which does not have the
8319 DQ-form constraint that the traditional instruction had) instead of
8320 forcing the unaligned offset to a GPR. */
8321 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DQ
))
8324 if (GET_CODE (addr
) != PLUS
)
8327 op0
= XEXP (addr
, 0);
8328 if (!REG_P (op0
) || !INT_REG_OK_FOR_BASE_P (op0
, strict
))
8331 op1
= XEXP (addr
, 1);
8332 if (!CONST_INT_P (op1
))
8335 return quad_address_offset_p (INTVAL (op1
));
8338 /* Return true if this is a load or store quad operation. This function does
8339 not handle the atomic quad memory instructions. */
8342 quad_load_store_p (rtx op0
, rtx op1
)
8346 if (!TARGET_QUAD_MEMORY
)
8349 else if (REG_P (op0
) && MEM_P (op1
))
8350 ret
= (quad_int_reg_operand (op0
, GET_MODE (op0
))
8351 && quad_memory_operand (op1
, GET_MODE (op1
))
8352 && !reg_overlap_mentioned_p (op0
, op1
));
8354 else if (MEM_P (op0
) && REG_P (op1
))
8355 ret
= (quad_memory_operand (op0
, GET_MODE (op0
))
8356 && quad_int_reg_operand (op1
, GET_MODE (op1
)));
8361 if (TARGET_DEBUG_ADDR
)
8363 fprintf (stderr
, "\n========== quad_load_store, return %s\n",
8364 ret
? "true" : "false");
8365 debug_rtx (gen_rtx_SET (op0
, op1
));
8371 /* Given an address, return a constant offset term if one exists. */
8374 address_offset (rtx op
)
8376 if (GET_CODE (op
) == PRE_INC
8377 || GET_CODE (op
) == PRE_DEC
)
8379 else if (GET_CODE (op
) == PRE_MODIFY
8380 || GET_CODE (op
) == LO_SUM
)
8383 if (GET_CODE (op
) == CONST
)
8386 if (GET_CODE (op
) == PLUS
)
8389 if (CONST_INT_P (op
))
8395 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8396 the mode. If we can't find (or don't know) the alignment of the symbol
8397 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8398 should be pessimistic]. Offsets are validated in the same way as for
8401 darwin_rs6000_legitimate_lo_sum_const_p (rtx x
, machine_mode mode
)
8403 /* We should not get here with this. */
8404 gcc_checking_assert (! mode_supports_dq_form (mode
));
8406 if (GET_CODE (x
) == CONST
)
8409 /* If we are building PIC code, then any symbol must be wrapped in an
8410 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8411 bool machopic_offs_p
= false;
8412 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
8414 x
= XVECEXP (x
, 0, 0);
8415 machopic_offs_p
= true;
8419 unsigned HOST_WIDE_INT offset
= 0;
8421 if (GET_CODE (x
) == PLUS
)
8424 if (! SYMBOL_REF_P (sym
))
8426 if (!CONST_INT_P (XEXP (x
, 1)))
8428 offset
= INTVAL (XEXP (x
, 1));
8430 else if (SYMBOL_REF_P (x
))
8432 else if (CONST_INT_P (x
))
8433 offset
= INTVAL (x
);
8434 else if (GET_CODE (x
) == LABEL_REF
)
8435 offset
= 0; // We assume code labels are Pmode aligned
8437 return false; // not sure what we have here.
8439 /* If we don't know the alignment of the thing to which the symbol refers,
8440 we assume optimistically it is "enough".
8441 ??? maybe we should be pessimistic instead. */
8446 tree decl
= SYMBOL_REF_DECL (sym
);
8447 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8448 if (TARGET_MACHO
&& flag_pic
&& !machopic_offs_p
)
8451 if (MACHO_SYMBOL_INDIRECTION_P (sym
))
8452 /* The decl in an indirection symbol is the original one, which might
8453 be less aligned than the indirection. Our indirections are always
8458 if (decl
&& DECL_ALIGN (decl
))
8459 align
= DECL_ALIGN_UNIT (decl
);
8462 unsigned int extra
= 0;
8468 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8470 if (VECTOR_MEM_VSX_P (mode
))
8473 if (!TARGET_POWERPC64
)
8475 else if ((offset
& 3) || (align
& 3))
8486 if (!TARGET_POWERPC64
)
8488 else if ((offset
& 3) || (align
& 3))
8496 /* We only care if the access(es) would cause a change to the high part. */
8497 offset
= sext_hwi (offset
, 16);
8498 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8501 /* Return true if the MEM operand is a memory operand suitable for use
8502 with a (full width, possibly multiple) gpr load/store. On
8503 powerpc64 this means the offset must be divisible by 4.
8504 Implements 'Y' constraint.
8506 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8507 a constraint function we know the operand has satisfied a suitable
8510 Offsetting a lo_sum should not be allowed, except where we know by
8511 alignment that a 32k boundary is not crossed. Note that by
8512 "offsetting" here we mean a further offset to access parts of the
8513 MEM. It's fine to have a lo_sum where the inner address is offset
8514 from a sym, since the same sym+offset will appear in the high part
8515 of the address calculation. */
8518 mem_operand_gpr (rtx op
, machine_mode mode
)
8520 unsigned HOST_WIDE_INT offset
;
8522 rtx addr
= XEXP (op
, 0);
8524 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8526 && (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
8527 && mode_supports_pre_incdec_p (mode
)
8528 && legitimate_indirect_address_p (XEXP (addr
, 0), false))
8531 /* Allow prefixed instructions if supported. If the bottom two bits of the
8532 offset are non-zero, we could use a prefixed instruction (which does not
8533 have the DS-form constraint that the traditional instruction had) instead
8534 of forcing the unaligned offset to a GPR. */
8535 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8538 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8539 really OK. Doing this early avoids teaching all the other machinery
8541 if (TARGET_MACHO
&& GET_CODE (addr
) == LO_SUM
)
8542 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr
, 1), mode
);
8544 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8545 if (!rs6000_offsettable_memref_p (op
, mode
, false))
8548 op
= address_offset (addr
);
8552 offset
= INTVAL (op
);
8553 if (TARGET_POWERPC64
&& (offset
& 3) != 0)
8556 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8560 if (GET_CODE (addr
) == LO_SUM
)
8561 /* For lo_sum addresses, we must allow any offset except one that
8562 causes a wrap, so test only the low 16 bits. */
8563 offset
= sext_hwi (offset
, 16);
8565 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8568 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8569 enforce an offset divisible by 4 even for 32-bit. */
8572 mem_operand_ds_form (rtx op
, machine_mode mode
)
8574 unsigned HOST_WIDE_INT offset
;
8576 rtx addr
= XEXP (op
, 0);
8578 /* Allow prefixed instructions if supported. If the bottom two bits of the
8579 offset are non-zero, we could use a prefixed instruction (which does not
8580 have the DS-form constraint that the traditional instruction had) instead
8581 of forcing the unaligned offset to a GPR. */
8582 if (address_is_prefixed (addr
, mode
, NON_PREFIXED_DS
))
8585 if (!offsettable_address_p (false, mode
, addr
))
8588 op
= address_offset (addr
);
8592 offset
= INTVAL (op
);
8593 if ((offset
& 3) != 0)
8596 extra
= GET_MODE_SIZE (mode
) - UNITS_PER_WORD
;
8600 if (GET_CODE (addr
) == LO_SUM
)
8601 /* For lo_sum addresses, we must allow any offset except one that
8602 causes a wrap, so test only the low 16 bits. */
8603 offset
= sext_hwi (offset
, 16);
8605 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8608 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8611 reg_offset_addressing_ok_p (machine_mode mode
)
8625 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8626 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8627 a vector mode, if we want to use the VSX registers to move it around,
8628 we need to restrict ourselves to reg+reg addressing. Similarly for
8629 IEEE 128-bit floating point that is passed in a single vector
8631 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
))
8632 return mode_supports_dq_form (mode
);
8635 /* The vector pair/quad types support offset addressing if the
8636 underlying vectors support offset addressing. */
8642 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8643 addressing for the LFIWZX and STFIWX instructions. */
8644 if (TARGET_NO_SDMODE_STACK
)
8656 virtual_stack_registers_memory_p (rtx op
)
8661 regnum
= REGNO (op
);
8663 else if (GET_CODE (op
) == PLUS
8664 && REG_P (XEXP (op
, 0))
8665 && CONST_INT_P (XEXP (op
, 1)))
8666 regnum
= REGNO (XEXP (op
, 0));
8671 return (regnum
>= FIRST_VIRTUAL_REGISTER
8672 && regnum
<= LAST_VIRTUAL_POINTER_REGISTER
);
8675 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8676 is known to not straddle a 32k boundary. This function is used
8677 to determine whether -mcmodel=medium code can use TOC pointer
8678 relative addressing for OP. This means the alignment of the TOC
8679 pointer must also be taken into account, and unfortunately that is
8682 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8683 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8687 offsettable_ok_by_alignment (rtx op
, HOST_WIDE_INT offset
,
8691 unsigned HOST_WIDE_INT dsize
, dalign
, lsb
, mask
;
8693 if (!SYMBOL_REF_P (op
))
8696 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8698 if (mode_supports_dq_form (mode
))
8701 dsize
= GET_MODE_SIZE (mode
);
8702 decl
= SYMBOL_REF_DECL (op
);
8708 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8709 replacing memory addresses with an anchor plus offset. We
8710 could find the decl by rummaging around in the block->objects
8711 VEC for the given offset but that seems like too much work. */
8712 dalign
= BITS_PER_UNIT
;
8713 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op
)
8714 && SYMBOL_REF_ANCHOR_P (op
)
8715 && SYMBOL_REF_BLOCK (op
) != NULL
)
8717 struct object_block
*block
= SYMBOL_REF_BLOCK (op
);
8719 dalign
= block
->alignment
;
8720 offset
+= SYMBOL_REF_BLOCK_OFFSET (op
);
8722 else if (CONSTANT_POOL_ADDRESS_P (op
))
8724 /* It would be nice to have get_pool_align().. */
8725 machine_mode cmode
= get_pool_mode (op
);
8727 dalign
= GET_MODE_ALIGNMENT (cmode
);
8730 else if (DECL_P (decl
))
8732 dalign
= DECL_ALIGN (decl
);
8736 /* Allow BLKmode when the entire object is known to not
8737 cross a 32k boundary. */
8738 if (!DECL_SIZE_UNIT (decl
))
8741 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl
)))
8744 dsize
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
8748 dalign
/= BITS_PER_UNIT
;
8749 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8750 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8751 return dalign
>= dsize
;
8757 /* Find how many bits of the alignment we know for this access. */
8758 dalign
/= BITS_PER_UNIT
;
8759 if (dalign
> POWERPC64_TOC_POINTER_ALIGNMENT
)
8760 dalign
= POWERPC64_TOC_POINTER_ALIGNMENT
;
8762 lsb
= offset
& -offset
;
8766 return dalign
>= dsize
;
8770 constant_pool_expr_p (rtx op
)
8774 split_const (op
, &base
, &offset
);
8775 return (SYMBOL_REF_P (base
)
8776 && CONSTANT_POOL_ADDRESS_P (base
)
8777 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base
), Pmode
));
8780 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8781 use that as the register to put the HIGH value into if register allocation
8785 create_TOC_reference (rtx symbol
, rtx largetoc_reg
)
8787 rtx tocrel
, tocreg
, hi
;
8789 gcc_assert (TARGET_TOC
);
8791 if (TARGET_DEBUG_ADDR
)
8793 if (SYMBOL_REF_P (symbol
))
8794 fprintf (stderr
, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8798 fprintf (stderr
, "\ncreate_TOC_reference, code %s:\n",
8799 GET_RTX_NAME (GET_CODE (symbol
)));
8804 if (!can_create_pseudo_p ())
8805 df_set_regs_ever_live (TOC_REGISTER
, true);
8807 tocreg
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
8808 tocrel
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, symbol
, tocreg
), UNSPEC_TOCREL
);
8809 if (TARGET_CMODEL
== CMODEL_SMALL
|| can_create_pseudo_p ())
8812 hi
= gen_rtx_HIGH (Pmode
, copy_rtx (tocrel
));
8813 if (largetoc_reg
!= NULL
)
8815 emit_move_insn (largetoc_reg
, hi
);
8818 return gen_rtx_LO_SUM (Pmode
, hi
, tocrel
);
8821 /* These are only used to pass through from print_operand/print_operand_address
8822 to rs6000_output_addr_const_extra over the intervening function
8823 output_addr_const which is not target code. */
8824 static const_rtx tocrel_base_oac
, tocrel_offset_oac
;
8826 /* Return true if OP is a toc pointer relative address (the output
8827 of create_TOC_reference). If STRICT, do not match non-split
8828 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8829 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8830 TOCREL_OFFSET_RET respectively. */
8833 toc_relative_expr_p (const_rtx op
, bool strict
, const_rtx
*tocrel_base_ret
,
8834 const_rtx
*tocrel_offset_ret
)
8839 if (TARGET_CMODEL
!= CMODEL_SMALL
)
8841 /* When strict ensure we have everything tidy. */
8843 && !(GET_CODE (op
) == LO_SUM
8844 && REG_P (XEXP (op
, 0))
8845 && INT_REG_OK_FOR_BASE_P (XEXP (op
, 0), strict
)))
8848 /* When not strict, allow non-split TOC addresses and also allow
8849 (lo_sum (high ..)) TOC addresses created during reload. */
8850 if (GET_CODE (op
) == LO_SUM
)
8854 const_rtx tocrel_base
= op
;
8855 const_rtx tocrel_offset
= const0_rtx
;
8857 if (GET_CODE (op
) == PLUS
&& add_cint_operand (XEXP (op
, 1), GET_MODE (op
)))
8859 tocrel_base
= XEXP (op
, 0);
8860 tocrel_offset
= XEXP (op
, 1);
8863 if (tocrel_base_ret
)
8864 *tocrel_base_ret
= tocrel_base
;
8865 if (tocrel_offset_ret
)
8866 *tocrel_offset_ret
= tocrel_offset
;
8868 return (GET_CODE (tocrel_base
) == UNSPEC
8869 && XINT (tocrel_base
, 1) == UNSPEC_TOCREL
8870 && REG_P (XVECEXP (tocrel_base
, 0, 1))
8871 && REGNO (XVECEXP (tocrel_base
, 0, 1)) == TOC_REGISTER
);
8874 /* Return true if X is a constant pool address, and also for cmodel=medium
8875 if X is a toc-relative address known to be offsettable within MODE. */
8878 legitimate_constant_pool_address_p (const_rtx x
, machine_mode mode
,
8881 const_rtx tocrel_base
, tocrel_offset
;
8882 return (toc_relative_expr_p (x
, strict
, &tocrel_base
, &tocrel_offset
)
8883 && (TARGET_CMODEL
!= CMODEL_MEDIUM
8884 || constant_pool_expr_p (XVECEXP (tocrel_base
, 0, 0))
8886 || offsettable_ok_by_alignment (XVECEXP (tocrel_base
, 0, 0),
8887 INTVAL (tocrel_offset
), mode
)));
8891 legitimate_small_data_p (machine_mode mode
, rtx x
)
8893 return (DEFAULT_ABI
== ABI_V4
8894 && !flag_pic
&& !TARGET_TOC
8895 && (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
)
8896 && small_data_operand (x
, mode
));
8900 rs6000_legitimate_offset_address_p (machine_mode mode
, rtx x
,
8901 bool strict
, bool worst_case
)
8903 unsigned HOST_WIDE_INT offset
;
8906 if (GET_CODE (x
) != PLUS
)
8908 if (!REG_P (XEXP (x
, 0)))
8910 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
8912 if (mode_supports_dq_form (mode
))
8913 return quad_address_p (x
, mode
, strict
);
8914 if (!reg_offset_addressing_ok_p (mode
))
8915 return virtual_stack_registers_memory_p (x
);
8916 if (legitimate_constant_pool_address_p (x
, mode
, strict
|| lra_in_progress
))
8918 if (!CONST_INT_P (XEXP (x
, 1)))
8921 offset
= INTVAL (XEXP (x
, 1));
8928 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8930 if (VECTOR_MEM_VSX_P (mode
))
8935 if (!TARGET_POWERPC64
)
8937 else if (offset
& 3)
8950 if (!TARGET_POWERPC64
)
8952 else if (offset
& 3)
8960 if (TARGET_PREFIXED
)
8961 return SIGNED_34BIT_OFFSET_EXTRA_P (offset
, extra
);
8963 return SIGNED_16BIT_OFFSET_EXTRA_P (offset
, extra
);
8967 legitimate_indexed_address_p (rtx x
, int strict
)
8971 if (GET_CODE (x
) != PLUS
)
8977 return (REG_P (op0
) && REG_P (op1
)
8978 && ((INT_REG_OK_FOR_BASE_P (op0
, strict
)
8979 && INT_REG_OK_FOR_INDEX_P (op1
, strict
))
8980 || (INT_REG_OK_FOR_BASE_P (op1
, strict
)
8981 && INT_REG_OK_FOR_INDEX_P (op0
, strict
))));
8985 avoiding_indexed_address_p (machine_mode mode
)
8987 unsigned int msize
= GET_MODE_SIZE (mode
);
8989 /* Avoid indexed addressing for modes that have non-indexed load/store
8990 instruction forms. On power10, vector pairs have an indexed
8991 form, but vector quads don't. */
8995 return (TARGET_AVOID_XFORM
&& VECTOR_MEM_NONE_P (mode
));
8999 legitimate_indirect_address_p (rtx x
, int strict
)
9001 return REG_P (x
) && INT_REG_OK_FOR_BASE_P (x
, strict
);
9005 macho_lo_sum_memory_operand (rtx x
, machine_mode mode
)
9007 if (!TARGET_MACHO
|| !flag_pic
9008 || mode
!= SImode
|| !MEM_P (x
))
9012 if (GET_CODE (x
) != LO_SUM
)
9014 if (!REG_P (XEXP (x
, 0)))
9016 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), 0))
9020 return CONSTANT_P (x
);
9024 legitimate_lo_sum_address_p (machine_mode mode
, rtx x
, int strict
)
9026 if (GET_CODE (x
) != LO_SUM
)
9028 if (!REG_P (XEXP (x
, 0)))
9030 if (!INT_REG_OK_FOR_BASE_P (XEXP (x
, 0), strict
))
9032 /* quad word addresses are restricted, and we can't use LO_SUM. */
9033 if (mode_supports_dq_form (mode
))
9041 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
)
9043 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9044 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9045 recognizes some LO_SUM addresses as valid although this
9046 function says opposite. In most cases, LRA through different
9047 transformations can generate correct code for address reloads.
9048 It cannot manage only some LO_SUM cases. So we need to add
9049 code here saying that some addresses are still valid. */
9050 large_toc_ok
= (lra_in_progress
&& TARGET_CMODEL
!= CMODEL_SMALL
9051 && small_toc_ref (x
, VOIDmode
));
9052 if (TARGET_TOC
&& ! large_toc_ok
)
9054 if (GET_MODE_NUNITS (mode
) != 1)
9056 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9057 && !(/* ??? Assume floating point reg based on mode? */
9058 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9061 return CONSTANT_P (x
) || large_toc_ok
;
9063 else if (TARGET_MACHO
)
9065 if (GET_MODE_NUNITS (mode
) != 1)
9067 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
9068 && !(/* see above */
9069 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9072 if (MACHO_DYNAMIC_NO_PIC_P
|| !flag_pic
)
9073 return CONSTANT_P (x
);
9075 /* Macho-O PIC code from here. */
9076 if (GET_CODE (x
) == CONST
)
9079 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9080 if (SYMBOL_REF_P (x
))
9083 /* So this is OK if the wrapped object is const. */
9084 if (GET_CODE (x
) == UNSPEC
9085 && XINT (x
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9086 return CONSTANT_P (XVECEXP (x
, 0, 0));
9087 return CONSTANT_P (x
);
9093 /* Try machine-dependent ways of modifying an illegitimate address
9094 to be legitimate. If we find one, return the new, valid address.
9095 This is used from only one place: `memory_address' in explow.cc.
9097 OLDX is the address as it was before break_out_memory_refs was
9098 called. In some cases it is useful to look at this to decide what
9101 It is always safe for this function to do nothing. It exists to
9102 recognize opportunities to optimize the output.
9104 On RS/6000, first check for the sum of a register with a constant
9105 integer that is out of range. If so, generate code to add the
9106 constant with the low-order 16 bits masked to the register and force
9107 this result into another register (this can be done with `cau').
9108 Then generate an address of REG+(CONST&0xffff), allowing for the
9109 possibility of bit 16 being a one.
9111 Then check for the sum of a register and something not constant, try to
9112 load the other things into a register and return the sum. */
9115 rs6000_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
9120 if (!reg_offset_addressing_ok_p (mode
)
9121 || mode_supports_dq_form (mode
))
9123 if (virtual_stack_registers_memory_p (x
))
9126 /* In theory we should not be seeing addresses of the form reg+0,
9127 but just in case it is generated, optimize it away. */
9128 if (GET_CODE (x
) == PLUS
&& XEXP (x
, 1) == const0_rtx
)
9129 return force_reg (Pmode
, XEXP (x
, 0));
9131 /* For TImode with load/store quad, restrict addresses to just a single
9132 pointer, so it works with both GPRs and VSX registers. */
9133 /* Make sure both operands are registers. */
9134 else if (GET_CODE (x
) == PLUS
9135 && (mode
!= TImode
|| !TARGET_VSX
))
9136 return gen_rtx_PLUS (Pmode
,
9137 force_reg (Pmode
, XEXP (x
, 0)),
9138 force_reg (Pmode
, XEXP (x
, 1)));
9140 return force_reg (Pmode
, x
);
9142 if (SYMBOL_REF_P (x
) && !TARGET_MACHO
)
9144 enum tls_model model
= SYMBOL_REF_TLS_MODEL (x
);
9146 return rs6000_legitimize_tls_address (x
, model
);
9158 /* As in legitimate_offset_address_p we do not assume
9159 worst-case. The mode here is just a hint as to the registers
9160 used. A TImode is usually in gprs, but may actually be in
9161 fprs. Leave worst-case scenario for reload to handle via
9162 insn constraints. PTImode is only GPRs. */
9169 if (GET_CODE (x
) == PLUS
9170 && REG_P (XEXP (x
, 0))
9171 && CONST_INT_P (XEXP (x
, 1))
9172 && ((unsigned HOST_WIDE_INT
) (INTVAL (XEXP (x
, 1)) + 0x8000)
9173 >= 0x10000 - extra
))
9175 HOST_WIDE_INT high_int
, low_int
;
9177 low_int
= sext_hwi (INTVAL (XEXP (x
, 1)), 16);
9178 if (low_int
>= 0x8000 - extra
)
9180 high_int
= INTVAL (XEXP (x
, 1)) - low_int
;
9181 sum
= force_operand (gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9182 gen_int_mode (high_int
, Pmode
)), 0);
9183 return plus_constant (Pmode
, sum
, low_int
);
9185 else if (GET_CODE (x
) == PLUS
9186 && REG_P (XEXP (x
, 0))
9187 && !CONST_INT_P (XEXP (x
, 1))
9188 && GET_MODE_NUNITS (mode
) == 1
9189 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9190 || (/* ??? Assume floating point reg based on mode? */
9191 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
)))
9192 && !avoiding_indexed_address_p (mode
))
9194 return gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
9195 force_reg (Pmode
, force_operand (XEXP (x
, 1), 0)));
9197 else if ((TARGET_ELF
9199 || !MACHO_DYNAMIC_NO_PIC_P
9203 && TARGET_NO_TOC_OR_PCREL
9206 && !CONST_WIDE_INT_P (x
)
9207 && !CONST_DOUBLE_P (x
)
9209 && GET_MODE_NUNITS (mode
) == 1
9210 && (GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
9211 || (/* ??? Assume floating point reg based on mode? */
9212 TARGET_HARD_FLOAT
&& (mode
== DFmode
|| mode
== DDmode
))))
9214 rtx reg
= gen_reg_rtx (Pmode
);
9216 emit_insn (gen_elf_high (reg
, x
));
9218 emit_insn (gen_macho_high (Pmode
, reg
, x
));
9219 return gen_rtx_LO_SUM (Pmode
, reg
, x
);
9223 && constant_pool_expr_p (x
)
9224 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x
), Pmode
))
9225 return create_TOC_reference (x
, NULL_RTX
);
9230 /* Debug version of rs6000_legitimize_address. */
9232 rs6000_debug_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
)
9238 ret
= rs6000_legitimize_address (x
, oldx
, mode
);
9239 insns
= get_insns ();
9245 "\nrs6000_legitimize_address: mode %s, old code %s, "
9246 "new code %s, modified\n",
9247 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)),
9248 GET_RTX_NAME (GET_CODE (ret
)));
9250 fprintf (stderr
, "Original address:\n");
9253 fprintf (stderr
, "oldx:\n");
9256 fprintf (stderr
, "New address:\n");
9261 fprintf (stderr
, "Insns added:\n");
9262 debug_rtx_list (insns
, 20);
9268 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9269 GET_MODE_NAME (mode
), GET_RTX_NAME (GET_CODE (x
)));
9280 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9281 We need to emit DTP-relative relocations. */
9283 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
9285 rs6000_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
9290 fputs ("\t.long\t", file
);
9293 fputs (DOUBLE_INT_ASM_OP
, file
);
9298 output_addr_const (file
, x
);
9300 fputs ("@dtprel+0x8000", file
);
9303 /* Return true if X is a symbol that refers to real (rather than emulated)
9307 rs6000_real_tls_symbol_ref_p (rtx x
)
9309 return (SYMBOL_REF_P (x
)
9310 && SYMBOL_REF_TLS_MODEL (x
) >= TLS_MODEL_REAL
);
9313 /* In the name of slightly smaller debug output, and to cater to
9314 general assembler lossage, recognize various UNSPEC sequences
9315 and turn them back into a direct symbol reference. */
9318 rs6000_delegitimize_address (rtx orig_x
)
9322 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9323 encodes loading up the high part of the address of a TOC reference along
9324 with a load of a GPR using the same base register used for the load. We
9325 return the original SYMBOL_REF.
9327 (set (reg:INT1 <reg>
9328 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9330 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9331 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9332 We return the original SYMBOL_REF.
9334 (parallel [(set (reg:DI <base-reg>)
9335 (unspec:DI [(symbol_ref <symbol>)
9336 (const_int <marker>)]
9337 UNSPEC_PCREL_OPT_LD_ADDR))
9338 (set (reg:DI <load-reg>)
9339 (unspec:DI [(const_int 0)]
9340 UNSPEC_PCREL_OPT_LD_DATA))])
9342 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9343 GPR being loaded is the same as the GPR used to hold the external address.
9345 (set (reg:DI <base-reg>)
9346 (unspec:DI [(symbol_ref <symbol>)
9347 (const_int <marker>)]
9348 UNSPEC_PCREL_OPT_LD_SAME_REG))
9350 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9351 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9352 We return the original SYMBOL_REF.
9354 (parallel [(set (reg:DI <base-reg>)
9355 (unspec:DI [(symbol_ref <symbol>)
9356 (const_int <marker>)]
9357 UNSPEC_PCREL_OPT_ST_ADDR))
9358 (use (reg <store-reg>))]) */
9360 if (GET_CODE (orig_x
) == UNSPEC
)
9361 switch (XINT (orig_x
, 1))
9363 case UNSPEC_FUSION_GPR
:
9364 case UNSPEC_PCREL_OPT_LD_ADDR
:
9365 case UNSPEC_PCREL_OPT_LD_SAME_REG
:
9366 case UNSPEC_PCREL_OPT_ST_ADDR
:
9367 orig_x
= XVECEXP (orig_x
, 0, 0);
9374 orig_x
= delegitimize_mem_from_attrs (orig_x
);
9381 if (TARGET_CMODEL
!= CMODEL_SMALL
&& GET_CODE (y
) == LO_SUM
)
9385 if (GET_CODE (y
) == PLUS
9386 && GET_MODE (y
) == Pmode
9387 && CONST_INT_P (XEXP (y
, 1)))
9389 offset
= XEXP (y
, 1);
9393 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_TOCREL
)
9395 y
= XVECEXP (y
, 0, 0);
9398 /* Do not associate thread-local symbols with the original
9399 constant pool symbol. */
9402 && CONSTANT_POOL_ADDRESS_P (y
)
9403 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y
)))
9407 if (offset
!= NULL_RTX
)
9408 y
= gen_rtx_PLUS (Pmode
, y
, offset
);
9409 if (!MEM_P (orig_x
))
9412 return replace_equiv_address_nv (orig_x
, y
);
9416 && GET_CODE (orig_x
) == LO_SUM
9417 && GET_CODE (XEXP (orig_x
, 1)) == CONST
)
9419 y
= XEXP (XEXP (orig_x
, 1), 0);
9420 if (GET_CODE (y
) == UNSPEC
&& XINT (y
, 1) == UNSPEC_MACHOPIC_OFFSET
)
9421 return XVECEXP (y
, 0, 0);
9427 /* Return true if X shouldn't be emitted into the debug info.
9428 The linker doesn't like .toc section references from
9429 .debug_* sections, so reject .toc section symbols. */
9432 rs6000_const_not_ok_for_debug_p (rtx x
)
9434 if (GET_CODE (x
) == UNSPEC
)
9436 if (SYMBOL_REF_P (x
)
9437 && CONSTANT_POOL_ADDRESS_P (x
))
9439 rtx c
= get_pool_constant (x
);
9440 machine_mode cmode
= get_pool_mode (x
);
9441 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c
, cmode
))
9448 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9451 rs6000_legitimate_combined_insn (rtx_insn
*insn
)
9453 int icode
= INSN_CODE (insn
);
9455 /* Reject creating doloop insns. Combine should not be allowed
9456 to create these for a number of reasons:
9457 1) In a nested loop, if combine creates one of these in an
9458 outer loop and the register allocator happens to allocate ctr
9459 to the outer loop insn, then the inner loop can't use ctr.
9460 Inner loops ought to be more highly optimized.
9461 2) Combine often wants to create one of these from what was
9462 originally a three insn sequence, first combining the three
9463 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9464 allocated ctr, the splitter takes use back to the three insn
9465 sequence. It's better to stop combine at the two insn
9467 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9468 insns, the register allocator sometimes uses floating point
9469 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9470 jump insn and output reloads are not implemented for jumps,
9471 the ctrsi/ctrdi splitters need to handle all possible cases.
9472 That's a pain, and it gets to be seriously difficult when a
9473 splitter that runs after reload needs memory to transfer from
9474 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9475 for the difficult case. It's better to not create problems
9476 in the first place. */
9477 if (icode
!= CODE_FOR_nothing
9478 && (icode
== CODE_FOR_bdz_si
9479 || icode
== CODE_FOR_bdz_di
9480 || icode
== CODE_FOR_bdnz_si
9481 || icode
== CODE_FOR_bdnz_di
9482 || icode
== CODE_FOR_bdztf_si
9483 || icode
== CODE_FOR_bdztf_di
9484 || icode
== CODE_FOR_bdnztf_si
9485 || icode
== CODE_FOR_bdnztf_di
))
9491 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9493 static GTY(()) rtx rs6000_tls_symbol
;
9495 rs6000_tls_get_addr (void)
9497 if (!rs6000_tls_symbol
)
9498 rs6000_tls_symbol
= init_one_libfunc ("__tls_get_addr");
9500 return rs6000_tls_symbol
;
9503 /* Construct the SYMBOL_REF for TLS GOT references. */
9505 static GTY(()) rtx rs6000_got_symbol
;
9507 rs6000_got_sym (void)
9509 if (!rs6000_got_symbol
)
9511 rs6000_got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
9512 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_LOCAL
;
9513 SYMBOL_REF_FLAGS (rs6000_got_symbol
) |= SYMBOL_FLAG_EXTERNAL
;
9516 return rs6000_got_symbol
;
9519 /* AIX Thread-Local Address support. */
9522 rs6000_legitimize_tls_address_aix (rtx addr
, enum tls_model model
)
9524 rtx sym
, mem
, tocref
, tlsreg
, tmpreg
, dest
;
9528 /* Place addr into TOC constant pool. */
9529 sym
= force_const_mem (GET_MODE (addr
), addr
);
9531 /* Output the TOC entry and create the MEM referencing the value. */
9532 if (constant_pool_expr_p (XEXP (sym
, 0))
9533 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym
, 0)), Pmode
))
9535 tocref
= create_TOC_reference (XEXP (sym
, 0), NULL_RTX
);
9536 mem
= gen_const_mem (Pmode
, tocref
);
9537 set_mem_alias_set (mem
, get_TOC_alias_set ());
9542 /* Use global-dynamic for local-dynamic. */
9543 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
9544 || model
== TLS_MODEL_LOCAL_DYNAMIC
)
9546 /* Create new TOC reference for @m symbol. */
9547 name
= XSTR (XVECEXP (XEXP (mem
, 0), 0, 0), 0);
9548 tlsname
= XALLOCAVEC (char, strlen (name
) + 1);
9549 strcpy (tlsname
, "*LCM");
9550 strcat (tlsname
, name
+ 3);
9551 rtx modaddr
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (tlsname
));
9552 SYMBOL_REF_FLAGS (modaddr
) |= SYMBOL_FLAG_LOCAL
;
9553 tocref
= create_TOC_reference (modaddr
, NULL_RTX
);
9554 rtx modmem
= gen_const_mem (Pmode
, tocref
);
9555 set_mem_alias_set (modmem
, get_TOC_alias_set ());
9557 rtx modreg
= gen_reg_rtx (Pmode
);
9558 emit_insn (gen_rtx_SET (modreg
, modmem
));
9560 tmpreg
= gen_reg_rtx (Pmode
);
9561 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9563 dest
= gen_reg_rtx (Pmode
);
9565 emit_insn (gen_tls_get_addrsi (dest
, modreg
, tmpreg
));
9567 emit_insn (gen_tls_get_addrdi (dest
, modreg
, tmpreg
));
9570 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9571 else if (TARGET_32BIT
)
9573 tlsreg
= gen_reg_rtx (SImode
);
9574 emit_insn (gen_tls_get_tpointer (tlsreg
));
9578 tlsreg
= gen_rtx_REG (DImode
, 13);
9579 xcoff_tls_exec_model_detected
= true;
9582 /* Load the TOC value into temporary register. */
9583 tmpreg
= gen_reg_rtx (Pmode
);
9584 emit_insn (gen_rtx_SET (tmpreg
, mem
));
9585 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
9586 gen_rtx_MINUS (Pmode
, addr
, tlsreg
));
9588 /* Add TOC symbol value to TLS pointer. */
9589 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tmpreg
, tlsreg
));
9594 /* Passes the tls arg value for global dynamic and local dynamic
9595 emit_library_call_value in rs6000_legitimize_tls_address to
9596 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9597 marker relocs put on __tls_get_addr calls. */
9598 static rtx global_tlsarg
;
9600 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9601 this (thread-local) address. */
9604 rs6000_legitimize_tls_address (rtx addr
, enum tls_model model
)
9609 return rs6000_legitimize_tls_address_aix (addr
, model
);
9611 dest
= gen_reg_rtx (Pmode
);
9612 if (model
== TLS_MODEL_LOCAL_EXEC
9613 && (rs6000_tls_size
== 16 || rs6000_pcrel_p ()))
9619 tlsreg
= gen_rtx_REG (Pmode
, 13);
9620 insn
= gen_tls_tprel_64 (dest
, tlsreg
, addr
);
9624 tlsreg
= gen_rtx_REG (Pmode
, 2);
9625 insn
= gen_tls_tprel_32 (dest
, tlsreg
, addr
);
9629 else if (model
== TLS_MODEL_LOCAL_EXEC
&& rs6000_tls_size
== 32)
9633 tmp
= gen_reg_rtx (Pmode
);
9636 tlsreg
= gen_rtx_REG (Pmode
, 13);
9637 insn
= gen_tls_tprel_ha_64 (tmp
, tlsreg
, addr
);
9641 tlsreg
= gen_rtx_REG (Pmode
, 2);
9642 insn
= gen_tls_tprel_ha_32 (tmp
, tlsreg
, addr
);
9646 insn
= gen_tls_tprel_lo_64 (dest
, tmp
, addr
);
9648 insn
= gen_tls_tprel_lo_32 (dest
, tmp
, addr
);
9653 rtx got
, tga
, tmp1
, tmp2
;
9655 /* We currently use relocations like @got@tlsgd for tls, which
9656 means the linker will handle allocation of tls entries, placing
9657 them in the .got section. So use a pointer to the .got section,
9658 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9659 or to secondary GOT sections used by 32-bit -fPIC. */
9660 if (rs6000_pcrel_p ())
9662 else if (TARGET_64BIT
)
9663 got
= gen_rtx_REG (Pmode
, 2);
9667 got
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
9670 rtx gsym
= rs6000_got_sym ();
9671 got
= gen_reg_rtx (Pmode
);
9673 rs6000_emit_move (got
, gsym
, Pmode
);
9678 tmp1
= gen_reg_rtx (Pmode
);
9679 tmp2
= gen_reg_rtx (Pmode
);
9680 mem
= gen_const_mem (Pmode
, tmp1
);
9681 lab
= gen_label_rtx ();
9682 emit_insn (gen_load_toc_v4_PIC_1b (gsym
, lab
));
9683 emit_move_insn (tmp1
, gen_rtx_REG (Pmode
, LR_REGNO
));
9684 if (TARGET_LINK_STACK
)
9685 emit_insn (gen_addsi3 (tmp1
, tmp1
, GEN_INT (4)));
9686 emit_move_insn (tmp2
, mem
);
9687 rtx_insn
*last
= emit_insn (gen_addsi3 (got
, tmp1
, tmp2
));
9688 set_unique_reg_note (last
, REG_EQUAL
, gsym
);
9693 if (model
== TLS_MODEL_GLOBAL_DYNAMIC
)
9695 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addr
, got
),
9697 tga
= rs6000_tls_get_addr ();
9698 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9699 emit_insn (gen_rtx_SET (argreg
, arg
));
9700 global_tlsarg
= arg
;
9701 emit_library_call_value (tga
, dest
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9702 global_tlsarg
= NULL_RTX
;
9704 /* Make a note so that the result of this call can be CSEd. */
9705 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9706 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9707 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9709 else if (model
== TLS_MODEL_LOCAL_DYNAMIC
)
9711 rtx arg
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, got
), UNSPEC_TLSLD
);
9712 tga
= rs6000_tls_get_addr ();
9713 tmp1
= gen_reg_rtx (Pmode
);
9714 rtx argreg
= gen_rtx_REG (Pmode
, 3);
9715 emit_insn (gen_rtx_SET (argreg
, arg
));
9716 global_tlsarg
= arg
;
9717 emit_library_call_value (tga
, tmp1
, LCT_CONST
, Pmode
, argreg
, Pmode
);
9718 global_tlsarg
= NULL_RTX
;
9720 /* Make a note so that the result of this call can be CSEd. */
9721 rtvec vec
= gen_rtvec (1, copy_rtx (arg
));
9722 rtx uns
= gen_rtx_UNSPEC (Pmode
, vec
, UNSPEC_TLS_GET_ADDR
);
9723 set_unique_reg_note (get_last_insn (), REG_EQUAL
, uns
);
9725 if (rs6000_tls_size
== 16 || rs6000_pcrel_p ())
9728 insn
= gen_tls_dtprel_64 (dest
, tmp1
, addr
);
9730 insn
= gen_tls_dtprel_32 (dest
, tmp1
, addr
);
9732 else if (rs6000_tls_size
== 32)
9734 tmp2
= gen_reg_rtx (Pmode
);
9736 insn
= gen_tls_dtprel_ha_64 (tmp2
, tmp1
, addr
);
9738 insn
= gen_tls_dtprel_ha_32 (tmp2
, tmp1
, addr
);
9741 insn
= gen_tls_dtprel_lo_64 (dest
, tmp2
, addr
);
9743 insn
= gen_tls_dtprel_lo_32 (dest
, tmp2
, addr
);
9747 tmp2
= gen_reg_rtx (Pmode
);
9749 insn
= gen_tls_got_dtprel_64 (tmp2
, got
, addr
);
9751 insn
= gen_tls_got_dtprel_32 (tmp2
, got
, addr
);
9753 insn
= gen_rtx_SET (dest
, gen_rtx_PLUS (Pmode
, tmp2
, tmp1
));
9759 /* IE, or 64-bit offset LE. */
9760 tmp2
= gen_reg_rtx (Pmode
);
9762 insn
= gen_tls_got_tprel_64 (tmp2
, got
, addr
);
9764 insn
= gen_tls_got_tprel_32 (tmp2
, got
, addr
);
9766 if (rs6000_pcrel_p ())
9769 insn
= gen_tls_tls_pcrel_64 (dest
, tmp2
, addr
);
9771 insn
= gen_tls_tls_pcrel_32 (dest
, tmp2
, addr
);
9773 else if (TARGET_64BIT
)
9774 insn
= gen_tls_tls_64 (dest
, tmp2
, addr
);
9776 insn
= gen_tls_tls_32 (dest
, tmp2
, addr
);
9784 /* Only create the global variable for the stack protect guard if we are using
9785 the global flavor of that guard. */
9787 rs6000_init_stack_protect_guard (void)
9789 if (rs6000_stack_protector_guard
== SSP_GLOBAL
)
9790 return default_stack_protect_guard ();
9795 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9798 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
9800 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9801 It can not be put into a constant pool. e.g.
9802 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9803 (high:DI (symbol_ref:DI ("var")..)). */
9804 if (GET_CODE (x
) == HIGH
)
9807 /* A TLS symbol in the TOC cannot contain a sum. */
9808 if (GET_CODE (x
) == CONST
9809 && GET_CODE (XEXP (x
, 0)) == PLUS
9810 && SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
9811 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0)) != 0)
9814 /* Allow AIX TOC TLS symbols in the constant pool,
9815 but not ELF TLS symbols. */
9816 return TARGET_ELF
&& tls_referenced_p (x
);
9819 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9820 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9821 can be addressed relative to the toc pointer. */
9824 use_toc_relative_ref (rtx sym
, machine_mode mode
)
9826 return ((constant_pool_expr_p (sym
)
9827 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym
),
9828 get_pool_mode (sym
)))
9829 || (TARGET_CMODEL
== CMODEL_MEDIUM
9830 && SYMBOL_REF_LOCAL_P (sym
)
9831 && GET_MODE_SIZE (mode
) <= POWERPC64_TOC_POINTER_ALIGNMENT
));
9834 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9835 that is a valid memory address for an instruction.
9836 The MODE argument is the machine mode for the MEM expression
9837 that wants to use this address.
9839 On the RS/6000, there are four valid address: a SYMBOL_REF that
9840 refers to a constant pool entry of an address (or the sum of it
9841 plus a constant), a short (16-bit signed) constant plus a register,
9842 the sum of two registers, or a register indirect, possibly with an
9843 auto-increment. For DFmode, DDmode and DImode with a constant plus
9844 register, we must ensure that both words are addressable or PowerPC64
9845 with offset word aligned.
9847 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9848 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9849 because adjacent memory cells are accessed by adding word-sized offsets
9850 during assembly output. */
9852 rs6000_legitimate_address_p (machine_mode mode
, rtx x
, bool reg_ok_strict
)
9854 bool reg_offset_p
= reg_offset_addressing_ok_p (mode
);
9855 bool quad_offset_p
= mode_supports_dq_form (mode
);
9857 if (TARGET_ELF
&& RS6000_SYMBOL_REF_TLS_P (x
))
9860 /* Handle unaligned altivec lvx/stvx type addresses. */
9861 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
)
9862 && GET_CODE (x
) == AND
9863 && CONST_INT_P (XEXP (x
, 1))
9864 && INTVAL (XEXP (x
, 1)) == -16)
9867 return (legitimate_indirect_address_p (x
, reg_ok_strict
)
9868 || legitimate_indexed_address_p (x
, reg_ok_strict
)
9869 || virtual_stack_registers_memory_p (x
));
9872 if (legitimate_indirect_address_p (x
, reg_ok_strict
))
9875 && (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == PRE_DEC
)
9876 && mode_supports_pre_incdec_p (mode
)
9877 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
))
9880 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9881 if (address_is_prefixed (x
, mode
, NON_PREFIXED_DEFAULT
))
9884 /* Handle restricted vector d-form offsets in ISA 3.0. */
9887 if (quad_address_p (x
, mode
, reg_ok_strict
))
9890 else if (virtual_stack_registers_memory_p (x
))
9893 else if (reg_offset_p
)
9895 if (legitimate_small_data_p (mode
, x
))
9897 if (legitimate_constant_pool_address_p (x
, mode
,
9898 reg_ok_strict
|| lra_in_progress
))
9902 /* For TImode, if we have TImode in VSX registers, only allow register
9903 indirect addresses. This will allow the values to go in either GPRs
9904 or VSX registers without reloading. The vector types would tend to
9905 go into VSX registers, so we allow REG+REG, while TImode seems
9906 somewhat split, in that some uses are GPR based, and some VSX based. */
9907 /* FIXME: We could loosen this by changing the following to
9908 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9909 but currently we cannot allow REG+REG addressing for TImode. See
9910 PR72827 for complete details on how this ends up hoodwinking DSE. */
9911 if (mode
== TImode
&& TARGET_VSX
)
9913 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9916 && GET_CODE (x
) == PLUS
9917 && REG_P (XEXP (x
, 0))
9918 && (XEXP (x
, 0) == virtual_stack_vars_rtx
9919 || XEXP (x
, 0) == arg_pointer_rtx
)
9920 && CONST_INT_P (XEXP (x
, 1)))
9922 if (rs6000_legitimate_offset_address_p (mode
, x
, reg_ok_strict
, false))
9924 if (!FLOAT128_2REG_P (mode
)
9925 && (TARGET_HARD_FLOAT
9927 || (mode
!= DFmode
&& mode
!= DDmode
))
9928 && (TARGET_POWERPC64
|| mode
!= DImode
)
9929 && (mode
!= TImode
|| VECTOR_MEM_VSX_P (TImode
))
9931 && !avoiding_indexed_address_p (mode
)
9932 && legitimate_indexed_address_p (x
, reg_ok_strict
))
9934 if (TARGET_UPDATE
&& GET_CODE (x
) == PRE_MODIFY
9935 && mode_supports_pre_modify_p (mode
)
9936 && legitimate_indirect_address_p (XEXP (x
, 0), reg_ok_strict
)
9937 && (rs6000_legitimate_offset_address_p (mode
, XEXP (x
, 1),
9938 reg_ok_strict
, false)
9939 || (!avoiding_indexed_address_p (mode
)
9940 && legitimate_indexed_address_p (XEXP (x
, 1), reg_ok_strict
)))
9941 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
9943 /* There is no prefixed version of the load/store with update. */
9944 rtx addr
= XEXP (x
, 1);
9945 return !address_is_prefixed (addr
, mode
, NON_PREFIXED_DEFAULT
);
9947 if (reg_offset_p
&& !quad_offset_p
9948 && legitimate_lo_sum_address_p (mode
, x
, reg_ok_strict
))
9953 /* Debug version of rs6000_legitimate_address_p. */
9955 rs6000_debug_legitimate_address_p (machine_mode mode
, rtx x
,
9958 bool ret
= rs6000_legitimate_address_p (mode
, x
, reg_ok_strict
);
9960 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9961 "strict = %d, reload = %s, code = %s\n",
9962 ret
? "true" : "false",
9963 GET_MODE_NAME (mode
),
9965 (reload_completed
? "after" : "before"),
9966 GET_RTX_NAME (GET_CODE (x
)));
9972 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9975 rs6000_mode_dependent_address_p (const_rtx addr
,
9976 addr_space_t as ATTRIBUTE_UNUSED
)
9978 return rs6000_mode_dependent_address_ptr (addr
);
9981 /* Go to LABEL if ADDR (a legitimate address expression)
9982 has an effect that depends on the machine mode it is used for.
9984 On the RS/6000 this is true of all integral offsets (since AltiVec
9985 and VSX modes don't allow them) or is a pre-increment or decrement.
9987 ??? Except that due to conceptual problems in offsettable_address_p
9988 we can't really report the problems of integral offsets. So leave
9989 this assuming that the adjustable offset must be valid for the
9990 sub-words of a TFmode operand, which is what we had before. */
9993 rs6000_mode_dependent_address (const_rtx addr
)
9995 switch (GET_CODE (addr
))
9998 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9999 is considered a legitimate address before reload, so there
10000 are no offset restrictions in that case. Note that this
10001 condition is safe in strict mode because any address involving
10002 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10003 been rejected as illegitimate. */
10004 if (XEXP (addr
, 0) != virtual_stack_vars_rtx
10005 && XEXP (addr
, 0) != arg_pointer_rtx
10006 && CONST_INT_P (XEXP (addr
, 1)))
10008 HOST_WIDE_INT val
= INTVAL (XEXP (addr
, 1));
10009 HOST_WIDE_INT extra
= TARGET_POWERPC64
? 8 : 12;
10010 if (TARGET_PREFIXED
)
10011 return !SIGNED_34BIT_OFFSET_EXTRA_P (val
, extra
);
10013 return !SIGNED_16BIT_OFFSET_EXTRA_P (val
, extra
);
10018 /* Anything in the constant pool is sufficiently aligned that
10019 all bytes have the same high part address. */
10020 return !legitimate_constant_pool_address_p (addr
, QImode
, false);
10022 /* Auto-increment cases are now treated generically in recog.cc. */
10024 return TARGET_UPDATE
;
10026 /* AND is only allowed in Altivec loads. */
10037 /* Debug version of rs6000_mode_dependent_address. */
10039 rs6000_debug_mode_dependent_address (const_rtx addr
)
10041 bool ret
= rs6000_mode_dependent_address (addr
);
10043 fprintf (stderr
, "\nrs6000_mode_dependent_address: ret = %s\n",
10044 ret
? "true" : "false");
10050 /* Implement FIND_BASE_TERM. */
10053 rs6000_find_base_term (rtx op
)
10058 if (GET_CODE (base
) == CONST
)
10059 base
= XEXP (base
, 0);
10060 if (GET_CODE (base
) == PLUS
)
10061 base
= XEXP (base
, 0);
10062 if (GET_CODE (base
) == UNSPEC
)
10063 switch (XINT (base
, 1))
10065 case UNSPEC_TOCREL
:
10066 case UNSPEC_MACHOPIC_OFFSET
:
10067 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10068 for aliasing purposes. */
10069 return XVECEXP (base
, 0, 0);
10075 /* More elaborate version of recog's offsettable_memref_p predicate
10076 that works around the ??? note of rs6000_mode_dependent_address.
10077 In particular it accepts
10079 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10081 in 32-bit mode, that the recog predicate rejects. */
10084 rs6000_offsettable_memref_p (rtx op
, machine_mode reg_mode
, bool strict
)
10091 /* First mimic offsettable_memref_p. */
10092 if (offsettable_address_p (strict
, GET_MODE (op
), XEXP (op
, 0)))
10095 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10096 the latter predicate knows nothing about the mode of the memory
10097 reference and, therefore, assumes that it is the largest supported
10098 mode (TFmode). As a consequence, legitimate offsettable memory
10099 references are rejected. rs6000_legitimate_offset_address_p contains
10100 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10101 at least with a little bit of help here given that we know the
10102 actual registers used. */
10103 worst_case
= ((TARGET_POWERPC64
&& GET_MODE_CLASS (reg_mode
) == MODE_INT
)
10104 || GET_MODE_SIZE (reg_mode
) == 4);
10105 return rs6000_legitimate_offset_address_p (GET_MODE (op
), XEXP (op
, 0),
10106 strict
, worst_case
);
10109 /* Determine the reassociation width to be used in reassociate_bb.
10110 This takes into account how many parallel operations we
10111 can actually do of a given type, and also the latency.
10113 int add/sub 6/cycle
10115 vect add/sub/mul 2/cycle
10116 fp add/sub/mul 2/cycle
10121 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
10124 switch (rs6000_tune
)
10126 case PROCESSOR_POWER8
:
10127 case PROCESSOR_POWER9
:
10128 case PROCESSOR_POWER10
:
10129 if (DECIMAL_FLOAT_MODE_P (mode
))
10131 if (VECTOR_MODE_P (mode
))
10133 if (INTEGRAL_MODE_P (mode
))
10135 if (FLOAT_MODE_P (mode
))
10144 /* Change register usage conditional on target flags. */
10146 rs6000_conditional_register_usage (void)
10150 if (TARGET_DEBUG_TARGET
)
10151 fprintf (stderr
, "rs6000_conditional_register_usage called\n");
10153 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10155 fixed_regs
[13] = call_used_regs
[13] = 1;
10157 /* Conditionally disable FPRs. */
10158 if (TARGET_SOFT_FLOAT
)
10159 for (i
= 32; i
< 64; i
++)
10160 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10162 /* The TOC register is not killed across calls in a way that is
10163 visible to the compiler. */
10164 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
10165 call_used_regs
[2] = 0;
10167 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 2)
10168 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10170 if (DEFAULT_ABI
== ABI_V4
&& flag_pic
== 1)
10171 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10172 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10174 if (DEFAULT_ABI
== ABI_DARWIN
&& flag_pic
)
10175 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
]
10176 = call_used_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10178 if (TARGET_TOC
&& TARGET_MINIMAL_TOC
)
10179 fixed_regs
[RS6000_PIC_OFFSET_TABLE_REGNUM
] = 1;
10181 if (!TARGET_ALTIVEC
&& !TARGET_VSX
)
10183 for (i
= FIRST_ALTIVEC_REGNO
; i
<= LAST_ALTIVEC_REGNO
; ++i
)
10184 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10185 call_used_regs
[VRSAVE_REGNO
] = 1;
10188 if (TARGET_ALTIVEC
|| TARGET_VSX
)
10189 global_regs
[VSCR_REGNO
] = 1;
10191 if (TARGET_ALTIVEC_ABI
)
10193 for (i
= FIRST_ALTIVEC_REGNO
; i
< FIRST_ALTIVEC_REGNO
+ 20; ++i
)
10194 call_used_regs
[i
] = 1;
10196 /* AIX reserves VR20:31 in non-extended ABI mode. */
10197 if (TARGET_XCOFF
&& !rs6000_aix_extabi
)
10198 for (i
= FIRST_ALTIVEC_REGNO
+ 20; i
< FIRST_ALTIVEC_REGNO
+ 32; ++i
)
10199 fixed_regs
[i
] = call_used_regs
[i
] = 1;
10204 /* Output insns to set DEST equal to the constant SOURCE as a series of
10205 lis, ori and shl instructions and return TRUE. */
10208 rs6000_emit_set_const (rtx dest
, rtx source
)
10210 machine_mode mode
= GET_MODE (dest
);
10215 gcc_checking_assert (CONST_INT_P (source
));
10216 c
= INTVAL (source
);
10221 emit_insn (gen_rtx_SET (dest
, source
));
10225 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (SImode
);
10227 emit_insn (gen_rtx_SET (temp
, GEN_INT (c
& ~(HOST_WIDE_INT
) 0xffff)));
10228 emit_insn (gen_rtx_SET (dest
,
10229 gen_rtx_IOR (SImode
, temp
,
10230 GEN_INT (c
& 0xffff))));
10234 if (!TARGET_POWERPC64
)
10238 hi
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
== 0, DImode
);
10239 lo
= operand_subword_force (dest
, WORDS_BIG_ENDIAN
!= 0, DImode
);
10240 emit_move_insn (hi
, GEN_INT (c
>> 32));
10241 c
= sext_hwi (c
, 32);
10242 emit_move_insn (lo
, GEN_INT (c
));
10245 rs6000_emit_set_long_const (dest
, c
);
10249 gcc_unreachable ();
10252 insn
= get_last_insn ();
10253 set
= single_set (insn
);
10254 if (! CONSTANT_P (SET_SRC (set
)))
10255 set_unique_reg_note (insn
, REG_EQUAL
, GEN_INT (c
));
10260 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10261 Output insns to set DEST equal to the constant C as a series of
10262 lis, ori and shl instructions. */
10265 rs6000_emit_set_long_const (rtx dest
, HOST_WIDE_INT c
)
10268 HOST_WIDE_INT ud1
, ud2
, ud3
, ud4
;
10278 if ((ud4
== 0xffff && ud3
== 0xffff && ud2
== 0xffff && (ud1
& 0x8000))
10279 || (ud4
== 0 && ud3
== 0 && ud2
== 0 && ! (ud1
& 0x8000)))
10280 emit_move_insn (dest
, GEN_INT (sext_hwi (ud1
, 16)));
10282 else if ((ud4
== 0xffff && ud3
== 0xffff && (ud2
& 0x8000))
10283 || (ud4
== 0 && ud3
== 0 && ! (ud2
& 0x8000)))
10285 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10287 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10288 GEN_INT (sext_hwi (ud2
<< 16, 32)));
10290 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10292 else if (ud4
== 0xffff && ud3
== 0xffff && (ud1
& 0x8000))
10295 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10296 emit_move_insn (temp
, GEN_INT (sext_hwi (ud1
, 16)));
10297 emit_move_insn (dest
, gen_rtx_XOR (DImode
, temp
,
10298 GEN_INT ((ud2
^ 0xffff) << 16)));
10300 else if (ud3
== 0 && ud4
== 0)
10302 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10304 gcc_assert (ud2
& 0x8000);
10309 emit_move_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10310 emit_move_insn (dest
,
10311 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10313 else if (!(ud1
& 0x8000))
10316 emit_move_insn (temp
, GEN_INT (ud1
));
10317 emit_move_insn (dest
,
10318 gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
<< 16)));
10322 /* lis; ori; rldicl */
10323 emit_move_insn (temp
, GEN_INT (sext_hwi (ud2
<< 16, 32)));
10324 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10325 emit_move_insn (dest
,
10326 gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff)));
10329 else if (ud1
== ud3
&& ud2
== ud4
)
10331 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10332 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10333 rs6000_emit_set_long_const (temp
, sext_hwi (num
, 32));
10334 rtx one
= gen_rtx_AND (DImode
, temp
, GEN_INT (0xffffffff));
10335 rtx two
= gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (32));
10336 emit_move_insn (dest
, gen_rtx_IOR (DImode
, one
, two
));
10338 else if ((ud4
== 0xffff && (ud3
& 0x8000))
10339 || (ud4
== 0 && ! (ud3
& 0x8000)))
10341 temp
= !can_create_pseudo_p () ? dest
: gen_reg_rtx (DImode
);
10343 emit_move_insn (temp
, GEN_INT (sext_hwi (ud3
<< 16, 32)));
10345 emit_move_insn (temp
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud2
)));
10346 emit_move_insn (ud1
!= 0 ? temp
: dest
,
10347 gen_rtx_ASHIFT (DImode
, temp
, GEN_INT (16)));
10349 emit_move_insn (dest
, gen_rtx_IOR (DImode
, temp
, GEN_INT (ud1
)));
10351 else if (TARGET_PREFIXED
)
10353 if (can_create_pseudo_p ())
10355 /* pli A,L + pli B,H + rldimi A,B,32,0. */
10356 temp
= gen_reg_rtx (DImode
);
10357 rtx temp1
= gen_reg_rtx (DImode
);
10358 emit_move_insn (temp
, GEN_INT ((ud4
<< 16) | ud3
));
10359 emit_move_insn (temp1
, GEN_INT ((ud2
<< 16) | ud1
));
10361 emit_insn (gen_rotldi3_insert_3 (dest
, temp
, GEN_INT (32), temp1
,
10362 GEN_INT (0xffffffff)));
10366 /* pli A,H + sldi A,32 + paddi A,A,L. */
10367 emit_move_insn (dest
, GEN_INT ((ud4
<< 16) | ud3
));
10369 emit_move_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10371 bool can_use_paddi
= REGNO (dest
) != FIRST_GPR_REGNO
;
10373 /* Use paddi for the low 32 bits. */
10374 if (ud2
!= 0 && ud1
!= 0 && can_use_paddi
)
10375 emit_move_insn (dest
, gen_rtx_PLUS (DImode
, dest
,
10376 GEN_INT ((ud2
<< 16) | ud1
)));
10378 /* Use oris, ori for low 32 bits. */
10379 if (ud2
!= 0 && (ud1
== 0 || !can_use_paddi
))
10380 emit_move_insn (dest
,
10381 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10382 if (ud1
!= 0 && (ud2
== 0 || !can_use_paddi
))
10383 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10388 if (can_create_pseudo_p ())
10390 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10391 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10392 rtx high
= gen_reg_rtx (DImode
);
10393 rtx low
= gen_reg_rtx (DImode
);
10394 HOST_WIDE_INT num
= (ud2
<< 16) | ud1
;
10395 rs6000_emit_set_long_const (low
, sext_hwi (num
, 32));
10396 num
= (ud4
<< 16) | ud3
;
10397 rs6000_emit_set_long_const (high
, sext_hwi (num
, 32));
10398 emit_insn (gen_rotldi3_insert_3 (dest
, high
, GEN_INT (32), low
,
10399 GEN_INT (0xffffffff)));
10403 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10404 oris DEST,UD2 ; ori DEST,UD1. */
10405 emit_move_insn (dest
, GEN_INT (sext_hwi (ud4
<< 16, 32)));
10407 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud3
)));
10409 emit_move_insn (dest
, gen_rtx_ASHIFT (DImode
, dest
, GEN_INT (32)));
10411 emit_move_insn (dest
,
10412 gen_rtx_IOR (DImode
, dest
, GEN_INT (ud2
<< 16)));
10414 emit_move_insn (dest
, gen_rtx_IOR (DImode
, dest
, GEN_INT (ud1
)));
10419 /* Helper for the following. Get rid of [r+r] memory refs
10420 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10423 rs6000_eliminate_indexed_memrefs (rtx operands
[2])
10425 if (MEM_P (operands
[0])
10426 && !REG_P (XEXP (operands
[0], 0))
10427 && ! legitimate_constant_pool_address_p (XEXP (operands
[0], 0),
10428 GET_MODE (operands
[0]), false))
10430 = replace_equiv_address (operands
[0],
10431 copy_addr_to_reg (XEXP (operands
[0], 0)));
10433 if (MEM_P (operands
[1])
10434 && !REG_P (XEXP (operands
[1], 0))
10435 && ! legitimate_constant_pool_address_p (XEXP (operands
[1], 0),
10436 GET_MODE (operands
[1]), false))
10438 = replace_equiv_address (operands
[1],
10439 copy_addr_to_reg (XEXP (operands
[1], 0)));
10442 /* Generate a vector of constants to permute MODE for a little-endian
10443 storage operation by swapping the two halves of a vector. */
10445 rs6000_const_vec (machine_mode mode
)
10473 v
= rtvec_alloc (subparts
);
10475 for (i
= 0; i
< subparts
/ 2; ++i
)
10476 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
+ subparts
/ 2);
10477 for (i
= subparts
/ 2; i
< subparts
; ++i
)
10478 RTVEC_ELT (v
, i
) = gen_rtx_CONST_INT (DImode
, i
- subparts
/ 2);
10483 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10484 store operation. */
10486 rs6000_emit_le_vsx_permute (rtx dest
, rtx source
, machine_mode mode
)
10488 gcc_assert (!altivec_indexed_or_indirect_operand (dest
, mode
));
10489 gcc_assert (!altivec_indexed_or_indirect_operand (source
, mode
));
10491 /* Scalar permutations are easier to express in integer modes rather than
10492 floating-point modes, so cast them here. We use V1TImode instead
10493 of TImode to ensure that the values don't go through GPRs. */
10494 if (FLOAT128_VECTOR_P (mode
))
10496 dest
= gen_lowpart (V1TImode
, dest
);
10497 source
= gen_lowpart (V1TImode
, source
);
10501 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10503 if (mode
== TImode
|| mode
== V1TImode
)
10504 emit_insn (gen_rtx_SET (dest
, gen_rtx_ROTATE (mode
, source
,
10508 rtx par
= gen_rtx_PARALLEL (VOIDmode
, rs6000_const_vec (mode
));
10509 emit_insn (gen_rtx_SET (dest
, gen_rtx_VEC_SELECT (mode
, source
, par
)));
10513 /* Emit a little-endian load from vector memory location SOURCE to VSX
10514 register DEST in mode MODE. The load is done with two permuting
10515 insn's that represent an lxvd2x and xxpermdi. */
10517 rs6000_emit_le_vsx_load (rtx dest
, rtx source
, machine_mode mode
)
10519 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10521 if (mode
== TImode
|| mode
== V1TImode
)
10524 dest
= gen_lowpart (V2DImode
, dest
);
10525 source
= adjust_address (source
, V2DImode
, 0);
10528 rtx tmp
= can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest
) : dest
;
10529 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10530 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10533 /* Emit a little-endian store to vector memory location DEST from VSX
10534 register SOURCE in mode MODE. The store is done with two permuting
10535 insn's that represent an xxpermdi and an stxvd2x. */
10537 rs6000_emit_le_vsx_store (rtx dest
, rtx source
, machine_mode mode
)
10539 /* This should never be called after LRA. */
10540 gcc_assert (can_create_pseudo_p ());
10542 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10544 if (mode
== TImode
|| mode
== V1TImode
)
10547 dest
= adjust_address (dest
, V2DImode
, 0);
10548 source
= gen_lowpart (V2DImode
, source
);
10551 rtx tmp
= gen_reg_rtx_and_attrs (source
);
10552 rs6000_emit_le_vsx_permute (tmp
, source
, mode
);
10553 rs6000_emit_le_vsx_permute (dest
, tmp
, mode
);
10556 /* Emit a sequence representing a little-endian VSX load or store,
10557 moving data from SOURCE to DEST in mode MODE. This is done
10558 separately from rs6000_emit_move to ensure it is called only
10559 during expand. LE VSX loads and stores introduced later are
10560 handled with a split. The expand-time RTL generation allows
10561 us to optimize away redundant pairs of register-permutes. */
10563 rs6000_emit_le_vsx_move (rtx dest
, rtx source
, machine_mode mode
)
10565 gcc_assert (!BYTES_BIG_ENDIAN
10566 && VECTOR_MEM_VSX_P (mode
)
10567 && !TARGET_P9_VECTOR
10568 && !gpr_or_gpr_p (dest
, source
)
10569 && (MEM_P (source
) ^ MEM_P (dest
)));
10571 if (MEM_P (source
))
10573 gcc_assert (REG_P (dest
) || SUBREG_P (dest
));
10574 rs6000_emit_le_vsx_load (dest
, source
, mode
);
10578 if (!REG_P (source
))
10579 source
= force_reg (mode
, source
);
10580 rs6000_emit_le_vsx_store (dest
, source
, mode
);
10584 /* Return whether a SFmode or SImode move can be done without converting one
10585 mode to another. This arrises when we have:
10587 (SUBREG:SF (REG:SI ...))
10588 (SUBREG:SI (REG:SF ...))
10590 and one of the values is in a floating point/vector register, where SFmode
10591 scalars are stored in DFmode format. */
10594 valid_sf_si_move (rtx dest
, rtx src
, machine_mode mode
)
10596 if (TARGET_ALLOW_SF_SUBREG
)
10599 if (mode
!= SFmode
&& GET_MODE_CLASS (mode
) != MODE_INT
)
10602 if (!SUBREG_P (src
) || !sf_subreg_operand (src
, mode
))
10605 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10606 if (SUBREG_P (dest
))
10608 rtx dest_subreg
= SUBREG_REG (dest
);
10609 rtx src_subreg
= SUBREG_REG (src
);
10610 return GET_MODE (dest_subreg
) == GET_MODE (src_subreg
);
10617 /* Helper function to change moves with:
10619 (SUBREG:SF (REG:SI)) and
10620 (SUBREG:SI (REG:SF))
10622 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10623 values are stored as DFmode values in the VSX registers. We need to convert
10624 the bits before we can use a direct move or operate on the bits in the
10625 vector register as an integer type.
10627 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10630 rs6000_emit_move_si_sf_subreg (rtx dest
, rtx source
, machine_mode mode
)
10632 if (TARGET_DIRECT_MOVE_64BIT
&& !reload_completed
10633 && (!SUBREG_P (dest
) || !sf_subreg_operand (dest
, mode
))
10634 && SUBREG_P (source
) && sf_subreg_operand (source
, mode
))
10636 rtx inner_source
= SUBREG_REG (source
);
10637 machine_mode inner_mode
= GET_MODE (inner_source
);
10639 if (mode
== SImode
&& inner_mode
== SFmode
)
10641 emit_insn (gen_movsi_from_sf (dest
, inner_source
));
10645 if (mode
== SFmode
&& inner_mode
== SImode
)
10647 emit_insn (gen_movsf_from_si (dest
, inner_source
));
10655 /* Emit a move from SOURCE to DEST in mode MODE. */
10657 rs6000_emit_move (rtx dest
, rtx source
, machine_mode mode
)
10660 operands
[0] = dest
;
10661 operands
[1] = source
;
10663 if (TARGET_DEBUG_ADDR
)
10666 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10667 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10668 GET_MODE_NAME (mode
),
10671 can_create_pseudo_p ());
10673 fprintf (stderr
, "source:\n");
10674 debug_rtx (source
);
10677 /* Check that we get CONST_WIDE_INT only when we should. */
10678 if (CONST_WIDE_INT_P (operands
[1])
10679 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
10680 gcc_unreachable ();
10682 #ifdef HAVE_AS_GNU_ATTRIBUTE
10683 /* If we use a long double type, set the flags in .gnu_attribute that say
10684 what the long double type is. This is to allow the linker's warning
10685 message for the wrong long double to be useful, even if the function does
10686 not do a call (for example, doing a 128-bit add on power9 if the long
10687 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10688 used if they aren't the default long dobule type. */
10689 if (rs6000_gnu_attr
&& (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
|| TARGET_64BIT
))
10691 if (TARGET_LONG_DOUBLE_128
&& (mode
== TFmode
|| mode
== TCmode
))
10692 rs6000_passes_float
= rs6000_passes_long_double
= true;
10694 else if (!TARGET_LONG_DOUBLE_128
&& (mode
== DFmode
|| mode
== DCmode
))
10695 rs6000_passes_float
= rs6000_passes_long_double
= true;
10699 /* See if we need to special case SImode/SFmode SUBREG moves. */
10700 if ((mode
== SImode
|| mode
== SFmode
) && SUBREG_P (source
)
10701 && rs6000_emit_move_si_sf_subreg (dest
, source
, mode
))
10704 /* Check if GCC is setting up a block move that will end up using FP
10705 registers as temporaries. We must make sure this is acceptable. */
10706 if (MEM_P (operands
[0])
10707 && MEM_P (operands
[1])
10709 && (rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[0]))
10710 || rs6000_slow_unaligned_access (DImode
, MEM_ALIGN (operands
[1])))
10711 && ! (rs6000_slow_unaligned_access (SImode
,
10712 (MEM_ALIGN (operands
[0]) > 32
10713 ? 32 : MEM_ALIGN (operands
[0])))
10714 || rs6000_slow_unaligned_access (SImode
,
10715 (MEM_ALIGN (operands
[1]) > 32
10716 ? 32 : MEM_ALIGN (operands
[1]))))
10717 && ! MEM_VOLATILE_P (operands
[0])
10718 && ! MEM_VOLATILE_P (operands
[1]))
10720 emit_move_insn (adjust_address (operands
[0], SImode
, 0),
10721 adjust_address (operands
[1], SImode
, 0));
10722 emit_move_insn (adjust_address (copy_rtx (operands
[0]), SImode
, 4),
10723 adjust_address (copy_rtx (operands
[1]), SImode
, 4));
10727 if (can_create_pseudo_p () && MEM_P (operands
[0])
10728 && !gpc_reg_operand (operands
[1], mode
))
10729 operands
[1] = force_reg (mode
, operands
[1]);
10731 /* Recognize the case where operand[1] is a reference to thread-local
10732 data and load its address to a register. */
10733 if (tls_referenced_p (operands
[1]))
10735 enum tls_model model
;
10736 rtx tmp
= operands
[1];
10739 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
10741 addend
= XEXP (XEXP (tmp
, 0), 1);
10742 tmp
= XEXP (XEXP (tmp
, 0), 0);
10745 gcc_assert (SYMBOL_REF_P (tmp
));
10746 model
= SYMBOL_REF_TLS_MODEL (tmp
);
10747 gcc_assert (model
!= 0);
10749 tmp
= rs6000_legitimize_tls_address (tmp
, model
);
10752 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
10753 tmp
= force_operand (tmp
, operands
[0]);
10758 /* 128-bit constant floating-point values on Darwin should really be loaded
10759 as two parts. However, this premature splitting is a problem when DFmode
10760 values can go into Altivec registers. */
10761 if (TARGET_MACHO
&& CONST_DOUBLE_P (operands
[1]) && FLOAT128_IBM_P (mode
)
10762 && !reg_addr
[DFmode
].scalar_in_vmx_p
)
10764 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
, 0),
10765 simplify_gen_subreg (DFmode
, operands
[1], mode
, 0),
10767 rs6000_emit_move (simplify_gen_subreg (DFmode
, operands
[0], mode
,
10768 GET_MODE_SIZE (DFmode
)),
10769 simplify_gen_subreg (DFmode
, operands
[1], mode
,
10770 GET_MODE_SIZE (DFmode
)),
10775 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10776 p1:SD) if p1 is not of floating point class and p0 is spilled as
10777 we can have no analogous movsd_store for this. */
10778 if (lra_in_progress
&& mode
== DDmode
10779 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10780 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10781 && SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1]))
10782 && GET_MODE (SUBREG_REG (operands
[1])) == SDmode
)
10785 int regno
= REGNO (SUBREG_REG (operands
[1]));
10787 if (!HARD_REGISTER_NUM_P (regno
))
10789 cl
= reg_preferred_class (regno
);
10790 regno
= reg_renumber
[regno
];
10792 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][1];
10794 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10797 operands
[0] = gen_lowpart_SUBREG (SDmode
, operands
[0]);
10798 operands
[1] = SUBREG_REG (operands
[1]);
10801 if (lra_in_progress
10803 && REG_P (operands
[0]) && !HARD_REGISTER_P (operands
[0])
10804 && reg_preferred_class (REGNO (operands
[0])) == NO_REGS
10805 && (REG_P (operands
[1])
10806 || (SUBREG_P (operands
[1]) && REG_P (SUBREG_REG (operands
[1])))))
10808 int regno
= reg_or_subregno (operands
[1]);
10811 if (!HARD_REGISTER_NUM_P (regno
))
10813 cl
= reg_preferred_class (regno
);
10814 gcc_assert (cl
!= NO_REGS
);
10815 regno
= reg_renumber
[regno
];
10817 regno
= ira_class_hard_regs
[cl
][0];
10819 if (FP_REGNO_P (regno
))
10821 if (GET_MODE (operands
[0]) != DDmode
)
10822 operands
[0] = gen_rtx_SUBREG (DDmode
, operands
[0], 0);
10823 emit_insn (gen_movsd_store (operands
[0], operands
[1]));
10825 else if (INT_REGNO_P (regno
))
10826 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10831 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10832 p:DD)) if p0 is not of floating point class and p1 is spilled as
10833 we can have no analogous movsd_load for this. */
10834 if (lra_in_progress
&& mode
== DDmode
10835 && SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))
10836 && GET_MODE (SUBREG_REG (operands
[0])) == SDmode
10837 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10838 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10841 int regno
= REGNO (SUBREG_REG (operands
[0]));
10843 if (!HARD_REGISTER_NUM_P (regno
))
10845 cl
= reg_preferred_class (regno
);
10846 regno
= reg_renumber
[regno
];
10848 regno
= cl
== NO_REGS
? -1 : ira_class_hard_regs
[cl
][0];
10850 if (regno
>= 0 && ! FP_REGNO_P (regno
))
10853 operands
[0] = SUBREG_REG (operands
[0]);
10854 operands
[1] = gen_lowpart_SUBREG (SDmode
, operands
[1]);
10857 if (lra_in_progress
10859 && (REG_P (operands
[0])
10860 || (SUBREG_P (operands
[0]) && REG_P (SUBREG_REG (operands
[0]))))
10861 && REG_P (operands
[1]) && !HARD_REGISTER_P (operands
[1])
10862 && reg_preferred_class (REGNO (operands
[1])) == NO_REGS
)
10864 int regno
= reg_or_subregno (operands
[0]);
10867 if (!HARD_REGISTER_NUM_P (regno
))
10869 cl
= reg_preferred_class (regno
);
10870 gcc_assert (cl
!= NO_REGS
);
10871 regno
= reg_renumber
[regno
];
10873 regno
= ira_class_hard_regs
[cl
][0];
10875 if (FP_REGNO_P (regno
))
10877 if (GET_MODE (operands
[1]) != DDmode
)
10878 operands
[1] = gen_rtx_SUBREG (DDmode
, operands
[1], 0);
10879 emit_insn (gen_movsd_load (operands
[0], operands
[1]));
10881 else if (INT_REGNO_P (regno
))
10882 emit_insn (gen_movsd_hardfloat (operands
[0], operands
[1]));
10888 /* FIXME: In the long term, this switch statement should go away
10889 and be replaced by a sequence of tests based on things like
10895 if (CONSTANT_P (operands
[1])
10896 && !CONST_INT_P (operands
[1]))
10897 operands
[1] = force_const_mem (mode
, operands
[1]);
10904 if (FLOAT128_2REG_P (mode
))
10905 rs6000_eliminate_indexed_memrefs (operands
);
10912 if (CONSTANT_P (operands
[1])
10913 && ! easy_fp_constant (operands
[1], mode
))
10914 operands
[1] = force_const_mem (mode
, operands
[1]);
10924 if (CONSTANT_P (operands
[1])
10925 && !easy_vector_constant (operands
[1], mode
))
10926 operands
[1] = force_const_mem (mode
, operands
[1]);
10931 if (CONST_INT_P (operands
[1]) && INTVAL (operands
[1]) != 0)
10932 error ("%qs is an opaque type, and you cannot set it to other values",
10933 (mode
== OOmode
) ? "__vector_pair" : "__vector_quad");
10938 /* Use default pattern for address of ELF small data */
10941 && DEFAULT_ABI
== ABI_V4
10942 && (SYMBOL_REF_P (operands
[1])
10943 || GET_CODE (operands
[1]) == CONST
)
10944 && small_data_operand (operands
[1], mode
))
10946 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10950 /* Use the default pattern for loading up PC-relative addresses. */
10951 if (TARGET_PCREL
&& mode
== Pmode
10952 && pcrel_local_or_external_address (operands
[1], Pmode
))
10954 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
10958 if (DEFAULT_ABI
== ABI_V4
10959 && mode
== Pmode
&& mode
== SImode
10960 && flag_pic
== 1 && got_operand (operands
[1], mode
))
10962 emit_insn (gen_movsi_got (operands
[0], operands
[1]));
10966 if ((TARGET_ELF
|| DEFAULT_ABI
== ABI_DARWIN
)
10967 && TARGET_NO_TOC_OR_PCREL
10970 && CONSTANT_P (operands
[1])
10971 && GET_CODE (operands
[1]) != HIGH
10972 && !CONST_INT_P (operands
[1]))
10974 rtx target
= (!can_create_pseudo_p ()
10976 : gen_reg_rtx (mode
));
10978 /* If this is a function address on -mcall-aixdesc,
10979 convert it to the address of the descriptor. */
10980 if (DEFAULT_ABI
== ABI_AIX
10981 && SYMBOL_REF_P (operands
[1])
10982 && XSTR (operands
[1], 0)[0] == '.')
10984 const char *name
= XSTR (operands
[1], 0);
10986 while (*name
== '.')
10988 new_ref
= gen_rtx_SYMBOL_REF (Pmode
, name
);
10989 CONSTANT_POOL_ADDRESS_P (new_ref
)
10990 = CONSTANT_POOL_ADDRESS_P (operands
[1]);
10991 SYMBOL_REF_FLAGS (new_ref
) = SYMBOL_REF_FLAGS (operands
[1]);
10992 SYMBOL_REF_USED (new_ref
) = SYMBOL_REF_USED (operands
[1]);
10993 SYMBOL_REF_DATA (new_ref
) = SYMBOL_REF_DATA (operands
[1]);
10994 operands
[1] = new_ref
;
10997 if (DEFAULT_ABI
== ABI_DARWIN
)
11000 /* This is not PIC code, but could require the subset of
11001 indirections used by mdynamic-no-pic. */
11002 if (MACHO_DYNAMIC_NO_PIC_P
)
11004 /* Take care of any required data indirection. */
11005 operands
[1] = rs6000_machopic_legitimize_pic_address (
11006 operands
[1], mode
, operands
[0]);
11007 if (operands
[0] != operands
[1])
11008 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11012 emit_insn (gen_macho_high (Pmode
, target
, operands
[1]));
11013 emit_insn (gen_macho_low (Pmode
, operands
[0],
11014 target
, operands
[1]));
11018 emit_insn (gen_elf_high (target
, operands
[1]));
11019 emit_insn (gen_elf_low (operands
[0], target
, operands
[1]));
11023 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11024 and we have put it in the TOC, we just need to make a TOC-relative
11025 reference to it. */
11027 && SYMBOL_REF_P (operands
[1])
11028 && use_toc_relative_ref (operands
[1], mode
))
11029 operands
[1] = create_TOC_reference (operands
[1], operands
[0]);
11030 else if (mode
== Pmode
11031 && CONSTANT_P (operands
[1])
11032 && GET_CODE (operands
[1]) != HIGH
11033 && ((REG_P (operands
[0])
11034 && FP_REGNO_P (REGNO (operands
[0])))
11035 || !CONST_INT_P (operands
[1])
11036 || (num_insns_constant (operands
[1], mode
)
11037 > (TARGET_CMODEL
!= CMODEL_SMALL
? 3 : 2)))
11038 && !toc_relative_expr_p (operands
[1], false, NULL
, NULL
)
11039 && (TARGET_CMODEL
== CMODEL_SMALL
11040 || can_create_pseudo_p ()
11041 || (REG_P (operands
[0])
11042 && INT_REG_OK_FOR_BASE_P (operands
[0], true))))
11046 /* Darwin uses a special PIC legitimizer. */
11047 if (DEFAULT_ABI
== ABI_DARWIN
&& MACHOPIC_INDIRECT
)
11050 rs6000_machopic_legitimize_pic_address (operands
[1], mode
,
11052 if (operands
[0] != operands
[1])
11053 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11058 /* If we are to limit the number of things we put in the TOC and
11059 this is a symbol plus a constant we can add in one insn,
11060 just put the symbol in the TOC and add the constant. */
11061 if (GET_CODE (operands
[1]) == CONST
11062 && TARGET_NO_SUM_IN_TOC
11063 && GET_CODE (XEXP (operands
[1], 0)) == PLUS
11064 && add_operand (XEXP (XEXP (operands
[1], 0), 1), mode
)
11065 && (GET_CODE (XEXP (XEXP (operands
[1], 0), 0)) == LABEL_REF
11066 || SYMBOL_REF_P (XEXP (XEXP (operands
[1], 0), 0)))
11067 && ! side_effects_p (operands
[0]))
11070 force_const_mem (mode
, XEXP (XEXP (operands
[1], 0), 0));
11071 rtx other
= XEXP (XEXP (operands
[1], 0), 1);
11073 sym
= force_reg (mode
, sym
);
11074 emit_insn (gen_add3_insn (operands
[0], sym
, other
));
11078 operands
[1] = force_const_mem (mode
, operands
[1]);
11081 && SYMBOL_REF_P (XEXP (operands
[1], 0))
11082 && use_toc_relative_ref (XEXP (operands
[1], 0), mode
))
11084 rtx tocref
= create_TOC_reference (XEXP (operands
[1], 0),
11086 operands
[1] = gen_const_mem (mode
, tocref
);
11087 set_mem_alias_set (operands
[1], get_TOC_alias_set ());
11093 if (!VECTOR_MEM_VSX_P (TImode
))
11094 rs6000_eliminate_indexed_memrefs (operands
);
11098 rs6000_eliminate_indexed_memrefs (operands
);
11102 fatal_insn ("bad move", gen_rtx_SET (dest
, source
));
11105 /* Above, we may have called force_const_mem which may have returned
11106 an invalid address. If we can, fix this up; otherwise, reload will
11107 have to deal with it. */
11108 if (MEM_P (operands
[1]))
11109 operands
[1] = validize_mem (operands
[1]);
11111 emit_insn (gen_rtx_SET (operands
[0], operands
[1]));
11115 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11117 init_float128_ibm (machine_mode mode
)
11119 if (!TARGET_XL_COMPAT
)
11121 set_optab_libfunc (add_optab
, mode
, "__gcc_qadd");
11122 set_optab_libfunc (sub_optab
, mode
, "__gcc_qsub");
11123 set_optab_libfunc (smul_optab
, mode
, "__gcc_qmul");
11124 set_optab_libfunc (sdiv_optab
, mode
, "__gcc_qdiv");
11126 if (!TARGET_HARD_FLOAT
)
11128 set_optab_libfunc (neg_optab
, mode
, "__gcc_qneg");
11129 set_optab_libfunc (eq_optab
, mode
, "__gcc_qeq");
11130 set_optab_libfunc (ne_optab
, mode
, "__gcc_qne");
11131 set_optab_libfunc (gt_optab
, mode
, "__gcc_qgt");
11132 set_optab_libfunc (ge_optab
, mode
, "__gcc_qge");
11133 set_optab_libfunc (lt_optab
, mode
, "__gcc_qlt");
11134 set_optab_libfunc (le_optab
, mode
, "__gcc_qle");
11135 set_optab_libfunc (unord_optab
, mode
, "__gcc_qunord");
11137 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__gcc_stoq");
11138 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__gcc_dtoq");
11139 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__gcc_qtos");
11140 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__gcc_qtod");
11141 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__gcc_qtoi");
11142 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__gcc_qtou");
11143 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__gcc_itoq");
11144 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__gcc_utoq");
11149 set_optab_libfunc (add_optab
, mode
, "_xlqadd");
11150 set_optab_libfunc (sub_optab
, mode
, "_xlqsub");
11151 set_optab_libfunc (smul_optab
, mode
, "_xlqmul");
11152 set_optab_libfunc (sdiv_optab
, mode
, "_xlqdiv");
11155 /* Add various conversions for IFmode to use the traditional TFmode
11157 if (mode
== IFmode
)
11159 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdtf");
11160 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddtf");
11161 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdtf");
11162 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunctfsd");
11163 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunctfdd");
11164 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendtftd");
11166 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixtfdi");
11167 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunstfdi");
11169 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatditf");
11170 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatunditf");
11172 if (TARGET_POWERPC64
)
11174 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixtfti");
11175 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunstfti");
11176 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattitf");
11177 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntitf");
11182 /* Set up IEEE 128-bit floating point routines. Use different names if the
11183 arguments can be passed in a vector register. The historical PowerPC
11184 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11185 continue to use that if we aren't using vector registers to pass IEEE
11186 128-bit floating point. */
11189 init_float128_ieee (machine_mode mode
)
11191 if (FLOAT128_VECTOR_P (mode
))
11193 set_optab_libfunc (add_optab
, mode
, "__addkf3");
11194 set_optab_libfunc (sub_optab
, mode
, "__subkf3");
11195 set_optab_libfunc (neg_optab
, mode
, "__negkf2");
11196 set_optab_libfunc (smul_optab
, mode
, "__mulkf3");
11197 set_optab_libfunc (sdiv_optab
, mode
, "__divkf3");
11198 set_optab_libfunc (sqrt_optab
, mode
, "__sqrtkf2");
11199 set_optab_libfunc (abs_optab
, mode
, "__abskf2");
11200 set_optab_libfunc (powi_optab
, mode
, "__powikf2");
11202 set_optab_libfunc (eq_optab
, mode
, "__eqkf2");
11203 set_optab_libfunc (ne_optab
, mode
, "__nekf2");
11204 set_optab_libfunc (gt_optab
, mode
, "__gtkf2");
11205 set_optab_libfunc (ge_optab
, mode
, "__gekf2");
11206 set_optab_libfunc (lt_optab
, mode
, "__ltkf2");
11207 set_optab_libfunc (le_optab
, mode
, "__lekf2");
11208 set_optab_libfunc (unord_optab
, mode
, "__unordkf2");
11210 set_conv_libfunc (sext_optab
, mode
, SFmode
, "__extendsfkf2");
11211 set_conv_libfunc (sext_optab
, mode
, DFmode
, "__extenddfkf2");
11212 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "__trunckfsf2");
11213 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "__trunckfdf2");
11215 set_conv_libfunc (sext_optab
, mode
, IFmode
, "__trunctfkf2");
11216 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11217 set_conv_libfunc (sext_optab
, mode
, TFmode
, "__trunctfkf2");
11219 set_conv_libfunc (trunc_optab
, IFmode
, mode
, "__extendkftf2");
11220 if (mode
!= TFmode
&& FLOAT128_IBM_P (TFmode
))
11221 set_conv_libfunc (trunc_optab
, TFmode
, mode
, "__extendkftf2");
11223 set_conv_libfunc (sext_optab
, mode
, SDmode
, "__dpd_extendsdkf");
11224 set_conv_libfunc (sext_optab
, mode
, DDmode
, "__dpd_extendddkf");
11225 set_conv_libfunc (trunc_optab
, mode
, TDmode
, "__dpd_trunctdkf");
11226 set_conv_libfunc (trunc_optab
, SDmode
, mode
, "__dpd_trunckfsd");
11227 set_conv_libfunc (trunc_optab
, DDmode
, mode
, "__dpd_trunckfdd");
11228 set_conv_libfunc (sext_optab
, TDmode
, mode
, "__dpd_extendkftd");
11230 set_conv_libfunc (sfix_optab
, SImode
, mode
, "__fixkfsi");
11231 set_conv_libfunc (ufix_optab
, SImode
, mode
, "__fixunskfsi");
11232 set_conv_libfunc (sfix_optab
, DImode
, mode
, "__fixkfdi");
11233 set_conv_libfunc (ufix_optab
, DImode
, mode
, "__fixunskfdi");
11235 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "__floatsikf");
11236 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "__floatunsikf");
11237 set_conv_libfunc (sfloat_optab
, mode
, DImode
, "__floatdikf");
11238 set_conv_libfunc (ufloat_optab
, mode
, DImode
, "__floatundikf");
11240 if (TARGET_POWERPC64
)
11242 set_conv_libfunc (sfix_optab
, TImode
, mode
, "__fixkfti_sw");
11243 set_conv_libfunc (ufix_optab
, TImode
, mode
, "__fixunskfti_sw");
11244 set_conv_libfunc (sfloat_optab
, mode
, TImode
, "__floattikf_sw");
11245 set_conv_libfunc (ufloat_optab
, mode
, TImode
, "__floatuntikf_sw");
11251 set_optab_libfunc (add_optab
, mode
, "_q_add");
11252 set_optab_libfunc (sub_optab
, mode
, "_q_sub");
11253 set_optab_libfunc (neg_optab
, mode
, "_q_neg");
11254 set_optab_libfunc (smul_optab
, mode
, "_q_mul");
11255 set_optab_libfunc (sdiv_optab
, mode
, "_q_div");
11256 if (TARGET_PPC_GPOPT
)
11257 set_optab_libfunc (sqrt_optab
, mode
, "_q_sqrt");
11259 set_optab_libfunc (eq_optab
, mode
, "_q_feq");
11260 set_optab_libfunc (ne_optab
, mode
, "_q_fne");
11261 set_optab_libfunc (gt_optab
, mode
, "_q_fgt");
11262 set_optab_libfunc (ge_optab
, mode
, "_q_fge");
11263 set_optab_libfunc (lt_optab
, mode
, "_q_flt");
11264 set_optab_libfunc (le_optab
, mode
, "_q_fle");
11266 set_conv_libfunc (sext_optab
, mode
, SFmode
, "_q_stoq");
11267 set_conv_libfunc (sext_optab
, mode
, DFmode
, "_q_dtoq");
11268 set_conv_libfunc (trunc_optab
, SFmode
, mode
, "_q_qtos");
11269 set_conv_libfunc (trunc_optab
, DFmode
, mode
, "_q_qtod");
11270 set_conv_libfunc (sfix_optab
, SImode
, mode
, "_q_qtoi");
11271 set_conv_libfunc (ufix_optab
, SImode
, mode
, "_q_qtou");
11272 set_conv_libfunc (sfloat_optab
, mode
, SImode
, "_q_itoq");
11273 set_conv_libfunc (ufloat_optab
, mode
, SImode
, "_q_utoq");
11278 rs6000_init_libfuncs (void)
11280 /* __float128 support. */
11281 if (TARGET_FLOAT128_TYPE
)
11283 init_float128_ibm (IFmode
);
11284 init_float128_ieee (KFmode
);
11287 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11288 if (TARGET_LONG_DOUBLE_128
)
11290 if (!TARGET_IEEEQUAD
)
11291 init_float128_ibm (TFmode
);
11293 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11295 init_float128_ieee (TFmode
);
11299 /* Emit a potentially record-form instruction, setting DST from SRC.
11300 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11301 signed comparison of DST with zero. If DOT is 1, the generated RTL
11302 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11303 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11304 a separate COMPARE. */
11307 rs6000_emit_dot_insn (rtx dst
, rtx src
, int dot
, rtx ccreg
)
11311 emit_move_insn (dst
, src
);
11315 if (cc_reg_not_cr0_operand (ccreg
, CCmode
))
11317 emit_move_insn (dst
, src
);
11318 emit_move_insn (ccreg
, gen_rtx_COMPARE (CCmode
, dst
, const0_rtx
));
11322 rtx ccset
= gen_rtx_SET (ccreg
, gen_rtx_COMPARE (CCmode
, src
, const0_rtx
));
11325 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, dst
);
11326 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, clobber
)));
11330 rtx set
= gen_rtx_SET (dst
, src
);
11331 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, ccset
, set
)));
11336 /* A validation routine: say whether CODE, a condition code, and MODE
11337 match. The other alternatives either don't make sense or should
11338 never be generated. */
11341 validate_condition_mode (enum rtx_code code
, machine_mode mode
)
11343 gcc_assert ((GET_RTX_CLASS (code
) == RTX_COMPARE
11344 || GET_RTX_CLASS (code
) == RTX_COMM_COMPARE
)
11345 && GET_MODE_CLASS (mode
) == MODE_CC
);
11347 /* These don't make sense. */
11348 gcc_assert ((code
!= GT
&& code
!= LT
&& code
!= GE
&& code
!= LE
)
11349 || mode
!= CCUNSmode
);
11351 gcc_assert ((code
!= GTU
&& code
!= LTU
&& code
!= GEU
&& code
!= LEU
)
11352 || mode
== CCUNSmode
);
11354 gcc_assert (mode
== CCFPmode
11355 || (code
!= ORDERED
&& code
!= UNORDERED
11356 && code
!= UNEQ
&& code
!= LTGT
11357 && code
!= UNGT
&& code
!= UNLT
11358 && code
!= UNGE
&& code
!= UNLE
));
11360 /* These are invalid; the information is not there. */
11361 gcc_assert (mode
!= CCEQmode
|| code
== EQ
|| code
== NE
);
11365 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11366 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11367 not zero, store there the bit offset (counted from the right) where
11368 the single stretch of 1 bits begins; and similarly for B, the bit
11369 offset where it ends. */
11372 rs6000_is_valid_mask (rtx mask
, int *b
, int *e
, machine_mode mode
)
11374 unsigned HOST_WIDE_INT val
= INTVAL (mask
);
11375 unsigned HOST_WIDE_INT bit
;
11377 int n
= GET_MODE_PRECISION (mode
);
11379 if (mode
!= DImode
&& mode
!= SImode
)
11382 if (INTVAL (mask
) >= 0)
11385 ne
= exact_log2 (bit
);
11386 nb
= exact_log2 (val
+ bit
);
11388 else if (val
+ 1 == 0)
11397 nb
= exact_log2 (bit
);
11398 ne
= exact_log2 (val
+ bit
);
11403 ne
= exact_log2 (bit
);
11404 if (val
+ bit
== 0)
11412 if (nb
< 0 || ne
< 0 || nb
>= n
|| ne
>= n
)
11424 rs6000_is_valid_rotate_dot_mask (rtx mask
, machine_mode mode
)
11427 if (rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
) && nb
>= ne
&& ne
> 0)
11431 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11433 return (UINTVAL (mask
) << (63 - nb
)) <= 0x7fffffff;
11439 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11440 or rldicr instruction, to implement an AND with it in mode MODE. */
11443 rs6000_is_valid_and_mask (rtx mask
, machine_mode mode
)
11447 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11450 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11452 if (mode
== DImode
)
11453 return (ne
== 0 || nb
== 63 || (nb
< 32 && ne
<= nb
));
11455 /* For SImode, rlwinm can do everything. */
11456 if (mode
== SImode
)
11457 return (nb
< 32 && ne
< 32);
11462 /* Return the instruction template for an AND with mask in mode MODE, with
11463 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11466 rs6000_insn_for_and_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11470 if (!rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
))
11471 gcc_unreachable ();
11473 if (mode
== DImode
&& ne
== 0)
11475 operands
[3] = GEN_INT (63 - nb
);
11477 return "rldicl. %0,%1,0,%3";
11478 return "rldicl %0,%1,0,%3";
11481 if (mode
== DImode
&& nb
== 63)
11483 operands
[3] = GEN_INT (63 - ne
);
11485 return "rldicr. %0,%1,0,%3";
11486 return "rldicr %0,%1,0,%3";
11489 if (nb
< 32 && ne
< 32)
11491 operands
[3] = GEN_INT (31 - nb
);
11492 operands
[4] = GEN_INT (31 - ne
);
11494 return "rlwinm. %0,%1,0,%3,%4";
11495 return "rlwinm %0,%1,0,%3,%4";
11498 gcc_unreachable ();
11501 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11502 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11503 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11506 rs6000_is_valid_shift_mask (rtx mask
, rtx shift
, machine_mode mode
)
11510 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11513 int n
= GET_MODE_PRECISION (mode
);
11516 if (CONST_INT_P (XEXP (shift
, 1)))
11518 sh
= INTVAL (XEXP (shift
, 1));
11519 if (sh
< 0 || sh
>= n
)
11523 rtx_code code
= GET_CODE (shift
);
11525 /* Convert any shift by 0 to a rotate, to simplify below code. */
11529 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11530 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11532 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11538 /* DImode rotates need rld*. */
11539 if (mode
== DImode
&& code
== ROTATE
)
11540 return (nb
== 63 || ne
== 0 || ne
== sh
);
11542 /* SImode rotates need rlw*. */
11543 if (mode
== SImode
&& code
== ROTATE
)
11544 return (nb
< 32 && ne
< 32 && sh
< 32);
11546 /* Wrap-around masks are only okay for rotates. */
11550 /* Variable shifts are only okay for rotates. */
11554 /* Don't allow ASHIFT if the mask is wrong for that. */
11555 if (code
== ASHIFT
&& ne
< sh
)
11558 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11559 if the mask is wrong for that. */
11560 if (nb
< 32 && ne
< 32 && sh
< 32
11561 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11564 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11565 if the mask is wrong for that. */
11566 if (code
== LSHIFTRT
)
11568 if (nb
== 63 || ne
== 0 || ne
== sh
)
11569 return !(code
== LSHIFTRT
&& nb
>= sh
);
11574 /* Return the instruction template for a shift with mask in mode MODE, with
11575 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11578 rs6000_insn_for_shift_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11582 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11583 gcc_unreachable ();
11585 if (mode
== DImode
&& ne
== 0)
11587 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11588 operands
[2] = GEN_INT (64 - INTVAL (operands
[2]));
11589 operands
[3] = GEN_INT (63 - nb
);
11591 return "rld%I2cl. %0,%1,%2,%3";
11592 return "rld%I2cl %0,%1,%2,%3";
11595 if (mode
== DImode
&& nb
== 63)
11597 operands
[3] = GEN_INT (63 - ne
);
11599 return "rld%I2cr. %0,%1,%2,%3";
11600 return "rld%I2cr %0,%1,%2,%3";
11604 && GET_CODE (operands
[4]) != LSHIFTRT
11605 && CONST_INT_P (operands
[2])
11606 && ne
== INTVAL (operands
[2]))
11608 operands
[3] = GEN_INT (63 - nb
);
11610 return "rld%I2c. %0,%1,%2,%3";
11611 return "rld%I2c %0,%1,%2,%3";
11614 if (nb
< 32 && ne
< 32)
11616 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11617 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11618 operands
[3] = GEN_INT (31 - nb
);
11619 operands
[4] = GEN_INT (31 - ne
);
11620 /* This insn can also be a 64-bit rotate with mask that really makes
11621 it just a shift right (with mask); the %h below are to adjust for
11622 that situation (shift count is >= 32 in that case). */
11624 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11625 return "rlw%I2nm %0,%1,%h2,%3,%4";
11628 gcc_unreachable ();
11631 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11632 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11633 ASHIFT, or LSHIFTRT) in mode MODE. */
11636 rs6000_is_valid_insert_mask (rtx mask
, rtx shift
, machine_mode mode
)
11640 if (!rs6000_is_valid_mask (mask
, &nb
, &ne
, mode
))
11643 int n
= GET_MODE_PRECISION (mode
);
11645 int sh
= INTVAL (XEXP (shift
, 1));
11646 if (sh
< 0 || sh
>= n
)
11649 rtx_code code
= GET_CODE (shift
);
11651 /* Convert any shift by 0 to a rotate, to simplify below code. */
11655 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11656 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& ne
>= sh
)
11658 if (code
== ROTATE
&& sh
>= 0 && nb
>= ne
&& nb
< sh
)
11664 /* DImode rotates need rldimi. */
11665 if (mode
== DImode
&& code
== ROTATE
)
11668 /* SImode rotates need rlwimi. */
11669 if (mode
== SImode
&& code
== ROTATE
)
11670 return (nb
< 32 && ne
< 32 && sh
< 32);
11672 /* Wrap-around masks are only okay for rotates. */
11676 /* Don't allow ASHIFT if the mask is wrong for that. */
11677 if (code
== ASHIFT
&& ne
< sh
)
11680 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11681 if the mask is wrong for that. */
11682 if (nb
< 32 && ne
< 32 && sh
< 32
11683 && !(code
== LSHIFTRT
&& nb
>= 32 - sh
))
11686 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11687 if the mask is wrong for that. */
11688 if (code
== LSHIFTRT
)
11691 return !(code
== LSHIFTRT
&& nb
>= sh
);
11696 /* Return the instruction template for an insert with mask in mode MODE, with
11697 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11700 rs6000_insn_for_insert_mask (machine_mode mode
, rtx
*operands
, bool dot
)
11704 if (!rs6000_is_valid_mask (operands
[3], &nb
, &ne
, mode
))
11705 gcc_unreachable ();
11707 /* Prefer rldimi because rlwimi is cracked. */
11708 if (TARGET_POWERPC64
11709 && (!dot
|| mode
== DImode
)
11710 && GET_CODE (operands
[4]) != LSHIFTRT
11711 && ne
== INTVAL (operands
[2]))
11713 operands
[3] = GEN_INT (63 - nb
);
11715 return "rldimi. %0,%1,%2,%3";
11716 return "rldimi %0,%1,%2,%3";
11719 if (nb
< 32 && ne
< 32)
11721 if (GET_CODE (operands
[4]) == LSHIFTRT
&& INTVAL (operands
[2]))
11722 operands
[2] = GEN_INT (32 - INTVAL (operands
[2]));
11723 operands
[3] = GEN_INT (31 - nb
);
11724 operands
[4] = GEN_INT (31 - ne
);
11726 return "rlwimi. %0,%1,%2,%3,%4";
11727 return "rlwimi %0,%1,%2,%3,%4";
11730 gcc_unreachable ();
11733 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11734 using two machine instructions. */
11737 rs6000_is_valid_2insn_and (rtx c
, machine_mode mode
)
11739 /* There are two kinds of AND we can handle with two insns:
11740 1) those we can do with two rl* insn;
11743 We do not handle that last case yet. */
11745 /* If there is just one stretch of ones, we can do it. */
11746 if (rs6000_is_valid_mask (c
, NULL
, NULL
, mode
))
11749 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11750 one insn, we can do the whole thing with two. */
11751 unsigned HOST_WIDE_INT val
= INTVAL (c
);
11752 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11753 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11754 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11755 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11756 return rs6000_is_valid_and_mask (GEN_INT (val
+ bit3
- bit2
), mode
);
11759 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11760 If EXPAND is true, split rotate-and-mask instructions we generate to
11761 their constituent parts as well (this is used during expand); if DOT
11762 is 1, make the last insn a record-form instruction clobbering the
11763 destination GPR and setting the CC reg (from operands[3]); if 2, set
11764 that GPR as well as the CC reg. */
11767 rs6000_emit_2insn_and (machine_mode mode
, rtx
*operands
, bool expand
, int dot
)
11769 gcc_assert (!(expand
&& dot
));
11771 unsigned HOST_WIDE_INT val
= INTVAL (operands
[2]);
11773 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11774 shift right. This generates better code than doing the masks without
11775 shifts, or shifting first right and then left. */
11777 if (rs6000_is_valid_mask (operands
[2], &nb
, &ne
, mode
) && nb
>= ne
)
11779 gcc_assert (mode
== DImode
);
11781 int shift
= 63 - nb
;
11784 rtx tmp1
= gen_reg_rtx (DImode
);
11785 rtx tmp2
= gen_reg_rtx (DImode
);
11786 emit_insn (gen_ashldi3 (tmp1
, operands
[1], GEN_INT (shift
)));
11787 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (val
<< shift
)));
11788 emit_insn (gen_lshrdi3 (operands
[0], tmp2
, GEN_INT (shift
)));
11792 rtx tmp
= gen_rtx_ASHIFT (mode
, operands
[1], GEN_INT (shift
));
11793 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (val
<< shift
));
11794 emit_move_insn (operands
[0], tmp
);
11795 tmp
= gen_rtx_LSHIFTRT (mode
, operands
[0], GEN_INT (shift
));
11796 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11801 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11802 that does the rest. */
11803 unsigned HOST_WIDE_INT bit1
= val
& -val
;
11804 unsigned HOST_WIDE_INT bit2
= (val
+ bit1
) & ~val
;
11805 unsigned HOST_WIDE_INT val1
= (val
+ bit1
) & val
;
11806 unsigned HOST_WIDE_INT bit3
= val1
& -val1
;
11808 unsigned HOST_WIDE_INT mask1
= -bit3
+ bit2
- 1;
11809 unsigned HOST_WIDE_INT mask2
= val
+ bit3
- bit2
;
11811 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2
), mode
));
11813 /* Two "no-rotate"-and-mask instructions, for SImode. */
11814 if (rs6000_is_valid_and_mask (GEN_INT (mask1
), mode
))
11816 gcc_assert (mode
== SImode
);
11818 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11819 rtx tmp
= gen_rtx_AND (mode
, operands
[1], GEN_INT (mask1
));
11820 emit_move_insn (reg
, tmp
);
11821 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11822 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11826 gcc_assert (mode
== DImode
);
11828 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11829 insns; we have to do the first in SImode, because it wraps. */
11830 if (mask2
<= 0xffffffff
11831 && rs6000_is_valid_and_mask (GEN_INT (mask1
), SImode
))
11833 rtx reg
= expand
? gen_reg_rtx (mode
) : operands
[0];
11834 rtx tmp
= gen_rtx_AND (SImode
, gen_lowpart (SImode
, operands
[1]),
11836 rtx reg_low
= gen_lowpart (SImode
, reg
);
11837 emit_move_insn (reg_low
, tmp
);
11838 tmp
= gen_rtx_AND (mode
, reg
, GEN_INT (mask2
));
11839 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11843 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11844 at the top end), rotate back and clear the other hole. */
11845 int right
= exact_log2 (bit3
);
11846 int left
= 64 - right
;
11848 /* Rotate the mask too. */
11849 mask1
= (mask1
>> right
) | ((bit2
- 1) << left
);
11853 rtx tmp1
= gen_reg_rtx (DImode
);
11854 rtx tmp2
= gen_reg_rtx (DImode
);
11855 rtx tmp3
= gen_reg_rtx (DImode
);
11856 emit_insn (gen_rotldi3 (tmp1
, operands
[1], GEN_INT (left
)));
11857 emit_insn (gen_anddi3 (tmp2
, tmp1
, GEN_INT (mask1
)));
11858 emit_insn (gen_rotldi3 (tmp3
, tmp2
, GEN_INT (right
)));
11859 emit_insn (gen_anddi3 (operands
[0], tmp3
, GEN_INT (mask2
)));
11863 rtx tmp
= gen_rtx_ROTATE (mode
, operands
[1], GEN_INT (left
));
11864 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask1
));
11865 emit_move_insn (operands
[0], tmp
);
11866 tmp
= gen_rtx_ROTATE (mode
, operands
[0], GEN_INT (right
));
11867 tmp
= gen_rtx_AND (mode
, tmp
, GEN_INT (mask2
));
11868 rs6000_emit_dot_insn (operands
[0], tmp
, dot
, dot
? operands
[3] : 0);
11872 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11873 for lfq and stfq insns iff the registers are hard registers. */
11876 registers_ok_for_quad_peep (rtx reg1
, rtx reg2
)
11878 /* We might have been passed a SUBREG. */
11879 if (!REG_P (reg1
) || !REG_P (reg2
))
11882 /* We might have been passed non floating point registers. */
11883 if (!FP_REGNO_P (REGNO (reg1
))
11884 || !FP_REGNO_P (REGNO (reg2
)))
11887 return (REGNO (reg1
) == REGNO (reg2
) - 1);
11890 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11891 addr1 and addr2 must be in consecutive memory locations
11892 (addr2 == addr1 + 8). */
11895 mems_ok_for_quad_peep (rtx mem1
, rtx mem2
)
11898 unsigned int reg1
, reg2
;
11899 int offset1
, offset2
;
11901 /* The mems cannot be volatile. */
11902 if (MEM_VOLATILE_P (mem1
) || MEM_VOLATILE_P (mem2
))
11905 addr1
= XEXP (mem1
, 0);
11906 addr2
= XEXP (mem2
, 0);
11908 /* Extract an offset (if used) from the first addr. */
11909 if (GET_CODE (addr1
) == PLUS
)
11911 /* If not a REG, return zero. */
11912 if (!REG_P (XEXP (addr1
, 0)))
11916 reg1
= REGNO (XEXP (addr1
, 0));
11917 /* The offset must be constant! */
11918 if (!CONST_INT_P (XEXP (addr1
, 1)))
11920 offset1
= INTVAL (XEXP (addr1
, 1));
11923 else if (!REG_P (addr1
))
11927 reg1
= REGNO (addr1
);
11928 /* This was a simple (mem (reg)) expression. Offset is 0. */
11932 /* And now for the second addr. */
11933 if (GET_CODE (addr2
) == PLUS
)
11935 /* If not a REG, return zero. */
11936 if (!REG_P (XEXP (addr2
, 0)))
11940 reg2
= REGNO (XEXP (addr2
, 0));
11941 /* The offset must be constant. */
11942 if (!CONST_INT_P (XEXP (addr2
, 1)))
11944 offset2
= INTVAL (XEXP (addr2
, 1));
11947 else if (!REG_P (addr2
))
11951 reg2
= REGNO (addr2
);
11952 /* This was a simple (mem (reg)) expression. Offset is 0. */
11956 /* Both of these must have the same base register. */
11960 /* The offset for the second addr must be 8 more than the first addr. */
11961 if (offset2
!= offset1
+ 8)
11964 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
11969 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
11970 need to use DDmode, in all other cases we can use the same mode. */
11971 static machine_mode
11972 rs6000_secondary_memory_needed_mode (machine_mode mode
)
11974 if (lra_in_progress
&& mode
== SDmode
)
11979 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
11980 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
11981 only work on the traditional altivec registers, note if an altivec register
11984 static enum rs6000_reg_type
11985 register_to_reg_type (rtx reg
, bool *is_altivec
)
11987 HOST_WIDE_INT regno
;
11988 enum reg_class rclass
;
11990 if (SUBREG_P (reg
))
11991 reg
= SUBREG_REG (reg
);
11994 return NO_REG_TYPE
;
11996 regno
= REGNO (reg
);
11997 if (!HARD_REGISTER_NUM_P (regno
))
11999 if (!lra_in_progress
&& !reload_completed
)
12000 return PSEUDO_REG_TYPE
;
12002 regno
= true_regnum (reg
);
12003 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
))
12004 return PSEUDO_REG_TYPE
;
12007 gcc_assert (regno
>= 0);
12009 if (is_altivec
&& ALTIVEC_REGNO_P (regno
))
12010 *is_altivec
= true;
12012 rclass
= rs6000_regno_regclass
[regno
];
12013 return reg_class_to_reg_type
[(int)rclass
];
12016 /* Helper function to return the cost of adding a TOC entry address. */
12019 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask
)
12023 if (TARGET_CMODEL
!= CMODEL_SMALL
)
12024 ret
= ((addr_mask
& RELOAD_REG_OFFSET
) == 0) ? 1 : 2;
12027 ret
= (TARGET_MINIMAL_TOC
) ? 6 : 3;
12032 /* Helper function for rs6000_secondary_reload to determine whether the memory
12033 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12034 needs reloading. Return negative if the memory is not handled by the memory
12035 helper functions and to try a different reload method, 0 if no additional
12036 instructions are need, and positive to give the extra cost for the
12040 rs6000_secondary_reload_memory (rtx addr
,
12041 enum reg_class rclass
,
12044 int extra_cost
= 0;
12045 rtx reg
, and_arg
, plus_arg0
, plus_arg1
;
12046 addr_mask_type addr_mask
;
12047 const char *type
= NULL
;
12048 const char *fail_msg
= NULL
;
12050 if (GPR_REG_CLASS_P (rclass
))
12051 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12053 else if (rclass
== FLOAT_REGS
)
12054 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12056 else if (rclass
== ALTIVEC_REGS
)
12057 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12059 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12060 else if (rclass
== VSX_REGS
)
12061 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
]
12062 & ~RELOAD_REG_AND_M16
);
12064 /* If the register allocator hasn't made up its mind yet on the register
12065 class to use, settle on defaults to use. */
12066 else if (rclass
== NO_REGS
)
12068 addr_mask
= (reg_addr
[mode
].addr_mask
[RELOAD_REG_ANY
]
12069 & ~RELOAD_REG_AND_M16
);
12071 if ((addr_mask
& RELOAD_REG_MULTIPLE
) != 0)
12072 addr_mask
&= ~(RELOAD_REG_INDEXED
12073 | RELOAD_REG_PRE_INCDEC
12074 | RELOAD_REG_PRE_MODIFY
);
12080 /* If the register isn't valid in this register class, just return now. */
12081 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12083 if (TARGET_DEBUG_ADDR
)
12086 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12087 "not valid in class\n",
12088 GET_MODE_NAME (mode
), reg_class_names
[rclass
]);
12095 switch (GET_CODE (addr
))
12097 /* Does the register class supports auto update forms for this mode? We
12098 don't need a scratch register, since the powerpc only supports
12099 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12102 reg
= XEXP (addr
, 0);
12103 if (!base_reg_operand (addr
, GET_MODE (reg
)))
12105 fail_msg
= "no base register #1";
12109 else if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12117 reg
= XEXP (addr
, 0);
12118 plus_arg1
= XEXP (addr
, 1);
12119 if (!base_reg_operand (reg
, GET_MODE (reg
))
12120 || GET_CODE (plus_arg1
) != PLUS
12121 || !rtx_equal_p (reg
, XEXP (plus_arg1
, 0)))
12123 fail_msg
= "bad PRE_MODIFY";
12127 else if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12134 /* Do we need to simulate AND -16 to clear the bottom address bits used
12135 in VMX load/stores? Only allow the AND for vector sizes. */
12137 and_arg
= XEXP (addr
, 0);
12138 if (GET_MODE_SIZE (mode
) != 16
12139 || !CONST_INT_P (XEXP (addr
, 1))
12140 || INTVAL (XEXP (addr
, 1)) != -16)
12142 fail_msg
= "bad Altivec AND #1";
12146 if (rclass
!= ALTIVEC_REGS
)
12148 if (legitimate_indirect_address_p (and_arg
, false))
12151 else if (legitimate_indexed_address_p (and_arg
, false))
12156 fail_msg
= "bad Altivec AND #2";
12164 /* If this is an indirect address, make sure it is a base register. */
12167 if (!legitimate_indirect_address_p (addr
, false))
12174 /* If this is an indexed address, make sure the register class can handle
12175 indexed addresses for this mode. */
12177 plus_arg0
= XEXP (addr
, 0);
12178 plus_arg1
= XEXP (addr
, 1);
12180 /* (plus (plus (reg) (constant)) (constant)) is generated during
12181 push_reload processing, so handle it now. */
12182 if (GET_CODE (plus_arg0
) == PLUS
&& CONST_INT_P (plus_arg1
))
12184 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12191 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12192 push_reload processing, so handle it now. */
12193 else if (GET_CODE (plus_arg0
) == PLUS
&& REG_P (plus_arg1
))
12195 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12198 type
= "indexed #2";
12202 else if (!base_reg_operand (plus_arg0
, GET_MODE (plus_arg0
)))
12204 fail_msg
= "no base register #2";
12208 else if (int_reg_operand (plus_arg1
, GET_MODE (plus_arg1
)))
12210 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0
12211 || !legitimate_indexed_address_p (addr
, false))
12218 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0
12219 && CONST_INT_P (plus_arg1
))
12221 if (!quad_address_offset_p (INTVAL (plus_arg1
)))
12224 type
= "vector d-form offset";
12228 /* Make sure the register class can handle offset addresses. */
12229 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12231 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12234 type
= "offset #2";
12240 fail_msg
= "bad PLUS";
12247 /* Quad offsets are restricted and can't handle normal addresses. */
12248 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12251 type
= "vector d-form lo_sum";
12254 else if (!legitimate_lo_sum_address_p (mode
, addr
, false))
12256 fail_msg
= "bad LO_SUM";
12260 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12267 /* Static addresses need to create a TOC entry. */
12271 if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12274 type
= "vector d-form lo_sum #2";
12280 extra_cost
= rs6000_secondary_reload_toc_costs (addr_mask
);
12284 /* TOC references look like offsetable memory. */
12286 if (TARGET_CMODEL
== CMODEL_SMALL
|| XINT (addr
, 1) != UNSPEC_TOCREL
)
12288 fail_msg
= "bad UNSPEC";
12292 else if ((addr_mask
& RELOAD_REG_QUAD_OFFSET
) != 0)
12295 type
= "vector d-form lo_sum #3";
12298 else if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12301 type
= "toc reference";
12307 fail_msg
= "bad address";
12312 if (TARGET_DEBUG_ADDR
/* && extra_cost != 0 */)
12314 if (extra_cost
< 0)
12316 "rs6000_secondary_reload_memory error: mode = %s, "
12317 "class = %s, addr_mask = '%s', %s\n",
12318 GET_MODE_NAME (mode
),
12319 reg_class_names
[rclass
],
12320 rs6000_debug_addr_mask (addr_mask
, false),
12321 (fail_msg
!= NULL
) ? fail_msg
: "<bad address>");
12325 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12326 "addr_mask = '%s', extra cost = %d, %s\n",
12327 GET_MODE_NAME (mode
),
12328 reg_class_names
[rclass
],
12329 rs6000_debug_addr_mask (addr_mask
, false),
12331 (type
) ? type
: "<none>");
12339 /* Helper function for rs6000_secondary_reload to return true if a move to a
12340 different register classe is really a simple move. */
12343 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type
,
12344 enum rs6000_reg_type from_type
,
12347 int size
= GET_MODE_SIZE (mode
);
12349 /* Add support for various direct moves available. In this function, we only
12350 look at cases where we don't need any extra registers, and one or more
12351 simple move insns are issued. Originally small integers are not allowed
12352 in FPR/VSX registers. Single precision binary floating is not a simple
12353 move because we need to convert to the single precision memory layout.
12354 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12355 need special direct move handling, which we do not support yet. */
12356 if (TARGET_DIRECT_MOVE
12357 && ((to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12358 || (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12360 if (TARGET_POWERPC64
)
12362 /* ISA 2.07: MTVSRD or MVFVSRD. */
12366 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12367 if (size
== 16 && TARGET_P9_VECTOR
&& mode
!= TDmode
)
12371 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12372 if (TARGET_P8_VECTOR
)
12374 if (mode
== SImode
)
12377 if (TARGET_P9_VECTOR
&& (mode
== HImode
|| mode
== QImode
))
12381 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12382 if (mode
== SDmode
)
12386 /* Move to/from SPR. */
12387 else if ((size
== 4 || (TARGET_POWERPC64
&& size
== 8))
12388 && ((to_type
== GPR_REG_TYPE
&& from_type
== SPR_REG_TYPE
)
12389 || (to_type
== SPR_REG_TYPE
&& from_type
== GPR_REG_TYPE
)))
12395 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12396 special direct moves that involve allocating an extra register, return the
12397 insn code of the helper function if there is such a function or
12398 CODE_FOR_nothing if not. */
12401 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type
,
12402 enum rs6000_reg_type from_type
,
12404 secondary_reload_info
*sri
,
12408 enum insn_code icode
= CODE_FOR_nothing
;
12410 int size
= GET_MODE_SIZE (mode
);
12412 if (TARGET_POWERPC64
&& size
== 16)
12414 /* Handle moving 128-bit values from GPRs to VSX point registers on
12415 ISA 2.07 (power8, power9) when running in 64-bit mode using
12416 XXPERMDI to glue the two 64-bit values back together. */
12417 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12419 cost
= 3; /* 2 mtvsrd's, 1 xxpermdi. */
12420 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12423 /* Handle moving 128-bit values from VSX point registers to GPRs on
12424 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12425 bottom 64-bit value. */
12426 else if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12428 cost
= 3; /* 2 mfvsrd's, 1 xxpermdi. */
12429 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12433 else if (TARGET_POWERPC64
&& mode
== SFmode
)
12435 if (to_type
== GPR_REG_TYPE
&& from_type
== VSX_REG_TYPE
)
12437 cost
= 3; /* xscvdpspn, mfvsrd, and. */
12438 icode
= reg_addr
[mode
].reload_gpr_vsx
;
12441 else if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
)
12443 cost
= 2; /* mtvsrz, xscvspdpn. */
12444 icode
= reg_addr
[mode
].reload_vsx_gpr
;
12448 else if (!TARGET_POWERPC64
&& size
== 8)
12450 /* Handle moving 64-bit values from GPRs to floating point registers on
12451 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12452 32-bit values back together. Altivec register classes must be handled
12453 specially since a different instruction is used, and the secondary
12454 reload support requires a single instruction class in the scratch
12455 register constraint. However, right now TFmode is not allowed in
12456 Altivec registers, so the pattern will never match. */
12457 if (to_type
== VSX_REG_TYPE
&& from_type
== GPR_REG_TYPE
&& !altivec_p
)
12459 cost
= 3; /* 2 mtvsrwz's, 1 fmrgow. */
12460 icode
= reg_addr
[mode
].reload_fpr_gpr
;
12464 if (icode
!= CODE_FOR_nothing
)
12469 sri
->icode
= icode
;
12470 sri
->extra_cost
= cost
;
12477 /* Return whether a move between two register classes can be done either
12478 directly (simple move) or via a pattern that uses a single extra temporary
12479 (using ISA 2.07's direct move in this case. */
12482 rs6000_secondary_reload_move (enum rs6000_reg_type to_type
,
12483 enum rs6000_reg_type from_type
,
12485 secondary_reload_info
*sri
,
12488 /* Fall back to load/store reloads if either type is not a register. */
12489 if (to_type
== NO_REG_TYPE
|| from_type
== NO_REG_TYPE
)
12492 /* If we haven't allocated registers yet, assume the move can be done for the
12493 standard register types. */
12494 if ((to_type
== PSEUDO_REG_TYPE
&& from_type
== PSEUDO_REG_TYPE
)
12495 || (to_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (from_type
))
12496 || (from_type
== PSEUDO_REG_TYPE
&& IS_STD_REG_TYPE (to_type
)))
12499 /* Moves to the same set of registers is a simple move for non-specialized
12501 if (to_type
== from_type
&& IS_STD_REG_TYPE (to_type
))
12504 /* Check whether a simple move can be done directly. */
12505 if (rs6000_secondary_reload_simple_move (to_type
, from_type
, mode
))
12509 sri
->icode
= CODE_FOR_nothing
;
12510 sri
->extra_cost
= 0;
12515 /* Now check if we can do it in a few steps. */
12516 return rs6000_secondary_reload_direct_move (to_type
, from_type
, mode
, sri
,
12520 /* Inform reload about cases where moving X with a mode MODE to a register in
12521 RCLASS requires an extra scratch or immediate register. Return the class
12522 needed for the immediate register.
12524 For VSX and Altivec, we may need a register to convert sp+offset into
12527 For misaligned 64-bit gpr loads and stores we need a register to
12528 convert an offset address to indirect. */
12531 rs6000_secondary_reload (bool in_p
,
12533 reg_class_t rclass_i
,
12535 secondary_reload_info
*sri
)
12537 enum reg_class rclass
= (enum reg_class
) rclass_i
;
12538 reg_class_t ret
= ALL_REGS
;
12539 enum insn_code icode
;
12540 bool default_p
= false;
12541 bool done_p
= false;
12543 /* Allow subreg of memory before/during reload. */
12544 bool memory_p
= (MEM_P (x
)
12545 || (!reload_completed
&& SUBREG_P (x
)
12546 && MEM_P (SUBREG_REG (x
))));
12548 sri
->icode
= CODE_FOR_nothing
;
12549 sri
->t_icode
= CODE_FOR_nothing
;
12550 sri
->extra_cost
= 0;
12552 ? reg_addr
[mode
].reload_load
12553 : reg_addr
[mode
].reload_store
);
12555 if (REG_P (x
) || register_operand (x
, mode
))
12557 enum rs6000_reg_type to_type
= reg_class_to_reg_type
[(int)rclass
];
12558 bool altivec_p
= (rclass
== ALTIVEC_REGS
);
12559 enum rs6000_reg_type from_type
= register_to_reg_type (x
, &altivec_p
);
12562 std::swap (to_type
, from_type
);
12564 /* Can we do a direct move of some sort? */
12565 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
, sri
,
12568 icode
= (enum insn_code
)sri
->icode
;
12575 /* Make sure 0.0 is not reloaded or forced into memory. */
12576 if (x
== CONST0_RTX (mode
) && VSX_REG_CLASS_P (rclass
))
12583 /* If this is a scalar floating point value and we want to load it into the
12584 traditional Altivec registers, do it via a move via a traditional floating
12585 point register, unless we have D-form addressing. Also make sure that
12586 non-zero constants use a FPR. */
12587 if (!done_p
&& reg_addr
[mode
].scalar_in_vmx_p
12588 && !mode_supports_vmx_dform (mode
)
12589 && (rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
12590 && (memory_p
|| CONST_DOUBLE_P (x
)))
12597 /* Handle reload of load/stores if we have reload helper functions. */
12598 if (!done_p
&& icode
!= CODE_FOR_nothing
&& memory_p
)
12600 int extra_cost
= rs6000_secondary_reload_memory (XEXP (x
, 0), rclass
,
12603 if (extra_cost
>= 0)
12607 if (extra_cost
> 0)
12609 sri
->extra_cost
= extra_cost
;
12610 sri
->icode
= icode
;
12615 /* Handle unaligned loads and stores of integer registers. */
12616 if (!done_p
&& TARGET_POWERPC64
12617 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12619 && GET_MODE_SIZE (GET_MODE (x
)) >= UNITS_PER_WORD
)
12621 rtx addr
= XEXP (x
, 0);
12622 rtx off
= address_offset (addr
);
12624 if (off
!= NULL_RTX
)
12626 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12627 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12629 /* We need a secondary reload when our legitimate_address_p
12630 says the address is good (as otherwise the entire address
12631 will be reloaded), and the offset is not a multiple of
12632 four or we have an address wrap. Address wrap will only
12633 occur for LO_SUMs since legitimate_offset_address_p
12634 rejects addresses for 16-byte mems that will wrap. */
12635 if (GET_CODE (addr
) == LO_SUM
12636 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12637 && ((offset
& 3) != 0
12638 || ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
))
12639 : (offset
+ 0x8000 < 0x10000 - extra
/* legitimate_address_p */
12640 && (offset
& 3) != 0))
12642 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12644 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_load
12645 : CODE_FOR_reload_di_load
);
12647 sri
->icode
= ((TARGET_32BIT
) ? CODE_FOR_reload_si_store
12648 : CODE_FOR_reload_di_store
);
12649 sri
->extra_cost
= 2;
12660 if (!done_p
&& !TARGET_POWERPC64
12661 && reg_class_to_reg_type
[(int)rclass
] == GPR_REG_TYPE
12663 && GET_MODE_SIZE (GET_MODE (x
)) > UNITS_PER_WORD
)
12665 rtx addr
= XEXP (x
, 0);
12666 rtx off
= address_offset (addr
);
12668 if (off
!= NULL_RTX
)
12670 unsigned int extra
= GET_MODE_SIZE (GET_MODE (x
)) - UNITS_PER_WORD
;
12671 unsigned HOST_WIDE_INT offset
= INTVAL (off
);
12673 /* We need a secondary reload when our legitimate_address_p
12674 says the address is good (as otherwise the entire address
12675 will be reloaded), and we have a wrap.
12677 legitimate_lo_sum_address_p allows LO_SUM addresses to
12678 have any offset so test for wrap in the low 16 bits.
12680 legitimate_offset_address_p checks for the range
12681 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12682 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12683 [0x7ff4,0x7fff] respectively, so test for the
12684 intersection of these ranges, [0x7ffc,0x7fff] and
12685 [0x7ff4,0x7ff7] respectively.
12687 Note that the address we see here may have been
12688 manipulated by legitimize_reload_address. */
12689 if (GET_CODE (addr
) == LO_SUM
12690 ? ((offset
& 0xffff) ^ 0x8000) >= 0x10000 - extra
12691 : offset
- (0x8000 - extra
) < UNITS_PER_WORD
)
12694 sri
->icode
= CODE_FOR_reload_si_load
;
12696 sri
->icode
= CODE_FOR_reload_si_store
;
12697 sri
->extra_cost
= 2;
12712 ret
= default_secondary_reload (in_p
, x
, rclass
, mode
, sri
);
12714 gcc_assert (ret
!= ALL_REGS
);
12716 if (TARGET_DEBUG_ADDR
)
12719 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12721 reg_class_names
[ret
],
12722 in_p
? "true" : "false",
12723 reg_class_names
[rclass
],
12724 GET_MODE_NAME (mode
));
12726 if (reload_completed
)
12727 fputs (", after reload", stderr
);
12730 fputs (", done_p not set", stderr
);
12733 fputs (", default secondary reload", stderr
);
12735 if (sri
->icode
!= CODE_FOR_nothing
)
12736 fprintf (stderr
, ", reload func = %s, extra cost = %d",
12737 insn_data
[sri
->icode
].name
, sri
->extra_cost
);
12739 else if (sri
->extra_cost
> 0)
12740 fprintf (stderr
, ", extra cost = %d", sri
->extra_cost
);
12742 fputs ("\n", stderr
);
12749 /* Better tracing for rs6000_secondary_reload_inner. */
12752 rs6000_secondary_reload_trace (int line
, rtx reg
, rtx mem
, rtx scratch
,
12757 gcc_assert (reg
!= NULL_RTX
&& mem
!= NULL_RTX
&& scratch
!= NULL_RTX
);
12759 fprintf (stderr
, "rs6000_secondary_reload_inner:%d, type = %s\n", line
,
12760 store_p
? "store" : "load");
12763 set
= gen_rtx_SET (mem
, reg
);
12765 set
= gen_rtx_SET (reg
, mem
);
12767 clobber
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
12768 debug_rtx (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
12771 static void rs6000_secondary_reload_fail (int, rtx
, rtx
, rtx
, bool)
12772 ATTRIBUTE_NORETURN
;
12775 rs6000_secondary_reload_fail (int line
, rtx reg
, rtx mem
, rtx scratch
,
12778 rs6000_secondary_reload_trace (line
, reg
, mem
, scratch
, store_p
);
12779 gcc_unreachable ();
12782 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12783 reload helper functions. These were identified in
12784 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12785 reload, it calls the insns:
12786 reload_<RELOAD:mode>_<P:mptrsize>_store
12787 reload_<RELOAD:mode>_<P:mptrsize>_load
12789 which in turn calls this function, to do whatever is necessary to create
12790 valid addresses. */
12793 rs6000_secondary_reload_inner (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
12795 int regno
= true_regnum (reg
);
12796 machine_mode mode
= GET_MODE (reg
);
12797 addr_mask_type addr_mask
;
12800 rtx op_reg
, op0
, op1
;
12805 if (regno
< 0 || !HARD_REGISTER_NUM_P (regno
) || !MEM_P (mem
)
12806 || !base_reg_operand (scratch
, GET_MODE (scratch
)))
12807 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12809 if (IN_RANGE (regno
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
))
12810 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_GPR
];
12812 else if (IN_RANGE (regno
, FIRST_FPR_REGNO
, LAST_FPR_REGNO
))
12813 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
];
12815 else if (IN_RANGE (regno
, FIRST_ALTIVEC_REGNO
, LAST_ALTIVEC_REGNO
))
12816 addr_mask
= reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
];
12819 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12821 /* Make sure the mode is valid in this register class. */
12822 if ((addr_mask
& RELOAD_REG_VALID
) == 0)
12823 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12825 if (TARGET_DEBUG_ADDR
)
12826 rs6000_secondary_reload_trace (__LINE__
, reg
, mem
, scratch
, store_p
);
12828 new_addr
= addr
= XEXP (mem
, 0);
12829 switch (GET_CODE (addr
))
12831 /* Does the register class support auto update forms for this mode? If
12832 not, do the update now. We don't need a scratch register, since the
12833 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12836 op_reg
= XEXP (addr
, 0);
12837 if (!base_reg_operand (op_reg
, Pmode
))
12838 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12840 if ((addr_mask
& RELOAD_REG_PRE_INCDEC
) == 0)
12842 int delta
= GET_MODE_SIZE (mode
);
12843 if (GET_CODE (addr
) == PRE_DEC
)
12845 emit_insn (gen_add2_insn (op_reg
, GEN_INT (delta
)));
12851 op0
= XEXP (addr
, 0);
12852 op1
= XEXP (addr
, 1);
12853 if (!base_reg_operand (op0
, Pmode
)
12854 || GET_CODE (op1
) != PLUS
12855 || !rtx_equal_p (op0
, XEXP (op1
, 0)))
12856 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12858 if ((addr_mask
& RELOAD_REG_PRE_MODIFY
) == 0)
12860 emit_insn (gen_rtx_SET (op0
, op1
));
12865 /* Do we need to simulate AND -16 to clear the bottom address bits used
12866 in VMX load/stores? */
12868 op0
= XEXP (addr
, 0);
12869 op1
= XEXP (addr
, 1);
12870 if ((addr_mask
& RELOAD_REG_AND_M16
) == 0)
12872 if (REG_P (op0
) || SUBREG_P (op0
))
12875 else if (GET_CODE (op1
) == PLUS
)
12877 emit_insn (gen_rtx_SET (scratch
, op1
));
12882 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12884 and_op
= gen_rtx_AND (GET_MODE (scratch
), op_reg
, op1
);
12885 cc_clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (CCmode
));
12886 rv
= gen_rtvec (2, gen_rtx_SET (scratch
, and_op
), cc_clobber
);
12887 emit_insn (gen_rtx_PARALLEL (VOIDmode
, rv
));
12888 new_addr
= scratch
;
12892 /* If this is an indirect address, make sure it is a base register. */
12895 if (!base_reg_operand (addr
, GET_MODE (addr
)))
12897 emit_insn (gen_rtx_SET (scratch
, addr
));
12898 new_addr
= scratch
;
12902 /* If this is an indexed address, make sure the register class can handle
12903 indexed addresses for this mode. */
12905 op0
= XEXP (addr
, 0);
12906 op1
= XEXP (addr
, 1);
12907 if (!base_reg_operand (op0
, Pmode
))
12908 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12910 else if (int_reg_operand (op1
, Pmode
))
12912 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12914 emit_insn (gen_rtx_SET (scratch
, addr
));
12915 new_addr
= scratch
;
12919 else if (mode_supports_dq_form (mode
) && CONST_INT_P (op1
))
12921 if (((addr_mask
& RELOAD_REG_QUAD_OFFSET
) == 0)
12922 || !quad_address_p (addr
, mode
, false))
12924 emit_insn (gen_rtx_SET (scratch
, addr
));
12925 new_addr
= scratch
;
12929 /* Make sure the register class can handle offset addresses. */
12930 else if (rs6000_legitimate_offset_address_p (mode
, addr
, false, true))
12932 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12934 emit_insn (gen_rtx_SET (scratch
, addr
));
12935 new_addr
= scratch
;
12940 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12945 op0
= XEXP (addr
, 0);
12946 op1
= XEXP (addr
, 1);
12947 if (!base_reg_operand (op0
, Pmode
))
12948 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12950 else if (int_reg_operand (op1
, Pmode
))
12952 if ((addr_mask
& RELOAD_REG_INDEXED
) == 0)
12954 emit_insn (gen_rtx_SET (scratch
, addr
));
12955 new_addr
= scratch
;
12959 /* Quad offsets are restricted and can't handle normal addresses. */
12960 else if (mode_supports_dq_form (mode
))
12962 emit_insn (gen_rtx_SET (scratch
, addr
));
12963 new_addr
= scratch
;
12966 /* Make sure the register class can handle offset addresses. */
12967 else if (legitimate_lo_sum_address_p (mode
, addr
, false))
12969 if ((addr_mask
& RELOAD_REG_OFFSET
) == 0)
12971 emit_insn (gen_rtx_SET (scratch
, addr
));
12972 new_addr
= scratch
;
12977 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12984 rs6000_emit_move (scratch
, addr
, Pmode
);
12985 new_addr
= scratch
;
12989 rs6000_secondary_reload_fail (__LINE__
, reg
, mem
, scratch
, store_p
);
12992 /* Adjust the address if it changed. */
12993 if (addr
!= new_addr
)
12995 mem
= replace_equiv_address_nv (mem
, new_addr
);
12996 if (TARGET_DEBUG_ADDR
)
12997 fprintf (stderr
, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13000 /* Now create the move. */
13002 emit_insn (gen_rtx_SET (mem
, reg
));
13004 emit_insn (gen_rtx_SET (reg
, mem
));
13009 /* Convert reloads involving 64-bit gprs and misaligned offset
13010 addressing, or multiple 32-bit gprs and offsets that are too large,
13011 to use indirect addressing. */
13014 rs6000_secondary_reload_gpr (rtx reg
, rtx mem
, rtx scratch
, bool store_p
)
13016 int regno
= true_regnum (reg
);
13017 enum reg_class rclass
;
13019 rtx scratch_or_premodify
= scratch
;
13021 if (TARGET_DEBUG_ADDR
)
13023 fprintf (stderr
, "\nrs6000_secondary_reload_gpr, type = %s\n",
13024 store_p
? "store" : "load");
13025 fprintf (stderr
, "reg:\n");
13027 fprintf (stderr
, "mem:\n");
13029 fprintf (stderr
, "scratch:\n");
13030 debug_rtx (scratch
);
13033 gcc_assert (regno
>= 0 && HARD_REGISTER_NUM_P (regno
));
13034 gcc_assert (MEM_P (mem
));
13035 rclass
= REGNO_REG_CLASS (regno
);
13036 gcc_assert (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
);
13037 addr
= XEXP (mem
, 0);
13039 if (GET_CODE (addr
) == PRE_MODIFY
)
13041 gcc_assert (REG_P (XEXP (addr
, 0))
13042 && GET_CODE (XEXP (addr
, 1)) == PLUS
13043 && XEXP (XEXP (addr
, 1), 0) == XEXP (addr
, 0));
13044 scratch_or_premodify
= XEXP (addr
, 0);
13045 addr
= XEXP (addr
, 1);
13047 gcc_assert (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
);
13049 rs6000_emit_move (scratch_or_premodify
, addr
, Pmode
);
13051 mem
= replace_equiv_address_nv (mem
, scratch_or_premodify
);
13053 /* Now create the move. */
13055 emit_insn (gen_rtx_SET (mem
, reg
));
13057 emit_insn (gen_rtx_SET (reg
, mem
));
13062 /* Given an rtx X being reloaded into a reg required to be
13063 in class CLASS, return the class of reg to actually use.
13064 In general this is just CLASS; but on some machines
13065 in some cases it is preferable to use a more restrictive class.
13067 On the RS/6000, we have to return NO_REGS when we want to reload a
13068 floating-point CONST_DOUBLE to force it to be copied to memory.
13070 We also don't want to reload integer values into floating-point
13071 registers if we can at all help it. In fact, this can
13072 cause reload to die, if it tries to generate a reload of CTR
13073 into a FP register and discovers it doesn't have the memory location
13076 ??? Would it be a good idea to have reload do the converse, that is
13077 try to reload floating modes into FP registers if possible?
13080 static enum reg_class
13081 rs6000_preferred_reload_class (rtx x
, enum reg_class rclass
)
13083 machine_mode mode
= GET_MODE (x
);
13084 bool is_constant
= CONSTANT_P (x
);
13086 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13087 reload class for it. */
13088 if ((rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13089 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_VMX
] & RELOAD_REG_VALID
) == 0)
13092 if ((rclass
== FLOAT_REGS
|| rclass
== VSX_REGS
)
13093 && (reg_addr
[mode
].addr_mask
[RELOAD_REG_FPR
] & RELOAD_REG_VALID
) == 0)
13096 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13097 the reloading of address expressions using PLUS into floating point
13099 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
) && GET_CODE (x
) != PLUS
)
13103 /* Zero is always allowed in all VSX registers. */
13104 if (x
== CONST0_RTX (mode
))
13107 /* If this is a vector constant that can be formed with a few Altivec
13108 instructions, we want altivec registers. */
13109 if (GET_CODE (x
) == CONST_VECTOR
&& easy_vector_constant (x
, mode
))
13110 return ALTIVEC_REGS
;
13112 /* If this is an integer constant that can easily be loaded into
13113 vector registers, allow it. */
13114 if (CONST_INT_P (x
))
13116 HOST_WIDE_INT value
= INTVAL (x
);
13118 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13119 2.06 can generate it in the Altivec registers with
13123 if (TARGET_P8_VECTOR
)
13125 else if (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
13126 return ALTIVEC_REGS
;
13131 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13132 a sign extend in the Altivec registers. */
13133 if (IN_RANGE (value
, -128, 127) && TARGET_P9_VECTOR
13134 && (rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
))
13135 return ALTIVEC_REGS
;
13138 /* Force constant to memory. */
13142 /* D-form addressing can easily reload the value. */
13143 if (mode_supports_vmx_dform (mode
)
13144 || mode_supports_dq_form (mode
))
13147 /* If this is a scalar floating point value and we don't have D-form
13148 addressing, prefer the traditional floating point registers so that we
13149 can use D-form (register+offset) addressing. */
13150 if (rclass
== VSX_REGS
13151 && (mode
== SFmode
|| GET_MODE_SIZE (mode
) == 8))
13154 /* Prefer the Altivec registers if Altivec is handling the vector
13155 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13157 if (VECTOR_UNIT_ALTIVEC_P (mode
) || VECTOR_MEM_ALTIVEC_P (mode
)
13158 || mode
== V1TImode
)
13159 return ALTIVEC_REGS
;
13164 if (is_constant
|| GET_CODE (x
) == PLUS
)
13166 if (reg_class_subset_p (GENERAL_REGS
, rclass
))
13167 return GENERAL_REGS
;
13168 if (reg_class_subset_p (BASE_REGS
, rclass
))
13173 /* For the vector pair and vector quad modes, prefer their natural register
13174 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13175 the GPR registers. */
13176 if (rclass
== GEN_OR_FLOAT_REGS
)
13178 if (mode
== OOmode
)
13181 if (mode
== XOmode
)
13184 if (GET_MODE_CLASS (mode
) == MODE_INT
)
13185 return GENERAL_REGS
;
13191 /* Debug version of rs6000_preferred_reload_class. */
13192 static enum reg_class
13193 rs6000_debug_preferred_reload_class (rtx x
, enum reg_class rclass
)
13195 enum reg_class ret
= rs6000_preferred_reload_class (x
, rclass
);
13198 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13200 reg_class_names
[ret
], reg_class_names
[rclass
],
13201 GET_MODE_NAME (GET_MODE (x
)));
13207 /* If we are copying between FP or AltiVec registers and anything else, we need
13208 a memory location. The exception is when we are targeting ppc64 and the
13209 move to/from fpr to gpr instructions are available. Also, under VSX, you
13210 can copy vector registers from the FP register set to the Altivec register
13211 set and vice versa. */
13214 rs6000_secondary_memory_needed (machine_mode mode
,
13215 reg_class_t from_class
,
13216 reg_class_t to_class
)
13218 enum rs6000_reg_type from_type
, to_type
;
13219 bool altivec_p
= ((from_class
== ALTIVEC_REGS
)
13220 || (to_class
== ALTIVEC_REGS
));
13222 /* If a simple/direct move is available, we don't need secondary memory */
13223 from_type
= reg_class_to_reg_type
[(int)from_class
];
13224 to_type
= reg_class_to_reg_type
[(int)to_class
];
13226 if (rs6000_secondary_reload_move (to_type
, from_type
, mode
,
13227 (secondary_reload_info
*)0, altivec_p
))
13230 /* If we have a floating point or vector register class, we need to use
13231 memory to transfer the data. */
13232 if (IS_FP_VECT_REG_TYPE (from_type
) || IS_FP_VECT_REG_TYPE (to_type
))
13238 /* Debug version of rs6000_secondary_memory_needed. */
13240 rs6000_debug_secondary_memory_needed (machine_mode mode
,
13241 reg_class_t from_class
,
13242 reg_class_t to_class
)
13244 bool ret
= rs6000_secondary_memory_needed (mode
, from_class
, to_class
);
13247 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13248 "to_class = %s, mode = %s\n",
13249 ret
? "true" : "false",
13250 reg_class_names
[from_class
],
13251 reg_class_names
[to_class
],
13252 GET_MODE_NAME (mode
));
13257 /* Return the register class of a scratch register needed to copy IN into
13258 or out of a register in RCLASS in MODE. If it can be done directly,
13259 NO_REGS is returned. */
13261 static enum reg_class
13262 rs6000_secondary_reload_class (enum reg_class rclass
, machine_mode mode
,
13267 if (TARGET_ELF
|| (DEFAULT_ABI
== ABI_DARWIN
13269 && MACHOPIC_INDIRECT
13273 /* We cannot copy a symbolic operand directly into anything
13274 other than BASE_REGS for TARGET_ELF. So indicate that a
13275 register from BASE_REGS is needed as an intermediate
13278 On Darwin, pic addresses require a load from memory, which
13279 needs a base register. */
13280 if (rclass
!= BASE_REGS
13281 && (SYMBOL_REF_P (in
)
13282 || GET_CODE (in
) == HIGH
13283 || GET_CODE (in
) == LABEL_REF
13284 || GET_CODE (in
) == CONST
))
13290 regno
= REGNO (in
);
13291 if (!HARD_REGISTER_NUM_P (regno
))
13293 regno
= true_regnum (in
);
13294 if (!HARD_REGISTER_NUM_P (regno
))
13298 else if (SUBREG_P (in
))
13300 regno
= true_regnum (in
);
13301 if (!HARD_REGISTER_NUM_P (regno
))
13307 /* If we have VSX register moves, prefer moving scalar values between
13308 Altivec registers and GPR by going via an FPR (and then via memory)
13309 instead of reloading the secondary memory address for Altivec moves. */
13311 && GET_MODE_SIZE (mode
) < 16
13312 && !mode_supports_vmx_dform (mode
)
13313 && (((rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
)
13314 && (regno
>= 0 && ALTIVEC_REGNO_P (regno
)))
13315 || ((rclass
== VSX_REGS
|| rclass
== ALTIVEC_REGS
)
13316 && (regno
>= 0 && INT_REGNO_P (regno
)))))
13319 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13321 if (rclass
== GENERAL_REGS
|| rclass
== BASE_REGS
13322 || (regno
>= 0 && INT_REGNO_P (regno
)))
13325 /* Constants, memory, and VSX registers can go into VSX registers (both the
13326 traditional floating point and the altivec registers). */
13327 if (rclass
== VSX_REGS
13328 && (regno
== -1 || VSX_REGNO_P (regno
)))
13331 /* Constants, memory, and FP registers can go into FP registers. */
13332 if ((regno
== -1 || FP_REGNO_P (regno
))
13333 && (rclass
== FLOAT_REGS
|| rclass
== GEN_OR_FLOAT_REGS
))
13334 return (mode
!= SDmode
|| lra_in_progress
) ? NO_REGS
: GENERAL_REGS
;
13336 /* Memory, and AltiVec registers can go into AltiVec registers. */
13337 if ((regno
== -1 || ALTIVEC_REGNO_P (regno
))
13338 && rclass
== ALTIVEC_REGS
)
13341 /* We can copy among the CR registers. */
13342 if ((rclass
== CR_REGS
|| rclass
== CR0_REGS
)
13343 && regno
>= 0 && CR_REGNO_P (regno
))
13346 /* Otherwise, we need GENERAL_REGS. */
13347 return GENERAL_REGS
;
13350 /* Debug version of rs6000_secondary_reload_class. */
13351 static enum reg_class
13352 rs6000_debug_secondary_reload_class (enum reg_class rclass
,
13353 machine_mode mode
, rtx in
)
13355 enum reg_class ret
= rs6000_secondary_reload_class (rclass
, mode
, in
);
13357 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13358 "mode = %s, input rtx:\n",
13359 reg_class_names
[ret
], reg_class_names
[rclass
],
13360 GET_MODE_NAME (mode
));
13366 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13369 rs6000_can_change_mode_class (machine_mode from
,
13371 reg_class_t rclass
)
13373 unsigned from_size
= GET_MODE_SIZE (from
);
13374 unsigned to_size
= GET_MODE_SIZE (to
);
13376 if (from_size
!= to_size
)
13378 enum reg_class xclass
= (TARGET_VSX
) ? VSX_REGS
: FLOAT_REGS
;
13380 if (reg_classes_intersect_p (xclass
, rclass
))
13382 unsigned to_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, to
);
13383 unsigned from_nregs
= hard_regno_nregs (FIRST_FPR_REGNO
, from
);
13384 bool to_float128_vector_p
= FLOAT128_VECTOR_P (to
);
13385 bool from_float128_vector_p
= FLOAT128_VECTOR_P (from
);
13387 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13388 single register under VSX because the scalar part of the register
13389 is in the upper 64-bits, and not the lower 64-bits. Types like
13390 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13391 IEEE floating point can't overlap, and neither can small
13394 if (to_float128_vector_p
&& from_float128_vector_p
)
13397 else if (to_float128_vector_p
|| from_float128_vector_p
)
13400 /* TDmode in floating-mode registers must always go into a register
13401 pair with the most significant word in the even-numbered register
13402 to match ISA requirements. In little-endian mode, this does not
13403 match subreg numbering, so we cannot allow subregs. */
13404 if (!BYTES_BIG_ENDIAN
&& (to
== TDmode
|| from
== TDmode
))
13407 /* Allow SD<->DD changes, since SDmode values are stored in
13408 the low half of the DDmode, just like target-independent
13409 code expects. We need to allow at least SD->DD since
13410 rs6000_secondary_memory_needed_mode asks for that change
13411 to be made for SD reloads. */
13412 if ((to
== DDmode
&& from
== SDmode
)
13413 || (to
== SDmode
&& from
== DDmode
))
13416 if (from_size
< 8 || to_size
< 8)
13419 if (from_size
== 8 && (8 * to_nregs
) != to_size
)
13422 if (to_size
== 8 && (8 * from_nregs
) != from_size
)
13431 /* Since the VSX register set includes traditional floating point registers
13432 and altivec registers, just check for the size being different instead of
13433 trying to check whether the modes are vector modes. Otherwise it won't
13434 allow say DF and DI to change classes. For types like TFmode and TDmode
13435 that take 2 64-bit registers, rather than a single 128-bit register, don't
13436 allow subregs of those types to other 128 bit types. */
13437 if (TARGET_VSX
&& VSX_REG_CLASS_P (rclass
))
13439 unsigned num_regs
= (from_size
+ 15) / 16;
13440 if (hard_regno_nregs (FIRST_FPR_REGNO
, to
) > num_regs
13441 || hard_regno_nregs (FIRST_FPR_REGNO
, from
) > num_regs
)
13444 return (from_size
== 8 || from_size
== 16);
13447 if (TARGET_ALTIVEC
&& rclass
== ALTIVEC_REGS
13448 && (ALTIVEC_VECTOR_MODE (from
) + ALTIVEC_VECTOR_MODE (to
)) == 1)
13454 /* Debug version of rs6000_can_change_mode_class. */
13456 rs6000_debug_can_change_mode_class (machine_mode from
,
13458 reg_class_t rclass
)
13460 bool ret
= rs6000_can_change_mode_class (from
, to
, rclass
);
13463 "rs6000_can_change_mode_class, return %s, from = %s, "
13464 "to = %s, rclass = %s\n",
13465 ret
? "true" : "false",
13466 GET_MODE_NAME (from
), GET_MODE_NAME (to
),
13467 reg_class_names
[rclass
]);
13472 /* Return a string to do a move operation of 128 bits of data. */
13475 rs6000_output_move_128bit (rtx operands
[])
13477 rtx dest
= operands
[0];
13478 rtx src
= operands
[1];
13479 machine_mode mode
= GET_MODE (dest
);
13482 bool dest_gpr_p
, dest_fp_p
, dest_vmx_p
, dest_vsx_p
;
13483 bool src_gpr_p
, src_fp_p
, src_vmx_p
, src_vsx_p
;
13487 dest_regno
= REGNO (dest
);
13488 dest_gpr_p
= INT_REGNO_P (dest_regno
);
13489 dest_fp_p
= FP_REGNO_P (dest_regno
);
13490 dest_vmx_p
= ALTIVEC_REGNO_P (dest_regno
);
13491 dest_vsx_p
= dest_fp_p
| dest_vmx_p
;
13496 dest_gpr_p
= dest_fp_p
= dest_vmx_p
= dest_vsx_p
= false;
13501 src_regno
= REGNO (src
);
13502 src_gpr_p
= INT_REGNO_P (src_regno
);
13503 src_fp_p
= FP_REGNO_P (src_regno
);
13504 src_vmx_p
= ALTIVEC_REGNO_P (src_regno
);
13505 src_vsx_p
= src_fp_p
| src_vmx_p
;
13510 src_gpr_p
= src_fp_p
= src_vmx_p
= src_vsx_p
= false;
13513 /* Register moves. */
13514 if (dest_regno
>= 0 && src_regno
>= 0)
13521 if (TARGET_DIRECT_MOVE_128
&& src_vsx_p
)
13522 return (WORDS_BIG_ENDIAN
13523 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13524 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13526 else if (TARGET_VSX
&& TARGET_DIRECT_MOVE
&& src_vsx_p
)
13530 else if (TARGET_VSX
&& dest_vsx_p
)
13533 return "xxlor %x0,%x1,%x1";
13535 else if (TARGET_DIRECT_MOVE_128
&& src_gpr_p
)
13536 return (WORDS_BIG_ENDIAN
13537 ? "mtvsrdd %x0,%1,%L1"
13538 : "mtvsrdd %x0,%L1,%1");
13540 else if (TARGET_DIRECT_MOVE
&& src_gpr_p
)
13544 else if (TARGET_ALTIVEC
&& dest_vmx_p
&& src_vmx_p
)
13545 return "vor %0,%1,%1";
13547 else if (dest_fp_p
&& src_fp_p
)
13552 else if (dest_regno
>= 0 && MEM_P (src
))
13556 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13562 else if (TARGET_ALTIVEC
&& dest_vmx_p
13563 && altivec_indexed_or_indirect_operand (src
, mode
))
13564 return "lvx %0,%y1";
13566 else if (TARGET_VSX
&& dest_vsx_p
)
13568 if (mode_supports_dq_form (mode
)
13569 && quad_address_p (XEXP (src
, 0), mode
, true))
13570 return "lxv %x0,%1";
13572 else if (TARGET_P9_VECTOR
)
13573 return "lxvx %x0,%y1";
13575 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13576 return "lxvw4x %x0,%y1";
13579 return "lxvd2x %x0,%y1";
13582 else if (TARGET_ALTIVEC
&& dest_vmx_p
)
13583 return "lvx %0,%y1";
13585 else if (dest_fp_p
)
13590 else if (src_regno
>= 0 && MEM_P (dest
))
13594 if (TARGET_QUAD_MEMORY
&& quad_load_store_p (dest
, src
))
13595 return "stq %1,%0";
13600 else if (TARGET_ALTIVEC
&& src_vmx_p
13601 && altivec_indexed_or_indirect_operand (dest
, mode
))
13602 return "stvx %1,%y0";
13604 else if (TARGET_VSX
&& src_vsx_p
)
13606 if (mode_supports_dq_form (mode
)
13607 && quad_address_p (XEXP (dest
, 0), mode
, true))
13608 return "stxv %x1,%0";
13610 else if (TARGET_P9_VECTOR
)
13611 return "stxvx %x1,%y0";
13613 else if (mode
== V16QImode
|| mode
== V8HImode
|| mode
== V4SImode
)
13614 return "stxvw4x %x1,%y0";
13617 return "stxvd2x %x1,%y0";
13620 else if (TARGET_ALTIVEC
&& src_vmx_p
)
13621 return "stvx %1,%y0";
13628 else if (dest_regno
>= 0
13629 && (CONST_INT_P (src
)
13630 || CONST_WIDE_INT_P (src
)
13631 || CONST_DOUBLE_P (src
)
13632 || GET_CODE (src
) == CONST_VECTOR
))
13637 else if ((dest_vmx_p
&& TARGET_ALTIVEC
)
13638 || (dest_vsx_p
&& TARGET_VSX
))
13639 return output_vec_const_move (operands
);
13642 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest
, src
));
13645 /* Validate a 128-bit move. */
13647 rs6000_move_128bit_ok_p (rtx operands
[])
13649 machine_mode mode
= GET_MODE (operands
[0]);
13650 return (gpc_reg_operand (operands
[0], mode
)
13651 || gpc_reg_operand (operands
[1], mode
));
13654 /* Return true if a 128-bit move needs to be split. */
13656 rs6000_split_128bit_ok_p (rtx operands
[])
13658 if (!reload_completed
)
13661 if (!gpr_or_gpr_p (operands
[0], operands
[1]))
13664 if (quad_load_store_p (operands
[0], operands
[1]))
13671 /* Given a comparison operation, return the bit number in CCR to test. We
13672 know this is a valid comparison.
13674 SCC_P is 1 if this is for an scc. That means that %D will have been
13675 used instead of %C, so the bits will be in different places.
13677 Return -1 if OP isn't a valid comparison for some reason. */
13680 ccr_bit (rtx op
, int scc_p
)
13682 enum rtx_code code
= GET_CODE (op
);
13683 machine_mode cc_mode
;
13688 if (!COMPARISON_P (op
))
13691 reg
= XEXP (op
, 0);
13693 if (!REG_P (reg
) || !CR_REGNO_P (REGNO (reg
)))
13696 cc_mode
= GET_MODE (reg
);
13697 cc_regnum
= REGNO (reg
);
13698 base_bit
= 4 * (cc_regnum
- CR0_REGNO
);
13700 validate_condition_mode (code
, cc_mode
);
13702 /* When generating a sCOND operation, only positive conditions are
13721 return scc_p
? base_bit
+ 3 : base_bit
+ 2;
13723 return base_bit
+ 2;
13724 case GT
: case GTU
: case UNLE
:
13725 return base_bit
+ 1;
13726 case LT
: case LTU
: case UNGE
:
13728 case ORDERED
: case UNORDERED
:
13729 return base_bit
+ 3;
13732 /* If scc, we will have done a cror to put the bit in the
13733 unordered position. So test that bit. For integer, this is ! LT
13734 unless this is an scc insn. */
13735 return scc_p
? base_bit
+ 3 : base_bit
;
13738 return scc_p
? base_bit
+ 3 : base_bit
+ 1;
13745 /* Return the GOT register. */
13748 rs6000_got_register (rtx value ATTRIBUTE_UNUSED
)
13750 /* The second flow pass currently (June 1999) can't update
13751 regs_ever_live without disturbing other parts of the compiler, so
13752 update it here to make the prolog/epilogue code happy. */
13753 if (!can_create_pseudo_p ()
13754 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM
))
13755 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM
, true);
13757 crtl
->uses_pic_offset_table
= 1;
13759 return pic_offset_table_rtx
;
13762 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13764 /* Write out a function code label. */
13767 rs6000_output_function_entry (FILE *file
, const char *fname
)
13769 if (fname
[0] != '.')
13771 switch (DEFAULT_ABI
)
13774 gcc_unreachable ();
13780 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "L.");
13790 RS6000_OUTPUT_BASENAME (file
, fname
);
13793 /* Print an operand. Recognize special options, documented below. */
13796 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13797 only introduced by the linker, when applying the sda21
13799 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13800 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13802 #define SMALL_DATA_RELOC "sda21"
13803 #define SMALL_DATA_REG 0
13807 print_operand (FILE *file
, rtx x
, int code
)
13810 unsigned HOST_WIDE_INT uval
;
13814 /* %a is output_address. */
13816 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13820 /* Write the MMA accumulator number associated with VSX register X. */
13821 if (!REG_P (x
) || !FP_REGNO_P (REGNO (x
)) || (REGNO (x
) % 4) != 0)
13822 output_operand_lossage ("invalid %%A value");
13824 fprintf (file
, "%d", (REGNO (x
) - FIRST_FPR_REGNO
) / 4);
13828 /* Like 'J' but get to the GT bit only. */
13829 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13831 output_operand_lossage ("invalid %%D value");
13835 /* Bit 1 is GT bit. */
13836 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 1;
13838 /* Add one for shift count in rlinm for scc. */
13839 fprintf (file
, "%d", i
+ 1);
13843 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13846 output_operand_lossage ("invalid %%e value");
13851 if ((uval
& 0xffff) == 0 && uval
!= 0)
13856 /* X is a CR register. Print the number of the EQ bit of the CR */
13857 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13858 output_operand_lossage ("invalid %%E value");
13860 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
) + 2);
13864 /* X is a CR register. Print the shift count needed to move it
13865 to the high-order four bits. */
13866 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13867 output_operand_lossage ("invalid %%f value");
13869 fprintf (file
, "%d", 4 * (REGNO (x
) - CR0_REGNO
));
13873 /* Similar, but print the count for the rotate in the opposite
13875 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
13876 output_operand_lossage ("invalid %%F value");
13878 fprintf (file
, "%d", 32 - 4 * (REGNO (x
) - CR0_REGNO
));
13882 /* X is a constant integer. If it is negative, print "m",
13883 otherwise print "z". This is to make an aze or ame insn. */
13884 if (!CONST_INT_P (x
))
13885 output_operand_lossage ("invalid %%G value");
13886 else if (INTVAL (x
) >= 0)
13893 /* If constant, output low-order five bits. Otherwise, write
13896 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 31);
13898 print_operand (file
, x
, 0);
13902 /* If constant, output low-order six bits. Otherwise, write
13905 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 63);
13907 print_operand (file
, x
, 0);
13911 /* Print `i' if this is a constant, else nothing. */
13917 /* Write the bit number in CCR for jump. */
13918 i
= ccr_bit (x
, 0);
13920 output_operand_lossage ("invalid %%j code");
13922 fprintf (file
, "%d", i
);
13926 /* Similar, but add one for shift count in rlinm for scc and pass
13927 scc flag to `ccr_bit'. */
13928 i
= ccr_bit (x
, 1);
13930 output_operand_lossage ("invalid %%J code");
13932 /* If we want bit 31, write a shift count of zero, not 32. */
13933 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
13937 /* X must be a constant. Write the 1's complement of the
13940 output_operand_lossage ("invalid %%k value");
13942 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~ INTVAL (x
));
13946 /* X must be a symbolic constant on ELF. Write an
13947 expression suitable for an 'addi' that adds in the low 16
13948 bits of the MEM. */
13949 if (GET_CODE (x
) == CONST
)
13951 if (GET_CODE (XEXP (x
, 0)) != PLUS
13952 || (!SYMBOL_REF_P (XEXP (XEXP (x
, 0), 0))
13953 && GET_CODE (XEXP (XEXP (x
, 0), 0)) != LABEL_REF
)
13954 || !CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
13955 output_operand_lossage ("invalid %%K value");
13957 print_operand_address (file
, x
);
13958 fputs ("@l", file
);
13961 /* %l is output_asm_label. */
13964 /* Write second word of DImode or DFmode reference. Works on register
13965 or non-indexed memory only. */
13967 fputs (reg_names
[REGNO (x
) + 1], file
);
13968 else if (MEM_P (x
))
13970 machine_mode mode
= GET_MODE (x
);
13971 /* Handle possible auto-increment. Since it is pre-increment and
13972 we have already done it, we can just use an offset of word. */
13973 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
13974 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
13975 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13977 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
13978 output_address (mode
, plus_constant (Pmode
, XEXP (XEXP (x
, 0), 0),
13981 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
,
13985 if (small_data_operand (x
, GET_MODE (x
)))
13986 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
13987 reg_names
[SMALL_DATA_REG
]);
13991 case 'N': /* Unused */
13992 /* Write the number of elements in the vector times 4. */
13993 if (GET_CODE (x
) != PARALLEL
)
13994 output_operand_lossage ("invalid %%N value");
13996 fprintf (file
, "%d", XVECLEN (x
, 0) * 4);
13999 case 'O': /* Unused */
14000 /* Similar, but subtract 1 first. */
14001 if (GET_CODE (x
) != PARALLEL
)
14002 output_operand_lossage ("invalid %%O value");
14004 fprintf (file
, "%d", (XVECLEN (x
, 0) - 1) * 4);
14008 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14011 || (i
= exact_log2 (INTVAL (x
))) < 0)
14012 output_operand_lossage ("invalid %%p value");
14014 fprintf (file
, "%d", i
);
14018 /* The operand must be an indirect memory reference. The result
14019 is the register name. */
14020 if (!MEM_P (x
) || !REG_P (XEXP (x
, 0))
14021 || REGNO (XEXP (x
, 0)) >= 32)
14022 output_operand_lossage ("invalid %%P value");
14024 fputs (reg_names
[REGNO (XEXP (x
, 0))], file
);
14028 /* This outputs the logical code corresponding to a boolean
14029 expression. The expression may have one or both operands
14030 negated (if one, only the first one). For condition register
14031 logical operations, it will also treat the negated
14032 CR codes as NOTs, but not handle NOTs of them. */
14034 const char *const *t
= 0;
14036 enum rtx_code code
= GET_CODE (x
);
14037 static const char * const tbl
[3][3] = {
14038 { "and", "andc", "nor" },
14039 { "or", "orc", "nand" },
14040 { "xor", "eqv", "xor" } };
14044 else if (code
== IOR
)
14046 else if (code
== XOR
)
14049 output_operand_lossage ("invalid %%q value");
14051 if (GET_CODE (XEXP (x
, 0)) != NOT
)
14055 if (GET_CODE (XEXP (x
, 1)) == NOT
)
14066 if (! TARGET_MFCRF
)
14072 /* X is a CR register. Print the mask for `mtcrf'. */
14073 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14074 output_operand_lossage ("invalid %%R value");
14076 fprintf (file
, "%d", 128 >> (REGNO (x
) - CR0_REGNO
));
14080 /* Low 5 bits of 32 - value */
14082 output_operand_lossage ("invalid %%s value");
14084 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (32 - INTVAL (x
)) & 31);
14088 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14089 if (!REG_P (x
) || !CR_REGNO_P (REGNO (x
)))
14091 output_operand_lossage ("invalid %%t value");
14095 /* Bit 3 is OV bit. */
14096 i
= 4 * (REGNO (x
) - CR0_REGNO
) + 3;
14098 /* If we want bit 31, write a shift count of zero, not 32. */
14099 fprintf (file
, "%d", i
== 31 ? 0 : i
+ 1);
14103 /* Print the symbolic name of a branch target register. */
14104 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14105 x
= XVECEXP (x
, 0, 0);
14106 if (!REG_P (x
) || (REGNO (x
) != LR_REGNO
14107 && REGNO (x
) != CTR_REGNO
))
14108 output_operand_lossage ("invalid %%T value");
14109 else if (REGNO (x
) == LR_REGNO
)
14110 fputs ("lr", file
);
14112 fputs ("ctr", file
);
14116 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14117 for use in unsigned operand. */
14120 output_operand_lossage ("invalid %%u value");
14125 if ((uval
& 0xffff) == 0)
14128 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
, uval
& 0xffff);
14132 /* High-order 16 bits of constant for use in signed operand. */
14134 output_operand_lossage ("invalid %%v value");
14136 fprintf (file
, HOST_WIDE_INT_PRINT_HEX
,
14137 (INTVAL (x
) >> 16) & 0xffff);
14141 /* Print `u' if this has an auto-increment or auto-decrement. */
14143 && (GET_CODE (XEXP (x
, 0)) == PRE_INC
14144 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
14145 || GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
))
14150 /* Print the trap code for this operand. */
14151 switch (GET_CODE (x
))
14154 fputs ("eq", file
); /* 4 */
14157 fputs ("ne", file
); /* 24 */
14160 fputs ("lt", file
); /* 16 */
14163 fputs ("le", file
); /* 20 */
14166 fputs ("gt", file
); /* 8 */
14169 fputs ("ge", file
); /* 12 */
14172 fputs ("llt", file
); /* 2 */
14175 fputs ("lle", file
); /* 6 */
14178 fputs ("lgt", file
); /* 1 */
14181 fputs ("lge", file
); /* 5 */
14184 output_operand_lossage ("invalid %%V value");
14189 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14192 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, sext_hwi (INTVAL (x
), 16));
14194 print_operand (file
, x
, 0);
14198 /* X is a FPR or Altivec register used in a VSX context. */
14199 if (!REG_P (x
) || !VSX_REGNO_P (REGNO (x
)))
14200 output_operand_lossage ("invalid %%x value");
14203 int reg
= REGNO (x
);
14204 int vsx_reg
= (FP_REGNO_P (reg
)
14206 : reg
- FIRST_ALTIVEC_REGNO
+ 32);
14208 #ifdef TARGET_REGNAMES
14209 if (TARGET_REGNAMES
)
14210 fprintf (file
, "%%vs%d", vsx_reg
);
14213 fprintf (file
, "%d", vsx_reg
);
14219 && (legitimate_indexed_address_p (XEXP (x
, 0), 0)
14220 || (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
14221 && legitimate_indexed_address_p (XEXP (XEXP (x
, 0), 1), 0))))
14226 /* Like 'L', for third word of TImode/PTImode */
14228 fputs (reg_names
[REGNO (x
) + 2], file
);
14229 else if (MEM_P (x
))
14231 machine_mode mode
= GET_MODE (x
);
14232 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14233 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14234 output_address (mode
, plus_constant (Pmode
,
14235 XEXP (XEXP (x
, 0), 0), 8));
14236 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14237 output_address (mode
, plus_constant (Pmode
,
14238 XEXP (XEXP (x
, 0), 0), 8));
14240 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 8), 0));
14241 if (small_data_operand (x
, GET_MODE (x
)))
14242 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14243 reg_names
[SMALL_DATA_REG
]);
14248 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14249 x
= XVECEXP (x
, 0, 1);
14250 /* X is a SYMBOL_REF. Write out the name preceded by a
14251 period and without any trailing data in brackets. Used for function
14252 names. If we are configured for System V (or the embedded ABI) on
14253 the PowerPC, do not emit the period, since those systems do not use
14254 TOCs and the like. */
14255 if (!SYMBOL_REF_P (x
))
14257 output_operand_lossage ("invalid %%z value");
14261 /* For macho, check to see if we need a stub. */
14264 const char *name
= XSTR (x
, 0);
14266 if (darwin_symbol_stubs
14267 && MACHOPIC_INDIRECT
14268 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
14269 name
= machopic_indirection_name (x
, /*stub_p=*/true);
14271 assemble_name (file
, name
);
14273 else if (!DOT_SYMBOLS
)
14274 assemble_name (file
, XSTR (x
, 0));
14276 rs6000_output_function_entry (file
, XSTR (x
, 0));
14280 /* Like 'L', for last word of TImode/PTImode. */
14282 fputs (reg_names
[REGNO (x
) + 3], file
);
14283 else if (MEM_P (x
))
14285 machine_mode mode
= GET_MODE (x
);
14286 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
14287 || GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14288 output_address (mode
, plus_constant (Pmode
,
14289 XEXP (XEXP (x
, 0), 0), 12));
14290 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14291 output_address (mode
, plus_constant (Pmode
,
14292 XEXP (XEXP (x
, 0), 0), 12));
14294 output_address (mode
, XEXP (adjust_address_nv (x
, SImode
, 12), 0));
14295 if (small_data_operand (x
, GET_MODE (x
)))
14296 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14297 reg_names
[SMALL_DATA_REG
]);
14301 /* Print AltiVec memory operand. */
14306 gcc_assert (MEM_P (x
));
14310 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x
))
14311 && GET_CODE (tmp
) == AND
14312 && CONST_INT_P (XEXP (tmp
, 1))
14313 && INTVAL (XEXP (tmp
, 1)) == -16)
14314 tmp
= XEXP (tmp
, 0);
14315 else if (VECTOR_MEM_VSX_P (GET_MODE (x
))
14316 && GET_CODE (tmp
) == PRE_MODIFY
)
14317 tmp
= XEXP (tmp
, 1);
14319 fprintf (file
, "0,%s", reg_names
[REGNO (tmp
)]);
14322 if (GET_CODE (tmp
) != PLUS
14323 || !REG_P (XEXP (tmp
, 0))
14324 || !REG_P (XEXP (tmp
, 1)))
14326 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14330 if (REGNO (XEXP (tmp
, 0)) == 0)
14331 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 1)) ],
14332 reg_names
[ REGNO (XEXP (tmp
, 0)) ]);
14334 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (tmp
, 0)) ],
14335 reg_names
[ REGNO (XEXP (tmp
, 1)) ]);
14342 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
14343 else if (MEM_P (x
))
14345 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14346 know the width from the mode. */
14347 if (GET_CODE (XEXP (x
, 0)) == PRE_INC
)
14348 fprintf (file
, "%d(%s)", GET_MODE_SIZE (GET_MODE (x
)),
14349 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14350 else if (GET_CODE (XEXP (x
, 0)) == PRE_DEC
)
14351 fprintf (file
, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x
)),
14352 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
14353 else if (GET_CODE (XEXP (x
, 0)) == PRE_MODIFY
)
14354 output_address (GET_MODE (x
), XEXP (XEXP (x
, 0), 1));
14356 output_address (GET_MODE (x
), XEXP (x
, 0));
14358 else if (toc_relative_expr_p (x
, false,
14359 &tocrel_base_oac
, &tocrel_offset_oac
))
14360 /* This hack along with a corresponding hack in
14361 rs6000_output_addr_const_extra arranges to output addends
14362 where the assembler expects to find them. eg.
14363 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14364 without this hack would be output as "x@toc+4". We
14366 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14367 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
14368 output_addr_const (file
, XVECEXP (x
, 0, 0));
14369 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PLTSEQ
)
14370 output_addr_const (file
, XVECEXP (x
, 0, 1));
14372 output_addr_const (file
, x
);
14376 if (const char *name
= get_some_local_dynamic_name ())
14377 assemble_name (file
, name
);
14379 output_operand_lossage ("'%%&' used without any "
14380 "local dynamic TLS references");
14384 output_operand_lossage ("invalid %%xn code");
14388 /* Print the address of an operand. */
14391 print_operand_address (FILE *file
, rtx x
)
14394 fprintf (file
, "0(%s)", reg_names
[ REGNO (x
) ]);
14396 /* Is it a PC-relative address? */
14397 else if (TARGET_PCREL
&& pcrel_local_or_external_address (x
, VOIDmode
))
14399 HOST_WIDE_INT offset
;
14401 if (GET_CODE (x
) == CONST
)
14404 if (GET_CODE (x
) == PLUS
)
14406 offset
= INTVAL (XEXP (x
, 1));
14412 output_addr_const (file
, x
);
14415 fprintf (file
, "%+" PRId64
, offset
);
14417 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_LOCAL_P (x
))
14418 fprintf (file
, "@got");
14420 fprintf (file
, "@pcrel");
14422 else if (SYMBOL_REF_P (x
) || GET_CODE (x
) == CONST
14423 || GET_CODE (x
) == LABEL_REF
)
14425 output_addr_const (file
, x
);
14426 if (small_data_operand (x
, GET_MODE (x
)))
14427 fprintf (file
, "@%s(%s)", SMALL_DATA_RELOC
,
14428 reg_names
[SMALL_DATA_REG
]);
14430 gcc_assert (!TARGET_TOC
);
14432 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14433 && REG_P (XEXP (x
, 1)))
14435 if (REGNO (XEXP (x
, 0)) == 0)
14436 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 1)) ],
14437 reg_names
[ REGNO (XEXP (x
, 0)) ]);
14439 fprintf (file
, "%s,%s", reg_names
[ REGNO (XEXP (x
, 0)) ],
14440 reg_names
[ REGNO (XEXP (x
, 1)) ]);
14442 else if (GET_CODE (x
) == PLUS
&& REG_P (XEXP (x
, 0))
14443 && CONST_INT_P (XEXP (x
, 1)))
14444 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
14445 INTVAL (XEXP (x
, 1)), reg_names
[ REGNO (XEXP (x
, 0)) ]);
14447 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14448 && CONSTANT_P (XEXP (x
, 1)))
14450 fprintf (file
, "lo16(");
14451 output_addr_const (file
, XEXP (x
, 1));
14452 fprintf (file
, ")(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14456 else if (GET_CODE (x
) == LO_SUM
&& REG_P (XEXP (x
, 0))
14457 && CONSTANT_P (XEXP (x
, 1)))
14459 output_addr_const (file
, XEXP (x
, 1));
14460 fprintf (file
, "@l(%s)", reg_names
[ REGNO (XEXP (x
, 0)) ]);
14463 else if (toc_relative_expr_p (x
, false, &tocrel_base_oac
, &tocrel_offset_oac
))
14465 /* This hack along with a corresponding hack in
14466 rs6000_output_addr_const_extra arranges to output addends
14467 where the assembler expects to find them. eg.
14469 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14470 without this hack would be output as "x@toc+8@l(9)". We
14471 want "x+8@toc@l(9)". */
14472 output_addr_const (file
, CONST_CAST_RTX (tocrel_base_oac
));
14473 if (GET_CODE (x
) == LO_SUM
)
14474 fprintf (file
, "@l(%s)", reg_names
[REGNO (XEXP (x
, 0))]);
14476 fprintf (file
, "(%s)", reg_names
[REGNO (XVECEXP (tocrel_base_oac
, 0, 1))]);
14479 output_addr_const (file
, x
);
14482 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14485 rs6000_output_addr_const_extra (FILE *file
, rtx x
)
14487 if (GET_CODE (x
) == UNSPEC
)
14488 switch (XINT (x
, 1))
14490 case UNSPEC_TOCREL
:
14491 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x
, 0, 0))
14492 && REG_P (XVECEXP (x
, 0, 1))
14493 && REGNO (XVECEXP (x
, 0, 1)) == TOC_REGISTER
);
14494 output_addr_const (file
, XVECEXP (x
, 0, 0));
14495 if (x
== tocrel_base_oac
&& tocrel_offset_oac
!= const0_rtx
)
14497 if (INTVAL (tocrel_offset_oac
) >= 0)
14498 fprintf (file
, "+");
14499 output_addr_const (file
, CONST_CAST_RTX (tocrel_offset_oac
));
14501 if (!TARGET_AIX
|| (TARGET_ELF
&& TARGET_MINIMAL_TOC
))
14504 assemble_name (file
, toc_label_name
);
14507 else if (TARGET_ELF
)
14508 fputs ("@toc", file
);
14512 case UNSPEC_MACHOPIC_OFFSET
:
14513 output_addr_const (file
, XVECEXP (x
, 0, 0));
14515 machopic_output_function_base_name (file
);
14522 /* Target hook for assembling integer objects. The PowerPC version has
14523 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14524 is defined. It also needs to handle DI-mode objects on 64-bit
14528 rs6000_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
14530 #ifdef RELOCATABLE_NEEDS_FIXUP
14531 /* Special handling for SI values. */
14532 if (RELOCATABLE_NEEDS_FIXUP
&& size
== 4 && aligned_p
)
14534 static int recurse
= 0;
14536 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14537 the .fixup section. Since the TOC section is already relocated, we
14538 don't need to mark it here. We used to skip the text section, but it
14539 should never be valid for relocated addresses to be placed in the text
14541 if (DEFAULT_ABI
== ABI_V4
14542 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
14543 && in_section
!= toc_section
14545 && !CONST_SCALAR_INT_P (x
)
14551 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCP", fixuplabelno
);
14553 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
14554 fprintf (asm_out_file
, "\t.long\t(");
14555 output_addr_const (asm_out_file
, x
);
14556 fprintf (asm_out_file
, ")@fixup\n");
14557 fprintf (asm_out_file
, "\t.section\t\".fixup\",\"aw\"\n");
14558 ASM_OUTPUT_ALIGN (asm_out_file
, 2);
14559 fprintf (asm_out_file
, "\t.long\t");
14560 assemble_name (asm_out_file
, buf
);
14561 fprintf (asm_out_file
, "\n\t.previous\n");
14565 /* Remove initial .'s to turn a -mcall-aixdesc function
14566 address into the address of the descriptor, not the function
14568 else if (SYMBOL_REF_P (x
)
14569 && XSTR (x
, 0)[0] == '.'
14570 && DEFAULT_ABI
== ABI_AIX
)
14572 const char *name
= XSTR (x
, 0);
14573 while (*name
== '.')
14576 fprintf (asm_out_file
, "\t.long\t%s\n", name
);
14580 #endif /* RELOCATABLE_NEEDS_FIXUP */
14581 return default_assemble_integer (x
, size
, aligned_p
);
14584 /* Return a template string for assembly to emit when making an
14585 external call. FUNOP is the call mem argument operand number. */
14587 static const char *
14588 rs6000_call_template_1 (rtx
*operands
, unsigned int funop
, bool sibcall
)
14590 /* -Wformat-overflow workaround, without which gcc thinks that %u
14591 might produce 10 digits. */
14592 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14596 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14598 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14599 sprintf (arg
, "(%%%u@tlsgd)", funop
+ 1);
14600 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14601 sprintf (arg
, "(%%&@tlsld)");
14604 /* The magic 32768 offset here corresponds to the offset of
14605 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14607 sprintf (z
, "%%z%u%s", funop
,
14608 (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
== 2
14611 static char str
[32]; /* 1 spare */
14612 if (rs6000_pcrel_p ())
14613 sprintf (str
, "b%s %s@notoc%s", sibcall
? "" : "l", z
, arg
);
14614 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
14615 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14616 sibcall
? "" : "\n\tnop");
14617 else if (DEFAULT_ABI
== ABI_V4
)
14618 sprintf (str
, "b%s %s%s%s", sibcall
? "" : "l", z
, arg
,
14619 flag_pic
? "@plt" : "");
14621 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14622 else if (DEFAULT_ABI
== ABI_DARWIN
)
14624 /* The cookie is in operand func+2. */
14625 gcc_checking_assert (GET_CODE (operands
[funop
+ 2]) == CONST_INT
);
14626 int cookie
= INTVAL (operands
[funop
+ 2]);
14627 if (cookie
& CALL_LONG
)
14629 tree funname
= get_identifier (XSTR (operands
[funop
], 0));
14630 tree labelname
= get_prev_label (funname
);
14631 gcc_checking_assert (labelname
&& !sibcall
);
14633 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14634 instruction will reach 'foo', otherwise link as 'bl L42'".
14635 "L42" should be a 'branch island', that will do a far jump to
14636 'foo'. Branch islands are generated in
14637 macho_branch_islands(). */
14638 sprintf (str
, "jbsr %%z%u,%.10s", funop
,
14639 IDENTIFIER_POINTER (labelname
));
14642 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14644 sprintf (str
, "b%s %s%s", sibcall
? "" : "l", z
, arg
);
14648 gcc_unreachable ();
14653 rs6000_call_template (rtx
*operands
, unsigned int funop
)
14655 return rs6000_call_template_1 (operands
, funop
, false);
14659 rs6000_sibcall_template (rtx
*operands
, unsigned int funop
)
14661 return rs6000_call_template_1 (operands
, funop
, true);
14664 /* As above, for indirect calls. */
14666 static const char *
14667 rs6000_indirect_call_template_1 (rtx
*operands
, unsigned int funop
,
14670 /* -Wformat-overflow workaround, without which gcc thinks that %u
14671 might produce 10 digits. Note that -Wformat-overflow will not
14672 currently warn here for str[], so do not rely on a warning to
14673 ensure str[] is correctly sized. */
14674 gcc_assert (funop
<= MAX_RECOG_OPERANDS
);
14676 /* Currently, funop is either 0 or 1. The maximum string is always
14677 a !speculate 64-bit __tls_get_addr call.
14680 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14681 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14683 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14684 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14691 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14692 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14694 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14695 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14702 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14703 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14705 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14706 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14713 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14714 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14716 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14717 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14721 static char str
[160]; /* 8 spare */
14723 const char *ptrload
= TARGET_64BIT
? "d" : "wz";
14725 if (DEFAULT_ABI
== ABI_AIX
)
14728 ptrload
, funop
+ 3);
14730 /* We don't need the extra code to stop indirect call speculation if
14732 bool speculate
= (TARGET_MACHO
14733 || rs6000_speculate_indirect_jumps
14734 || (REG_P (operands
[funop
])
14735 && REGNO (operands
[funop
]) == LR_REGNO
));
14737 if (TARGET_PLTSEQ
&& GET_CODE (operands
[funop
]) == UNSPEC
)
14739 const char *rel64
= TARGET_64BIT
? "64" : "";
14742 if (GET_CODE (operands
[funop
+ 1]) == UNSPEC
)
14744 if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSGD
)
14745 sprintf (tls
, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14747 else if (XINT (operands
[funop
+ 1], 1) == UNSPEC_TLSLD
)
14748 sprintf (tls
, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14752 const char *notoc
= rs6000_pcrel_p () ? "_NOTOC" : "";
14753 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14754 && flag_pic
== 2 ? "+32768" : "");
14758 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14759 tls
, rel64
, notoc
, funop
, addend
);
14760 s
+= sprintf (s
, "crset 2\n\t");
14763 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14764 tls
, rel64
, notoc
, funop
, addend
);
14766 else if (!speculate
)
14767 s
+= sprintf (s
, "crset 2\n\t");
14769 if (rs6000_pcrel_p ())
14772 sprintf (s
, "b%%T%ul", funop
);
14774 sprintf (s
, "beq%%T%ul-", funop
);
14776 else if (DEFAULT_ABI
== ABI_AIX
)
14782 funop
, ptrload
, funop
+ 4);
14787 funop
, ptrload
, funop
+ 4);
14789 else if (DEFAULT_ABI
== ABI_ELFv2
)
14795 funop
, ptrload
, funop
+ 3);
14800 funop
, ptrload
, funop
+ 3);
14807 funop
, sibcall
? "" : "l");
14811 funop
, sibcall
? "" : "l", sibcall
? "\n\tb $" : "");
14817 rs6000_indirect_call_template (rtx
*operands
, unsigned int funop
)
14819 return rs6000_indirect_call_template_1 (operands
, funop
, false);
14823 rs6000_indirect_sibcall_template (rtx
*operands
, unsigned int funop
)
14825 return rs6000_indirect_call_template_1 (operands
, funop
, true);
14829 /* Output indirect call insns. WHICH identifies the type of sequence. */
14831 rs6000_pltseq_template (rtx
*operands
, int which
)
14833 const char *rel64
= TARGET_64BIT
? "64" : "";
14836 if (GET_CODE (operands
[3]) == UNSPEC
)
14838 char off
= which
== RS6000_PLTSEQ_PLT_PCREL34
? '8' : '4';
14839 if (XINT (operands
[3], 1) == UNSPEC_TLSGD
)
14840 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14842 else if (XINT (operands
[3], 1) == UNSPEC_TLSLD
)
14843 sprintf (tls
, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14847 gcc_assert (DEFAULT_ABI
== ABI_ELFv2
|| DEFAULT_ABI
== ABI_V4
);
14848 static char str
[96]; /* 10 spare */
14849 char off
= WORDS_BIG_ENDIAN
? '2' : '4';
14850 const char *addend
= (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
14851 && flag_pic
== 2 ? "+32768" : "");
14854 case RS6000_PLTSEQ_TOCSAVE
:
14857 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14858 TARGET_64BIT
? "d 2,24(1)" : "w 2,12(1)",
14861 case RS6000_PLTSEQ_PLT16_HA
:
14862 if (DEFAULT_ABI
== ABI_V4
&& !flag_pic
)
14865 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14869 "addis %%0,%%1,0\n\t"
14870 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14871 tls
, off
, rel64
, addend
);
14873 case RS6000_PLTSEQ_PLT16_LO
:
14875 "l%s %%0,0(%%1)\n\t"
14876 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14877 TARGET_64BIT
? "d" : "wz",
14878 tls
, off
, rel64
, TARGET_64BIT
? "_DS" : "", addend
);
14880 case RS6000_PLTSEQ_MTCTR
:
14883 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14884 tls
, rel64
, addend
);
14886 case RS6000_PLTSEQ_PLT_PCREL34
:
14888 "pl%s %%0,0(0),1\n\t"
14889 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14890 TARGET_64BIT
? "d" : "wz",
14894 gcc_unreachable ();
14900 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14901 /* Emit an assembler directive to set symbol visibility for DECL to
14902 VISIBILITY_TYPE. */
14905 rs6000_assemble_visibility (tree decl
, int vis
)
14910 /* Functions need to have their entry point symbol visibility set as
14911 well as their descriptor symbol visibility. */
14912 if (DEFAULT_ABI
== ABI_AIX
14914 && TREE_CODE (decl
) == FUNCTION_DECL
)
14916 static const char * const visibility_types
[] = {
14917 NULL
, "protected", "hidden", "internal"
14920 const char *name
, *type
;
14922 name
= ((* targetm
.strip_name_encoding
)
14923 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
))));
14924 type
= visibility_types
[vis
];
14926 fprintf (asm_out_file
, "\t.%s\t%s\n", type
, name
);
14927 fprintf (asm_out_file
, "\t.%s\t.%s\n", type
, name
);
14930 default_assemble_visibility (decl
, vis
);
14934 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
14935 entry. If RECORD_P is true and the target supports named sections,
14936 the location of the NOPs will be recorded in a special object section
14937 called "__patchable_function_entries". This routine may be called
14938 twice per function to put NOPs before and after the function
14942 rs6000_print_patchable_function_entry (FILE *file
,
14943 unsigned HOST_WIDE_INT patch_area_size
,
14946 bool global_entry_needed_p
= rs6000_global_entry_point_prologue_needed_p ();
14947 /* For a function which needs global entry point, we will emit the
14948 patchable area before and after local entry point under the control of
14949 cfun->machine->global_entry_emitted, see the handling in function
14950 rs6000_output_function_prologue. */
14951 if (!global_entry_needed_p
|| cfun
->machine
->global_entry_emitted
)
14952 default_print_patchable_function_entry (file
, patch_area_size
, record_p
);
14956 rs6000_reverse_condition (machine_mode mode
, enum rtx_code code
)
14958 /* Reversal of FP compares takes care -- an ordered compare
14959 becomes an unordered compare and vice versa. */
14960 if (mode
== CCFPmode
14961 && (!flag_finite_math_only
14962 || code
== UNLT
|| code
== UNLE
|| code
== UNGT
|| code
== UNGE
14963 || code
== UNEQ
|| code
== LTGT
))
14964 return reverse_condition_maybe_unordered (code
);
14966 return reverse_condition (code
);
14969 /* Check if C (as 64bit integer) can be rotated to a constant which constains
14970 nonzero bits at the LOWBITS low bits only.
14972 Return true if C can be rotated to such constant. If so, *ROT is written
14973 to the number by which C is rotated.
14974 Return false otherwise. */
14977 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c
, int lowbits
, int *rot
)
14979 int clz
= HOST_BITS_PER_WIDE_INT
- lowbits
;
14981 /* case a. 0..0xxx: already at least clz zeros. */
14982 int lz
= clz_hwi (c
);
14989 /* case b. 0..0xxx0..0: at least clz zeros. */
14990 int tz
= ctz_hwi (c
);
14991 if (lz
+ tz
>= clz
)
14993 *rot
= HOST_BITS_PER_WIDE_INT
- tz
;
14997 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
14998 ^bit -> Vbit, , then zeros are at head or tail.
14999 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15000 const int rot_bits
= lowbits
+ 1;
15001 unsigned HOST_WIDE_INT rc
= (c
>> rot_bits
) | (c
<< (clz
- 1));
15003 if (clz_hwi (rc
) + tz
>= clz
)
15005 *rot
= HOST_BITS_PER_WIDE_INT
- (tz
+ rot_bits
);
15012 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15013 which contains 48bits leading zeros and 16bits of any value. */
15016 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c
)
15019 bool res
= can_be_rotated_to_lowbits (c
, 16, &rot
);
15020 return res
&& rot
> 0;
15023 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15024 which contains 49bits leading ones and 15bits of any value. */
15027 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c
)
15030 bool res
= can_be_rotated_to_lowbits (~c
, 15, &rot
);
15031 return res
&& rot
> 0;
15034 /* Generate a compare for CODE. Return a brand-new rtx that
15035 represents the result of the compare. */
15038 rs6000_generate_compare (rtx cmp
, machine_mode mode
)
15040 machine_mode comp_mode
;
15041 rtx compare_result
;
15042 enum rtx_code code
= GET_CODE (cmp
);
15043 rtx op0
= XEXP (cmp
, 0);
15044 rtx op1
= XEXP (cmp
, 1);
15046 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15047 comp_mode
= CCmode
;
15048 else if (FLOAT_MODE_P (mode
))
15049 comp_mode
= CCFPmode
;
15050 else if (code
== GTU
|| code
== LTU
15051 || code
== GEU
|| code
== LEU
)
15052 comp_mode
= CCUNSmode
;
15053 else if ((code
== EQ
|| code
== NE
)
15054 && unsigned_reg_p (op0
)
15055 && (unsigned_reg_p (op1
)
15056 || (CONST_INT_P (op1
) && INTVAL (op1
) != 0)))
15057 /* These are unsigned values, perhaps there will be a later
15058 ordering compare that can be shared with this one. */
15059 comp_mode
= CCUNSmode
;
15061 comp_mode
= CCmode
;
15063 /* If we have an unsigned compare, make sure we don't have a signed value as
15065 if (comp_mode
== CCUNSmode
&& CONST_INT_P (op1
)
15066 && INTVAL (op1
) < 0)
15068 op0
= copy_rtx_if_shared (op0
);
15069 op1
= force_reg (GET_MODE (op0
), op1
);
15070 cmp
= gen_rtx_fmt_ee (code
, GET_MODE (cmp
), op0
, op1
);
15073 /* First, the compare. */
15074 compare_result
= gen_reg_rtx (comp_mode
);
15076 /* IEEE 128-bit support in VSX registers when we do not have hardware
15078 if (!TARGET_FLOAT128_HW
&& FLOAT128_VECTOR_P (mode
))
15080 rtx libfunc
= NULL_RTX
;
15081 bool check_nan
= false;
15088 libfunc
= optab_libfunc (eq_optab
, mode
);
15093 libfunc
= optab_libfunc (ge_optab
, mode
);
15098 libfunc
= optab_libfunc (le_optab
, mode
);
15103 libfunc
= optab_libfunc (unord_optab
, mode
);
15104 code
= (code
== UNORDERED
) ? NE
: EQ
;
15110 libfunc
= optab_libfunc (ge_optab
, mode
);
15111 code
= (code
== UNGE
) ? GE
: GT
;
15117 libfunc
= optab_libfunc (le_optab
, mode
);
15118 code
= (code
== UNLE
) ? LE
: LT
;
15124 libfunc
= optab_libfunc (eq_optab
, mode
);
15125 code
= (code
= UNEQ
) ? EQ
: NE
;
15129 gcc_unreachable ();
15132 gcc_assert (libfunc
);
15135 dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15136 SImode
, op0
, mode
, op1
, mode
);
15138 /* The library signals an exception for signalling NaNs, so we need to
15139 handle isgreater, etc. by first checking isordered. */
15142 rtx ne_rtx
, normal_dest
, unord_dest
;
15143 rtx unord_func
= optab_libfunc (unord_optab
, mode
);
15144 rtx join_label
= gen_label_rtx ();
15145 rtx join_ref
= gen_rtx_LABEL_REF (VOIDmode
, join_label
);
15146 rtx unord_cmp
= gen_reg_rtx (comp_mode
);
15149 /* Test for either value being a NaN. */
15150 gcc_assert (unord_func
);
15151 unord_dest
= emit_library_call_value (unord_func
, NULL_RTX
, LCT_CONST
,
15152 SImode
, op0
, mode
, op1
, mode
);
15154 /* Set value (0) if either value is a NaN, and jump to the join
15156 dest
= gen_reg_rtx (SImode
);
15157 emit_move_insn (dest
, const1_rtx
);
15158 emit_insn (gen_rtx_SET (unord_cmp
,
15159 gen_rtx_COMPARE (comp_mode
, unord_dest
,
15162 ne_rtx
= gen_rtx_NE (comp_mode
, unord_cmp
, const0_rtx
);
15163 emit_jump_insn (gen_rtx_SET (pc_rtx
,
15164 gen_rtx_IF_THEN_ELSE (VOIDmode
, ne_rtx
,
15168 /* Do the normal comparison, knowing that the values are not
15170 normal_dest
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_CONST
,
15171 SImode
, op0
, mode
, op1
, mode
);
15173 emit_insn (gen_cstoresi4 (dest
,
15174 gen_rtx_fmt_ee (code
, SImode
, normal_dest
,
15176 normal_dest
, const0_rtx
));
15178 /* Join NaN and non-Nan paths. Compare dest against 0. */
15179 emit_label (join_label
);
15183 emit_insn (gen_rtx_SET (compare_result
,
15184 gen_rtx_COMPARE (comp_mode
, dest
, const0_rtx
)));
15189 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15190 CLOBBERs to match cmptf_internal2 pattern. */
15191 if (comp_mode
== CCFPmode
&& TARGET_XL_COMPAT
15192 && FLOAT128_IBM_P (GET_MODE (op0
))
15193 && TARGET_HARD_FLOAT
)
15194 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
15196 gen_rtx_SET (compare_result
,
15197 gen_rtx_COMPARE (comp_mode
, op0
, op1
)),
15198 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15199 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15200 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15201 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15202 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15203 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15204 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15205 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (DFmode
)),
15206 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (Pmode
)))));
15207 else if (GET_CODE (op1
) == UNSPEC
15208 && XINT (op1
, 1) == UNSPEC_SP_TEST
)
15210 rtx op1b
= XVECEXP (op1
, 0, 0);
15211 comp_mode
= CCEQmode
;
15212 compare_result
= gen_reg_rtx (CCEQmode
);
15214 emit_insn (gen_stack_protect_testdi (compare_result
, op0
, op1b
));
15216 emit_insn (gen_stack_protect_testsi (compare_result
, op0
, op1b
));
15219 emit_insn (gen_rtx_SET (compare_result
,
15220 gen_rtx_COMPARE (comp_mode
, op0
, op1
)));
15223 validate_condition_mode (code
, GET_MODE (compare_result
));
15225 return gen_rtx_fmt_ee (code
, VOIDmode
, compare_result
, const0_rtx
);
15229 /* Return the diagnostic message string if the binary operation OP is
15230 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15233 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED
,
15237 machine_mode mode1
= TYPE_MODE (type1
);
15238 machine_mode mode2
= TYPE_MODE (type2
);
15240 /* For complex modes, use the inner type. */
15241 if (COMPLEX_MODE_P (mode1
))
15242 mode1
= GET_MODE_INNER (mode1
);
15244 if (COMPLEX_MODE_P (mode2
))
15245 mode2
= GET_MODE_INNER (mode2
);
15247 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15248 double to intermix unless -mfloat128-convert. */
15249 if (mode1
== mode2
)
15252 if (!TARGET_FLOAT128_CVT
)
15254 if ((FLOAT128_IEEE_P (mode1
) && FLOAT128_IBM_P (mode2
))
15255 || (FLOAT128_IBM_P (mode1
) && FLOAT128_IEEE_P (mode2
)))
15256 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15264 /* Expand floating point conversion to/from __float128 and __ibm128. */
15267 rs6000_expand_float128_convert (rtx dest
, rtx src
, bool unsigned_p
)
15269 machine_mode dest_mode
= GET_MODE (dest
);
15270 machine_mode src_mode
= GET_MODE (src
);
15271 convert_optab cvt
= unknown_optab
;
15272 bool do_move
= false;
15273 rtx libfunc
= NULL_RTX
;
15275 typedef rtx (*rtx_2func_t
) (rtx
, rtx
);
15276 rtx_2func_t hw_convert
= (rtx_2func_t
)0;
15280 rtx_2func_t from_df
;
15281 rtx_2func_t from_sf
;
15282 rtx_2func_t from_si_sign
;
15283 rtx_2func_t from_si_uns
;
15284 rtx_2func_t from_di_sign
;
15285 rtx_2func_t from_di_uns
;
15288 rtx_2func_t to_si_sign
;
15289 rtx_2func_t to_si_uns
;
15290 rtx_2func_t to_di_sign
;
15291 rtx_2func_t to_di_uns
;
15292 } hw_conversions
[2] = {
15293 /* convertions to/from KFmode */
15295 gen_extenddfkf2_hw
, /* KFmode <- DFmode. */
15296 gen_extendsfkf2_hw
, /* KFmode <- SFmode. */
15297 gen_float_kfsi2_hw
, /* KFmode <- SImode (signed). */
15298 gen_floatuns_kfsi2_hw
, /* KFmode <- SImode (unsigned). */
15299 gen_float_kfdi2_hw
, /* KFmode <- DImode (signed). */
15300 gen_floatuns_kfdi2_hw
, /* KFmode <- DImode (unsigned). */
15301 gen_trunckfdf2_hw
, /* DFmode <- KFmode. */
15302 gen_trunckfsf2_hw
, /* SFmode <- KFmode. */
15303 gen_fix_kfsi2_hw
, /* SImode <- KFmode (signed). */
15304 gen_fixuns_kfsi2_hw
, /* SImode <- KFmode (unsigned). */
15305 gen_fix_kfdi2_hw
, /* DImode <- KFmode (signed). */
15306 gen_fixuns_kfdi2_hw
, /* DImode <- KFmode (unsigned). */
15309 /* convertions to/from TFmode */
15311 gen_extenddftf2_hw
, /* TFmode <- DFmode. */
15312 gen_extendsftf2_hw
, /* TFmode <- SFmode. */
15313 gen_float_tfsi2_hw
, /* TFmode <- SImode (signed). */
15314 gen_floatuns_tfsi2_hw
, /* TFmode <- SImode (unsigned). */
15315 gen_float_tfdi2_hw
, /* TFmode <- DImode (signed). */
15316 gen_floatuns_tfdi2_hw
, /* TFmode <- DImode (unsigned). */
15317 gen_trunctfdf2_hw
, /* DFmode <- TFmode. */
15318 gen_trunctfsf2_hw
, /* SFmode <- TFmode. */
15319 gen_fix_tfsi2_hw
, /* SImode <- TFmode (signed). */
15320 gen_fixuns_tfsi2_hw
, /* SImode <- TFmode (unsigned). */
15321 gen_fix_tfdi2_hw
, /* DImode <- TFmode (signed). */
15322 gen_fixuns_tfdi2_hw
, /* DImode <- TFmode (unsigned). */
15326 if (dest_mode
== src_mode
)
15327 gcc_unreachable ();
15329 /* Eliminate memory operations. */
15331 src
= force_reg (src_mode
, src
);
15335 rtx tmp
= gen_reg_rtx (dest_mode
);
15336 rs6000_expand_float128_convert (tmp
, src
, unsigned_p
);
15337 rs6000_emit_move (dest
, tmp
, dest_mode
);
15341 /* Convert to IEEE 128-bit floating point. */
15342 if (FLOAT128_IEEE_P (dest_mode
))
15344 if (dest_mode
== KFmode
)
15346 else if (dest_mode
== TFmode
)
15349 gcc_unreachable ();
15355 hw_convert
= hw_conversions
[kf_or_tf
].from_df
;
15360 hw_convert
= hw_conversions
[kf_or_tf
].from_sf
;
15366 if (FLOAT128_IBM_P (src_mode
))
15375 cvt
= ufloat_optab
;
15376 hw_convert
= hw_conversions
[kf_or_tf
].from_si_uns
;
15380 cvt
= sfloat_optab
;
15381 hw_convert
= hw_conversions
[kf_or_tf
].from_si_sign
;
15388 cvt
= ufloat_optab
;
15389 hw_convert
= hw_conversions
[kf_or_tf
].from_di_uns
;
15393 cvt
= sfloat_optab
;
15394 hw_convert
= hw_conversions
[kf_or_tf
].from_di_sign
;
15399 gcc_unreachable ();
15403 /* Convert from IEEE 128-bit floating point. */
15404 else if (FLOAT128_IEEE_P (src_mode
))
15406 if (src_mode
== KFmode
)
15408 else if (src_mode
== TFmode
)
15411 gcc_unreachable ();
15417 hw_convert
= hw_conversions
[kf_or_tf
].to_df
;
15422 hw_convert
= hw_conversions
[kf_or_tf
].to_sf
;
15428 if (FLOAT128_IBM_P (dest_mode
))
15438 hw_convert
= hw_conversions
[kf_or_tf
].to_si_uns
;
15443 hw_convert
= hw_conversions
[kf_or_tf
].to_si_sign
;
15451 hw_convert
= hw_conversions
[kf_or_tf
].to_di_uns
;
15456 hw_convert
= hw_conversions
[kf_or_tf
].to_di_sign
;
15461 gcc_unreachable ();
15465 /* Both IBM format. */
15466 else if (FLOAT128_IBM_P (dest_mode
) && FLOAT128_IBM_P (src_mode
))
15470 gcc_unreachable ();
15472 /* Handle conversion between TFmode/KFmode/IFmode. */
15474 emit_insn (gen_rtx_SET (dest
, gen_rtx_FLOAT_EXTEND (dest_mode
, src
)));
15476 /* Handle conversion if we have hardware support. */
15477 else if (TARGET_FLOAT128_HW
&& hw_convert
)
15478 emit_insn ((hw_convert
) (dest
, src
));
15480 /* Call an external function to do the conversion. */
15481 else if (cvt
!= unknown_optab
)
15483 libfunc
= convert_optab_libfunc (cvt
, dest_mode
, src_mode
);
15484 gcc_assert (libfunc
!= NULL_RTX
);
15486 dest2
= emit_library_call_value (libfunc
, dest
, LCT_CONST
, dest_mode
,
15489 gcc_assert (dest2
!= NULL_RTX
);
15490 if (!rtx_equal_p (dest
, dest2
))
15491 emit_move_insn (dest
, dest2
);
15495 gcc_unreachable ();
15501 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15502 can be used as that dest register. Return the dest register. */
15505 rs6000_emit_eqne (machine_mode mode
, rtx op1
, rtx op2
, rtx scratch
)
15507 if (op2
== const0_rtx
)
15510 if (GET_CODE (scratch
) == SCRATCH
)
15511 scratch
= gen_reg_rtx (mode
);
15513 if (logical_operand (op2
, mode
))
15514 emit_insn (gen_rtx_SET (scratch
, gen_rtx_XOR (mode
, op1
, op2
)));
15516 emit_insn (gen_rtx_SET (scratch
,
15517 gen_rtx_PLUS (mode
, op1
, negate_rtx (mode
, op2
))));
15522 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15523 requires this. The result is mode MODE. */
15525 rs6000_emit_fp_cror (rtx_code code
, machine_mode mode
, rtx x
)
15529 if (code
== LTGT
|| code
== LE
|| code
== UNLT
)
15530 cond
[n
++] = gen_rtx_fmt_ee (LT
, mode
, x
, const0_rtx
);
15531 if (code
== LTGT
|| code
== GE
|| code
== UNGT
)
15532 cond
[n
++] = gen_rtx_fmt_ee (GT
, mode
, x
, const0_rtx
);
15533 if (code
== LE
|| code
== GE
|| code
== UNEQ
)
15534 cond
[n
++] = gen_rtx_fmt_ee (EQ
, mode
, x
, const0_rtx
);
15535 if (code
== UNLT
|| code
== UNGT
|| code
== UNEQ
)
15536 cond
[n
++] = gen_rtx_fmt_ee (UNORDERED
, mode
, x
, const0_rtx
);
15538 gcc_assert (n
== 2);
15540 rtx cc
= gen_reg_rtx (CCEQmode
);
15541 rtx logical
= gen_rtx_IOR (mode
, cond
[0], cond
[1]);
15542 emit_insn (gen_cceq_ior_compare (mode
, cc
, logical
, cond
[0], x
, cond
[1], x
));
15548 rs6000_emit_sCOND (machine_mode mode
, rtx operands
[])
15550 rtx condition_rtx
= rs6000_generate_compare (operands
[1], mode
);
15551 rtx_code cond_code
= GET_CODE (condition_rtx
);
15553 if (FLOAT_MODE_P (mode
) && HONOR_NANS (mode
)
15554 && !(FLOAT128_VECTOR_P (mode
) && !TARGET_FLOAT128_HW
))
15556 else if (cond_code
== NE
15557 || cond_code
== GE
|| cond_code
== LE
15558 || cond_code
== GEU
|| cond_code
== LEU
15559 || cond_code
== ORDERED
|| cond_code
== UNGE
|| cond_code
== UNLE
)
15561 rtx not_result
= gen_reg_rtx (CCEQmode
);
15562 rtx not_op
, rev_cond_rtx
;
15563 machine_mode cc_mode
;
15565 cc_mode
= GET_MODE (XEXP (condition_rtx
, 0));
15567 rev_cond_rtx
= gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode
, cond_code
),
15568 SImode
, XEXP (condition_rtx
, 0), const0_rtx
);
15569 not_op
= gen_rtx_COMPARE (CCEQmode
, rev_cond_rtx
, const0_rtx
);
15570 emit_insn (gen_rtx_SET (not_result
, not_op
));
15571 condition_rtx
= gen_rtx_EQ (VOIDmode
, not_result
, const0_rtx
);
15574 machine_mode op_mode
= GET_MODE (XEXP (operands
[1], 0));
15575 if (op_mode
== VOIDmode
)
15576 op_mode
= GET_MODE (XEXP (operands
[1], 1));
15578 if (TARGET_POWERPC64
&& (op_mode
== DImode
|| FLOAT_MODE_P (mode
)))
15580 PUT_MODE (condition_rtx
, DImode
);
15581 convert_move (operands
[0], condition_rtx
, 0);
15585 PUT_MODE (condition_rtx
, SImode
);
15586 emit_insn (gen_rtx_SET (operands
[0], condition_rtx
));
15590 /* Emit a branch of kind CODE to location LOC. */
15593 rs6000_emit_cbranch (machine_mode mode
, rtx operands
[])
15595 rtx condition_rtx
= rs6000_generate_compare (operands
[0], mode
);
15596 rtx loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
15597 rtx ite
= gen_rtx_IF_THEN_ELSE (VOIDmode
, condition_rtx
, loc_ref
, pc_rtx
);
15598 emit_jump_insn (gen_rtx_SET (pc_rtx
, ite
));
15601 /* Return the string to output a conditional branch to LABEL, which is
15602 the operand template of the label, or NULL if the branch is really a
15603 conditional return.
15605 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15606 condition code register and its mode specifies what kind of
15607 comparison we made.
15609 REVERSED is nonzero if we should reverse the sense of the comparison.
15611 INSN is the insn. */
15614 output_cbranch (rtx op
, const char *label
, int reversed
, rtx_insn
*insn
)
15616 static char string
[64];
15617 enum rtx_code code
= GET_CODE (op
);
15618 rtx cc_reg
= XEXP (op
, 0);
15619 machine_mode mode
= GET_MODE (cc_reg
);
15620 int cc_regno
= REGNO (cc_reg
) - CR0_REGNO
;
15621 int need_longbranch
= label
!= NULL
&& get_attr_length (insn
) == 8;
15622 int really_reversed
= reversed
^ need_longbranch
;
15628 validate_condition_mode (code
, mode
);
15630 /* Work out which way this really branches. We could use
15631 reverse_condition_maybe_unordered here always but this
15632 makes the resulting assembler clearer. */
15633 if (really_reversed
)
15635 /* Reversal of FP compares takes care -- an ordered compare
15636 becomes an unordered compare and vice versa. */
15637 if (mode
== CCFPmode
)
15638 code
= reverse_condition_maybe_unordered (code
);
15640 code
= reverse_condition (code
);
15645 /* Not all of these are actually distinct opcodes, but
15646 we distinguish them for clarity of the resulting assembler. */
15647 case NE
: case LTGT
:
15648 ccode
= "ne"; break;
15649 case EQ
: case UNEQ
:
15650 ccode
= "eq"; break;
15652 ccode
= "ge"; break;
15653 case GT
: case GTU
: case UNGT
:
15654 ccode
= "gt"; break;
15656 ccode
= "le"; break;
15657 case LT
: case LTU
: case UNLT
:
15658 ccode
= "lt"; break;
15659 case UNORDERED
: ccode
= "un"; break;
15660 case ORDERED
: ccode
= "nu"; break;
15661 case UNGE
: ccode
= "nl"; break;
15662 case UNLE
: ccode
= "ng"; break;
15664 gcc_unreachable ();
15667 /* Maybe we have a guess as to how likely the branch is. */
15669 note
= find_reg_note (insn
, REG_BR_PROB
, NULL_RTX
);
15670 if (note
!= NULL_RTX
)
15672 /* PROB is the difference from 50%. */
15673 int prob
= profile_probability::from_reg_br_prob_note (XINT (note
, 0))
15674 .to_reg_br_prob_base () - REG_BR_PROB_BASE
/ 2;
15676 /* Only hint for highly probable/improbable branches on newer cpus when
15677 we have real profile data, as static prediction overrides processor
15678 dynamic prediction. For older cpus we may as well always hint, but
15679 assume not taken for branches that are very close to 50% as a
15680 mispredicted taken branch is more expensive than a
15681 mispredicted not-taken branch. */
15682 if (rs6000_always_hint
15683 || (abs (prob
) > REG_BR_PROB_BASE
/ 100 * 48
15684 && (profile_status_for_fn (cfun
) != PROFILE_GUESSED
)
15685 && br_prob_note_reliable_p (note
)))
15687 if (abs (prob
) > REG_BR_PROB_BASE
/ 20
15688 && ((prob
> 0) ^ need_longbranch
))
15696 s
+= sprintf (s
, "b%slr%s ", ccode
, pred
);
15698 s
+= sprintf (s
, "b%s%s ", ccode
, pred
);
15700 /* We need to escape any '%' characters in the reg_names string.
15701 Assume they'd only be the first character.... */
15702 if (reg_names
[cc_regno
+ CR0_REGNO
][0] == '%')
15704 s
+= sprintf (s
, "%s", reg_names
[cc_regno
+ CR0_REGNO
]);
15708 /* If the branch distance was too far, we may have to use an
15709 unconditional branch to go the distance. */
15710 if (need_longbranch
)
15711 s
+= sprintf (s
, ",$+8\n\tb %s", label
);
15713 s
+= sprintf (s
, ",%s", label
);
15719 /* Return insn for VSX or Altivec comparisons. */
15722 rs6000_emit_vector_compare_inner (enum rtx_code code
, rtx op0
, rtx op1
)
15725 machine_mode mode
= GET_MODE (op0
);
15733 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15744 mask
= gen_reg_rtx (mode
);
15745 emit_insn (gen_rtx_SET (mask
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
15752 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15753 DMODE is expected destination mode. This is a recursive function. */
15756 rs6000_emit_vector_compare (enum rtx_code rcode
,
15758 machine_mode dmode
)
15761 bool swap_operands
= false;
15762 bool try_again
= false;
15764 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode
));
15765 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
15767 /* See if the comparison works as is. */
15768 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15776 swap_operands
= true;
15781 swap_operands
= true;
15789 /* Invert condition and try again.
15790 e.g., A != B becomes ~(A==B). */
15792 enum rtx_code rev_code
;
15793 enum insn_code nor_code
;
15796 rev_code
= reverse_condition_maybe_unordered (rcode
);
15797 if (rev_code
== UNKNOWN
)
15800 nor_code
= optab_handler (one_cmpl_optab
, dmode
);
15801 if (nor_code
== CODE_FOR_nothing
)
15804 mask2
= rs6000_emit_vector_compare (rev_code
, op0
, op1
, dmode
);
15808 mask
= gen_reg_rtx (dmode
);
15809 emit_insn (GEN_FCN (nor_code
) (mask
, mask2
));
15817 /* Try GT/GTU/LT/LTU OR EQ */
15820 enum insn_code ior_code
;
15821 enum rtx_code new_code
;
15842 gcc_unreachable ();
15845 ior_code
= optab_handler (ior_optab
, dmode
);
15846 if (ior_code
== CODE_FOR_nothing
)
15849 c_rtx
= rs6000_emit_vector_compare (new_code
, op0
, op1
, dmode
);
15853 eq_rtx
= rs6000_emit_vector_compare (EQ
, op0
, op1
, dmode
);
15857 mask
= gen_reg_rtx (dmode
);
15858 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
15869 std::swap (op0
, op1
);
15871 mask
= rs6000_emit_vector_compare_inner (rcode
, op0
, op1
);
15876 /* You only get two chances. */
15880 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15881 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15882 operands for the relation operation COND. */
15885 rs6000_emit_vector_cond_expr (rtx dest
, rtx op_true
, rtx op_false
,
15886 rtx cond
, rtx cc_op0
, rtx cc_op1
)
15888 machine_mode dest_mode
= GET_MODE (dest
);
15889 machine_mode mask_mode
= GET_MODE (cc_op0
);
15890 enum rtx_code rcode
= GET_CODE (cond
);
15892 bool invert_move
= false;
15894 if (VECTOR_UNIT_NONE_P (dest_mode
))
15897 gcc_assert (GET_MODE_SIZE (dest_mode
) == GET_MODE_SIZE (mask_mode
)
15898 && GET_MODE_NUNITS (dest_mode
) == GET_MODE_NUNITS (mask_mode
));
15902 /* Swap operands if we can, and fall back to doing the operation as
15903 specified, and doing a NOR to invert the test. */
15909 /* Invert condition and try again.
15910 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15911 invert_move
= true;
15912 rcode
= reverse_condition_maybe_unordered (rcode
);
15913 if (rcode
== UNKNOWN
)
15919 if (GET_MODE_CLASS (mask_mode
) == MODE_VECTOR_INT
)
15921 /* Invert condition to avoid compound test. */
15922 invert_move
= true;
15923 rcode
= reverse_condition (rcode
);
15932 /* Invert condition to avoid compound test if necessary. */
15933 if (rcode
== GEU
|| rcode
== LEU
)
15935 invert_move
= true;
15936 rcode
= reverse_condition (rcode
);
15944 /* Get the vector mask for the given relational operations. */
15945 mask
= rs6000_emit_vector_compare (rcode
, cc_op0
, cc_op1
, mask_mode
);
15950 if (mask_mode
!= dest_mode
)
15951 mask
= simplify_gen_subreg (dest_mode
, mask
, mask_mode
, 0);
15954 std::swap (op_true
, op_false
);
15956 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
15957 if (GET_MODE_CLASS (dest_mode
) == MODE_VECTOR_INT
15958 && (GET_CODE (op_true
) == CONST_VECTOR
15959 || GET_CODE (op_false
) == CONST_VECTOR
))
15961 rtx constant_0
= CONST0_RTX (dest_mode
);
15962 rtx constant_m1
= CONSTM1_RTX (dest_mode
);
15964 if (op_true
== constant_m1
&& op_false
== constant_0
)
15966 emit_move_insn (dest
, mask
);
15970 else if (op_true
== constant_0
&& op_false
== constant_m1
)
15972 emit_insn (gen_rtx_SET (dest
, gen_rtx_NOT (dest_mode
, mask
)));
15976 /* If we can't use the vector comparison directly, perhaps we can use
15977 the mask for the true or false fields, instead of loading up a
15979 if (op_true
== constant_m1
)
15982 if (op_false
== constant_0
)
15986 if (!REG_P (op_true
) && !SUBREG_P (op_true
))
15987 op_true
= force_reg (dest_mode
, op_true
);
15989 if (!REG_P (op_false
) && !SUBREG_P (op_false
))
15990 op_false
= force_reg (dest_mode
, op_false
);
15992 rtx tmp
= gen_rtx_IOR (dest_mode
,
15993 gen_rtx_AND (dest_mode
, gen_rtx_NOT (dest_mode
, mask
),
15995 gen_rtx_AND (dest_mode
, mask
, op_true
));
15996 emit_insn (gen_rtx_SET (dest
, tmp
));
16000 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16001 maximum or minimum with "C" semantics.
16003 Unless you use -ffast-math, you can't use these instructions to replace
16004 conditions that implicitly reverse the condition because the comparison
16005 might generate a NaN or signed zer0.
16007 I.e. the following can be replaced all of the time
16008 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16009 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16010 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16011 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16013 The following can be replaced only if -ffast-math is used:
16014 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16015 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16016 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16017 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16019 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16020 nonzero/true, FALSE_COND if it is zero/false.
16022 Return false if we can't generate the appropriate minimum or maximum, and
16023 true if we can did the minimum or maximum. */
16026 rs6000_maybe_emit_maxc_minc (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16028 enum rtx_code code
= GET_CODE (op
);
16029 rtx op0
= XEXP (op
, 0);
16030 rtx op1
= XEXP (op
, 1);
16031 machine_mode compare_mode
= GET_MODE (op0
);
16032 machine_mode result_mode
= GET_MODE (dest
);
16034 if (result_mode
!= compare_mode
)
16037 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16038 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16039 we need to do the reversions first to make the following checks
16040 support fewer cases, like:
16042 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16043 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16044 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16045 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16047 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16048 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16049 have to check for fast-math or the like. */
16050 if (code
== UNGE
|| code
== UNGT
|| code
== UNLE
|| code
== UNLT
)
16052 code
= reverse_condition_maybe_unordered (code
);
16053 std::swap (true_cond
, false_cond
);
16057 if (code
== GE
|| code
== GT
)
16059 else if (code
== LE
|| code
== LT
)
16064 if (rtx_equal_p (op0
, true_cond
) && rtx_equal_p (op1
, false_cond
))
16067 /* Only when NaNs and signed-zeros are not in effect, smax could be
16068 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16069 `op0 > op1 ? op1 : op0`. */
16070 else if (rtx_equal_p (op1
, true_cond
) && rtx_equal_p (op0
, false_cond
)
16071 && !HONOR_NANS (compare_mode
) && !HONOR_SIGNED_ZEROS (compare_mode
))
16077 rs6000_emit_minmax (dest
, max_p
? SMAX
: SMIN
, op0
, op1
);
16081 /* Possibly emit a floating point conditional move by generating a compare that
16082 sets a mask instruction and a XXSEL select instruction.
16084 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16085 nonzero/true, FALSE_COND if it is zero/false.
16087 Return false if the operation cannot be generated, and true if we could
16088 generate the instruction. */
16091 rs6000_maybe_emit_fp_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16093 enum rtx_code code
= GET_CODE (op
);
16094 rtx op0
= XEXP (op
, 0);
16095 rtx op1
= XEXP (op
, 1);
16096 machine_mode compare_mode
= GET_MODE (op0
);
16097 machine_mode result_mode
= GET_MODE (dest
);
16102 if (!can_create_pseudo_p ())
16105 /* We allow the comparison to be either SFmode/DFmode and the true/false
16106 condition to be either SFmode/DFmode. I.e. we allow:
16111 r = (a == b) ? c : d;
16118 r = (a == b) ? c : d;
16120 but we don't allow intermixing the IEEE 128-bit floating point types with
16121 the 32/64-bit scalar types. */
16123 if (!(compare_mode
== result_mode
16124 || (compare_mode
== SFmode
&& result_mode
== DFmode
)
16125 || (compare_mode
== DFmode
&& result_mode
== SFmode
)))
16138 code
= swap_condition (code
);
16139 std::swap (op0
, op1
);
16146 /* Generate: [(parallel [(set (dest)
16147 (if_then_else (op (cmp1) (cmp2))
16150 (clobber (scratch))])]. */
16152 compare_rtx
= gen_rtx_fmt_ee (code
, CCFPmode
, op0
, op1
);
16153 cmove_rtx
= gen_rtx_SET (dest
,
16154 gen_rtx_IF_THEN_ELSE (result_mode
,
16159 clobber_rtx
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (V2DImode
));
16160 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
16161 gen_rtvec (2, cmove_rtx
, clobber_rtx
)));
16166 /* Helper function to return true if the target has instructions to do a
16167 compare and set mask instruction that can be used with XXSEL to implement a
16168 conditional move. It is also assumed that such a target also supports the
16169 "C" minimum and maximum instructions. */
16172 have_compare_and_set_mask (machine_mode mode
)
16178 return TARGET_P9_MINMAX
;
16182 return TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
);
16191 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16192 operands of the last comparison is nonzero/true, FALSE_COND if it
16193 is zero/false. Return 0 if the hardware has no such operation. */
16196 rs6000_emit_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16198 enum rtx_code code
= GET_CODE (op
);
16199 rtx op0
= XEXP (op
, 0);
16200 rtx op1
= XEXP (op
, 1);
16201 machine_mode compare_mode
= GET_MODE (op0
);
16202 machine_mode result_mode
= GET_MODE (dest
);
16204 bool is_against_zero
;
16206 /* These modes should always match. */
16207 if (GET_MODE (op1
) != compare_mode
16208 /* In the isel case however, we can use a compare immediate, so
16209 op1 may be a small constant. */
16210 && (!TARGET_ISEL
|| !short_cint_operand (op1
, VOIDmode
)))
16212 if (GET_MODE (true_cond
) != result_mode
)
16214 if (GET_MODE (false_cond
) != result_mode
)
16217 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16219 if (have_compare_and_set_mask (compare_mode
)
16220 && have_compare_and_set_mask (result_mode
))
16222 if (rs6000_maybe_emit_maxc_minc (dest
, op
, true_cond
, false_cond
))
16225 if (rs6000_maybe_emit_fp_cmove (dest
, op
, true_cond
, false_cond
))
16229 /* Don't allow using floating point comparisons for integer results for
16231 if (FLOAT_MODE_P (compare_mode
) && !FLOAT_MODE_P (result_mode
))
16234 /* First, work out if the hardware can do this at all, or
16235 if it's too slow.... */
16236 if (!FLOAT_MODE_P (compare_mode
))
16239 return rs6000_emit_int_cmove (dest
, op
, true_cond
, false_cond
);
16243 is_against_zero
= op1
== CONST0_RTX (compare_mode
);
16245 /* A floating-point subtract might overflow, underflow, or produce
16246 an inexact result, thus changing the floating-point flags, so it
16247 can't be generated if we care about that. It's safe if one side
16248 of the construct is zero, since then no subtract will be
16250 if (SCALAR_FLOAT_MODE_P (compare_mode
)
16251 && flag_trapping_math
&& ! is_against_zero
)
16254 /* Eliminate half of the comparisons by switching operands, this
16255 makes the remaining code simpler. */
16256 if (code
== UNLT
|| code
== UNGT
|| code
== UNORDERED
|| code
== NE
16257 || code
== LTGT
|| code
== LT
|| code
== UNLE
)
16259 code
= reverse_condition_maybe_unordered (code
);
16261 true_cond
= false_cond
;
16265 /* UNEQ and LTGT take four instructions for a comparison with zero,
16266 it'll probably be faster to use a branch here too. */
16267 if (code
== UNEQ
&& HONOR_NANS (compare_mode
))
16270 /* We're going to try to implement comparisons by performing
16271 a subtract, then comparing against zero. Unfortunately,
16272 Inf - Inf is NaN which is not zero, and so if we don't
16273 know that the operand is finite and the comparison
16274 would treat EQ different to UNORDERED, we can't do it. */
16275 if (HONOR_INFINITIES (compare_mode
)
16276 && code
!= GT
&& code
!= UNGE
16277 && (!CONST_DOUBLE_P (op1
)
16278 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1
)))
16279 /* Constructs of the form (a OP b ? a : b) are safe. */
16280 && ((! rtx_equal_p (op0
, false_cond
) && ! rtx_equal_p (op1
, false_cond
))
16281 || (! rtx_equal_p (op0
, true_cond
)
16282 && ! rtx_equal_p (op1
, true_cond
))))
16285 /* At this point we know we can use fsel. */
16287 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16288 is no fsel instruction. */
16289 if (compare_mode
!= SFmode
&& compare_mode
!= DFmode
)
16292 /* Reduce the comparison to a comparison against zero. */
16293 if (! is_against_zero
)
16295 temp
= gen_reg_rtx (compare_mode
);
16296 emit_insn (gen_rtx_SET (temp
, gen_rtx_MINUS (compare_mode
, op0
, op1
)));
16298 op1
= CONST0_RTX (compare_mode
);
16301 /* If we don't care about NaNs we can reduce some of the comparisons
16302 down to faster ones. */
16303 if (! HONOR_NANS (compare_mode
))
16309 true_cond
= false_cond
;
16322 /* Now, reduce everything down to a GE. */
16329 temp
= gen_reg_rtx (compare_mode
);
16330 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16335 temp
= gen_reg_rtx (compare_mode
);
16336 emit_insn (gen_rtx_SET (temp
, gen_rtx_ABS (compare_mode
, op0
)));
16341 temp
= gen_reg_rtx (compare_mode
);
16342 emit_insn (gen_rtx_SET (temp
,
16343 gen_rtx_NEG (compare_mode
,
16344 gen_rtx_ABS (compare_mode
, op0
))));
16349 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16350 temp
= gen_reg_rtx (result_mode
);
16351 emit_insn (gen_rtx_SET (temp
,
16352 gen_rtx_IF_THEN_ELSE (result_mode
,
16353 gen_rtx_GE (VOIDmode
,
16355 true_cond
, false_cond
)));
16356 false_cond
= true_cond
;
16359 temp
= gen_reg_rtx (compare_mode
);
16360 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16365 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16366 temp
= gen_reg_rtx (result_mode
);
16367 emit_insn (gen_rtx_SET (temp
,
16368 gen_rtx_IF_THEN_ELSE (result_mode
,
16369 gen_rtx_GE (VOIDmode
,
16371 true_cond
, false_cond
)));
16372 true_cond
= false_cond
;
16375 temp
= gen_reg_rtx (compare_mode
);
16376 emit_insn (gen_rtx_SET (temp
, gen_rtx_NEG (compare_mode
, op0
)));
16381 gcc_unreachable ();
16384 emit_insn (gen_rtx_SET (dest
,
16385 gen_rtx_IF_THEN_ELSE (result_mode
,
16386 gen_rtx_GE (VOIDmode
,
16388 true_cond
, false_cond
)));
16392 /* Same as above, but for ints (isel). */
16395 rs6000_emit_int_cmove (rtx dest
, rtx op
, rtx true_cond
, rtx false_cond
)
16397 rtx condition_rtx
, cr
;
16398 machine_mode mode
= GET_MODE (dest
);
16399 enum rtx_code cond_code
;
16400 rtx (*isel_func
) (rtx
, rtx
, rtx
, rtx
, rtx
);
16403 if (mode
!= SImode
&& (!TARGET_POWERPC64
|| mode
!= DImode
))
16406 /* PR104335: We now need to expect CC-mode "comparisons"
16407 coming from ifcvt. The following code expects proper
16408 comparisons so better abort here. */
16409 if (GET_MODE_CLASS (GET_MODE (XEXP (op
, 0))) == MODE_CC
)
16412 /* We still have to do the compare, because isel doesn't do a
16413 compare, it just looks at the CRx bits set by a previous compare
16415 condition_rtx
= rs6000_generate_compare (op
, mode
);
16416 cond_code
= GET_CODE (condition_rtx
);
16417 cr
= XEXP (condition_rtx
, 0);
16418 signedp
= GET_MODE (cr
) == CCmode
;
16420 isel_func
= (mode
== SImode
16421 ? (signedp
? gen_isel_cc_si
: gen_isel_ccuns_si
)
16422 : (signedp
? gen_isel_cc_di
: gen_isel_ccuns_di
));
16426 case LT
: case GT
: case LTU
: case GTU
: case EQ
:
16427 /* isel handles these directly. */
16431 /* We need to swap the sense of the comparison. */
16433 std::swap (false_cond
, true_cond
);
16434 PUT_CODE (condition_rtx
, reverse_condition (cond_code
));
16439 false_cond
= force_reg (mode
, false_cond
);
16440 if (true_cond
!= const0_rtx
)
16441 true_cond
= force_reg (mode
, true_cond
);
16443 emit_insn (isel_func (dest
, condition_rtx
, true_cond
, false_cond
, cr
));
16449 rs6000_emit_minmax (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
16451 machine_mode mode
= GET_MODE (op0
);
16455 /* VSX/altivec have direct min/max insns. */
16456 if ((code
== SMAX
|| code
== SMIN
)
16457 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode
)
16458 || (mode
== SFmode
&& VECTOR_UNIT_VSX_P (DFmode
))
16459 || (TARGET_POWER10
&& TARGET_FLOAT128_HW
&& FLOAT128_IEEE_P (mode
))))
16461 emit_insn (gen_rtx_SET (dest
, gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
16465 if (code
== SMAX
|| code
== SMIN
)
16470 if (code
== SMAX
|| code
== UMAX
)
16471 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16472 op0
, op1
, mode
, 0);
16474 target
= emit_conditional_move (dest
, { c
, op0
, op1
, mode
},
16475 op1
, op0
, mode
, 0);
16476 gcc_assert (target
);
16477 if (target
!= dest
)
16478 emit_move_insn (dest
, target
);
16481 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16482 COND is true. Mark the jump as unlikely to be taken. */
16485 emit_unlikely_jump (rtx cond
, rtx label
)
16487 rtx x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, label
, pc_rtx
);
16488 rtx_insn
*insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, x
));
16489 add_reg_br_prob_note (insn
, profile_probability::very_unlikely ());
16492 /* A subroutine of the atomic operation splitters. Emit a load-locked
16493 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16494 the zero_extend operation. */
16497 emit_load_locked (machine_mode mode
, rtx reg
, rtx mem
)
16499 rtx (*fn
) (rtx
, rtx
) = NULL
;
16504 fn
= gen_load_lockedqi
;
16507 fn
= gen_load_lockedhi
;
16510 if (GET_MODE (mem
) == QImode
)
16511 fn
= gen_load_lockedqi_si
;
16512 else if (GET_MODE (mem
) == HImode
)
16513 fn
= gen_load_lockedhi_si
;
16515 fn
= gen_load_lockedsi
;
16518 fn
= gen_load_lockeddi
;
16521 fn
= gen_load_lockedti
;
16524 gcc_unreachable ();
16526 emit_insn (fn (reg
, mem
));
16529 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16530 instruction in MODE. */
16533 emit_store_conditional (machine_mode mode
, rtx res
, rtx mem
, rtx val
)
16535 rtx (*fn
) (rtx
, rtx
, rtx
) = NULL
;
16540 fn
= gen_store_conditionalqi
;
16543 fn
= gen_store_conditionalhi
;
16546 fn
= gen_store_conditionalsi
;
16549 fn
= gen_store_conditionaldi
;
16552 fn
= gen_store_conditionalti
;
16555 gcc_unreachable ();
16558 /* Emit sync before stwcx. to address PPC405 Erratum. */
16559 if (PPC405_ERRATUM77
)
16560 emit_insn (gen_hwsync ());
16562 emit_insn (fn (res
, mem
, val
));
16565 /* Expand barriers before and after a load_locked/store_cond sequence. */
16568 rs6000_pre_atomic_barrier (rtx mem
, enum memmodel model
)
16570 rtx addr
= XEXP (mem
, 0);
16572 if (!legitimate_indirect_address_p (addr
, reload_completed
)
16573 && !legitimate_indexed_address_p (addr
, reload_completed
))
16575 addr
= force_reg (Pmode
, addr
);
16576 mem
= replace_equiv_address_nv (mem
, addr
);
16581 case MEMMODEL_RELAXED
:
16582 case MEMMODEL_CONSUME
:
16583 case MEMMODEL_ACQUIRE
:
16585 case MEMMODEL_RELEASE
:
16586 case MEMMODEL_ACQ_REL
:
16587 emit_insn (gen_lwsync ());
16589 case MEMMODEL_SEQ_CST
:
16590 emit_insn (gen_hwsync ());
16593 gcc_unreachable ();
16599 rs6000_post_atomic_barrier (enum memmodel model
)
16603 case MEMMODEL_RELAXED
:
16604 case MEMMODEL_CONSUME
:
16605 case MEMMODEL_RELEASE
:
16607 case MEMMODEL_ACQUIRE
:
16608 case MEMMODEL_ACQ_REL
:
16609 case MEMMODEL_SEQ_CST
:
16610 emit_insn (gen_isync ());
16613 gcc_unreachable ();
16617 /* A subroutine of the various atomic expanders. For sub-word operations,
16618 we must adjust things to operate on SImode. Given the original MEM,
16619 return a new aligned memory. Also build and return the quantities by
16620 which to shift and mask. */
16623 rs6000_adjust_atomic_subword (rtx orig_mem
, rtx
*pshift
, rtx
*pmask
)
16625 rtx addr
, align
, shift
, mask
, mem
;
16626 HOST_WIDE_INT shift_mask
;
16627 machine_mode mode
= GET_MODE (orig_mem
);
16629 /* For smaller modes, we have to implement this via SImode. */
16630 shift_mask
= (mode
== QImode
? 0x18 : 0x10);
16632 addr
= XEXP (orig_mem
, 0);
16633 addr
= force_reg (GET_MODE (addr
), addr
);
16635 /* Aligned memory containing subword. Generate a new memory. We
16636 do not want any of the existing MEM_ATTR data, as we're now
16637 accessing memory outside the original object. */
16638 align
= expand_simple_binop (Pmode
, AND
, addr
, GEN_INT (-4),
16639 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16640 mem
= gen_rtx_MEM (SImode
, align
);
16641 MEM_VOLATILE_P (mem
) = MEM_VOLATILE_P (orig_mem
);
16642 if (MEM_ALIAS_SET (orig_mem
) == ALIAS_SET_MEMORY_BARRIER
)
16643 set_mem_alias_set (mem
, ALIAS_SET_MEMORY_BARRIER
);
16645 /* Shift amount for subword relative to aligned word. */
16646 shift
= gen_reg_rtx (SImode
);
16647 addr
= gen_lowpart (SImode
, addr
);
16648 rtx tmp
= gen_reg_rtx (SImode
);
16649 emit_insn (gen_ashlsi3 (tmp
, addr
, GEN_INT (3)));
16650 emit_insn (gen_andsi3 (shift
, tmp
, GEN_INT (shift_mask
)));
16651 if (BYTES_BIG_ENDIAN
)
16652 shift
= expand_simple_binop (SImode
, XOR
, shift
, GEN_INT (shift_mask
),
16653 shift
, 1, OPTAB_LIB_WIDEN
);
16656 /* Mask for insertion. */
16657 mask
= expand_simple_binop (SImode
, ASHIFT
, GEN_INT (GET_MODE_MASK (mode
)),
16658 shift
, NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16664 /* A subroutine of the various atomic expanders. For sub-word operands,
16665 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16668 rs6000_mask_atomic_subword (rtx oldval
, rtx newval
, rtx mask
)
16672 x
= gen_reg_rtx (SImode
);
16673 emit_insn (gen_rtx_SET (x
, gen_rtx_AND (SImode
,
16674 gen_rtx_NOT (SImode
, mask
),
16677 x
= expand_simple_binop (SImode
, IOR
, newval
, x
, x
, 1, OPTAB_LIB_WIDEN
);
16682 /* A subroutine of the various atomic expanders. For sub-word operands,
16683 extract WIDE to NARROW via SHIFT. */
16686 rs6000_finish_atomic_subword (rtx narrow
, rtx wide
, rtx shift
)
16688 wide
= expand_simple_binop (SImode
, LSHIFTRT
, wide
, shift
,
16689 wide
, 1, OPTAB_LIB_WIDEN
);
16690 emit_move_insn (narrow
, gen_lowpart (GET_MODE (narrow
), wide
));
16693 /* Expand an atomic compare and swap operation. */
16696 rs6000_expand_atomic_compare_and_swap (rtx operands
[])
16698 rtx boolval
, retval
, mem
, oldval
, newval
, cond
;
16699 rtx label1
, label2
, x
, mask
, shift
;
16700 machine_mode mode
, orig_mode
;
16701 enum memmodel mod_s
, mod_f
;
16704 boolval
= operands
[0];
16705 retval
= operands
[1];
16707 oldval
= operands
[3];
16708 newval
= operands
[4];
16709 is_weak
= (INTVAL (operands
[5]) != 0);
16710 mod_s
= memmodel_base (INTVAL (operands
[6]));
16711 mod_f
= memmodel_base (INTVAL (operands
[7]));
16712 orig_mode
= mode
= GET_MODE (mem
);
16714 mask
= shift
= NULL_RTX
;
16715 if (mode
== QImode
|| mode
== HImode
)
16717 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16718 lwarx and shift/mask operations. With power8, we need to do the
16719 comparison in SImode, but the store is still done in QI/HImode. */
16720 oldval
= convert_modes (SImode
, mode
, oldval
, 1);
16722 if (!TARGET_SYNC_HI_QI
)
16724 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16726 /* Shift and mask OLDVAL into position with the word. */
16727 oldval
= expand_simple_binop (SImode
, ASHIFT
, oldval
, shift
,
16728 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16730 /* Shift and mask NEWVAL into position within the word. */
16731 newval
= convert_modes (SImode
, mode
, newval
, 1);
16732 newval
= expand_simple_binop (SImode
, ASHIFT
, newval
, shift
,
16733 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16736 /* Prepare to adjust the return value. */
16737 retval
= gen_reg_rtx (SImode
);
16740 else if (reg_overlap_mentioned_p (retval
, oldval
))
16741 oldval
= copy_to_reg (oldval
);
16743 if (mode
!= TImode
&& !reg_or_short_operand (oldval
, mode
))
16744 oldval
= copy_to_mode_reg (mode
, oldval
);
16746 if (reg_overlap_mentioned_p (retval
, newval
))
16747 newval
= copy_to_reg (newval
);
16749 mem
= rs6000_pre_atomic_barrier (mem
, mod_s
);
16754 label1
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16755 emit_label (XEXP (label1
, 0));
16757 label2
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16759 emit_load_locked (mode
, retval
, mem
);
16763 x
= expand_simple_binop (SImode
, AND
, retval
, mask
,
16764 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16766 cond
= gen_reg_rtx (CCmode
);
16767 /* If we have TImode, synthesize a comparison. */
16768 if (mode
!= TImode
)
16769 x
= gen_rtx_COMPARE (CCmode
, x
, oldval
);
16772 rtx xor1_result
= gen_reg_rtx (DImode
);
16773 rtx xor2_result
= gen_reg_rtx (DImode
);
16774 rtx or_result
= gen_reg_rtx (DImode
);
16775 rtx new_word0
= simplify_gen_subreg (DImode
, x
, TImode
, 0);
16776 rtx new_word1
= simplify_gen_subreg (DImode
, x
, TImode
, 8);
16777 rtx old_word0
= simplify_gen_subreg (DImode
, oldval
, TImode
, 0);
16778 rtx old_word1
= simplify_gen_subreg (DImode
, oldval
, TImode
, 8);
16780 emit_insn (gen_xordi3 (xor1_result
, new_word0
, old_word0
));
16781 emit_insn (gen_xordi3 (xor2_result
, new_word1
, old_word1
));
16782 emit_insn (gen_iordi3 (or_result
, xor1_result
, xor2_result
));
16783 x
= gen_rtx_COMPARE (CCmode
, or_result
, const0_rtx
);
16786 emit_insn (gen_rtx_SET (cond
, x
));
16788 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16789 emit_unlikely_jump (x
, label2
);
16793 x
= rs6000_mask_atomic_subword (retval
, newval
, mask
);
16795 emit_store_conditional (orig_mode
, cond
, mem
, x
);
16799 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16800 emit_unlikely_jump (x
, label1
);
16803 if (!is_mm_relaxed (mod_f
))
16804 emit_label (XEXP (label2
, 0));
16806 rs6000_post_atomic_barrier (mod_s
);
16808 if (is_mm_relaxed (mod_f
))
16809 emit_label (XEXP (label2
, 0));
16812 rs6000_finish_atomic_subword (operands
[1], retval
, shift
);
16813 else if (mode
!= GET_MODE (operands
[1]))
16814 convert_move (operands
[1], retval
, 1);
16816 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16817 x
= gen_rtx_EQ (SImode
, cond
, const0_rtx
);
16818 emit_insn (gen_rtx_SET (boolval
, x
));
16821 /* Expand an atomic exchange operation. */
16824 rs6000_expand_atomic_exchange (rtx operands
[])
16826 rtx retval
, mem
, val
, cond
;
16828 enum memmodel model
;
16829 rtx label
, x
, mask
, shift
;
16831 retval
= operands
[0];
16834 model
= memmodel_base (INTVAL (operands
[3]));
16835 mode
= GET_MODE (mem
);
16837 mask
= shift
= NULL_RTX
;
16838 if (!TARGET_SYNC_HI_QI
&& (mode
== QImode
|| mode
== HImode
))
16840 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16842 /* Shift and mask VAL into position with the word. */
16843 val
= convert_modes (SImode
, mode
, val
, 1);
16844 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16845 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16847 /* Prepare to adjust the return value. */
16848 retval
= gen_reg_rtx (SImode
);
16852 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16854 label
= gen_rtx_LABEL_REF (VOIDmode
, gen_label_rtx ());
16855 emit_label (XEXP (label
, 0));
16857 emit_load_locked (mode
, retval
, mem
);
16861 x
= rs6000_mask_atomic_subword (retval
, val
, mask
);
16863 cond
= gen_reg_rtx (CCmode
);
16864 emit_store_conditional (mode
, cond
, mem
, x
);
16866 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16867 emit_unlikely_jump (x
, label
);
16869 rs6000_post_atomic_barrier (model
);
16872 rs6000_finish_atomic_subword (operands
[0], retval
, shift
);
16875 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16876 to perform. MEM is the memory on which to operate. VAL is the second
16877 operand of the binary operator. BEFORE and AFTER are optional locations to
16878 return the value of MEM either before of after the operation. MODEL_RTX
16879 is a CONST_INT containing the memory model to use. */
16882 rs6000_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
16883 rtx orig_before
, rtx orig_after
, rtx model_rtx
)
16885 enum memmodel model
= memmodel_base (INTVAL (model_rtx
));
16886 machine_mode mode
= GET_MODE (mem
);
16887 machine_mode store_mode
= mode
;
16888 rtx label
, x
, cond
, mask
, shift
;
16889 rtx before
= orig_before
, after
= orig_after
;
16891 mask
= shift
= NULL_RTX
;
16892 /* On power8, we want to use SImode for the operation. On previous systems,
16893 use the operation in a subword and shift/mask to get the proper byte or
16895 if (mode
== QImode
|| mode
== HImode
)
16897 if (TARGET_SYNC_HI_QI
)
16899 val
= convert_modes (SImode
, mode
, val
, 1);
16901 /* Prepare to adjust the return value. */
16902 before
= gen_reg_rtx (SImode
);
16904 after
= gen_reg_rtx (SImode
);
16909 mem
= rs6000_adjust_atomic_subword (mem
, &shift
, &mask
);
16911 /* Shift and mask VAL into position with the word. */
16912 val
= convert_modes (SImode
, mode
, val
, 1);
16913 val
= expand_simple_binop (SImode
, ASHIFT
, val
, shift
,
16914 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16920 /* We've already zero-extended VAL. That is sufficient to
16921 make certain that it does not affect other bits. */
16926 /* If we make certain that all of the other bits in VAL are
16927 set, that will be sufficient to not affect other bits. */
16928 x
= gen_rtx_NOT (SImode
, mask
);
16929 x
= gen_rtx_IOR (SImode
, x
, val
);
16930 emit_insn (gen_rtx_SET (val
, x
));
16937 /* These will all affect bits outside the field and need
16938 adjustment via MASK within the loop. */
16942 gcc_unreachable ();
16945 /* Prepare to adjust the return value. */
16946 before
= gen_reg_rtx (SImode
);
16948 after
= gen_reg_rtx (SImode
);
16949 store_mode
= mode
= SImode
;
16953 mem
= rs6000_pre_atomic_barrier (mem
, model
);
16955 label
= gen_label_rtx ();
16956 emit_label (label
);
16957 label
= gen_rtx_LABEL_REF (VOIDmode
, label
);
16959 if (before
== NULL_RTX
)
16960 before
= gen_reg_rtx (mode
);
16962 emit_load_locked (mode
, before
, mem
);
16966 x
= expand_simple_binop (mode
, AND
, before
, val
,
16967 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16968 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
16972 after
= expand_simple_binop (mode
, code
, before
, val
,
16973 after
, 1, OPTAB_LIB_WIDEN
);
16979 x
= expand_simple_binop (SImode
, AND
, after
, mask
,
16980 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
16981 x
= rs6000_mask_atomic_subword (before
, x
, mask
);
16983 else if (store_mode
!= mode
)
16984 x
= convert_modes (store_mode
, mode
, x
, 1);
16986 cond
= gen_reg_rtx (CCmode
);
16987 emit_store_conditional (store_mode
, cond
, mem
, x
);
16989 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
16990 emit_unlikely_jump (x
, label
);
16992 rs6000_post_atomic_barrier (model
);
16996 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
16997 then do the calcuations in a SImode register. */
16999 rs6000_finish_atomic_subword (orig_before
, before
, shift
);
17001 rs6000_finish_atomic_subword (orig_after
, after
, shift
);
17003 else if (store_mode
!= mode
)
17005 /* QImode/HImode on machines with lbarx/lharx where we do the native
17006 operation and then do the calcuations in a SImode register. */
17008 convert_move (orig_before
, before
, 1);
17010 convert_move (orig_after
, after
, 1);
17012 else if (orig_after
&& after
!= orig_after
)
17013 emit_move_insn (orig_after
, after
);
17016 static GTY(()) alias_set_type TOC_alias_set
= -1;
17019 get_TOC_alias_set (void)
17021 if (TOC_alias_set
== -1)
17022 TOC_alias_set
= new_alias_set ();
17023 return TOC_alias_set
;
17026 /* The mode the ABI uses for a word. This is not the same as word_mode
17027 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17029 static scalar_int_mode
17030 rs6000_abi_word_mode (void)
17032 return TARGET_32BIT
? SImode
: DImode
;
17035 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17037 rs6000_offload_options (void)
17040 return xstrdup ("-foffload-abi=lp64");
17042 return xstrdup ("-foffload-abi=ilp32");
17046 /* A quick summary of the various types of 'constant-pool tables'
17049 Target Flags Name One table per
17050 AIX (none) AIX TOC object file
17051 AIX -mfull-toc AIX TOC object file
17052 AIX -mminimal-toc AIX minimal TOC translation unit
17053 SVR4/EABI (none) SVR4 SDATA object file
17054 SVR4/EABI -fpic SVR4 pic object file
17055 SVR4/EABI -fPIC SVR4 PIC translation unit
17056 SVR4/EABI -mrelocatable EABI TOC function
17057 SVR4/EABI -maix AIX TOC object file
17058 SVR4/EABI -maix -mminimal-toc
17059 AIX minimal TOC translation unit
17061 Name Reg. Set by entries contains:
17062 made by addrs? fp? sum?
17064 AIX TOC 2 crt0 as Y option option
17065 AIX minimal TOC 30 prolog gcc Y Y option
17066 SVR4 SDATA 13 crt0 gcc N Y N
17067 SVR4 pic 30 prolog ld Y not yet N
17068 SVR4 PIC 30 prolog gcc Y option option
17069 EABI TOC 30 prolog gcc Y option option
17073 /* Hash functions for the hash table. */
17076 rs6000_hash_constant (rtx k
)
17078 enum rtx_code code
= GET_CODE (k
);
17079 machine_mode mode
= GET_MODE (k
);
17080 unsigned result
= (code
<< 3) ^ mode
;
17081 const char *format
;
17084 format
= GET_RTX_FORMAT (code
);
17085 flen
= strlen (format
);
17091 return result
* 1231 + (unsigned) INSN_UID (XEXP (k
, 0));
17093 case CONST_WIDE_INT
:
17096 flen
= CONST_WIDE_INT_NUNITS (k
);
17097 for (i
= 0; i
< flen
; i
++)
17098 result
= result
* 613 + CONST_WIDE_INT_ELT (k
, i
);
17103 return real_hash (CONST_DOUBLE_REAL_VALUE (k
)) * result
;
17113 for (; fidx
< flen
; fidx
++)
17114 switch (format
[fidx
])
17119 const char *str
= XSTR (k
, fidx
);
17120 len
= strlen (str
);
17121 result
= result
* 613 + len
;
17122 for (i
= 0; i
< len
; i
++)
17123 result
= result
* 613 + (unsigned) str
[i
];
17128 result
= result
* 1231 + rs6000_hash_constant (XEXP (k
, fidx
));
17132 result
= result
* 613 + (unsigned) XINT (k
, fidx
);
17135 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT
))
17136 result
= result
* 613 + (unsigned) XWINT (k
, fidx
);
17140 for (i
= 0; i
< sizeof (HOST_WIDE_INT
) / sizeof (unsigned); i
++)
17141 result
= result
* 613 + (unsigned) (XWINT (k
, fidx
)
17148 gcc_unreachable ();
17155 toc_hasher::hash (toc_hash_struct
*thc
)
17157 return rs6000_hash_constant (thc
->key
) ^ thc
->key_mode
;
17160 /* Compare H1 and H2 for equivalence. */
17163 toc_hasher::equal (toc_hash_struct
*h1
, toc_hash_struct
*h2
)
17168 if (h1
->key_mode
!= h2
->key_mode
)
17171 return rtx_equal_p (r1
, r2
);
17174 /* These are the names given by the C++ front-end to vtables, and
17175 vtable-like objects. Ideally, this logic should not be here;
17176 instead, there should be some programmatic way of inquiring as
17177 to whether or not an object is a vtable. */
17179 #define VTABLE_NAME_P(NAME) \
17180 (startswith (name, "_vt.") \
17181 || startswith (name, "_ZTV") \
17182 || startswith (name, "_ZTT") \
17183 || startswith (name, "_ZTI") \
17184 || startswith (name, "_ZTC"))
17186 #ifdef NO_DOLLAR_IN_LABEL
17187 /* Return a GGC-allocated character string translating dollar signs in
17188 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17191 rs6000_xcoff_strip_dollar (const char *name
)
17197 q
= (const char *) strchr (name
, '$');
17199 if (q
== 0 || q
== name
)
17202 len
= strlen (name
);
17203 strip
= XALLOCAVEC (char, len
+ 1);
17204 strcpy (strip
, name
);
17205 p
= strip
+ (q
- name
);
17209 p
= strchr (p
+ 1, '$');
17212 return ggc_alloc_string (strip
, len
);
17217 rs6000_output_symbol_ref (FILE *file
, rtx x
)
17219 const char *name
= XSTR (x
, 0);
17221 /* Currently C++ toc references to vtables can be emitted before it
17222 is decided whether the vtable is public or private. If this is
17223 the case, then the linker will eventually complain that there is
17224 a reference to an unknown section. Thus, for vtables only,
17225 we emit the TOC reference to reference the identifier and not the
17227 if (VTABLE_NAME_P (name
))
17229 RS6000_OUTPUT_BASENAME (file
, name
);
17232 assemble_name (file
, name
);
17235 /* Output a TOC entry. We derive the entry name from what is being
17239 output_toc (FILE *file
, rtx x
, int labelno
, machine_mode mode
)
17242 const char *name
= buf
;
17244 HOST_WIDE_INT offset
= 0;
17246 gcc_assert (!TARGET_NO_TOC_OR_PCREL
);
17248 /* When the linker won't eliminate them, don't output duplicate
17249 TOC entries (this happens on AIX if there is any kind of TOC,
17250 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17252 if (TARGET_TOC
&& GET_CODE (x
) != LABEL_REF
)
17254 struct toc_hash_struct
*h
;
17256 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17257 time because GGC is not initialized at that point. */
17258 if (toc_hash_table
== NULL
)
17259 toc_hash_table
= hash_table
<toc_hasher
>::create_ggc (1021);
17261 h
= ggc_alloc
<toc_hash_struct
> ();
17263 h
->key_mode
= mode
;
17264 h
->labelno
= labelno
;
17266 toc_hash_struct
**found
= toc_hash_table
->find_slot (h
, INSERT
);
17267 if (*found
== NULL
)
17269 else /* This is indeed a duplicate.
17270 Set this label equal to that label. */
17272 fputs ("\t.set ", file
);
17273 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17274 fprintf (file
, "%d,", labelno
);
17275 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LC");
17276 fprintf (file
, "%d\n", ((*found
)->labelno
));
17279 if (TARGET_XCOFF
&& SYMBOL_REF_P (x
)
17280 && (SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_GLOBAL_DYNAMIC
17281 || SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
))
17283 fputs ("\t.set ", file
);
17284 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17285 fprintf (file
, "%d,", labelno
);
17286 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file
, "LCM");
17287 fprintf (file
, "%d\n", ((*found
)->labelno
));
17294 /* If we're going to put a double constant in the TOC, make sure it's
17295 aligned properly when strict alignment is on. */
17296 if ((CONST_DOUBLE_P (x
) || CONST_WIDE_INT_P (x
))
17297 && STRICT_ALIGNMENT
17298 && GET_MODE_BITSIZE (mode
) >= 64
17299 && ! (TARGET_NO_FP_IN_TOC
&& ! TARGET_MINIMAL_TOC
)) {
17300 ASM_OUTPUT_ALIGN (file
, 3);
17303 (*targetm
.asm_out
.internal_label
) (file
, "LC", labelno
);
17305 /* Handle FP constants specially. Note that if we have a minimal
17306 TOC, things we put here aren't actually in the TOC, so we can allow
17308 if (CONST_DOUBLE_P (x
)
17309 && (GET_MODE (x
) == TFmode
|| GET_MODE (x
) == TDmode
17310 || GET_MODE (x
) == IFmode
|| GET_MODE (x
) == KFmode
))
17314 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17315 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17317 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17321 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17322 fputs (DOUBLE_INT_ASM_OP
, file
);
17324 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17325 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17326 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17327 fprintf (file
, "0x%lx%08lx,0x%lx%08lx\n",
17328 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17329 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff,
17330 k
[WORDS_BIG_ENDIAN
? 2 : 3] & 0xffffffff,
17331 k
[WORDS_BIG_ENDIAN
? 3 : 2] & 0xffffffff);
17336 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17337 fputs ("\t.long ", file
);
17339 fprintf (file
, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17340 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17341 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17342 fprintf (file
, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17343 k
[0] & 0xffffffff, k
[1] & 0xffffffff,
17344 k
[2] & 0xffffffff, k
[3] & 0xffffffff);
17348 else if (CONST_DOUBLE_P (x
)
17349 && (GET_MODE (x
) == DFmode
|| GET_MODE (x
) == DDmode
))
17353 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17354 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17356 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), k
);
17360 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17361 fputs (DOUBLE_INT_ASM_OP
, file
);
17363 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17364 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17365 fprintf (file
, "0x%lx%08lx\n",
17366 k
[WORDS_BIG_ENDIAN
? 0 : 1] & 0xffffffff,
17367 k
[WORDS_BIG_ENDIAN
? 1 : 0] & 0xffffffff);
17372 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17373 fputs ("\t.long ", file
);
17375 fprintf (file
, "\t.tc FD_%lx_%lx[TC],",
17376 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17377 fprintf (file
, "0x%lx,0x%lx\n",
17378 k
[0] & 0xffffffff, k
[1] & 0xffffffff);
17382 else if (CONST_DOUBLE_P (x
)
17383 && (GET_MODE (x
) == SFmode
|| GET_MODE (x
) == SDmode
))
17387 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x
)))
17388 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17390 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
17394 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17395 fputs (DOUBLE_INT_ASM_OP
, file
);
17397 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17398 if (WORDS_BIG_ENDIAN
)
17399 fprintf (file
, "0x%lx00000000\n", l
& 0xffffffff);
17401 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17406 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17407 fputs ("\t.long ", file
);
17409 fprintf (file
, "\t.tc FS_%lx[TC],", l
& 0xffffffff);
17410 fprintf (file
, "0x%lx\n", l
& 0xffffffff);
17414 else if (GET_MODE (x
) == VOIDmode
&& CONST_INT_P (x
))
17416 unsigned HOST_WIDE_INT low
;
17417 HOST_WIDE_INT high
;
17419 low
= INTVAL (x
) & 0xffffffff;
17420 high
= (HOST_WIDE_INT
) INTVAL (x
) >> 32;
17422 /* TOC entries are always Pmode-sized, so when big-endian
17423 smaller integer constants in the TOC need to be padded.
17424 (This is still a win over putting the constants in
17425 a separate constant pool, because then we'd have
17426 to have both a TOC entry _and_ the actual constant.)
17428 For a 32-bit target, CONST_INT values are loaded and shifted
17429 entirely within `low' and can be stored in one TOC entry. */
17431 /* It would be easy to make this work, but it doesn't now. */
17432 gcc_assert (!TARGET_64BIT
|| POINTER_SIZE
>= GET_MODE_BITSIZE (mode
));
17434 if (WORDS_BIG_ENDIAN
&& POINTER_SIZE
> GET_MODE_BITSIZE (mode
))
17437 low
<<= POINTER_SIZE
- GET_MODE_BITSIZE (mode
);
17438 high
= (HOST_WIDE_INT
) low
>> 32;
17444 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17445 fputs (DOUBLE_INT_ASM_OP
, file
);
17447 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17448 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17449 fprintf (file
, "0x%lx%08lx\n",
17450 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17455 if (POINTER_SIZE
< GET_MODE_BITSIZE (mode
))
17457 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17458 fputs ("\t.long ", file
);
17460 fprintf (file
, "\t.tc ID_%lx_%lx[TC],",
17461 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17462 fprintf (file
, "0x%lx,0x%lx\n",
17463 (long) high
& 0xffffffff, (long) low
& 0xffffffff);
17467 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17468 fputs ("\t.long ", file
);
17470 fprintf (file
, "\t.tc IS_%lx[TC],", (long) low
& 0xffffffff);
17471 fprintf (file
, "0x%lx\n", (long) low
& 0xffffffff);
17477 if (GET_CODE (x
) == CONST
)
17479 gcc_assert (GET_CODE (XEXP (x
, 0)) == PLUS
17480 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)));
17482 base
= XEXP (XEXP (x
, 0), 0);
17483 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
17486 switch (GET_CODE (base
))
17489 name
= XSTR (base
, 0);
17493 ASM_GENERATE_INTERNAL_LABEL (buf
, "L",
17494 CODE_LABEL_NUMBER (XEXP (base
, 0)));
17498 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (base
));
17502 gcc_unreachable ();
17505 if (TARGET_ELF
|| TARGET_MINIMAL_TOC
)
17506 fputs (TARGET_32BIT
? "\t.long " : DOUBLE_INT_ASM_OP
, file
);
17509 fputs ("\t.tc ", file
);
17510 RS6000_OUTPUT_BASENAME (file
, name
);
17513 fprintf (file
, ".N" HOST_WIDE_INT_PRINT_UNSIGNED
, - offset
);
17515 fprintf (file
, ".P" HOST_WIDE_INT_PRINT_UNSIGNED
, offset
);
17517 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17518 after other TOC symbols, reducing overflow of small TOC access
17519 to [TC] symbols. */
17520 fputs (TARGET_XCOFF
&& TARGET_CMODEL
!= CMODEL_SMALL
17521 ? "[TE]," : "[TC],", file
);
17524 /* Currently C++ toc references to vtables can be emitted before it
17525 is decided whether the vtable is public or private. If this is
17526 the case, then the linker will eventually complain that there is
17527 a TOC reference to an unknown section. Thus, for vtables only,
17528 we emit the TOC reference to reference the symbol and not the
17530 if (VTABLE_NAME_P (name
))
17532 RS6000_OUTPUT_BASENAME (file
, name
);
17534 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
);
17535 else if (offset
> 0)
17536 fprintf (file
, "+" HOST_WIDE_INT_PRINT_DEC
, offset
);
17539 output_addr_const (file
, x
);
17542 if (TARGET_XCOFF
&& SYMBOL_REF_P (base
))
17544 switch (SYMBOL_REF_TLS_MODEL (base
))
17548 case TLS_MODEL_LOCAL_EXEC
:
17549 fputs ("@le", file
);
17551 case TLS_MODEL_INITIAL_EXEC
:
17552 fputs ("@ie", file
);
17554 /* Use global-dynamic for local-dynamic. */
17555 case TLS_MODEL_GLOBAL_DYNAMIC
:
17556 case TLS_MODEL_LOCAL_DYNAMIC
:
17558 (*targetm
.asm_out
.internal_label
) (file
, "LCM", labelno
);
17559 fputs ("\t.tc .", file
);
17560 RS6000_OUTPUT_BASENAME (file
, name
);
17561 fputs ("[TC],", file
);
17562 output_addr_const (file
, x
);
17563 fputs ("@m", file
);
17566 gcc_unreachable ();
17574 /* Output an assembler pseudo-op to write an ASCII string of N characters
17575 starting at P to FILE.
17577 On the RS/6000, we have to do this using the .byte operation and
17578 write out special characters outside the quoted string.
17579 Also, the assembler is broken; very long strings are truncated,
17580 so we must artificially break them up early. */
17583 output_ascii (FILE *file
, const char *p
, int n
)
17586 int i
, count_string
;
17587 const char *for_string
= "\t.byte \"";
17588 const char *for_decimal
= "\t.byte ";
17589 const char *to_close
= NULL
;
17592 for (i
= 0; i
< n
; i
++)
17595 if (c
>= ' ' && c
< 0177)
17598 fputs (for_string
, file
);
17601 /* Write two quotes to get one. */
17609 for_decimal
= "\"\n\t.byte ";
17613 if (count_string
>= 512)
17615 fputs (to_close
, file
);
17617 for_string
= "\t.byte \"";
17618 for_decimal
= "\t.byte ";
17626 fputs (for_decimal
, file
);
17627 fprintf (file
, "%d", c
);
17629 for_string
= "\n\t.byte \"";
17630 for_decimal
= ", ";
17636 /* Now close the string if we have written one. Then end the line. */
17638 fputs (to_close
, file
);
17641 /* Generate a unique section name for FILENAME for a section type
17642 represented by SECTION_DESC. Output goes into BUF.
17644 SECTION_DESC can be any string, as long as it is different for each
17645 possible section type.
17647 We name the section in the same manner as xlc. The name begins with an
17648 underscore followed by the filename (after stripping any leading directory
17649 names) with the last period replaced by the string SECTION_DESC. If
17650 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17654 rs6000_gen_section_name (char **buf
, const char *filename
,
17655 const char *section_desc
)
17657 const char *q
, *after_last_slash
, *last_period
= 0;
17661 after_last_slash
= filename
;
17662 for (q
= filename
; *q
; q
++)
17665 after_last_slash
= q
+ 1;
17666 else if (*q
== '.')
17670 len
= strlen (after_last_slash
) + strlen (section_desc
) + 2;
17671 *buf
= (char *) xmalloc (len
);
17676 for (q
= after_last_slash
; *q
; q
++)
17678 if (q
== last_period
)
17680 strcpy (p
, section_desc
);
17681 p
+= strlen (section_desc
);
17685 else if (ISALNUM (*q
))
17689 if (last_period
== 0)
17690 strcpy (p
, section_desc
);
17695 /* Emit profile function. */
17698 output_profile_hook (int labelno ATTRIBUTE_UNUSED
)
17700 /* Non-standard profiling for kernels, which just saves LR then calls
17701 _mcount without worrying about arg saves. The idea is to change
17702 the function prologue as little as possible as it isn't easy to
17703 account for arg save/restore code added just for _mcount. */
17704 if (TARGET_PROFILE_KERNEL
)
17707 if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
17709 #ifndef NO_PROFILE_COUNTERS
17710 # define NO_PROFILE_COUNTERS 0
17712 if (NO_PROFILE_COUNTERS
)
17713 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17714 LCT_NORMAL
, VOIDmode
);
17718 const char *label_name
;
17721 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17722 label_name
= ggc_strdup ((*targetm
.strip_name_encoding
) (buf
));
17723 fun
= gen_rtx_SYMBOL_REF (Pmode
, label_name
);
17725 emit_library_call (init_one_libfunc (RS6000_MCOUNT
),
17726 LCT_NORMAL
, VOIDmode
, fun
, Pmode
);
17729 else if (DEFAULT_ABI
== ABI_DARWIN
)
17731 const char *mcount_name
= RS6000_MCOUNT
;
17732 int caller_addr_regno
= LR_REGNO
;
17734 /* Be conservative and always set this, at least for now. */
17735 crtl
->uses_pic_offset_table
= 1;
17738 /* For PIC code, set up a stub and collect the caller's address
17739 from r0, which is where the prologue puts it. */
17740 if (MACHOPIC_INDIRECT
17741 && crtl
->uses_pic_offset_table
)
17742 caller_addr_regno
= 0;
17744 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, mcount_name
),
17745 LCT_NORMAL
, VOIDmode
,
17746 gen_rtx_REG (Pmode
, caller_addr_regno
), Pmode
);
17750 /* Write function profiler code. */
17753 output_function_profiler (FILE *file
, int labelno
)
17757 switch (DEFAULT_ABI
)
17760 gcc_unreachable ();
17765 warning (0, "no profiling of 64-bit code for this ABI");
17768 ASM_GENERATE_INTERNAL_LABEL (buf
, "LP", labelno
);
17769 fprintf (file
, "\tmflr %s\n", reg_names
[0]);
17770 if (NO_PROFILE_COUNTERS
)
17772 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17773 reg_names
[0], reg_names
[1]);
17775 else if (TARGET_SECURE_PLT
&& flag_pic
)
17777 if (TARGET_LINK_STACK
)
17780 get_ppc476_thunk_name (name
);
17781 asm_fprintf (file
, "\tbl %s\n", name
);
17784 asm_fprintf (file
, "\tbcl 20,31,1f\n1:\n");
17785 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17786 reg_names
[0], reg_names
[1]);
17787 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17788 asm_fprintf (file
, "\taddis %s,%s,",
17789 reg_names
[12], reg_names
[12]);
17790 assemble_name (file
, buf
);
17791 asm_fprintf (file
, "-1b@ha\n\tla %s,", reg_names
[0]);
17792 assemble_name (file
, buf
);
17793 asm_fprintf (file
, "-1b@l(%s)\n", reg_names
[12]);
17795 else if (flag_pic
== 1)
17797 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file
);
17798 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17799 reg_names
[0], reg_names
[1]);
17800 asm_fprintf (file
, "\tmflr %s\n", reg_names
[12]);
17801 asm_fprintf (file
, "\tlwz %s,", reg_names
[0]);
17802 assemble_name (file
, buf
);
17803 asm_fprintf (file
, "@got(%s)\n", reg_names
[12]);
17805 else if (flag_pic
> 1)
17807 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17808 reg_names
[0], reg_names
[1]);
17809 /* Now, we need to get the address of the label. */
17810 if (TARGET_LINK_STACK
)
17813 get_ppc476_thunk_name (name
);
17814 asm_fprintf (file
, "\tbl %s\n\tb 1f\n\t.long ", name
);
17815 assemble_name (file
, buf
);
17816 fputs ("-.\n1:", file
);
17817 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17818 asm_fprintf (file
, "\taddi %s,%s,4\n",
17819 reg_names
[11], reg_names
[11]);
17823 fputs ("\tbcl 20,31,1f\n\t.long ", file
);
17824 assemble_name (file
, buf
);
17825 fputs ("-.\n1:", file
);
17826 asm_fprintf (file
, "\tmflr %s\n", reg_names
[11]);
17828 asm_fprintf (file
, "\tlwz %s,0(%s)\n",
17829 reg_names
[0], reg_names
[11]);
17830 asm_fprintf (file
, "\tadd %s,%s,%s\n",
17831 reg_names
[0], reg_names
[0], reg_names
[11]);
17835 asm_fprintf (file
, "\tlis %s,", reg_names
[12]);
17836 assemble_name (file
, buf
);
17837 fputs ("@ha\n", file
);
17838 asm_fprintf (file
, "\tstw %s,4(%s)\n",
17839 reg_names
[0], reg_names
[1]);
17840 asm_fprintf (file
, "\tla %s,", reg_names
[0]);
17841 assemble_name (file
, buf
);
17842 asm_fprintf (file
, "@l(%s)\n", reg_names
[12]);
17845 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17846 fprintf (file
, "\tbl %s%s\n",
17847 RS6000_MCOUNT
, flag_pic
? "@plt" : "");
17853 /* Don't do anything, done in output_profile_hook (). */
17860 /* The following variable value is the last issued insn. */
17862 static rtx_insn
*last_scheduled_insn
;
17864 /* The following variable helps to balance issuing of load and
17865 store instructions */
17867 static int load_store_pendulum
;
17869 /* The following variable helps pair divide insns during scheduling. */
17870 static int divide_cnt
;
17871 /* The following variable helps pair and alternate vector and vector load
17872 insns during scheduling. */
17873 static int vec_pairing
;
17876 /* Power4 load update and store update instructions are cracked into a
17877 load or store and an integer insn which are executed in the same cycle.
17878 Branches have their own dispatch slot which does not count against the
17879 GCC issue rate, but it changes the program flow so there are no other
17880 instructions to issue in this cycle. */
17883 rs6000_variable_issue_1 (rtx_insn
*insn
, int more
)
17885 last_scheduled_insn
= insn
;
17886 if (GET_CODE (PATTERN (insn
)) == USE
17887 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
17889 cached_can_issue_more
= more
;
17890 return cached_can_issue_more
;
17893 if (insn_terminates_group_p (insn
, current_group
))
17895 cached_can_issue_more
= 0;
17896 return cached_can_issue_more
;
17899 /* If no reservation, but reach here */
17900 if (recog_memoized (insn
) < 0)
17903 if (rs6000_sched_groups
)
17905 if (is_microcoded_insn (insn
))
17906 cached_can_issue_more
= 0;
17907 else if (is_cracked_insn (insn
))
17908 cached_can_issue_more
= more
> 2 ? more
- 2 : 0;
17910 cached_can_issue_more
= more
- 1;
17912 return cached_can_issue_more
;
17915 if (rs6000_tune
== PROCESSOR_CELL
&& is_nonpipeline_insn (insn
))
17918 cached_can_issue_more
= more
- 1;
17919 return cached_can_issue_more
;
17923 rs6000_variable_issue (FILE *stream
, int verbose
, rtx_insn
*insn
, int more
)
17925 int r
= rs6000_variable_issue_1 (insn
, more
);
17927 fprintf (stream
, "// rs6000_variable_issue (more = %d) = %d\n", more
, r
);
17931 /* Adjust the cost of a scheduling dependency. Return the new cost of
17932 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
17935 rs6000_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
17938 enum attr_type attr_type
;
17940 if (recog_memoized (insn
) < 0 || recog_memoized (dep_insn
) < 0)
17947 /* Data dependency; DEP_INSN writes a register that INSN reads
17948 some cycles later. */
17950 /* Separate a load from a narrower, dependent store. */
17951 if ((rs6000_sched_groups
|| rs6000_tune
== PROCESSOR_POWER9
17952 || rs6000_tune
== PROCESSOR_POWER10
)
17953 && GET_CODE (PATTERN (insn
)) == SET
17954 && GET_CODE (PATTERN (dep_insn
)) == SET
17955 && MEM_P (XEXP (PATTERN (insn
), 1))
17956 && MEM_P (XEXP (PATTERN (dep_insn
), 0))
17957 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn
), 1)))
17958 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn
), 0)))))
17961 attr_type
= get_attr_type (insn
);
17966 /* Tell the first scheduling pass about the latency between
17967 a mtctr and bctr (and mtlr and br/blr). The first
17968 scheduling pass will not know about this latency since
17969 the mtctr instruction, which has the latency associated
17970 to it, will be generated by reload. */
17973 /* Leave some extra cycles between a compare and its
17974 dependent branch, to inhibit expensive mispredicts. */
17975 if ((rs6000_tune
== PROCESSOR_PPC603
17976 || rs6000_tune
== PROCESSOR_PPC604
17977 || rs6000_tune
== PROCESSOR_PPC604e
17978 || rs6000_tune
== PROCESSOR_PPC620
17979 || rs6000_tune
== PROCESSOR_PPC630
17980 || rs6000_tune
== PROCESSOR_PPC750
17981 || rs6000_tune
== PROCESSOR_PPC7400
17982 || rs6000_tune
== PROCESSOR_PPC7450
17983 || rs6000_tune
== PROCESSOR_PPCE5500
17984 || rs6000_tune
== PROCESSOR_PPCE6500
17985 || rs6000_tune
== PROCESSOR_POWER4
17986 || rs6000_tune
== PROCESSOR_POWER5
17987 || rs6000_tune
== PROCESSOR_POWER7
17988 || rs6000_tune
== PROCESSOR_POWER8
17989 || rs6000_tune
== PROCESSOR_POWER9
17990 || rs6000_tune
== PROCESSOR_POWER10
17991 || rs6000_tune
== PROCESSOR_CELL
)
17992 && recog_memoized (dep_insn
)
17993 && (INSN_CODE (dep_insn
) >= 0))
17995 switch (get_attr_type (dep_insn
))
17998 case TYPE_FPCOMPARE
:
17999 case TYPE_CR_LOGICAL
:
18003 if (get_attr_dot (dep_insn
) == DOT_YES
)
18008 if (get_attr_dot (dep_insn
) == DOT_YES
18009 && get_attr_var_shift (dep_insn
) == VAR_SHIFT_NO
)
18020 if ((rs6000_tune
== PROCESSOR_POWER6
)
18021 && recog_memoized (dep_insn
)
18022 && (INSN_CODE (dep_insn
) >= 0))
18025 if (GET_CODE (PATTERN (insn
)) != SET
)
18026 /* If this happens, we have to extend this to schedule
18027 optimally. Return default for now. */
18030 /* Adjust the cost for the case where the value written
18031 by a fixed point operation is used as the address
18032 gen value on a store. */
18033 switch (get_attr_type (dep_insn
))
18038 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18039 return get_attr_sign_extend (dep_insn
)
18040 == SIGN_EXTEND_YES
? 6 : 4;
18045 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18046 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18056 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18064 if (get_attr_update (dep_insn
) == UPDATE_YES
18065 && ! rs6000_store_data_bypass_p (dep_insn
, insn
))
18071 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18077 if (! rs6000_store_data_bypass_p (dep_insn
, insn
))
18078 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18088 if ((rs6000_tune
== PROCESSOR_POWER6
)
18089 && recog_memoized (dep_insn
)
18090 && (INSN_CODE (dep_insn
) >= 0))
18093 /* Adjust the cost for the case where the value written
18094 by a fixed point instruction is used within the address
18095 gen portion of a subsequent load(u)(x) */
18096 switch (get_attr_type (dep_insn
))
18101 if (set_to_load_agen (dep_insn
, insn
))
18102 return get_attr_sign_extend (dep_insn
)
18103 == SIGN_EXTEND_YES
? 6 : 4;
18108 if (set_to_load_agen (dep_insn
, insn
))
18109 return get_attr_var_shift (dep_insn
) == VAR_SHIFT_YES
?
18119 if (set_to_load_agen (dep_insn
, insn
))
18127 if (get_attr_update (dep_insn
) == UPDATE_YES
18128 && set_to_load_agen (dep_insn
, insn
))
18134 if (set_to_load_agen (dep_insn
, insn
))
18140 if (set_to_load_agen (dep_insn
, insn
))
18141 return get_attr_size (dep_insn
) == SIZE_32
? 45 : 57;
18154 /* Fall out to return default cost. */
18158 case REG_DEP_OUTPUT
:
18159 /* Output dependency; DEP_INSN writes a register that INSN writes some
18161 if ((rs6000_tune
== PROCESSOR_POWER6
)
18162 && recog_memoized (dep_insn
)
18163 && (INSN_CODE (dep_insn
) >= 0))
18165 attr_type
= get_attr_type (insn
);
18170 case TYPE_FPSIMPLE
:
18171 if (get_attr_type (dep_insn
) == TYPE_FP
18172 || get_attr_type (dep_insn
) == TYPE_FPSIMPLE
)
18179 /* Fall through, no cost for output dependency. */
18183 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18188 gcc_unreachable ();
18194 /* Debug version of rs6000_adjust_cost. */
18197 rs6000_debug_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
18198 int cost
, unsigned int dw
)
18200 int ret
= rs6000_adjust_cost (insn
, dep_type
, dep_insn
, cost
, dw
);
18208 default: dep
= "unknown depencency"; break;
18209 case REG_DEP_TRUE
: dep
= "data dependency"; break;
18210 case REG_DEP_OUTPUT
: dep
= "output dependency"; break;
18211 case REG_DEP_ANTI
: dep
= "anti depencency"; break;
18215 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18216 "%s, insn:\n", ret
, cost
, dep
);
18224 /* The function returns a true if INSN is microcoded.
18225 Return false otherwise. */
18228 is_microcoded_insn (rtx_insn
*insn
)
18230 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18231 || GET_CODE (PATTERN (insn
)) == USE
18232 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18235 if (rs6000_tune
== PROCESSOR_CELL
)
18236 return get_attr_cell_micro (insn
) == CELL_MICRO_ALWAYS
;
18238 if (rs6000_sched_groups
18239 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18241 enum attr_type type
= get_attr_type (insn
);
18242 if ((type
== TYPE_LOAD
18243 && get_attr_update (insn
) == UPDATE_YES
18244 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
18245 || ((type
== TYPE_LOAD
|| type
== TYPE_STORE
)
18246 && get_attr_update (insn
) == UPDATE_YES
18247 && get_attr_indexed (insn
) == INDEXED_YES
)
18248 || type
== TYPE_MFCR
)
18255 /* The function returns true if INSN is cracked into 2 instructions
18256 by the processor (and therefore occupies 2 issue slots). */
18259 is_cracked_insn (rtx_insn
*insn
)
18261 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18262 || GET_CODE (PATTERN (insn
)) == USE
18263 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18266 if (rs6000_sched_groups
18267 && (rs6000_tune
== PROCESSOR_POWER4
|| rs6000_tune
== PROCESSOR_POWER5
))
18269 enum attr_type type
= get_attr_type (insn
);
18270 if ((type
== TYPE_LOAD
18271 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
18272 && get_attr_update (insn
) == UPDATE_NO
)
18273 || (type
== TYPE_LOAD
18274 && get_attr_sign_extend (insn
) == SIGN_EXTEND_NO
18275 && get_attr_update (insn
) == UPDATE_YES
18276 && get_attr_indexed (insn
) == INDEXED_NO
)
18277 || (type
== TYPE_STORE
18278 && get_attr_update (insn
) == UPDATE_YES
18279 && get_attr_indexed (insn
) == INDEXED_NO
)
18280 || ((type
== TYPE_FPLOAD
|| type
== TYPE_FPSTORE
)
18281 && get_attr_update (insn
) == UPDATE_YES
)
18282 || (type
== TYPE_CR_LOGICAL
18283 && get_attr_cr_logical_3op (insn
) == CR_LOGICAL_3OP_YES
)
18284 || (type
== TYPE_EXTS
18285 && get_attr_dot (insn
) == DOT_YES
)
18286 || (type
== TYPE_SHIFT
18287 && get_attr_dot (insn
) == DOT_YES
18288 && get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
18289 || (type
== TYPE_MUL
18290 && get_attr_dot (insn
) == DOT_YES
)
18291 || type
== TYPE_DIV
18292 || (type
== TYPE_INSERT
18293 && get_attr_size (insn
) == SIZE_32
))
18300 /* The function returns true if INSN can be issued only from
18301 the branch slot. */
18304 is_branch_slot_insn (rtx_insn
*insn
)
18306 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18307 || GET_CODE (PATTERN (insn
)) == USE
18308 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18311 if (rs6000_sched_groups
)
18313 enum attr_type type
= get_attr_type (insn
);
18314 if (type
== TYPE_BRANCH
|| type
== TYPE_JMPREG
)
18322 /* The function returns true if out_inst sets a value that is
18323 used in the address generation computation of in_insn */
18325 set_to_load_agen (rtx_insn
*out_insn
, rtx_insn
*in_insn
)
18327 rtx out_set
, in_set
;
18329 /* For performance reasons, only handle the simple case where
18330 both loads are a single_set. */
18331 out_set
= single_set (out_insn
);
18334 in_set
= single_set (in_insn
);
18336 return reg_mentioned_p (SET_DEST (out_set
), SET_SRC (in_set
));
18342 /* Try to determine base/offset/size parts of the given MEM.
18343 Return true if successful, false if all the values couldn't
18346 This function only looks for REG or REG+CONST address forms.
18347 REG+REG address form will return false. */
18350 get_memref_parts (rtx mem
, rtx
*base
, HOST_WIDE_INT
*offset
,
18351 HOST_WIDE_INT
*size
)
18354 if (MEM_SIZE_KNOWN_P (mem
))
18355 *size
= MEM_SIZE (mem
);
18359 addr_rtx
= (XEXP (mem
, 0));
18360 if (GET_CODE (addr_rtx
) == PRE_MODIFY
)
18361 addr_rtx
= XEXP (addr_rtx
, 1);
18364 while (GET_CODE (addr_rtx
) == PLUS
18365 && CONST_INT_P (XEXP (addr_rtx
, 1)))
18367 *offset
+= INTVAL (XEXP (addr_rtx
, 1));
18368 addr_rtx
= XEXP (addr_rtx
, 0);
18370 if (!REG_P (addr_rtx
))
18377 /* If the target storage locations of arguments MEM1 and MEM2 are
18378 adjacent, then return the argument that has the lower address.
18379 Otherwise, return NULL_RTX. */
18382 adjacent_mem_locations (rtx mem1
, rtx mem2
)
18385 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18389 && get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18390 && get_memref_parts (mem2
, ®2
, &off2
, &size2
)
18391 && REGNO (reg1
) == REGNO (reg2
))
18393 if (off1
+ size1
== off2
)
18395 else if (off2
+ size2
== off1
)
18402 /* This function returns true if it can be determined that the two MEM
18403 locations overlap by at least 1 byte based on base reg/offset/size. */
18406 mem_locations_overlap (rtx mem1
, rtx mem2
)
18409 HOST_WIDE_INT off1
, size1
, off2
, size2
;
18411 if (get_memref_parts (mem1
, ®1
, &off1
, &size1
)
18412 && get_memref_parts (mem2
, ®2
, &off2
, &size2
))
18413 return ((REGNO (reg1
) == REGNO (reg2
))
18414 && (((off1
<= off2
) && (off1
+ size1
> off2
))
18415 || ((off2
<= off1
) && (off2
+ size2
> off1
))));
18420 /* A C statement (sans semicolon) to update the integer scheduling
18421 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18422 INSN earlier, reduce the priority to execute INSN later. Do not
18423 define this macro if you do not need to adjust the scheduling
18424 priorities of insns. */
18427 rs6000_adjust_priority (rtx_insn
*insn ATTRIBUTE_UNUSED
, int priority
)
18429 rtx load_mem
, str_mem
;
18430 /* On machines (like the 750) which have asymmetric integer units,
18431 where one integer unit can do multiply and divides and the other
18432 can't, reduce the priority of multiply/divide so it is scheduled
18433 before other integer operations. */
18436 if (! INSN_P (insn
))
18439 if (GET_CODE (PATTERN (insn
)) == USE
)
18442 switch (rs6000_tune
) {
18443 case PROCESSOR_PPC750
:
18444 switch (get_attr_type (insn
))
18451 fprintf (stderr
, "priority was %#x (%d) before adjustment\n",
18452 priority
, priority
);
18453 if (priority
>= 0 && priority
< 0x01000000)
18460 if (insn_must_be_first_in_group (insn
)
18461 && reload_completed
18462 && current_sched_info
->sched_max_insns_priority
18463 && rs6000_sched_restricted_insns_priority
)
18466 /* Prioritize insns that can be dispatched only in the first
18468 if (rs6000_sched_restricted_insns_priority
== 1)
18469 /* Attach highest priority to insn. This means that in
18470 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18471 precede 'priority' (critical path) considerations. */
18472 return current_sched_info
->sched_max_insns_priority
;
18473 else if (rs6000_sched_restricted_insns_priority
== 2)
18474 /* Increase priority of insn by a minimal amount. This means that in
18475 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18476 considerations precede dispatch-slot restriction considerations. */
18477 return (priority
+ 1);
18480 if (rs6000_tune
== PROCESSOR_POWER6
18481 && ((load_store_pendulum
== -2 && is_load_insn (insn
, &load_mem
))
18482 || (load_store_pendulum
== 2 && is_store_insn (insn
, &str_mem
))))
18483 /* Attach highest priority to insn if the scheduler has just issued two
18484 stores and this instruction is a load, or two loads and this instruction
18485 is a store. Power6 wants loads and stores scheduled alternately
18487 return current_sched_info
->sched_max_insns_priority
;
18492 /* Return true if the instruction is nonpipelined on the Cell. */
18494 is_nonpipeline_insn (rtx_insn
*insn
)
18496 enum attr_type type
;
18497 if (!insn
|| !NONDEBUG_INSN_P (insn
)
18498 || GET_CODE (PATTERN (insn
)) == USE
18499 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
18502 type
= get_attr_type (insn
);
18503 if (type
== TYPE_MUL
18504 || type
== TYPE_DIV
18505 || type
== TYPE_SDIV
18506 || type
== TYPE_DDIV
18507 || type
== TYPE_SSQRT
18508 || type
== TYPE_DSQRT
18509 || type
== TYPE_MFCR
18510 || type
== TYPE_MFCRF
18511 || type
== TYPE_MFJMPR
)
18519 /* Return how many instructions the machine can issue per cycle. */
18522 rs6000_issue_rate (void)
18524 /* Unless scheduling for register pressure, use issue rate of 1 for
18525 first scheduling pass to decrease degradation. */
18526 if (!reload_completed
&& !flag_sched_pressure
)
18529 switch (rs6000_tune
) {
18530 case PROCESSOR_RS64A
:
18531 case PROCESSOR_PPC601
: /* ? */
18532 case PROCESSOR_PPC7450
:
18534 case PROCESSOR_PPC440
:
18535 case PROCESSOR_PPC603
:
18536 case PROCESSOR_PPC750
:
18537 case PROCESSOR_PPC7400
:
18538 case PROCESSOR_PPC8540
:
18539 case PROCESSOR_PPC8548
:
18540 case PROCESSOR_CELL
:
18541 case PROCESSOR_PPCE300C2
:
18542 case PROCESSOR_PPCE300C3
:
18543 case PROCESSOR_PPCE500MC
:
18544 case PROCESSOR_PPCE500MC64
:
18545 case PROCESSOR_PPCE5500
:
18546 case PROCESSOR_PPCE6500
:
18547 case PROCESSOR_TITAN
:
18549 case PROCESSOR_PPC476
:
18550 case PROCESSOR_PPC604
:
18551 case PROCESSOR_PPC604e
:
18552 case PROCESSOR_PPC620
:
18553 case PROCESSOR_PPC630
:
18555 case PROCESSOR_POWER4
:
18556 case PROCESSOR_POWER5
:
18557 case PROCESSOR_POWER6
:
18558 case PROCESSOR_POWER7
:
18560 case PROCESSOR_POWER8
:
18562 case PROCESSOR_POWER9
:
18564 case PROCESSOR_POWER10
:
18571 /* Return how many instructions to look ahead for better insn
18575 rs6000_use_sched_lookahead (void)
18577 switch (rs6000_tune
)
18579 case PROCESSOR_PPC8540
:
18580 case PROCESSOR_PPC8548
:
18583 case PROCESSOR_CELL
:
18584 return (reload_completed
? 8 : 0);
18591 /* We are choosing insn from the ready queue. Return zero if INSN can be
18594 rs6000_use_sched_lookahead_guard (rtx_insn
*insn
, int ready_index
)
18596 if (ready_index
== 0)
18599 if (rs6000_tune
!= PROCESSOR_CELL
)
18602 gcc_assert (insn
!= NULL_RTX
&& INSN_P (insn
));
18604 if (!reload_completed
18605 || is_nonpipeline_insn (insn
)
18606 || is_microcoded_insn (insn
))
18612 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18613 and return true. */
18616 find_mem_ref (rtx pat
, rtx
*mem_ref
)
18621 /* stack_tie does not produce any real memory traffic. */
18622 if (tie_operand (pat
, VOIDmode
))
18631 /* Recursively process the pattern. */
18632 fmt
= GET_RTX_FORMAT (GET_CODE (pat
));
18634 for (i
= GET_RTX_LENGTH (GET_CODE (pat
)) - 1; i
>= 0; i
--)
18638 if (find_mem_ref (XEXP (pat
, i
), mem_ref
))
18641 else if (fmt
[i
] == 'E')
18642 for (j
= XVECLEN (pat
, i
) - 1; j
>= 0; j
--)
18644 if (find_mem_ref (XVECEXP (pat
, i
, j
), mem_ref
))
18652 /* Determine if PAT is a PATTERN of a load insn. */
18655 is_load_insn1 (rtx pat
, rtx
*load_mem
)
18657 if (!pat
|| pat
== NULL_RTX
)
18660 if (GET_CODE (pat
) == SET
)
18662 if (REG_P (SET_DEST (pat
)))
18663 return find_mem_ref (SET_SRC (pat
), load_mem
);
18668 if (GET_CODE (pat
) == PARALLEL
)
18672 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18673 if (is_load_insn1 (XVECEXP (pat
, 0, i
), load_mem
))
18680 /* Determine if INSN loads from memory. */
18683 is_load_insn (rtx insn
, rtx
*load_mem
)
18685 if (!insn
|| !INSN_P (insn
))
18691 return is_load_insn1 (PATTERN (insn
), load_mem
);
18694 /* Determine if PAT is a PATTERN of a store insn. */
18697 is_store_insn1 (rtx pat
, rtx
*str_mem
)
18699 if (!pat
|| pat
== NULL_RTX
)
18702 if (GET_CODE (pat
) == SET
)
18704 if (REG_P (SET_SRC (pat
)) || SUBREG_P (SET_SRC (pat
)))
18705 return find_mem_ref (SET_DEST (pat
), str_mem
);
18710 if (GET_CODE (pat
) == PARALLEL
)
18714 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
18715 if (is_store_insn1 (XVECEXP (pat
, 0, i
), str_mem
))
18722 /* Determine if INSN stores to memory. */
18725 is_store_insn (rtx insn
, rtx
*str_mem
)
18727 if (!insn
|| !INSN_P (insn
))
18730 return is_store_insn1 (PATTERN (insn
), str_mem
);
18733 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18736 is_power9_pairable_vec_type (enum attr_type type
)
18740 case TYPE_VECSIMPLE
:
18741 case TYPE_VECCOMPLEX
:
18745 case TYPE_VECFLOAT
:
18747 case TYPE_VECDOUBLE
:
18755 /* Returns whether the dependence between INSN and NEXT is considered
18756 costly by the given target. */
18759 rs6000_is_costly_dependence (dep_t dep
, int cost
, int distance
)
18763 rtx load_mem
, str_mem
;
18765 /* If the flag is not enabled - no dependence is considered costly;
18766 allow all dependent insns in the same group.
18767 This is the most aggressive option. */
18768 if (rs6000_sched_costly_dep
== no_dep_costly
)
18771 /* If the flag is set to 1 - a dependence is always considered costly;
18772 do not allow dependent instructions in the same group.
18773 This is the most conservative option. */
18774 if (rs6000_sched_costly_dep
== all_deps_costly
)
18777 insn
= DEP_PRO (dep
);
18778 next
= DEP_CON (dep
);
18780 if (rs6000_sched_costly_dep
== store_to_load_dep_costly
18781 && is_load_insn (next
, &load_mem
)
18782 && is_store_insn (insn
, &str_mem
))
18783 /* Prevent load after store in the same group. */
18786 if (rs6000_sched_costly_dep
== true_store_to_load_dep_costly
18787 && is_load_insn (next
, &load_mem
)
18788 && is_store_insn (insn
, &str_mem
)
18789 && DEP_TYPE (dep
) == REG_DEP_TRUE
18790 && mem_locations_overlap(str_mem
, load_mem
))
18791 /* Prevent load after store in the same group if it is a true
18795 /* The flag is set to X; dependences with latency >= X are considered costly,
18796 and will not be scheduled in the same group. */
18797 if (rs6000_sched_costly_dep
<= max_dep_latency
18798 && ((cost
- distance
) >= (int)rs6000_sched_costly_dep
))
18804 /* Return the next insn after INSN that is found before TAIL is reached,
18805 skipping any "non-active" insns - insns that will not actually occupy
18806 an issue slot. Return NULL_RTX if such an insn is not found. */
18809 get_next_active_insn (rtx_insn
*insn
, rtx_insn
*tail
)
18811 if (insn
== NULL_RTX
|| insn
== tail
)
18816 insn
= NEXT_INSN (insn
);
18817 if (insn
== NULL_RTX
|| insn
== tail
)
18821 || JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
)
18822 || (NONJUMP_INSN_P (insn
)
18823 && GET_CODE (PATTERN (insn
)) != USE
18824 && GET_CODE (PATTERN (insn
)) != CLOBBER
18825 && INSN_CODE (insn
) != CODE_FOR_stack_tie
))
18831 /* Move instruction at POS to the end of the READY list. */
18834 move_to_end_of_ready (rtx_insn
**ready
, int pos
, int lastpos
)
18840 for (i
= pos
; i
< lastpos
; i
++)
18841 ready
[i
] = ready
[i
+ 1];
18842 ready
[lastpos
] = tmp
;
18845 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18848 power6_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
18850 /* For Power6, we need to handle some special cases to try and keep the
18851 store queue from overflowing and triggering expensive flushes.
18853 This code monitors how load and store instructions are being issued
18854 and skews the ready list one way or the other to increase the likelihood
18855 that a desired instruction is issued at the proper time.
18857 A couple of things are done. First, we maintain a "load_store_pendulum"
18858 to track the current state of load/store issue.
18860 - If the pendulum is at zero, then no loads or stores have been
18861 issued in the current cycle so we do nothing.
18863 - If the pendulum is 1, then a single load has been issued in this
18864 cycle and we attempt to locate another load in the ready list to
18867 - If the pendulum is -2, then two stores have already been
18868 issued in this cycle, so we increase the priority of the first load
18869 in the ready list to increase it's likelihood of being chosen first
18872 - If the pendulum is -1, then a single store has been issued in this
18873 cycle and we attempt to locate another store in the ready list to
18874 issue with it, preferring a store to an adjacent memory location to
18875 facilitate store pairing in the store queue.
18877 - If the pendulum is 2, then two loads have already been
18878 issued in this cycle, so we increase the priority of the first store
18879 in the ready list to increase it's likelihood of being chosen first
18882 - If the pendulum < -2 or > 2, then do nothing.
18884 Note: This code covers the most common scenarios. There exist non
18885 load/store instructions which make use of the LSU and which
18886 would need to be accounted for to strictly model the behavior
18887 of the machine. Those instructions are currently unaccounted
18888 for to help minimize compile time overhead of this code.
18891 rtx load_mem
, str_mem
;
18893 if (is_store_insn (last_scheduled_insn
, &str_mem
))
18894 /* Issuing a store, swing the load_store_pendulum to the left */
18895 load_store_pendulum
--;
18896 else if (is_load_insn (last_scheduled_insn
, &load_mem
))
18897 /* Issuing a load, swing the load_store_pendulum to the right */
18898 load_store_pendulum
++;
18900 return cached_can_issue_more
;
18902 /* If the pendulum is balanced, or there is only one instruction on
18903 the ready list, then all is well, so return. */
18904 if ((load_store_pendulum
== 0) || (lastpos
<= 0))
18905 return cached_can_issue_more
;
18907 if (load_store_pendulum
== 1)
18909 /* A load has been issued in this cycle. Scan the ready list
18910 for another load to issue with it */
18915 if (is_load_insn (ready
[pos
], &load_mem
))
18917 /* Found a load. Move it to the head of the ready list,
18918 and adjust it's priority so that it is more likely to
18920 move_to_end_of_ready (ready
, pos
, lastpos
);
18922 if (!sel_sched_p ()
18923 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18924 INSN_PRIORITY (ready
[lastpos
])++;
18930 else if (load_store_pendulum
== -2)
18932 /* Two stores have been issued in this cycle. Increase the
18933 priority of the first load in the ready list to favor it for
18934 issuing in the next cycle. */
18939 if (is_load_insn (ready
[pos
], &load_mem
)
18941 && INSN_PRIORITY_KNOWN (ready
[pos
]))
18943 INSN_PRIORITY (ready
[pos
])++;
18945 /* Adjust the pendulum to account for the fact that a load
18946 was found and increased in priority. This is to prevent
18947 increasing the priority of multiple loads */
18948 load_store_pendulum
--;
18955 else if (load_store_pendulum
== -1)
18957 /* A store has been issued in this cycle. Scan the ready list for
18958 another store to issue with it, preferring a store to an adjacent
18960 int first_store_pos
= -1;
18966 if (is_store_insn (ready
[pos
], &str_mem
))
18969 /* Maintain the index of the first store found on the
18971 if (first_store_pos
== -1)
18972 first_store_pos
= pos
;
18974 if (is_store_insn (last_scheduled_insn
, &str_mem2
)
18975 && adjacent_mem_locations (str_mem
, str_mem2
))
18977 /* Found an adjacent store. Move it to the head of the
18978 ready list, and adjust it's priority so that it is
18979 more likely to stay there */
18980 move_to_end_of_ready (ready
, pos
, lastpos
);
18982 if (!sel_sched_p ()
18983 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
18984 INSN_PRIORITY (ready
[lastpos
])++;
18986 first_store_pos
= -1;
18994 if (first_store_pos
>= 0)
18996 /* An adjacent store wasn't found, but a non-adjacent store was,
18997 so move the non-adjacent store to the front of the ready
18998 list, and adjust its priority so that it is more likely to
19000 move_to_end_of_ready (ready
, first_store_pos
, lastpos
);
19001 if (!sel_sched_p ()
19002 && INSN_PRIORITY_KNOWN (ready
[lastpos
]))
19003 INSN_PRIORITY (ready
[lastpos
])++;
19006 else if (load_store_pendulum
== 2)
19008 /* Two loads have been issued in this cycle. Increase the priority
19009 of the first store in the ready list to favor it for issuing in
19015 if (is_store_insn (ready
[pos
], &str_mem
)
19017 && INSN_PRIORITY_KNOWN (ready
[pos
]))
19019 INSN_PRIORITY (ready
[pos
])++;
19021 /* Adjust the pendulum to account for the fact that a store
19022 was found and increased in priority. This is to prevent
19023 increasing the priority of multiple stores */
19024 load_store_pendulum
++;
19032 return cached_can_issue_more
;
19035 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19038 power9_sched_reorder2 (rtx_insn
**ready
, int lastpos
)
19041 enum attr_type type
, type2
;
19043 type
= get_attr_type (last_scheduled_insn
);
19045 /* Try to issue fixed point divides back-to-back in pairs so they will be
19046 routed to separate execution units and execute in parallel. */
19047 if (type
== TYPE_DIV
&& divide_cnt
== 0)
19049 /* First divide has been scheduled. */
19052 /* Scan the ready list looking for another divide, if found move it
19053 to the end of the list so it is chosen next. */
19057 if (recog_memoized (ready
[pos
]) >= 0
19058 && get_attr_type (ready
[pos
]) == TYPE_DIV
)
19060 move_to_end_of_ready (ready
, pos
, lastpos
);
19068 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19071 /* The best dispatch throughput for vector and vector load insns can be
19072 achieved by interleaving a vector and vector load such that they'll
19073 dispatch to the same superslice. If this pairing cannot be achieved
19074 then it is best to pair vector insns together and vector load insns
19077 To aid in this pairing, vec_pairing maintains the current state with
19078 the following values:
19080 0 : Initial state, no vecload/vector pairing has been started.
19082 1 : A vecload or vector insn has been issued and a candidate for
19083 pairing has been found and moved to the end of the ready
19085 if (type
== TYPE_VECLOAD
)
19087 /* Issued a vecload. */
19088 if (vec_pairing
== 0)
19090 int vecload_pos
= -1;
19091 /* We issued a single vecload, look for a vector insn to pair it
19092 with. If one isn't found, try to pair another vecload. */
19096 if (recog_memoized (ready
[pos
]) >= 0)
19098 type2
= get_attr_type (ready
[pos
]);
19099 if (is_power9_pairable_vec_type (type2
))
19101 /* Found a vector insn to pair with, move it to the
19102 end of the ready list so it is scheduled next. */
19103 move_to_end_of_ready (ready
, pos
, lastpos
);
19105 return cached_can_issue_more
;
19107 else if (type2
== TYPE_VECLOAD
&& vecload_pos
== -1)
19108 /* Remember position of first vecload seen. */
19113 if (vecload_pos
>= 0)
19115 /* Didn't find a vector to pair with but did find a vecload,
19116 move it to the end of the ready list. */
19117 move_to_end_of_ready (ready
, vecload_pos
, lastpos
);
19119 return cached_can_issue_more
;
19123 else if (is_power9_pairable_vec_type (type
))
19125 /* Issued a vector operation. */
19126 if (vec_pairing
== 0)
19129 /* We issued a single vector insn, look for a vecload to pair it
19130 with. If one isn't found, try to pair another vector. */
19134 if (recog_memoized (ready
[pos
]) >= 0)
19136 type2
= get_attr_type (ready
[pos
]);
19137 if (type2
== TYPE_VECLOAD
)
19139 /* Found a vecload insn to pair with, move it to the
19140 end of the ready list so it is scheduled next. */
19141 move_to_end_of_ready (ready
, pos
, lastpos
);
19143 return cached_can_issue_more
;
19145 else if (is_power9_pairable_vec_type (type2
)
19147 /* Remember position of first vector insn seen. */
19154 /* Didn't find a vecload to pair with but did find a vector
19155 insn, move it to the end of the ready list. */
19156 move_to_end_of_ready (ready
, vec_pos
, lastpos
);
19158 return cached_can_issue_more
;
19163 /* We've either finished a vec/vecload pair, couldn't find an insn to
19164 continue the current pair, or the last insn had nothing to do with
19165 with pairing. In any case, reset the state. */
19169 return cached_can_issue_more
;
19172 /* Determine if INSN is a store to memory that can be fused with a similar
19176 is_fusable_store (rtx_insn
*insn
, rtx
*str_mem
)
19178 /* Insn must be a non-prefixed base+disp form store. */
19179 if (is_store_insn (insn
, str_mem
)
19180 && get_attr_prefixed (insn
) == PREFIXED_NO
19181 && get_attr_update (insn
) == UPDATE_NO
19182 && get_attr_indexed (insn
) == INDEXED_NO
)
19184 /* Further restrictions by mode and size. */
19185 if (!MEM_SIZE_KNOWN_P (*str_mem
))
19188 machine_mode mode
= GET_MODE (*str_mem
);
19189 HOST_WIDE_INT size
= MEM_SIZE (*str_mem
);
19191 if (INTEGRAL_MODE_P (mode
))
19192 /* Must be word or dword size. */
19193 return (size
== 4 || size
== 8);
19194 else if (FLOAT_MODE_P (mode
))
19195 /* Must be dword size. */
19196 return (size
== 8);
19202 /* Do Power10 specific reordering of the ready list. */
19205 power10_sched_reorder (rtx_insn
**ready
, int lastpos
)
19209 /* Do store fusion during sched2 only. */
19210 if (!reload_completed
)
19211 return cached_can_issue_more
;
19213 /* If the prior insn finished off a store fusion pair then simply
19214 reset the counter and return, nothing more to do. */
19215 if (load_store_pendulum
!= 0)
19217 load_store_pendulum
= 0;
19218 return cached_can_issue_more
;
19221 /* Try to pair certain store insns to adjacent memory locations
19222 so that the hardware will fuse them to a single operation. */
19223 if (TARGET_P10_FUSION
&& is_fusable_store (last_scheduled_insn
, &mem1
))
19226 /* A fusable store was just scheduled. Scan the ready list for another
19227 store that it can fuse with. */
19232 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19233 must be ascending only. */
19234 if (is_fusable_store (ready
[pos
], &mem2
)
19235 && ((INTEGRAL_MODE_P (GET_MODE (mem1
))
19236 && adjacent_mem_locations (mem1
, mem2
))
19237 || (FLOAT_MODE_P (GET_MODE (mem1
))
19238 && (adjacent_mem_locations (mem1
, mem2
) == mem1
))))
19240 /* Found a fusable store. Move it to the end of the ready list
19241 so it is scheduled next. */
19242 move_to_end_of_ready (ready
, pos
, lastpos
);
19244 load_store_pendulum
= -1;
19251 return cached_can_issue_more
;
19254 /* We are about to begin issuing insns for this clock cycle. */
19257 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED
, int sched_verbose
,
19258 rtx_insn
**ready ATTRIBUTE_UNUSED
,
19259 int *pn_ready ATTRIBUTE_UNUSED
,
19260 int clock_var ATTRIBUTE_UNUSED
)
19262 int n_ready
= *pn_ready
;
19265 fprintf (dump
, "// rs6000_sched_reorder :\n");
19267 /* Reorder the ready list, if the second to last ready insn
19268 is a nonepipeline insn. */
19269 if (rs6000_tune
== PROCESSOR_CELL
&& n_ready
> 1)
19271 if (is_nonpipeline_insn (ready
[n_ready
- 1])
19272 && (recog_memoized (ready
[n_ready
- 2]) > 0))
19273 /* Simply swap first two insns. */
19274 std::swap (ready
[n_ready
- 1], ready
[n_ready
- 2]);
19277 if (rs6000_tune
== PROCESSOR_POWER6
)
19278 load_store_pendulum
= 0;
19280 /* Do Power10 dependent reordering. */
19281 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19282 power10_sched_reorder (ready
, n_ready
- 1);
19284 return rs6000_issue_rate ();
19287 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19290 rs6000_sched_reorder2 (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
19291 int *pn_ready
, int clock_var ATTRIBUTE_UNUSED
)
19294 fprintf (dump
, "// rs6000_sched_reorder2 :\n");
19296 /* Do Power6 dependent reordering if necessary. */
19297 if (rs6000_tune
== PROCESSOR_POWER6
&& last_scheduled_insn
)
19298 return power6_sched_reorder2 (ready
, *pn_ready
- 1);
19300 /* Do Power9 dependent reordering if necessary. */
19301 if (rs6000_tune
== PROCESSOR_POWER9
&& last_scheduled_insn
19302 && recog_memoized (last_scheduled_insn
) >= 0)
19303 return power9_sched_reorder2 (ready
, *pn_ready
- 1);
19305 /* Do Power10 dependent reordering. */
19306 if (rs6000_tune
== PROCESSOR_POWER10
&& last_scheduled_insn
)
19307 return power10_sched_reorder (ready
, *pn_ready
- 1);
19309 return cached_can_issue_more
;
19312 /* Return whether the presence of INSN causes a dispatch group termination
19313 of group WHICH_GROUP.
19315 If WHICH_GROUP == current_group, this function will return true if INSN
19316 causes the termination of the current group (i.e, the dispatch group to
19317 which INSN belongs). This means that INSN will be the last insn in the
19318 group it belongs to.
19320 If WHICH_GROUP == previous_group, this function will return true if INSN
19321 causes the termination of the previous group (i.e, the dispatch group that
19322 precedes the group to which INSN belongs). This means that INSN will be
19323 the first insn in the group it belongs to). */
19326 insn_terminates_group_p (rtx_insn
*insn
, enum group_termination which_group
)
19333 first
= insn_must_be_first_in_group (insn
);
19334 last
= insn_must_be_last_in_group (insn
);
19339 if (which_group
== current_group
)
19341 else if (which_group
== previous_group
)
19349 insn_must_be_first_in_group (rtx_insn
*insn
)
19351 enum attr_type type
;
19355 || DEBUG_INSN_P (insn
)
19356 || GET_CODE (PATTERN (insn
)) == USE
19357 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19360 switch (rs6000_tune
)
19362 case PROCESSOR_POWER5
:
19363 if (is_cracked_insn (insn
))
19366 case PROCESSOR_POWER4
:
19367 if (is_microcoded_insn (insn
))
19370 if (!rs6000_sched_groups
)
19373 type
= get_attr_type (insn
);
19380 case TYPE_CR_LOGICAL
:
19393 case PROCESSOR_POWER6
:
19394 type
= get_attr_type (insn
);
19403 case TYPE_FPCOMPARE
:
19414 if (get_attr_dot (insn
) == DOT_NO
19415 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19420 if (get_attr_size (insn
) == SIZE_32
)
19428 if (get_attr_update (insn
) == UPDATE_YES
)
19436 case PROCESSOR_POWER7
:
19437 type
= get_attr_type (insn
);
19441 case TYPE_CR_LOGICAL
:
19455 if (get_attr_dot (insn
) == DOT_YES
)
19460 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19461 || get_attr_update (insn
) == UPDATE_YES
)
19468 if (get_attr_update (insn
) == UPDATE_YES
)
19476 case PROCESSOR_POWER8
:
19477 type
= get_attr_type (insn
);
19481 case TYPE_CR_LOGICAL
:
19489 case TYPE_VECSTORE
:
19496 if (get_attr_dot (insn
) == DOT_YES
)
19501 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19502 || get_attr_update (insn
) == UPDATE_YES
)
19507 if (get_attr_update (insn
) == UPDATE_YES
19508 && get_attr_indexed (insn
) == INDEXED_YES
)
19524 insn_must_be_last_in_group (rtx_insn
*insn
)
19526 enum attr_type type
;
19530 || DEBUG_INSN_P (insn
)
19531 || GET_CODE (PATTERN (insn
)) == USE
19532 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
19535 switch (rs6000_tune
) {
19536 case PROCESSOR_POWER4
:
19537 case PROCESSOR_POWER5
:
19538 if (is_microcoded_insn (insn
))
19541 if (is_branch_slot_insn (insn
))
19545 case PROCESSOR_POWER6
:
19546 type
= get_attr_type (insn
);
19554 case TYPE_FPCOMPARE
:
19565 if (get_attr_dot (insn
) == DOT_NO
19566 || get_attr_var_shift (insn
) == VAR_SHIFT_NO
)
19571 if (get_attr_size (insn
) == SIZE_32
)
19579 case PROCESSOR_POWER7
:
19580 type
= get_attr_type (insn
);
19590 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19591 && get_attr_update (insn
) == UPDATE_YES
)
19596 if (get_attr_update (insn
) == UPDATE_YES
19597 && get_attr_indexed (insn
) == INDEXED_YES
)
19605 case PROCESSOR_POWER8
:
19606 type
= get_attr_type (insn
);
19618 if (get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
19619 && get_attr_update (insn
) == UPDATE_YES
)
19624 if (get_attr_update (insn
) == UPDATE_YES
19625 && get_attr_indexed (insn
) == INDEXED_YES
)
19640 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19641 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19644 is_costly_group (rtx
*group_insns
, rtx next_insn
)
19647 int issue_rate
= rs6000_issue_rate ();
19649 for (i
= 0; i
< issue_rate
; i
++)
19651 sd_iterator_def sd_it
;
19653 rtx insn
= group_insns
[i
];
19658 FOR_EACH_DEP (insn
, SD_LIST_RES_FORW
, sd_it
, dep
)
19660 rtx next
= DEP_CON (dep
);
19662 if (next
== next_insn
19663 && rs6000_is_costly_dependence (dep
, dep_cost (dep
), 0))
19671 /* Utility of the function redefine_groups.
19672 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19673 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19674 to keep it "far" (in a separate group) from GROUP_INSNS, following
19675 one of the following schemes, depending on the value of the flag
19676 -minsert_sched_nops = X:
19677 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19678 in order to force NEXT_INSN into a separate group.
19679 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19680 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19681 insertion (has a group just ended, how many vacant issue slots remain in the
19682 last group, and how many dispatch groups were encountered so far). */
19685 force_new_group (int sched_verbose
, FILE *dump
, rtx
*group_insns
,
19686 rtx_insn
*next_insn
, bool *group_end
, int can_issue_more
,
19691 int issue_rate
= rs6000_issue_rate ();
19692 bool end
= *group_end
;
19695 if (next_insn
== NULL_RTX
|| DEBUG_INSN_P (next_insn
))
19696 return can_issue_more
;
19698 if (rs6000_sched_insert_nops
> sched_finish_regroup_exact
)
19699 return can_issue_more
;
19701 force
= is_costly_group (group_insns
, next_insn
);
19703 return can_issue_more
;
19705 if (sched_verbose
> 6)
19706 fprintf (dump
,"force: group count = %d, can_issue_more = %d\n",
19707 *group_count
,can_issue_more
);
19709 if (rs6000_sched_insert_nops
== sched_finish_regroup_exact
)
19712 can_issue_more
= 0;
19714 /* Since only a branch can be issued in the last issue_slot, it is
19715 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19716 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19717 in this case the last nop will start a new group and the branch
19718 will be forced to the new group. */
19719 if (can_issue_more
&& !is_branch_slot_insn (next_insn
))
19722 /* Do we have a special group ending nop? */
19723 if (rs6000_tune
== PROCESSOR_POWER6
|| rs6000_tune
== PROCESSOR_POWER7
19724 || rs6000_tune
== PROCESSOR_POWER8
)
19726 nop
= gen_group_ending_nop ();
19727 emit_insn_before (nop
, next_insn
);
19728 can_issue_more
= 0;
19731 while (can_issue_more
> 0)
19734 emit_insn_before (nop
, next_insn
);
19742 if (rs6000_sched_insert_nops
< sched_finish_regroup_exact
)
19744 int n_nops
= rs6000_sched_insert_nops
;
19746 /* Nops can't be issued from the branch slot, so the effective
19747 issue_rate for nops is 'issue_rate - 1'. */
19748 if (can_issue_more
== 0)
19749 can_issue_more
= issue_rate
;
19751 if (can_issue_more
== 0)
19753 can_issue_more
= issue_rate
- 1;
19756 for (i
= 0; i
< issue_rate
; i
++)
19758 group_insns
[i
] = 0;
19765 emit_insn_before (nop
, next_insn
);
19766 if (can_issue_more
== issue_rate
- 1) /* new group begins */
19769 if (can_issue_more
== 0)
19771 can_issue_more
= issue_rate
- 1;
19774 for (i
= 0; i
< issue_rate
; i
++)
19776 group_insns
[i
] = 0;
19782 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19785 /* Is next_insn going to start a new group? */
19788 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19789 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19790 || (can_issue_more
< issue_rate
&&
19791 insn_terminates_group_p (next_insn
, previous_group
)));
19792 if (*group_end
&& end
)
19795 if (sched_verbose
> 6)
19796 fprintf (dump
, "done force: group count = %d, can_issue_more = %d\n",
19797 *group_count
, can_issue_more
);
19798 return can_issue_more
;
19801 return can_issue_more
;
19804 /* This function tries to synch the dispatch groups that the compiler "sees"
19805 with the dispatch groups that the processor dispatcher is expected to
19806 form in practice. It tries to achieve this synchronization by forcing the
19807 estimated processor grouping on the compiler (as opposed to the function
19808 'pad_goups' which tries to force the scheduler's grouping on the processor).
19810 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19811 examines the (estimated) dispatch groups that will be formed by the processor
19812 dispatcher. It marks these group boundaries to reflect the estimated
19813 processor grouping, overriding the grouping that the scheduler had marked.
19814 Depending on the value of the flag '-minsert-sched-nops' this function can
19815 force certain insns into separate groups or force a certain distance between
19816 them by inserting nops, for example, if there exists a "costly dependence"
19819 The function estimates the group boundaries that the processor will form as
19820 follows: It keeps track of how many vacant issue slots are available after
19821 each insn. A subsequent insn will start a new group if one of the following
19823 - no more vacant issue slots remain in the current dispatch group.
19824 - only the last issue slot, which is the branch slot, is vacant, but the next
19825 insn is not a branch.
19826 - only the last 2 or less issue slots, including the branch slot, are vacant,
19827 which means that a cracked insn (which occupies two issue slots) can't be
19828 issued in this group.
19829 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19830 start a new group. */
19833 redefine_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19836 rtx_insn
*insn
, *next_insn
;
19838 int can_issue_more
;
19841 int group_count
= 0;
19845 issue_rate
= rs6000_issue_rate ();
19846 group_insns
= XALLOCAVEC (rtx
, issue_rate
);
19847 for (i
= 0; i
< issue_rate
; i
++)
19849 group_insns
[i
] = 0;
19851 can_issue_more
= issue_rate
;
19853 insn
= get_next_active_insn (prev_head_insn
, tail
);
19856 while (insn
!= NULL_RTX
)
19858 slot
= (issue_rate
- can_issue_more
);
19859 group_insns
[slot
] = insn
;
19861 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19862 if (insn_terminates_group_p (insn
, current_group
))
19863 can_issue_more
= 0;
19865 next_insn
= get_next_active_insn (insn
, tail
);
19866 if (next_insn
== NULL_RTX
)
19867 return group_count
+ 1;
19869 /* Is next_insn going to start a new group? */
19871 = (can_issue_more
== 0
19872 || (can_issue_more
== 1 && !is_branch_slot_insn (next_insn
))
19873 || (can_issue_more
<= 2 && is_cracked_insn (next_insn
))
19874 || (can_issue_more
< issue_rate
&&
19875 insn_terminates_group_p (next_insn
, previous_group
)));
19877 can_issue_more
= force_new_group (sched_verbose
, dump
, group_insns
,
19878 next_insn
, &group_end
, can_issue_more
,
19884 can_issue_more
= 0;
19885 for (i
= 0; i
< issue_rate
; i
++)
19887 group_insns
[i
] = 0;
19891 if (GET_MODE (next_insn
) == TImode
&& can_issue_more
)
19892 PUT_MODE (next_insn
, VOIDmode
);
19893 else if (!can_issue_more
&& GET_MODE (next_insn
) != TImode
)
19894 PUT_MODE (next_insn
, TImode
);
19897 if (can_issue_more
== 0)
19898 can_issue_more
= issue_rate
;
19901 return group_count
;
19904 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19905 dispatch group boundaries that the scheduler had marked. Pad with nops
19906 any dispatch groups which have vacant issue slots, in order to force the
19907 scheduler's grouping on the processor dispatcher. The function
19908 returns the number of dispatch groups found. */
19911 pad_groups (FILE *dump
, int sched_verbose
, rtx_insn
*prev_head_insn
,
19914 rtx_insn
*insn
, *next_insn
;
19917 int can_issue_more
;
19919 int group_count
= 0;
19921 /* Initialize issue_rate. */
19922 issue_rate
= rs6000_issue_rate ();
19923 can_issue_more
= issue_rate
;
19925 insn
= get_next_active_insn (prev_head_insn
, tail
);
19926 next_insn
= get_next_active_insn (insn
, tail
);
19928 while (insn
!= NULL_RTX
)
19931 rs6000_variable_issue (dump
, sched_verbose
, insn
, can_issue_more
);
19933 group_end
= (next_insn
== NULL_RTX
|| GET_MODE (next_insn
) == TImode
);
19935 if (next_insn
== NULL_RTX
)
19940 /* If the scheduler had marked group termination at this location
19941 (between insn and next_insn), and neither insn nor next_insn will
19942 force group termination, pad the group with nops to force group
19945 && (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
19946 && !insn_terminates_group_p (insn
, current_group
)
19947 && !insn_terminates_group_p (next_insn
, previous_group
))
19949 if (!is_branch_slot_insn (next_insn
))
19952 while (can_issue_more
)
19955 emit_insn_before (nop
, next_insn
);
19960 can_issue_more
= issue_rate
;
19965 next_insn
= get_next_active_insn (insn
, tail
);
19968 return group_count
;
19971 /* We're beginning a new block. Initialize data structures as necessary. */
19974 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED
,
19975 int sched_verbose ATTRIBUTE_UNUSED
,
19976 int max_ready ATTRIBUTE_UNUSED
)
19978 last_scheduled_insn
= NULL
;
19979 load_store_pendulum
= 0;
19984 /* The following function is called at the end of scheduling BB.
19985 After reload, it inserts nops at insn group bundling. */
19988 rs6000_sched_finish (FILE *dump
, int sched_verbose
)
19993 fprintf (dump
, "=== Finishing schedule.\n");
19995 if (reload_completed
&& rs6000_sched_groups
)
19997 /* Do not run sched_finish hook when selective scheduling enabled. */
19998 if (sel_sched_p ())
20001 if (rs6000_sched_insert_nops
== sched_finish_none
)
20004 if (rs6000_sched_insert_nops
== sched_finish_pad_groups
)
20005 n_groups
= pad_groups (dump
, sched_verbose
,
20006 current_sched_info
->prev_head
,
20007 current_sched_info
->next_tail
);
20009 n_groups
= redefine_groups (dump
, sched_verbose
,
20010 current_sched_info
->prev_head
,
20011 current_sched_info
->next_tail
);
20013 if (sched_verbose
>= 6)
20015 fprintf (dump
, "ngroups = %d\n", n_groups
);
20016 print_rtl (dump
, current_sched_info
->prev_head
);
20017 fprintf (dump
, "Done finish_sched\n");
20022 struct rs6000_sched_context
20024 short cached_can_issue_more
;
20025 rtx_insn
*last_scheduled_insn
;
20026 int load_store_pendulum
;
20031 typedef struct rs6000_sched_context rs6000_sched_context_def
;
20032 typedef rs6000_sched_context_def
*rs6000_sched_context_t
;
20034 /* Allocate store for new scheduling context. */
20036 rs6000_alloc_sched_context (void)
20038 return xmalloc (sizeof (rs6000_sched_context_def
));
20041 /* If CLEAN_P is true then initializes _SC with clean data,
20042 and from the global context otherwise. */
20044 rs6000_init_sched_context (void *_sc
, bool clean_p
)
20046 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20050 sc
->cached_can_issue_more
= 0;
20051 sc
->last_scheduled_insn
= NULL
;
20052 sc
->load_store_pendulum
= 0;
20053 sc
->divide_cnt
= 0;
20054 sc
->vec_pairing
= 0;
20058 sc
->cached_can_issue_more
= cached_can_issue_more
;
20059 sc
->last_scheduled_insn
= last_scheduled_insn
;
20060 sc
->load_store_pendulum
= load_store_pendulum
;
20061 sc
->divide_cnt
= divide_cnt
;
20062 sc
->vec_pairing
= vec_pairing
;
20066 /* Sets the global scheduling context to the one pointed to by _SC. */
20068 rs6000_set_sched_context (void *_sc
)
20070 rs6000_sched_context_t sc
= (rs6000_sched_context_t
) _sc
;
20072 gcc_assert (sc
!= NULL
);
20074 cached_can_issue_more
= sc
->cached_can_issue_more
;
20075 last_scheduled_insn
= sc
->last_scheduled_insn
;
20076 load_store_pendulum
= sc
->load_store_pendulum
;
20077 divide_cnt
= sc
->divide_cnt
;
20078 vec_pairing
= sc
->vec_pairing
;
20083 rs6000_free_sched_context (void *_sc
)
20085 gcc_assert (_sc
!= NULL
);
20091 rs6000_sched_can_speculate_insn (rtx_insn
*insn
)
20093 switch (get_attr_type (insn
))
20108 /* Length in units of the trampoline for entering a nested function. */
20111 rs6000_trampoline_size (void)
20115 switch (DEFAULT_ABI
)
20118 gcc_unreachable ();
20121 ret
= (TARGET_32BIT
) ? 12 : 24;
20125 gcc_assert (!TARGET_32BIT
);
20131 ret
= (TARGET_32BIT
) ? 40 : 48;
20138 /* Emit RTL insns to initialize the variable parts of a trampoline.
20139 FNADDR is an RTX for the address of the function's pure code.
20140 CXT is an RTX for the static chain value for the function. */
20143 rs6000_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
20145 int regsize
= (TARGET_32BIT
) ? 4 : 8;
20146 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
20147 rtx ctx_reg
= force_reg (Pmode
, cxt
);
20148 rtx addr
= force_reg (Pmode
, XEXP (m_tramp
, 0));
20150 switch (DEFAULT_ABI
)
20153 gcc_unreachable ();
20155 /* Under AIX, just build the 3 word function descriptor */
20158 rtx fnmem
, fn_reg
, toc_reg
;
20160 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS
)
20161 error ("you cannot take the address of a nested function if you use "
20162 "the %qs option", "-mno-pointers-to-nested-functions");
20164 fnmem
= gen_const_mem (Pmode
, force_reg (Pmode
, fnaddr
));
20165 fn_reg
= gen_reg_rtx (Pmode
);
20166 toc_reg
= gen_reg_rtx (Pmode
);
20168 /* Macro to shorten the code expansions below. */
20169 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20171 m_tramp
= replace_equiv_address (m_tramp
, addr
);
20173 emit_move_insn (fn_reg
, MEM_PLUS (fnmem
, 0));
20174 emit_move_insn (toc_reg
, MEM_PLUS (fnmem
, regsize
));
20175 emit_move_insn (MEM_PLUS (m_tramp
, 0), fn_reg
);
20176 emit_move_insn (MEM_PLUS (m_tramp
, regsize
), toc_reg
);
20177 emit_move_insn (MEM_PLUS (m_tramp
, 2*regsize
), ctx_reg
);
20183 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20187 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__trampoline_setup"),
20188 LCT_NORMAL
, VOIDmode
,
20190 GEN_INT (rs6000_trampoline_size ()), SImode
,
20198 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20199 identifier as an argument, so the front end shouldn't look it up. */
20202 rs6000_attribute_takes_identifier_p (const_tree attr_id
)
20204 return is_attribute_p ("altivec", attr_id
);
20207 /* Handle the "altivec" attribute. The attribute may have
20208 arguments as follows:
20210 __attribute__((altivec(vector__)))
20211 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20212 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20214 and may appear more than once (e.g., 'vector bool char') in a
20215 given declaration. */
20218 rs6000_handle_altivec_attribute (tree
*node
,
20219 tree name ATTRIBUTE_UNUSED
,
20221 int flags ATTRIBUTE_UNUSED
,
20222 bool *no_add_attrs
)
20224 tree type
= *node
, result
= NULL_TREE
;
20228 = ((args
&& TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
)
20229 && TREE_CODE (TREE_VALUE (args
)) == IDENTIFIER_NODE
)
20230 ? *IDENTIFIER_POINTER (TREE_VALUE (args
))
20233 while (POINTER_TYPE_P (type
)
20234 || TREE_CODE (type
) == FUNCTION_TYPE
20235 || TREE_CODE (type
) == METHOD_TYPE
20236 || TREE_CODE (type
) == ARRAY_TYPE
)
20237 type
= TREE_TYPE (type
);
20239 mode
= TYPE_MODE (type
);
20241 /* Check for invalid AltiVec type qualifiers. */
20242 if (type
== long_double_type_node
)
20243 error ("use of %<long double%> in AltiVec types is invalid");
20244 else if (type
== boolean_type_node
)
20245 error ("use of boolean types in AltiVec types is invalid");
20246 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
20247 error ("use of %<complex%> in AltiVec types is invalid");
20248 else if (DECIMAL_FLOAT_MODE_P (mode
))
20249 error ("use of decimal floating-point types in AltiVec types is invalid");
20250 else if (!TARGET_VSX
)
20252 if (type
== long_unsigned_type_node
|| type
== long_integer_type_node
)
20255 error ("use of %<long%> in AltiVec types is invalid for "
20256 "64-bit code without %qs", "-mvsx");
20257 else if (rs6000_warn_altivec_long
)
20258 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20261 else if (type
== long_long_unsigned_type_node
20262 || type
== long_long_integer_type_node
)
20263 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20265 else if (type
== double_type_node
)
20266 error ("use of %<double%> in AltiVec types is invalid without %qs",
20270 switch (altivec_type
)
20273 unsigned_p
= TYPE_UNSIGNED (type
);
20277 result
= (unsigned_p
? unsigned_V1TI_type_node
: V1TI_type_node
);
20280 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
20283 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
20286 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
20289 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
20291 case E_SFmode
: result
= V4SF_type_node
; break;
20292 case E_DFmode
: result
= V2DF_type_node
; break;
20293 /* If the user says 'vector int bool', we may be handed the 'bool'
20294 attribute _before_ the 'vector' attribute, and so select the
20295 proper type in the 'b' case below. */
20296 case E_V4SImode
: case E_V8HImode
: case E_V16QImode
: case E_V4SFmode
:
20297 case E_V2DImode
: case E_V2DFmode
:
20305 case E_TImode
: case E_V1TImode
: result
= bool_V1TI_type_node
; break;
20306 case E_DImode
: case E_V2DImode
: result
= bool_V2DI_type_node
; break;
20307 case E_SImode
: case E_V4SImode
: result
= bool_V4SI_type_node
; break;
20308 case E_HImode
: case E_V8HImode
: result
= bool_V8HI_type_node
; break;
20309 case E_QImode
: case E_V16QImode
: result
= bool_V16QI_type_node
;
20316 case E_V8HImode
: result
= pixel_V8HI_type_node
;
20322 /* Propagate qualifiers attached to the element type
20323 onto the vector type. */
20324 if (result
&& result
!= type
&& TYPE_QUALS (type
))
20325 result
= build_qualified_type (result
, TYPE_QUALS (type
));
20327 *no_add_attrs
= true; /* No need to hang on to the attribute. */
20330 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
20335 /* AltiVec defines five built-in scalar types that serve as vector
20336 elements; we must teach the compiler how to mangle them. The 128-bit
20337 floating point mangling is target-specific as well. MMA defines
20338 two built-in types to be used as opaque vector types. */
20340 static const char *
20341 rs6000_mangle_type (const_tree type
)
20343 type
= TYPE_MAIN_VARIANT (type
);
20345 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20346 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
20347 && TREE_CODE (type
) != OPAQUE_TYPE
)
20350 if (type
== bool_char_type_node
) return "U6__boolc";
20351 if (type
== bool_short_type_node
) return "U6__bools";
20352 if (type
== pixel_type_node
) return "u7__pixel";
20353 if (type
== bool_int_type_node
) return "U6__booli";
20354 if (type
== bool_long_long_type_node
) return "U6__boolx";
20356 if (type
== float128_type_node
|| type
== float64x_type_node
)
20359 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IBM_P (TYPE_MODE (type
)))
20361 if (SCALAR_FLOAT_TYPE_P (type
) && FLOAT128_IEEE_P (TYPE_MODE (type
)))
20362 return "u9__ieee128";
20364 if (type
== vector_pair_type_node
)
20365 return "u13__vector_pair";
20366 if (type
== vector_quad_type_node
)
20367 return "u13__vector_quad";
20369 /* For all other types, use the default mangling. */
20373 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20374 struct attribute_spec.handler. */
20377 rs6000_handle_longcall_attribute (tree
*node
, tree name
,
20378 tree args ATTRIBUTE_UNUSED
,
20379 int flags ATTRIBUTE_UNUSED
,
20380 bool *no_add_attrs
)
20382 if (TREE_CODE (*node
) != FUNCTION_TYPE
20383 && TREE_CODE (*node
) != FIELD_DECL
20384 && TREE_CODE (*node
) != TYPE_DECL
)
20386 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
20388 *no_add_attrs
= true;
20394 /* Set longcall attributes on all functions declared when
20395 rs6000_default_long_calls is true. */
20397 rs6000_set_default_type_attributes (tree type
)
20399 if (rs6000_default_long_calls
20400 && (TREE_CODE (type
) == FUNCTION_TYPE
20401 || TREE_CODE (type
) == METHOD_TYPE
))
20402 TYPE_ATTRIBUTES (type
) = tree_cons (get_identifier ("longcall"),
20404 TYPE_ATTRIBUTES (type
));
20407 darwin_set_default_type_attributes (type
);
20411 /* Return a reference suitable for calling a function with the
20412 longcall attribute. */
20415 rs6000_longcall_ref (rtx call_ref
, rtx arg
)
20417 /* System V adds '.' to the internal name, so skip them. */
20418 const char *call_name
= XSTR (call_ref
, 0);
20419 if (*call_name
== '.')
20421 while (*call_name
== '.')
20424 tree node
= get_identifier (call_name
);
20425 call_ref
= gen_rtx_SYMBOL_REF (VOIDmode
, IDENTIFIER_POINTER (node
));
20430 rtx base
= const0_rtx
;
20432 if (rs6000_pcrel_p ())
20434 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20435 rtx u
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20436 gen_rtvec (3, base
, call_ref
, arg
),
20437 UNSPECV_PLT_PCREL
);
20438 emit_insn (gen_rtx_SET (reg
, u
));
20442 if (DEFAULT_ABI
== ABI_ELFv2
)
20443 base
= gen_rtx_REG (Pmode
, TOC_REGISTER
);
20447 base
= gen_rtx_REG (Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
20450 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20451 may be used by a function global entry point. For SysV4, r11
20452 is used by __glink_PLTresolve lazy resolver entry. */
20453 rtx reg
= gen_rtx_REG (Pmode
, regno
);
20454 rtx hi
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (3, base
, call_ref
, arg
),
20456 rtx lo
= gen_rtx_UNSPEC_VOLATILE (Pmode
,
20457 gen_rtvec (3, reg
, call_ref
, arg
),
20459 emit_insn (gen_rtx_SET (reg
, hi
));
20460 emit_insn (gen_rtx_SET (reg
, lo
));
20464 return force_reg (Pmode
, call_ref
);
20467 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20468 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20471 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20472 struct attribute_spec.handler. */
20474 rs6000_handle_struct_attribute (tree
*node
, tree name
,
20475 tree args ATTRIBUTE_UNUSED
,
20476 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
20479 if (DECL_P (*node
))
20481 if (TREE_CODE (*node
) == TYPE_DECL
)
20482 type
= &TREE_TYPE (*node
);
20487 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
20488 || TREE_CODE (*type
) == UNION_TYPE
)))
20490 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
20491 *no_add_attrs
= true;
20494 else if ((is_attribute_p ("ms_struct", name
)
20495 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
20496 || ((is_attribute_p ("gcc_struct", name
)
20497 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
20499 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
20501 *no_add_attrs
= true;
20508 rs6000_ms_bitfield_layout_p (const_tree record_type
)
20510 return (TARGET_USE_MS_BITFIELD_LAYOUT
&&
20511 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20512 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
));
20515 #ifdef USING_ELFOS_H
20517 /* A get_unnamed_section callback, used for switching to toc_section. */
20520 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
20522 if ((DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20523 && TARGET_MINIMAL_TOC
)
20525 if (!toc_initialized
)
20527 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20528 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20529 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "LCTOC", 0);
20530 fprintf (asm_out_file
, "\t.tc ");
20531 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1[TC],");
20532 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20533 fprintf (asm_out_file
, "\n");
20535 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20536 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20537 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20538 fprintf (asm_out_file
, " = .+32768\n");
20539 toc_initialized
= 1;
20542 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20544 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20546 fprintf (asm_out_file
, "%s\n", TOC_SECTION_ASM_OP
);
20547 if (!toc_initialized
)
20549 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20550 toc_initialized
= 1;
20555 fprintf (asm_out_file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
20556 if (!toc_initialized
)
20558 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
20559 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file
, "LCTOC1");
20560 fprintf (asm_out_file
, " = .+32768\n");
20561 toc_initialized
= 1;
20566 /* Implement TARGET_ASM_INIT_SECTIONS. */
20569 rs6000_elf_asm_init_sections (void)
20572 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op
, NULL
);
20575 = get_unnamed_section (SECTION_WRITE
, output_section_asm_op
,
20576 SDATA2_SECTION_ASM_OP
);
20579 /* Implement TARGET_SELECT_RTX_SECTION. */
20582 rs6000_elf_select_rtx_section (machine_mode mode
, rtx x
,
20583 unsigned HOST_WIDE_INT align
)
20585 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
20586 return toc_section
;
20588 return default_elf_select_rtx_section (mode
, x
, align
);
20591 /* For a SYMBOL_REF, set generic flags and then perform some
20592 target-specific processing.
20594 When the AIX ABI is requested on a non-AIX system, replace the
20595 function name with the real name (with a leading .) rather than the
20596 function descriptor name. This saves a lot of overriding code to
20597 read the prefixes. */
20599 static void rs6000_elf_encode_section_info (tree
, rtx
, int) ATTRIBUTE_UNUSED
;
20601 rs6000_elf_encode_section_info (tree decl
, rtx rtl
, int first
)
20603 default_encode_section_info (decl
, rtl
, first
);
20606 && TREE_CODE (decl
) == FUNCTION_DECL
20608 && DEFAULT_ABI
== ABI_AIX
)
20610 rtx sym_ref
= XEXP (rtl
, 0);
20611 size_t len
= strlen (XSTR (sym_ref
, 0));
20612 char *str
= XALLOCAVEC (char, len
+ 2);
20614 memcpy (str
+ 1, XSTR (sym_ref
, 0), len
+ 1);
20615 XSTR (sym_ref
, 0) = ggc_alloc_string (str
, len
+ 1);
20620 compare_section_name (const char *section
, const char *templ
)
20624 len
= strlen (templ
);
20625 return (strncmp (section
, templ
, len
) == 0
20626 && (section
[len
] == 0 || section
[len
] == '.'));
20630 rs6000_elf_in_small_data_p (const_tree decl
)
20632 if (rs6000_sdata
== SDATA_NONE
)
20635 /* We want to merge strings, so we never consider them small data. */
20636 if (TREE_CODE (decl
) == STRING_CST
)
20639 /* Functions are never in the small data area. */
20640 if (TREE_CODE (decl
) == FUNCTION_DECL
)
20643 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_SECTION_NAME (decl
))
20645 const char *section
= DECL_SECTION_NAME (decl
);
20646 if (compare_section_name (section
, ".sdata")
20647 || compare_section_name (section
, ".sdata2")
20648 || compare_section_name (section
, ".gnu.linkonce.s")
20649 || compare_section_name (section
, ".sbss")
20650 || compare_section_name (section
, ".sbss2")
20651 || compare_section_name (section
, ".gnu.linkonce.sb")
20652 || strcmp (section
, ".PPC.EMB.sdata0") == 0
20653 || strcmp (section
, ".PPC.EMB.sbss0") == 0)
20658 /* If we are told not to put readonly data in sdata, then don't. */
20659 if (TREE_READONLY (decl
) && rs6000_sdata
!= SDATA_EABI
20660 && !rs6000_readonly_in_sdata
)
20663 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
20666 && size
<= g_switch_value
20667 /* If it's not public, and we're not going to reference it there,
20668 there's no need to put it in the small data section. */
20669 && (rs6000_sdata
!= SDATA_DATA
|| TREE_PUBLIC (decl
)))
20676 #endif /* USING_ELFOS_H */
20678 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20681 rs6000_use_blocks_for_constant_p (machine_mode mode
, const_rtx x
)
20683 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
);
20686 /* Do not place thread-local symbols refs in the object blocks. */
20689 rs6000_use_blocks_for_decl_p (const_tree decl
)
20691 return !DECL_THREAD_LOCAL_P (decl
);
20694 /* Return a REG that occurs in ADDR with coefficient 1.
20695 ADDR can be effectively incremented by incrementing REG.
20697 r0 is special and we must not select it as an address
20698 register by this routine since our caller will try to
20699 increment the returned register via an "la" instruction. */
20702 find_addr_reg (rtx addr
)
20704 while (GET_CODE (addr
) == PLUS
)
20706 if (REG_P (XEXP (addr
, 0))
20707 && REGNO (XEXP (addr
, 0)) != 0)
20708 addr
= XEXP (addr
, 0);
20709 else if (REG_P (XEXP (addr
, 1))
20710 && REGNO (XEXP (addr
, 1)) != 0)
20711 addr
= XEXP (addr
, 1);
20712 else if (CONSTANT_P (XEXP (addr
, 0)))
20713 addr
= XEXP (addr
, 1);
20714 else if (CONSTANT_P (XEXP (addr
, 1)))
20715 addr
= XEXP (addr
, 0);
20717 gcc_unreachable ();
20719 gcc_assert (REG_P (addr
) && REGNO (addr
) != 0);
20724 rs6000_fatal_bad_address (rtx op
)
20726 fatal_insn ("bad address", op
);
20731 vec
<branch_island
, va_gc
> *branch_islands
;
20733 /* Remember to generate a branch island for far calls to the given
20737 add_compiler_branch_island (tree label_name
, tree function_name
,
20740 branch_island bi
= {function_name
, label_name
, line_number
};
20741 vec_safe_push (branch_islands
, bi
);
20744 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20745 already there or not. */
20748 no_previous_def (tree function_name
)
20753 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20754 if (function_name
== bi
->function_name
)
20759 /* GET_PREV_LABEL gets the label name from the previous definition of
20763 get_prev_label (tree function_name
)
20768 FOR_EACH_VEC_SAFE_ELT (branch_islands
, ix
, bi
)
20769 if (function_name
== bi
->function_name
)
20770 return bi
->label_name
;
20774 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20777 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20779 unsigned int length
;
20780 char *symbol_name
, *lazy_ptr_name
;
20781 char *local_label_0
;
20782 static unsigned label
= 0;
20784 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20785 symb
= (*targetm
.strip_name_encoding
) (symb
);
20787 length
= strlen (symb
);
20788 symbol_name
= XALLOCAVEC (char, length
+ 32);
20789 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20791 lazy_ptr_name
= XALLOCAVEC (char, length
+ 32);
20792 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name
, symb
, length
);
20796 switch_to_section (darwin_sections
[machopic_picsymbol_stub1_section
]);
20797 fprintf (file
, "\t.align 5\n");
20799 fprintf (file
, "%s:\n", stub
);
20800 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20803 local_label_0
= XALLOCAVEC (char, 16);
20804 sprintf (local_label_0
, "L%u$spb", label
);
20806 fprintf (file
, "\tmflr r0\n");
20807 fprintf (file
, "\tbcl 20,31,%s\n", local_label_0
);
20808 fprintf (file
, "%s:\n\tmflr r11\n", local_label_0
);
20809 fprintf (file
, "\taddis r11,r11,ha16(%s-%s)\n",
20810 lazy_ptr_name
, local_label_0
);
20811 fprintf (file
, "\tmtlr r0\n");
20812 fprintf (file
, "\t%s r12,lo16(%s-%s)(r11)\n",
20813 (TARGET_64BIT
? "ldu" : "lwzu"),
20814 lazy_ptr_name
, local_label_0
);
20815 fprintf (file
, "\tmtctr r12\n");
20816 fprintf (file
, "\tbctr\n");
20818 else /* mdynamic-no-pic or mkernel. */
20820 switch_to_section (darwin_sections
[machopic_symbol_stub1_section
]);
20821 fprintf (file
, "\t.align 4\n");
20823 fprintf (file
, "%s:\n", stub
);
20824 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20826 fprintf (file
, "\tlis r11,ha16(%s)\n", lazy_ptr_name
);
20827 fprintf (file
, "\t%s r12,lo16(%s)(r11)\n",
20828 (TARGET_64BIT
? "ldu" : "lwzu"),
20830 fprintf (file
, "\tmtctr r12\n");
20831 fprintf (file
, "\tbctr\n");
20834 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr_section
]);
20835 fprintf (file
, "%s:\n", lazy_ptr_name
);
20836 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20837 fprintf (file
, "%sdyld_stub_binding_helper\n",
20838 (TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t"));
20841 /* Legitimize PIC addresses. If the address is already
20842 position-independent, we return ORIG. Newly generated
20843 position-independent addresses go into a reg. This is REG if non
20844 zero, otherwise we allocate register(s) as necessary. */
20846 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20849 rs6000_machopic_legitimize_pic_address (rtx orig
, machine_mode mode
,
20854 if (reg
== NULL
&& !reload_completed
)
20855 reg
= gen_reg_rtx (Pmode
);
20857 if (GET_CODE (orig
) == CONST
)
20861 if (GET_CODE (XEXP (orig
, 0)) == PLUS
20862 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
20865 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
20867 /* Use a different reg for the intermediate value, as
20868 it will be marked UNCHANGING. */
20869 reg_temp
= !can_create_pseudo_p () ? reg
: gen_reg_rtx (Pmode
);
20870 base
= rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 0),
20873 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig
, 0), 1),
20876 if (CONST_INT_P (offset
))
20878 if (SMALL_INT (offset
))
20879 return plus_constant (Pmode
, base
, INTVAL (offset
));
20880 else if (!reload_completed
)
20881 offset
= force_reg (Pmode
, offset
);
20884 rtx mem
= force_const_mem (Pmode
, orig
);
20885 return machopic_legitimize_pic_address (mem
, Pmode
, reg
);
20888 return gen_rtx_PLUS (Pmode
, base
, offset
);
20891 /* Fall back on generic machopic code. */
20892 return machopic_legitimize_pic_address (orig
, mode
, reg
);
20895 /* Output a .machine directive for the Darwin assembler, and call
20896 the generic start_file routine. */
20899 rs6000_darwin_file_start (void)
20901 static const struct
20905 HOST_WIDE_INT if_set
;
20907 { "ppc64", "ppc64", MASK_64BIT
},
20908 { "970", "ppc970", OPTION_MASK_PPC_GPOPT
| OPTION_MASK_MFCRF \
20909 | MASK_POWERPC64
},
20910 { "power4", "ppc970", 0 },
20911 { "G5", "ppc970", 0 },
20912 { "7450", "ppc7450", 0 },
20913 { "7400", "ppc7400", OPTION_MASK_ALTIVEC
},
20914 { "G4", "ppc7400", 0 },
20915 { "750", "ppc750", 0 },
20916 { "740", "ppc750", 0 },
20917 { "G3", "ppc750", 0 },
20918 { "604e", "ppc604e", 0 },
20919 { "604", "ppc604", 0 },
20920 { "603e", "ppc603", 0 },
20921 { "603", "ppc603", 0 },
20922 { "601", "ppc601", 0 },
20923 { NULL
, "ppc", 0 } };
20924 const char *cpu_id
= "";
20927 rs6000_file_start ();
20928 darwin_file_start ();
20930 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
20932 if (rs6000_default_cpu
!= 0 && rs6000_default_cpu
[0] != '\0')
20933 cpu_id
= rs6000_default_cpu
;
20935 if (OPTION_SET_P (rs6000_cpu_index
))
20936 cpu_id
= processor_target_table
[rs6000_cpu_index
].name
;
20938 /* Look through the mapping array. Pick the first name that either
20939 matches the argument, has a bit set in IF_SET that is also set
20940 in the target flags, or has a NULL name. */
20943 while (mapping
[i
].arg
!= NULL
20944 && strcmp (mapping
[i
].arg
, cpu_id
) != 0
20945 && (mapping
[i
].if_set
& rs6000_isa_flags
) == 0)
20948 fprintf (asm_out_file
, "\t.machine %s\n", mapping
[i
].name
);
20951 #endif /* TARGET_MACHO */
20955 rs6000_elf_reloc_rw_mask (void)
20959 else if (DEFAULT_ABI
== ABI_AIX
|| DEFAULT_ABI
== ABI_ELFv2
)
20965 /* Record an element in the table of global constructors. SYMBOL is
20966 a SYMBOL_REF of the function to be called; PRIORITY is a number
20967 between 0 and MAX_INIT_PRIORITY.
20969 This differs from default_named_section_asm_out_constructor in
20970 that we have special handling for -mrelocatable. */
20972 static void rs6000_elf_asm_out_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
20974 rs6000_elf_asm_out_constructor (rtx symbol
, int priority
)
20976 const char *section
= ".ctors";
20979 if (priority
!= DEFAULT_INIT_PRIORITY
)
20981 sprintf (buf
, ".ctors.%.5u",
20982 /* Invert the numbering so the linker puts us in the proper
20983 order; constructors are run from right to left, and the
20984 linker sorts in increasing order. */
20985 MAX_INIT_PRIORITY
- priority
);
20989 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
20990 assemble_align (POINTER_SIZE
);
20992 if (DEFAULT_ABI
== ABI_V4
20993 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
20995 fputs ("\t.long (", asm_out_file
);
20996 output_addr_const (asm_out_file
, symbol
);
20997 fputs (")@fixup\n", asm_out_file
);
21000 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21003 static void rs6000_elf_asm_out_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
21005 rs6000_elf_asm_out_destructor (rtx symbol
, int priority
)
21007 const char *section
= ".dtors";
21010 if (priority
!= DEFAULT_INIT_PRIORITY
)
21012 sprintf (buf
, ".dtors.%.5u",
21013 /* Invert the numbering so the linker puts us in the proper
21014 order; constructors are run from right to left, and the
21015 linker sorts in increasing order. */
21016 MAX_INIT_PRIORITY
- priority
);
21020 switch_to_section (get_section (section
, SECTION_WRITE
, NULL
));
21021 assemble_align (POINTER_SIZE
);
21023 if (DEFAULT_ABI
== ABI_V4
21024 && (TARGET_RELOCATABLE
|| flag_pic
> 1))
21026 fputs ("\t.long (", asm_out_file
);
21027 output_addr_const (asm_out_file
, symbol
);
21028 fputs (")@fixup\n", asm_out_file
);
21031 assemble_integer (symbol
, POINTER_SIZE
/ BITS_PER_UNIT
, POINTER_SIZE
, 1);
21035 rs6000_elf_declare_function_name (FILE *file
, const char *name
, tree decl
)
21037 if (TARGET_64BIT
&& DEFAULT_ABI
!= ABI_ELFv2
)
21039 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file
);
21040 ASM_OUTPUT_LABEL (file
, name
);
21041 fputs (DOUBLE_INT_ASM_OP
, file
);
21042 rs6000_output_function_entry (file
, name
);
21043 fputs (",.TOC.@tocbase,0\n\t.previous\n", file
);
21046 fputs ("\t.size\t", file
);
21047 assemble_name (file
, name
);
21048 fputs (",24\n\t.type\t.", file
);
21049 assemble_name (file
, name
);
21050 fputs (",@function\n", file
);
21051 if (TREE_PUBLIC (decl
) && ! DECL_WEAK (decl
))
21053 fputs ("\t.globl\t.", file
);
21054 assemble_name (file
, name
);
21059 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21060 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21061 rs6000_output_function_entry (file
, name
);
21062 fputs (":\n", file
);
21067 if (DEFAULT_ABI
== ABI_V4
21068 && (TARGET_RELOCATABLE
|| flag_pic
> 1)
21069 && !TARGET_SECURE_PLT
21070 && (!constant_pool_empty_p () || crtl
->profile
)
21071 && (uses_toc
= uses_TOC ()))
21076 switch_to_other_text_partition ();
21077 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21079 fprintf (file
, "\t.long ");
21080 assemble_name (file
, toc_label_name
);
21083 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21084 assemble_name (file
, buf
);
21087 switch_to_other_text_partition ();
21090 ASM_OUTPUT_TYPE_DIRECTIVE (file
, name
, "function");
21091 ASM_DECLARE_RESULT (file
, DECL_RESULT (decl
));
21093 if (TARGET_CMODEL
== CMODEL_LARGE
21094 && rs6000_global_entry_point_prologue_needed_p ())
21098 (*targetm
.asm_out
.internal_label
) (file
, "LCL", rs6000_pic_labelno
);
21100 fprintf (file
, "\t.quad .TOC.-");
21101 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCF", rs6000_pic_labelno
);
21102 assemble_name (file
, buf
);
21106 if (DEFAULT_ABI
== ABI_AIX
)
21108 const char *desc_name
, *orig_name
;
21110 orig_name
= (*targetm
.strip_name_encoding
) (name
);
21111 desc_name
= orig_name
;
21112 while (*desc_name
== '.')
21115 if (TREE_PUBLIC (decl
))
21116 fprintf (file
, "\t.globl %s\n", desc_name
);
21118 fprintf (file
, "%s\n", MINIMAL_TOC_SECTION_ASM_OP
);
21119 fprintf (file
, "%s:\n", desc_name
);
21120 fprintf (file
, "\t.long %s\n", orig_name
);
21121 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file
);
21122 fputs ("\t.long 0\n", file
);
21123 fprintf (file
, "\t.previous\n");
21125 ASM_OUTPUT_LABEL (file
, name
);
21128 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED
;
21130 rs6000_elf_file_end (void)
21132 #ifdef HAVE_AS_GNU_ATTRIBUTE
21133 /* ??? The value emitted depends on options active at file end.
21134 Assume anyone using #pragma or attributes that might change
21135 options knows what they are doing. */
21136 if ((TARGET_64BIT
|| DEFAULT_ABI
== ABI_V4
)
21137 && rs6000_passes_float
)
21141 if (TARGET_HARD_FLOAT
)
21145 if (rs6000_passes_long_double
)
21147 if (!TARGET_LONG_DOUBLE_128
)
21149 else if (TARGET_IEEEQUAD
)
21154 fprintf (asm_out_file
, "\t.gnu_attribute 4, %d\n", fp
);
21156 if (TARGET_32BIT
&& DEFAULT_ABI
== ABI_V4
)
21158 if (rs6000_passes_vector
)
21159 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
21160 (TARGET_ALTIVEC_ABI
? 2 : 1));
21161 if (rs6000_returns_struct
)
21162 fprintf (asm_out_file
, "\t.gnu_attribute 12, %d\n",
21163 aix_struct_return
? 2 : 1);
21166 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21167 if (TARGET_32BIT
|| DEFAULT_ABI
== ABI_ELFv2
)
21168 file_end_indicate_exec_stack ();
21171 if (flag_split_stack
)
21172 file_end_indicate_split_stack ();
21176 /* We have expanded a CPU builtin, so we need to emit a reference to
21177 the special symbol that LIBC uses to declare it supports the
21178 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21179 switch_to_section (data_section
);
21180 fprintf (asm_out_file
, "\t.align %u\n", TARGET_32BIT
? 2 : 3);
21181 fprintf (asm_out_file
, "\t%s %s\n",
21182 TARGET_32BIT
? ".long" : ".quad", tcb_verification_symbol
);
21189 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21190 #define HAVE_XCOFF_DWARF_EXTRAS 0
21194 /* Names of bss and data sections. These should be unique names for each
21195 compilation unit. */
21197 char *xcoff_bss_section_name
;
21198 char *xcoff_private_data_section_name
;
21199 char *xcoff_private_rodata_section_name
;
21200 char *xcoff_tls_data_section_name
;
21201 char *xcoff_read_only_section_name
;
21203 static enum unwind_info_type
21204 rs6000_xcoff_debug_unwind_info (void)
21210 rs6000_xcoff_asm_output_anchor (rtx symbol
)
21214 sprintf (buffer
, "$ + " HOST_WIDE_INT_PRINT_DEC
,
21215 SYMBOL_REF_BLOCK_OFFSET (symbol
));
21216 fprintf (asm_out_file
, "%s", SET_ASM_OP
);
21217 RS6000_OUTPUT_BASENAME (asm_out_file
, XSTR (symbol
, 0));
21218 fprintf (asm_out_file
, ",");
21219 RS6000_OUTPUT_BASENAME (asm_out_file
, buffer
);
21220 fprintf (asm_out_file
, "\n");
21224 rs6000_xcoff_asm_globalize_label (FILE *stream
, const char *name
)
21226 fputs (GLOBAL_ASM_OP
, stream
);
21227 RS6000_OUTPUT_BASENAME (stream
, name
);
21228 putc ('\n', stream
);
21231 /* A get_unnamed_decl callback, used for read-only sections. PTR
21232 points to the section string variable. */
21235 rs6000_xcoff_output_readonly_section_asm_op (const char *directive
)
21237 fprintf (asm_out_file
, "\t.csect %s[RO],%s\n",
21239 ? xcoff_private_rodata_section_name
21240 : xcoff_read_only_section_name
,
21241 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21244 /* Likewise for read-write sections. */
21247 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21249 fprintf (asm_out_file
, "\t.csect %s[RW],%s\n",
21250 xcoff_private_data_section_name
,
21251 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21255 rs6000_xcoff_output_tls_section_asm_op (const char *directive
)
21257 fprintf (asm_out_file
, "\t.csect %s[TL],%s\n",
21259 ? xcoff_private_data_section_name
21260 : xcoff_tls_data_section_name
,
21261 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR
);
21264 /* A get_unnamed_section callback, used for switching to toc_section. */
21267 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED
)
21269 if (TARGET_MINIMAL_TOC
)
21271 /* toc_section is always selected at least once from
21272 rs6000_xcoff_file_start, so this is guaranteed to
21273 always be defined once and only once in each file. */
21274 if (!toc_initialized
)
21276 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file
);
21277 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file
);
21278 toc_initialized
= 1;
21280 fprintf (asm_out_file
, "\t.csect toc_table[RW]%s\n",
21281 (TARGET_32BIT
? "" : ",3"));
21284 fputs ("\t.toc\n", asm_out_file
);
21287 /* Implement TARGET_ASM_INIT_SECTIONS. */
21290 rs6000_xcoff_asm_init_sections (void)
21292 read_only_data_section
21293 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21296 private_data_section
21297 = get_unnamed_section (SECTION_WRITE
,
21298 rs6000_xcoff_output_readwrite_section_asm_op
,
21301 read_only_private_data_section
21302 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op
,
21306 = get_unnamed_section (SECTION_TLS
,
21307 rs6000_xcoff_output_tls_section_asm_op
,
21310 tls_private_data_section
21311 = get_unnamed_section (SECTION_TLS
,
21312 rs6000_xcoff_output_tls_section_asm_op
,
21316 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op
, NULL
);
21318 readonly_data_section
= read_only_data_section
;
21322 rs6000_xcoff_reloc_rw_mask (void)
21328 rs6000_xcoff_asm_named_section (const char *name
, unsigned int flags
,
21329 tree decl ATTRIBUTE_UNUSED
)
21332 static const char * const suffix
[7]
21333 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21335 if (flags
& SECTION_EXCLUDE
)
21337 else if (flags
& SECTION_DEBUG
)
21339 fprintf (asm_out_file
, "\t.dwsect %s\n", name
);
21342 else if (flags
& SECTION_CODE
)
21344 else if (flags
& SECTION_TLS
)
21346 if (flags
& SECTION_BSS
)
21351 else if (flags
& SECTION_WRITE
)
21353 if (flags
& SECTION_BSS
)
21361 fprintf (asm_out_file
, "\t.csect %s%s[%s],%u\n",
21362 (flags
& SECTION_CODE
) ? "." : "",
21363 name
, suffix
[smclass
], flags
& SECTION_ENTSIZE
);
21366 #define IN_NAMED_SECTION(DECL) \
21367 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
21368 && DECL_SECTION_NAME (DECL) != NULL)
21371 rs6000_xcoff_select_section (tree decl
, int reloc
,
21372 unsigned HOST_WIDE_INT align
)
21374 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21376 if (align
> BIGGEST_ALIGNMENT
&& VAR_OR_FUNCTION_DECL_P (decl
))
21378 resolve_unique_section (decl
, reloc
, true);
21379 if (IN_NAMED_SECTION (decl
))
21380 return get_named_section (decl
, NULL
, reloc
);
21383 if (decl_readonly_section (decl
, reloc
))
21385 if (TREE_PUBLIC (decl
))
21386 return read_only_data_section
;
21388 return read_only_private_data_section
;
21393 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21395 if (bss_initializer_p (decl
))
21396 return tls_comm_section
;
21397 else if (TREE_PUBLIC (decl
))
21398 return tls_data_section
;
21400 return tls_private_data_section
;
21404 if (TREE_PUBLIC (decl
))
21405 return data_section
;
21407 return private_data_section
;
21412 rs6000_xcoff_unique_section (tree decl
, int reloc ATTRIBUTE_UNUSED
)
21416 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
21417 name
= (*targetm
.strip_name_encoding
) (name
);
21418 set_decl_section_name (decl
, name
);
21421 /* Select section for constant in constant pool.
21423 On RS/6000, all constants are in the private read-only data area.
21424 However, if this is being placed in the TOC it must be output as a
21428 rs6000_xcoff_select_rtx_section (machine_mode mode
, rtx x
,
21429 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
21431 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x
, mode
))
21432 return toc_section
;
21434 return read_only_private_data_section
;
21437 /* Remove any trailing [DS] or the like from the symbol name. */
21439 static const char *
21440 rs6000_xcoff_strip_name_encoding (const char *name
)
21445 len
= strlen (name
);
21446 if (name
[len
- 1] == ']')
21447 return ggc_alloc_string (name
, len
- 4);
21452 /* Section attributes. AIX is always PIC. */
21454 static unsigned int
21455 rs6000_xcoff_section_type_flags (tree decl
, const char *name
, int reloc
)
21457 unsigned int align
;
21458 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
21460 if (decl
&& DECL_P (decl
) && VAR_P (decl
) && bss_initializer_p (decl
))
21461 flags
|= SECTION_BSS
;
21463 /* Align to at least UNIT size. */
21464 if (!decl
|| !DECL_P (decl
))
21465 align
= MIN_UNITS_PER_WORD
;
21466 /* Align code CSECT to at least 32 bytes. */
21467 else if ((flags
& SECTION_CODE
) != 0)
21468 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
), 32);
21470 /* Increase alignment of large objects if not already stricter. */
21471 align
= MAX ((DECL_ALIGN (decl
) / BITS_PER_UNIT
),
21472 int_size_in_bytes (TREE_TYPE (decl
)) > MIN_UNITS_PER_WORD
21473 ? UNITS_PER_FP_WORD
: MIN_UNITS_PER_WORD
);
21475 return flags
| (exact_log2 (align
) & SECTION_ENTSIZE
);
21478 /* Output at beginning of assembler file.
21480 Initialize the section names for the RS/6000 at this point.
21482 Specify filename, including full path, to assembler.
21484 We want to go into the TOC section so at least one .toc will be emitted.
21485 Also, in order to output proper .bs/.es pairs, we need at least one static
21486 [RW] section emitted.
21488 Finally, declare mcount when profiling to make the assembler happy. */
21491 rs6000_xcoff_file_start (void)
21493 rs6000_gen_section_name (&xcoff_bss_section_name
,
21494 main_input_filename
, ".bss_");
21495 rs6000_gen_section_name (&xcoff_private_data_section_name
,
21496 main_input_filename
, ".rw_");
21497 rs6000_gen_section_name (&xcoff_private_rodata_section_name
,
21498 main_input_filename
, ".rop_");
21499 rs6000_gen_section_name (&xcoff_read_only_section_name
,
21500 main_input_filename
, ".ro_");
21501 rs6000_gen_section_name (&xcoff_tls_data_section_name
,
21502 main_input_filename
, ".tls_");
21504 fputs ("\t.file\t", asm_out_file
);
21505 output_quoted_string (asm_out_file
, main_input_filename
);
21506 fputc ('\n', asm_out_file
);
21507 if (write_symbols
!= NO_DEBUG
)
21508 switch_to_section (private_data_section
);
21509 switch_to_section (toc_section
);
21510 switch_to_section (text_section
);
21512 fprintf (asm_out_file
, "\t.extern %s\n", RS6000_MCOUNT
);
21513 rs6000_file_start ();
21516 /* Output at end of assembler file.
21517 On the RS/6000, referencing data should automatically pull in text. */
21520 rs6000_xcoff_file_end (void)
21522 switch_to_section (text_section
);
21523 if (xcoff_tls_exec_model_detected
)
21525 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21526 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file
);
21528 fputs ("_section_.text:\n", asm_out_file
);
21529 switch_to_section (data_section
);
21530 fputs (TARGET_32BIT
21531 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21536 struct declare_alias_data
21539 bool function_descriptor
;
21542 /* Declare alias N. A helper function for for_node_and_aliases. */
21545 rs6000_declare_alias (struct symtab_node
*n
, void *d
)
21547 struct declare_alias_data
*data
= (struct declare_alias_data
*)d
;
21548 /* Main symbol is output specially, because varasm machinery does part of
21549 the job for us - we do not need to declare .globl/lglobs and such. */
21550 if (!n
->alias
|| n
->weakref
)
21553 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n
->decl
)))
21556 /* Prevent assemble_alias from trying to use .set pseudo operation
21557 that does not behave as expected by the middle-end. */
21558 TREE_ASM_WRITTEN (n
->decl
) = true;
21560 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n
->decl
));
21561 char *buffer
= (char *) alloca (strlen (name
) + 2);
21563 int dollar_inside
= 0;
21565 strcpy (buffer
, name
);
21566 p
= strchr (buffer
, '$');
21570 p
= strchr (p
+ 1, '$');
21572 if (TREE_PUBLIC (n
->decl
))
21574 if (!RS6000_WEAK
|| !DECL_WEAK (n
->decl
))
21576 if (dollar_inside
) {
21577 if (data
->function_descriptor
)
21578 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21579 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21581 if (data
->function_descriptor
)
21583 fputs ("\t.globl .", data
->file
);
21584 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21585 putc ('\n', data
->file
);
21587 fputs ("\t.globl ", data
->file
);
21588 assemble_name (data
->file
, buffer
);
21589 putc ('\n', data
->file
);
21591 #ifdef ASM_WEAKEN_DECL
21592 else if (DECL_WEAK (n
->decl
) && !data
->function_descriptor
)
21593 ASM_WEAKEN_DECL (data
->file
, n
->decl
, name
, NULL
);
21600 if (data
->function_descriptor
)
21601 fprintf(data
->file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21602 fprintf(data
->file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21604 if (data
->function_descriptor
)
21606 fputs ("\t.lglobl .", data
->file
);
21607 RS6000_OUTPUT_BASENAME (data
->file
, buffer
);
21608 putc ('\n', data
->file
);
21610 fputs ("\t.lglobl ", data
->file
);
21611 assemble_name (data
->file
, buffer
);
21612 putc ('\n', data
->file
);
21614 if (data
->function_descriptor
)
21615 putc ('.', data
->file
);
21616 ASM_OUTPUT_LABEL (data
->file
, buffer
);
21621 #ifdef HAVE_GAS_HIDDEN
21622 /* Helper function to calculate visibility of a DECL
21623 and return the value as a const string. */
21625 static const char *
21626 rs6000_xcoff_visibility (tree decl
)
21628 static const char * const visibility_types
[] = {
21629 "", ",protected", ",hidden", ",internal"
21632 enum symbol_visibility vis
= DECL_VISIBILITY (decl
);
21633 return visibility_types
[vis
];
21638 /* This macro produces the initial definition of a function name.
21639 On the RS/6000, we need to place an extra '.' in the function name and
21640 output the function descriptor.
21641 Dollar signs are converted to underscores.
21643 The csect for the function will have already been created when
21644 text_section was selected. We do have to go back to that csect, however.
21646 The third and fourth parameters to the .function pseudo-op (16 and 044)
21647 are placeholders which no longer have any use.
21649 Because AIX assembler's .set command has unexpected semantics, we output
21650 all aliases as alternative labels in front of the definition. */
21653 rs6000_xcoff_declare_function_name (FILE *file
, const char *name
, tree decl
)
21655 char *buffer
= (char *) alloca (strlen (name
) + 1);
21657 int dollar_inside
= 0;
21658 struct declare_alias_data data
= {file
, false};
21660 strcpy (buffer
, name
);
21661 p
= strchr (buffer
, '$');
21665 p
= strchr (p
+ 1, '$');
21667 if (TREE_PUBLIC (decl
))
21669 if (!RS6000_WEAK
|| !DECL_WEAK (decl
))
21671 if (dollar_inside
) {
21672 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21673 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21675 fputs ("\t.globl .", file
);
21676 RS6000_OUTPUT_BASENAME (file
, buffer
);
21677 #ifdef HAVE_GAS_HIDDEN
21678 fputs (rs6000_xcoff_visibility (decl
), file
);
21685 if (dollar_inside
) {
21686 fprintf(file
, "\t.rename .%s,\".%s\"\n", buffer
, name
);
21687 fprintf(file
, "\t.rename %s,\"%s\"\n", buffer
, name
);
21689 fputs ("\t.lglobl .", file
);
21690 RS6000_OUTPUT_BASENAME (file
, buffer
);
21694 fputs ("\t.csect ", file
);
21695 assemble_name (file
, buffer
);
21696 fputs (TARGET_32BIT
? "\n" : ",3\n", file
);
21698 ASM_OUTPUT_LABEL (file
, buffer
);
21700 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21702 fputs (TARGET_32BIT
? "\t.long ." : "\t.llong .", file
);
21703 RS6000_OUTPUT_BASENAME (file
, buffer
);
21704 fputs (", TOC[tc0], 0\n", file
);
21707 switch_to_section (function_section (decl
));
21709 ASM_OUTPUT_LABEL (file
, buffer
);
21711 data
.function_descriptor
= true;
21712 symtab_node::get (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21714 if (!DECL_IGNORED_P (decl
))
21716 if (dwarf_debuginfo_p ())
21718 name
= (*targetm
.strip_name_encoding
) (name
);
21719 fprintf (file
, "\t.function .%s,.%s,2,0\n", name
, name
);
21726 /* Output assembly language to globalize a symbol from a DECL,
21727 possibly with visibility. */
21730 rs6000_xcoff_asm_globalize_decl_name (FILE *stream
, tree decl
)
21732 const char *name
= XSTR (XEXP (DECL_RTL (decl
), 0), 0);
21733 fputs (GLOBAL_ASM_OP
, stream
);
21734 assemble_name (stream
, name
);
21735 #ifdef HAVE_GAS_HIDDEN
21736 fputs (rs6000_xcoff_visibility (decl
), stream
);
21738 putc ('\n', stream
);
21741 /* Output assembly language to define a symbol as COMMON from a DECL,
21742 possibly with visibility. */
21745 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream
,
21746 tree decl ATTRIBUTE_UNUSED
,
21748 unsigned HOST_WIDE_INT size
,
21749 unsigned int align
)
21751 unsigned int align2
= 2;
21754 align
= DATA_ABI_ALIGNMENT (TREE_TYPE (decl
), DECL_ALIGN (decl
));
21757 align2
= floor_log2 (align
/ BITS_PER_UNIT
);
21761 if (! DECL_COMMON (decl
))
21763 /* Forget section. */
21766 /* Globalize TLS BSS. */
21767 if (TREE_PUBLIC (decl
) && DECL_THREAD_LOCAL_P (decl
))
21769 fputs (GLOBAL_ASM_OP
, stream
);
21770 assemble_name (stream
, name
);
21771 fputc ('\n', stream
);
21774 /* Switch to section and skip space. */
21775 fputs ("\t.csect ", stream
);
21776 assemble_name (stream
, name
);
21777 fprintf (stream
, ",%u\n", align2
);
21778 ASM_DECLARE_OBJECT_NAME (stream
, name
, decl
);
21779 ASM_OUTPUT_SKIP (stream
, size
? size
: 1);
21783 if (TREE_PUBLIC (decl
))
21786 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%u" ,
21787 name
, size
, align2
);
21789 #ifdef HAVE_GAS_HIDDEN
21791 fputs (rs6000_xcoff_visibility (decl
), stream
);
21793 putc ('\n', stream
);
21797 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED
",%s,%u\n",
21798 (*targetm
.strip_name_encoding
) (name
), size
, name
, align2
);
21801 /* This macro produces the initial definition of a object (variable) name.
21802 Because AIX assembler's .set command has unexpected semantics, we output
21803 all aliases as alternative labels in front of the definition. */
21806 rs6000_xcoff_declare_object_name (FILE *file
, const char *name
, tree decl
)
21808 struct declare_alias_data data
= {file
, false};
21809 ASM_OUTPUT_LABEL (file
, name
);
21810 symtab_node::get_create (decl
)->call_for_symbol_and_aliases (rs6000_declare_alias
,
21814 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21817 rs6000_asm_output_dwarf_pcrel (FILE *file
, int size
, const char *label
)
21819 fputs (integer_asm_op (size
, FALSE
), file
);
21820 assemble_name (file
, label
);
21821 fputs ("-$", file
);
21824 /* Output a symbol offset relative to the dbase for the current object.
21825 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21828 __gcc_unwind_dbase is embedded in all executables/libraries through
21829 libgcc/config/rs6000/crtdbase.S. */
21832 rs6000_asm_output_dwarf_datarel (FILE *file
, int size
, const char *label
)
21834 fputs (integer_asm_op (size
, FALSE
), file
);
21835 assemble_name (file
, label
);
21836 fputs("-__gcc_unwind_dbase", file
);
21841 rs6000_xcoff_encode_section_info (tree decl
, rtx rtl
, int first
)
21845 const char *symname
;
21847 default_encode_section_info (decl
, rtl
, first
);
21849 /* Careful not to prod global register variables. */
21852 symbol
= XEXP (rtl
, 0);
21853 if (!SYMBOL_REF_P (symbol
))
21856 flags
= SYMBOL_REF_FLAGS (symbol
);
21858 if (TREE_CODE (decl
) == VAR_DECL
&& DECL_THREAD_LOCAL_P (decl
))
21859 flags
&= ~SYMBOL_FLAG_HAS_BLOCK_INFO
;
21861 SYMBOL_REF_FLAGS (symbol
) = flags
;
21863 symname
= XSTR (symbol
, 0);
21865 /* Append CSECT mapping class, unless the symbol already is qualified.
21866 Aliases are implemented as labels, so the symbol name should not add
21867 a mapping class. */
21870 && VAR_OR_FUNCTION_DECL_P (decl
)
21871 && (symtab_node::get (decl
) == NULL
21872 || symtab_node::get (decl
)->alias
== 0)
21873 && symname
[strlen (symname
) - 1] != ']')
21875 const char *smclass
= NULL
;
21877 if (TREE_CODE (decl
) == FUNCTION_DECL
)
21879 else if (DECL_THREAD_LOCAL_P (decl
))
21881 if (bss_initializer_p (decl
))
21883 else if (flag_data_sections
)
21886 else if (DECL_EXTERNAL (decl
))
21888 else if (bss_initializer_p (decl
))
21890 else if (flag_data_sections
)
21892 /* This must exactly match the logic of select section. */
21893 if (decl_readonly_section (decl
, compute_reloc_for_var (decl
)))
21899 if (smclass
!= NULL
)
21901 char *newname
= XALLOCAVEC (char, strlen (symname
) + 5);
21903 strcpy (newname
, symname
);
21904 strcat (newname
, smclass
);
21905 XSTR (symbol
, 0) = ggc_strdup (newname
);
21909 #endif /* HAVE_AS_TLS */
21910 #endif /* TARGET_XCOFF */
21913 rs6000_asm_weaken_decl (FILE *stream
, tree decl
,
21914 const char *name
, const char *val
)
21916 fputs ("\t.weak\t", stream
);
21917 assemble_name (stream
, name
);
21918 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21919 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21921 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21923 fputs (rs6000_xcoff_visibility (decl
), stream
);
21925 fputs ("\n\t.weak\t.", stream
);
21926 RS6000_OUTPUT_BASENAME (stream
, name
);
21928 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
21930 fputs (rs6000_xcoff_visibility (decl
), stream
);
21932 fputc ('\n', stream
);
21936 #ifdef ASM_OUTPUT_DEF
21937 ASM_OUTPUT_DEF (stream
, name
, val
);
21939 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
21940 && DEFAULT_ABI
== ABI_AIX
&& DOT_SYMBOLS
)
21942 fputs ("\t.set\t.", stream
);
21943 RS6000_OUTPUT_BASENAME (stream
, name
);
21944 fputs (",.", stream
);
21945 RS6000_OUTPUT_BASENAME (stream
, val
);
21946 fputc ('\n', stream
);
21952 /* Return true if INSN should not be copied. */
21955 rs6000_cannot_copy_insn_p (rtx_insn
*insn
)
21957 return recog_memoized (insn
) >= 0
21958 && get_attr_cannot_copy (insn
);
21961 /* Compute a (partial) cost for rtx X. Return true if the complete
21962 cost has been computed, and false if subexpressions should be
21963 scanned. In either case, *TOTAL contains the cost result. */
21966 rs6000_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
21967 int opno ATTRIBUTE_UNUSED
, int *total
, bool speed
)
21969 int code
= GET_CODE (x
);
21973 /* On the RS/6000, if it is valid in the insn, it is free. */
21975 if (((outer_code
== SET
21976 || outer_code
== PLUS
21977 || outer_code
== MINUS
)
21978 && (satisfies_constraint_I (x
)
21979 || satisfies_constraint_L (x
)))
21980 || (outer_code
== AND
21981 && (satisfies_constraint_K (x
)
21983 ? satisfies_constraint_L (x
)
21984 : satisfies_constraint_J (x
))))
21985 || ((outer_code
== IOR
|| outer_code
== XOR
)
21986 && (satisfies_constraint_K (x
)
21988 ? satisfies_constraint_L (x
)
21989 : satisfies_constraint_J (x
))))
21990 || outer_code
== ASHIFT
21991 || outer_code
== ASHIFTRT
21992 || outer_code
== LSHIFTRT
21993 || outer_code
== ROTATE
21994 || outer_code
== ROTATERT
21995 || outer_code
== ZERO_EXTRACT
21996 || (outer_code
== MULT
21997 && satisfies_constraint_I (x
))
21998 || ((outer_code
== DIV
|| outer_code
== UDIV
21999 || outer_code
== MOD
|| outer_code
== UMOD
)
22000 && exact_log2 (INTVAL (x
)) >= 0)
22001 || (outer_code
== COMPARE
22002 && (satisfies_constraint_I (x
)
22003 || satisfies_constraint_K (x
)))
22004 || ((outer_code
== EQ
|| outer_code
== NE
)
22005 && (satisfies_constraint_I (x
)
22006 || satisfies_constraint_K (x
)
22008 ? satisfies_constraint_L (x
)
22009 : satisfies_constraint_J (x
))))
22010 || (outer_code
== GTU
22011 && satisfies_constraint_I (x
))
22012 || (outer_code
== LTU
22013 && satisfies_constraint_P (x
)))
22018 else if ((outer_code
== PLUS
22019 && reg_or_add_cint_operand (x
, mode
))
22020 || (outer_code
== MINUS
22021 && reg_or_sub_cint_operand (x
, mode
))
22022 || ((outer_code
== SET
22023 || outer_code
== IOR
22024 || outer_code
== XOR
)
22026 & ~ (unsigned HOST_WIDE_INT
) 0xffffffff) == 0))
22028 *total
= COSTS_N_INSNS (1);
22034 case CONST_WIDE_INT
:
22038 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22042 /* When optimizing for size, MEM should be slightly more expensive
22043 than generating address, e.g., (plus (reg) (const)).
22044 L1 cache latency is about two instructions. */
22045 *total
= !speed
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22046 if (rs6000_slow_unaligned_access (mode
, MEM_ALIGN (x
)))
22047 *total
+= COSTS_N_INSNS (100);
22056 if (FLOAT_MODE_P (mode
))
22057 *total
= rs6000_cost
->fp
;
22059 *total
= COSTS_N_INSNS (1);
22063 if (CONST_INT_P (XEXP (x
, 1))
22064 && satisfies_constraint_I (XEXP (x
, 1)))
22066 if (INTVAL (XEXP (x
, 1)) >= -256
22067 && INTVAL (XEXP (x
, 1)) <= 255)
22068 *total
= rs6000_cost
->mulsi_const9
;
22070 *total
= rs6000_cost
->mulsi_const
;
22072 else if (mode
== SFmode
)
22073 *total
= rs6000_cost
->fp
;
22074 else if (FLOAT_MODE_P (mode
))
22075 *total
= rs6000_cost
->dmul
;
22076 else if (mode
== DImode
)
22077 *total
= rs6000_cost
->muldi
;
22079 *total
= rs6000_cost
->mulsi
;
22083 if (mode
== SFmode
)
22084 *total
= rs6000_cost
->fp
;
22086 *total
= rs6000_cost
->dmul
;
22091 if (FLOAT_MODE_P (mode
))
22093 *total
= mode
== DFmode
? rs6000_cost
->ddiv
22094 : rs6000_cost
->sdiv
;
22101 if (CONST_INT_P (XEXP (x
, 1))
22102 && exact_log2 (INTVAL (XEXP (x
, 1))) >= 0)
22104 if (code
== DIV
|| code
== MOD
)
22106 *total
= COSTS_N_INSNS (2);
22109 *total
= COSTS_N_INSNS (1);
22113 if (GET_MODE (XEXP (x
, 1)) == DImode
)
22114 *total
= rs6000_cost
->divdi
;
22116 *total
= rs6000_cost
->divsi
;
22118 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22119 if (!TARGET_MODULO
&& (code
== MOD
|| code
== UMOD
))
22120 *total
+= COSTS_N_INSNS (2);
22124 *total
= COSTS_N_INSNS (TARGET_CTZ
? 1 : 4);
22128 *total
= COSTS_N_INSNS (4);
22132 *total
= COSTS_N_INSNS (TARGET_POPCNTD
? 1 : 6);
22136 *total
= COSTS_N_INSNS (TARGET_CMPB
? 2 : 6);
22140 if (outer_code
== AND
|| outer_code
== IOR
|| outer_code
== XOR
)
22143 *total
= COSTS_N_INSNS (1);
22147 if (CONST_INT_P (XEXP (x
, 1)))
22149 rtx left
= XEXP (x
, 0);
22150 rtx_code left_code
= GET_CODE (left
);
22152 /* rotate-and-mask: 1 insn. */
22153 if ((left_code
== ROTATE
22154 || left_code
== ASHIFT
22155 || left_code
== LSHIFTRT
)
22156 && rs6000_is_valid_shift_mask (XEXP (x
, 1), left
, mode
))
22158 *total
= rtx_cost (XEXP (left
, 0), mode
, left_code
, 0, speed
);
22159 if (!CONST_INT_P (XEXP (left
, 1)))
22160 *total
+= rtx_cost (XEXP (left
, 1), SImode
, left_code
, 1, speed
);
22161 *total
+= COSTS_N_INSNS (1);
22165 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22166 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
22167 if (rs6000_is_valid_and_mask (XEXP (x
, 1), mode
)
22168 || (val
& 0xffff) == val
22169 || (val
& 0xffff0000) == val
22170 || ((val
& 0xffff) == 0 && mode
== SImode
))
22172 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22173 *total
+= COSTS_N_INSNS (1);
22178 if (rs6000_is_valid_2insn_and (XEXP (x
, 1), mode
))
22180 *total
= rtx_cost (left
, mode
, AND
, 0, speed
);
22181 *total
+= COSTS_N_INSNS (2);
22186 *total
= COSTS_N_INSNS (1);
22191 *total
= COSTS_N_INSNS (1);
22197 *total
= COSTS_N_INSNS (1);
22201 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22202 the sign extend and shift separately within the insn. */
22203 if (TARGET_EXTSWSLI
&& mode
== DImode
22204 && GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
22205 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
)
22216 /* Handle mul_highpart. */
22217 if (outer_code
== TRUNCATE
22218 && GET_CODE (XEXP (x
, 0)) == MULT
)
22220 if (mode
== DImode
)
22221 *total
= rs6000_cost
->muldi
;
22223 *total
= rs6000_cost
->mulsi
;
22226 else if (outer_code
== AND
)
22229 *total
= COSTS_N_INSNS (1);
22234 if (MEM_P (XEXP (x
, 0)))
22237 *total
= COSTS_N_INSNS (1);
22243 if (!FLOAT_MODE_P (mode
))
22245 *total
= COSTS_N_INSNS (1);
22251 case UNSIGNED_FLOAT
:
22254 case FLOAT_TRUNCATE
:
22255 *total
= rs6000_cost
->fp
;
22259 if (mode
== DFmode
)
22260 *total
= rs6000_cost
->sfdf_convert
;
22262 *total
= rs6000_cost
->fp
;
22269 *total
= COSTS_N_INSNS (1);
22272 else if (FLOAT_MODE_P (mode
) && TARGET_PPC_GFXOPT
&& TARGET_HARD_FLOAT
)
22274 *total
= rs6000_cost
->fp
;
22283 /* Carry bit requires mode == Pmode.
22284 NEG or PLUS already counted so only add one. */
22286 && (outer_code
== NEG
|| outer_code
== PLUS
))
22288 *total
= COSTS_N_INSNS (1);
22296 if (outer_code
== SET
)
22298 if (XEXP (x
, 1) == const0_rtx
)
22300 *total
= COSTS_N_INSNS (2);
22305 *total
= COSTS_N_INSNS (3);
22310 if (outer_code
== COMPARE
)
22318 if (XINT (x
, 1) == UNSPECV_MMA_XXSETACCZ
)
22332 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22335 rs6000_debug_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
22336 int opno
, int *total
, bool speed
)
22338 bool ret
= rs6000_rtx_costs (x
, mode
, outer_code
, opno
, total
, speed
);
22341 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22342 "opno = %d, total = %d, speed = %s, x:\n",
22343 ret
? "complete" : "scan inner",
22344 GET_MODE_NAME (mode
),
22345 GET_RTX_NAME (outer_code
),
22348 speed
? "true" : "false");
22356 rs6000_insn_cost (rtx_insn
*insn
, bool speed
)
22358 if (recog_memoized (insn
) < 0)
22361 /* If we are optimizing for size, just use the length. */
22363 return get_attr_length (insn
);
22365 /* Use the cost if provided. */
22366 int cost
= get_attr_cost (insn
);
22370 /* If the insn tells us how many insns there are, use that. Otherwise use
22371 the length/4. Adjust the insn length to remove the extra size that
22372 prefixed instructions take. */
22373 int n
= get_attr_num_insns (insn
);
22376 int length
= get_attr_length (insn
);
22377 if (get_attr_prefixed (insn
) == PREFIXED_YES
)
22380 ADJUST_INSN_LENGTH (insn
, adjust
);
22387 enum attr_type type
= get_attr_type (insn
);
22394 cost
= COSTS_N_INSNS (n
+ 1);
22398 switch (get_attr_size (insn
))
22401 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const9
;
22404 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi_const
;
22407 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->mulsi
;
22410 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->muldi
;
22413 gcc_unreachable ();
22417 switch (get_attr_size (insn
))
22420 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divsi
;
22423 cost
= COSTS_N_INSNS (n
- 1) + rs6000_cost
->divdi
;
22426 gcc_unreachable ();
22431 cost
= n
* rs6000_cost
->fp
;
22434 cost
= n
* rs6000_cost
->dmul
;
22437 cost
= n
* rs6000_cost
->sdiv
;
22440 cost
= n
* rs6000_cost
->ddiv
;
22447 cost
= COSTS_N_INSNS (n
+ 2);
22451 cost
= COSTS_N_INSNS (n
);
22457 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22460 rs6000_debug_address_cost (rtx x
, machine_mode mode
,
22461 addr_space_t as
, bool speed
)
22463 int ret
= TARGET_ADDRESS_COST (x
, mode
, as
, speed
);
22465 fprintf (stderr
, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22466 ret
, speed
? "true" : "false");
22473 /* A C expression returning the cost of moving data from a register of class
22474 CLASS1 to one of CLASS2. */
22477 rs6000_register_move_cost (machine_mode mode
,
22478 reg_class_t from
, reg_class_t to
)
22481 reg_class_t rclass
;
22483 if (TARGET_DEBUG_COST
)
22486 /* If we have VSX, we can easily move between FPR or Altivec registers,
22487 otherwise we can only easily move within classes.
22488 Do this first so we give best-case answers for union classes
22489 containing both gprs and vsx regs. */
22490 HARD_REG_SET to_vsx
, from_vsx
;
22491 to_vsx
= reg_class_contents
[to
] & reg_class_contents
[VSX_REGS
];
22492 from_vsx
= reg_class_contents
[from
] & reg_class_contents
[VSX_REGS
];
22493 if (!hard_reg_set_empty_p (to_vsx
)
22494 && !hard_reg_set_empty_p (from_vsx
)
22496 || hard_reg_set_intersect_p (to_vsx
, from_vsx
)))
22498 int reg
= FIRST_FPR_REGNO
;
22500 || (TEST_HARD_REG_BIT (to_vsx
, FIRST_ALTIVEC_REGNO
)
22501 && TEST_HARD_REG_BIT (from_vsx
, FIRST_ALTIVEC_REGNO
)))
22502 reg
= FIRST_ALTIVEC_REGNO
;
22503 ret
= 2 * hard_regno_nregs (reg
, mode
);
22506 /* Moves from/to GENERAL_REGS. */
22507 else if ((rclass
= from
, reg_classes_intersect_p (to
, GENERAL_REGS
))
22508 || (rclass
= to
, reg_classes_intersect_p (from
, GENERAL_REGS
)))
22510 if (rclass
== FLOAT_REGS
|| rclass
== ALTIVEC_REGS
|| rclass
== VSX_REGS
)
22512 if (TARGET_DIRECT_MOVE
)
22514 /* Keep the cost for direct moves above that for within
22515 a register class even if the actual processor cost is
22516 comparable. We do this because a direct move insn
22517 can't be a nop, whereas with ideal register
22518 allocation a move within the same class might turn
22519 out to be a nop. */
22520 if (rs6000_tune
== PROCESSOR_POWER9
22521 || rs6000_tune
== PROCESSOR_POWER10
)
22522 ret
= 3 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22524 ret
= 4 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22525 /* SFmode requires a conversion when moving between gprs
22527 if (mode
== SFmode
)
22531 ret
= (rs6000_memory_move_cost (mode
, rclass
, false)
22532 + rs6000_memory_move_cost (mode
, GENERAL_REGS
, false));
22535 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22537 else if (rclass
== CR_REGS
)
22540 /* For those processors that have slow LR/CTR moves, make them more
22541 expensive than memory in order to bias spills to memory .*/
22542 else if ((rs6000_tune
== PROCESSOR_POWER6
22543 || rs6000_tune
== PROCESSOR_POWER7
22544 || rs6000_tune
== PROCESSOR_POWER8
22545 || rs6000_tune
== PROCESSOR_POWER9
)
22546 && reg_class_subset_p (rclass
, SPECIAL_REGS
))
22547 ret
= 6 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22550 /* A move will cost one instruction per GPR moved. */
22551 ret
= 2 * hard_regno_nregs (FIRST_GPR_REGNO
, mode
);
22554 /* Everything else has to go through GENERAL_REGS. */
22556 ret
= (rs6000_register_move_cost (mode
, GENERAL_REGS
, to
)
22557 + rs6000_register_move_cost (mode
, from
, GENERAL_REGS
));
22559 if (TARGET_DEBUG_COST
)
22561 if (dbg_cost_ctrl
== 1)
22563 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22564 ret
, GET_MODE_NAME (mode
), reg_class_names
[from
],
22565 reg_class_names
[to
]);
22572 /* A C expressions returning the cost of moving data of MODE from a register to
22576 rs6000_memory_move_cost (machine_mode mode
, reg_class_t rclass
,
22577 bool in ATTRIBUTE_UNUSED
)
22581 if (TARGET_DEBUG_COST
)
22584 if (reg_classes_intersect_p (rclass
, GENERAL_REGS
))
22585 ret
= 4 * hard_regno_nregs (0, mode
);
22586 else if ((reg_classes_intersect_p (rclass
, FLOAT_REGS
)
22587 || reg_classes_intersect_p (rclass
, VSX_REGS
)))
22588 ret
= 4 * hard_regno_nregs (32, mode
);
22589 else if (reg_classes_intersect_p (rclass
, ALTIVEC_REGS
))
22590 ret
= 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO
, mode
);
22592 ret
= 4 + rs6000_register_move_cost (mode
, rclass
, GENERAL_REGS
);
22594 if (TARGET_DEBUG_COST
)
22596 if (dbg_cost_ctrl
== 1)
22598 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22599 ret
, GET_MODE_NAME (mode
), reg_class_names
[rclass
], in
);
22606 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22608 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22609 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22610 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22611 move cost between GENERAL_REGS and VSX_REGS low.
22613 It might seem reasonable to use a union class. After all, if usage
22614 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22615 rather than memory. However, in cases where register pressure of
22616 both is high, like the cactus_adm spec test, allowing
22617 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22618 the first scheduling pass. This is partly due to an allocno of
22619 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22620 class, which gives too high a pressure for GENERAL_REGS and too low
22621 for VSX_REGS. So, force a choice of the subclass here.
22623 The best class is also the union if GENERAL_REGS and VSX_REGS have
22624 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22625 allocno class, since trying to narrow down the class by regno mode
22626 is prone to error. For example, SImode is allowed in VSX regs and
22627 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22628 it would be wrong to choose an allocno of GENERAL_REGS based on
22632 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED
,
22633 reg_class_t allocno_class
,
22634 reg_class_t best_class
)
22636 switch (allocno_class
)
22638 case GEN_OR_VSX_REGS
:
22639 /* best_class must be a subset of allocno_class. */
22640 gcc_checking_assert (best_class
== GEN_OR_VSX_REGS
22641 || best_class
== GEN_OR_FLOAT_REGS
22642 || best_class
== VSX_REGS
22643 || best_class
== ALTIVEC_REGS
22644 || best_class
== FLOAT_REGS
22645 || best_class
== GENERAL_REGS
22646 || best_class
== BASE_REGS
);
22647 /* Use best_class but choose wider classes when copying from the
22648 wider class to best_class is cheap. This mimics IRA choice
22649 of allocno class. */
22650 if (best_class
== BASE_REGS
)
22651 return GENERAL_REGS
;
22652 if (TARGET_VSX
&& best_class
== FLOAT_REGS
)
22657 if (best_class
== ALTIVEC_REGS
)
22658 return ALTIVEC_REGS
;
22664 return allocno_class
;
22667 /* Load up a constant. If the mode is a vector mode, splat the value across
22668 all of the vector elements. */
22671 rs6000_load_constant_and_splat (machine_mode mode
, REAL_VALUE_TYPE dconst
)
22675 if (mode
== SFmode
|| mode
== DFmode
)
22677 rtx d
= const_double_from_real_value (dconst
, mode
);
22678 reg
= force_reg (mode
, d
);
22680 else if (mode
== V4SFmode
)
22682 rtx d
= const_double_from_real_value (dconst
, SFmode
);
22683 rtvec v
= gen_rtvec (4, d
, d
, d
, d
);
22684 reg
= gen_reg_rtx (mode
);
22685 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22687 else if (mode
== V2DFmode
)
22689 rtx d
= const_double_from_real_value (dconst
, DFmode
);
22690 rtvec v
= gen_rtvec (2, d
, d
);
22691 reg
= gen_reg_rtx (mode
);
22692 rs6000_expand_vector_init (reg
, gen_rtx_PARALLEL (mode
, v
));
22695 gcc_unreachable ();
22700 /* Generate an FMA instruction. */
22703 rs6000_emit_madd (rtx target
, rtx m1
, rtx m2
, rtx a
)
22705 machine_mode mode
= GET_MODE (target
);
22708 dst
= expand_ternary_op (mode
, fma_optab
, m1
, m2
, a
, target
, 0);
22709 gcc_assert (dst
!= NULL
);
22712 emit_move_insn (target
, dst
);
22715 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22718 rs6000_emit_nmsub (rtx dst
, rtx m1
, rtx m2
, rtx a
)
22720 machine_mode mode
= GET_MODE (dst
);
22723 /* This is a tad more complicated, since the fnma_optab is for
22724 a different expression: fma(-m1, m2, a), which is the same
22725 thing except in the case of signed zeros.
22727 Fortunately we know that if FMA is supported that FNMSUB is
22728 also supported in the ISA. Just expand it directly. */
22730 gcc_assert (optab_handler (fma_optab
, mode
) != CODE_FOR_nothing
);
22732 r
= gen_rtx_NEG (mode
, a
);
22733 r
= gen_rtx_FMA (mode
, m1
, m2
, r
);
22734 r
= gen_rtx_NEG (mode
, r
);
22735 emit_insn (gen_rtx_SET (dst
, r
));
22738 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22739 add a reg_note saying that this was a division. Support both scalar and
22740 vector divide. Assumes no trapping math and finite arguments. */
22743 rs6000_emit_swdiv (rtx dst
, rtx n
, rtx d
, bool note_p
)
22745 machine_mode mode
= GET_MODE (dst
);
22746 rtx one
, x0
, e0
, x1
, xprev
, eprev
, xnext
, enext
, u
, v
;
22749 /* Low precision estimates guarantee 5 bits of accuracy. High
22750 precision estimates guarantee 14 bits of accuracy. SFmode
22751 requires 23 bits of accuracy. DFmode requires 52 bits of
22752 accuracy. Each pass at least doubles the accuracy, leading
22753 to the following. */
22754 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22755 if (mode
== DFmode
|| mode
== V2DFmode
)
22758 enum insn_code code
= optab_handler (smul_optab
, mode
);
22759 insn_gen_fn gen_mul
= GEN_FCN (code
);
22761 gcc_assert (code
!= CODE_FOR_nothing
);
22763 one
= rs6000_load_constant_and_splat (mode
, dconst1
);
22765 /* x0 = 1./d estimate */
22766 x0
= gen_reg_rtx (mode
);
22767 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, d
),
22770 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22773 /* e0 = 1. - d * x0 */
22774 e0
= gen_reg_rtx (mode
);
22775 rs6000_emit_nmsub (e0
, d
, x0
, one
);
22777 /* x1 = x0 + e0 * x0 */
22778 x1
= gen_reg_rtx (mode
);
22779 rs6000_emit_madd (x1
, e0
, x0
, x0
);
22781 for (i
= 0, xprev
= x1
, eprev
= e0
; i
< passes
- 2;
22782 ++i
, xprev
= xnext
, eprev
= enext
) {
22784 /* enext = eprev * eprev */
22785 enext
= gen_reg_rtx (mode
);
22786 emit_insn (gen_mul (enext
, eprev
, eprev
));
22788 /* xnext = xprev + enext * xprev */
22789 xnext
= gen_reg_rtx (mode
);
22790 rs6000_emit_madd (xnext
, enext
, xprev
, xprev
);
22796 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22798 /* u = n * xprev */
22799 u
= gen_reg_rtx (mode
);
22800 emit_insn (gen_mul (u
, n
, xprev
));
22802 /* v = n - (d * u) */
22803 v
= gen_reg_rtx (mode
);
22804 rs6000_emit_nmsub (v
, d
, u
, n
);
22806 /* dst = (v * xprev) + u */
22807 rs6000_emit_madd (dst
, v
, xprev
, u
);
22810 add_reg_note (get_last_insn (), REG_EQUAL
, gen_rtx_DIV (mode
, n
, d
));
22813 /* Goldschmidt's Algorithm for single/double-precision floating point
22814 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22817 rs6000_emit_swsqrt (rtx dst
, rtx src
, bool recip
)
22819 machine_mode mode
= GET_MODE (src
);
22820 rtx e
= gen_reg_rtx (mode
);
22821 rtx g
= gen_reg_rtx (mode
);
22822 rtx h
= gen_reg_rtx (mode
);
22824 /* Low precision estimates guarantee 5 bits of accuracy. High
22825 precision estimates guarantee 14 bits of accuracy. SFmode
22826 requires 23 bits of accuracy. DFmode requires 52 bits of
22827 accuracy. Each pass at least doubles the accuracy, leading
22828 to the following. */
22829 int passes
= (TARGET_RECIP_PRECISION
) ? 1 : 3;
22830 if (mode
== DFmode
|| mode
== V2DFmode
)
22835 enum insn_code code
= optab_handler (smul_optab
, mode
);
22836 insn_gen_fn gen_mul
= GEN_FCN (code
);
22838 gcc_assert (code
!= CODE_FOR_nothing
);
22840 mhalf
= rs6000_load_constant_and_splat (mode
, dconsthalf
);
22842 /* e = rsqrt estimate */
22843 emit_insn (gen_rtx_SET (e
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, src
),
22846 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22849 rtx zero
= force_reg (mode
, CONST0_RTX (mode
));
22851 if (mode
== SFmode
)
22853 rtx target
= emit_conditional_move (e
, { GT
, src
, zero
, mode
},
22856 emit_move_insn (e
, target
);
22860 rtx cond
= gen_rtx_GT (VOIDmode
, e
, zero
);
22861 rs6000_emit_vector_cond_expr (e
, e
, zero
, cond
, src
, zero
);
22865 /* g = sqrt estimate. */
22866 emit_insn (gen_mul (g
, e
, src
));
22867 /* h = 1/(2*sqrt) estimate. */
22868 emit_insn (gen_mul (h
, e
, mhalf
));
22874 rtx t
= gen_reg_rtx (mode
);
22875 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22876 /* Apply correction directly to 1/rsqrt estimate. */
22877 rs6000_emit_madd (dst
, e
, t
, e
);
22881 for (i
= 0; i
< passes
; i
++)
22883 rtx t1
= gen_reg_rtx (mode
);
22884 rtx g1
= gen_reg_rtx (mode
);
22885 rtx h1
= gen_reg_rtx (mode
);
22887 rs6000_emit_nmsub (t1
, g
, h
, mhalf
);
22888 rs6000_emit_madd (g1
, g
, t1
, g
);
22889 rs6000_emit_madd (h1
, h
, t1
, h
);
22894 /* Multiply by 2 for 1/rsqrt. */
22895 emit_insn (gen_add3_insn (dst
, h
, h
));
22900 rtx t
= gen_reg_rtx (mode
);
22901 rs6000_emit_nmsub (t
, g
, h
, mhalf
);
22902 rs6000_emit_madd (dst
, g
, t
, g
);
22908 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22909 (Power7) targets. DST is the target, and SRC is the argument operand. */
22912 rs6000_emit_popcount (rtx dst
, rtx src
)
22914 machine_mode mode
= GET_MODE (dst
);
22917 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
22918 if (TARGET_POPCNTD
)
22920 if (mode
== SImode
)
22921 emit_insn (gen_popcntdsi2 (dst
, src
));
22923 emit_insn (gen_popcntddi2 (dst
, src
));
22927 tmp1
= gen_reg_rtx (mode
);
22929 if (mode
== SImode
)
22931 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22932 tmp2
= expand_mult (SImode
, tmp1
, GEN_INT (0x01010101),
22934 tmp2
= force_reg (SImode
, tmp2
);
22935 emit_insn (gen_lshrsi3 (dst
, tmp2
, GEN_INT (24)));
22939 emit_insn (gen_popcntbdi2 (tmp1
, src
));
22940 tmp2
= expand_mult (DImode
, tmp1
,
22941 GEN_INT ((HOST_WIDE_INT
)
22942 0x01010101 << 32 | 0x01010101),
22944 tmp2
= force_reg (DImode
, tmp2
);
22945 emit_insn (gen_lshrdi3 (dst
, tmp2
, GEN_INT (56)));
22950 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
22951 target, and SRC is the argument operand. */
22954 rs6000_emit_parity (rtx dst
, rtx src
)
22956 machine_mode mode
= GET_MODE (dst
);
22959 tmp
= gen_reg_rtx (mode
);
22961 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
22964 if (mode
== SImode
)
22966 emit_insn (gen_popcntbsi2 (tmp
, src
));
22967 emit_insn (gen_paritysi2_cmpb (dst
, tmp
));
22971 emit_insn (gen_popcntbdi2 (tmp
, src
));
22972 emit_insn (gen_paritydi2_cmpb (dst
, tmp
));
22977 if (mode
== SImode
)
22979 /* Is mult+shift >= shift+xor+shift+xor? */
22980 if (rs6000_cost
->mulsi_const
>= COSTS_N_INSNS (3))
22982 rtx tmp1
, tmp2
, tmp3
, tmp4
;
22984 tmp1
= gen_reg_rtx (SImode
);
22985 emit_insn (gen_popcntbsi2 (tmp1
, src
));
22987 tmp2
= gen_reg_rtx (SImode
);
22988 emit_insn (gen_lshrsi3 (tmp2
, tmp1
, GEN_INT (16)));
22989 tmp3
= gen_reg_rtx (SImode
);
22990 emit_insn (gen_xorsi3 (tmp3
, tmp1
, tmp2
));
22992 tmp4
= gen_reg_rtx (SImode
);
22993 emit_insn (gen_lshrsi3 (tmp4
, tmp3
, GEN_INT (8)));
22994 emit_insn (gen_xorsi3 (tmp
, tmp3
, tmp4
));
22997 rs6000_emit_popcount (tmp
, src
);
22998 emit_insn (gen_andsi3 (dst
, tmp
, const1_rtx
));
23002 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23003 if (rs6000_cost
->muldi
>= COSTS_N_INSNS (5))
23005 rtx tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
;
23007 tmp1
= gen_reg_rtx (DImode
);
23008 emit_insn (gen_popcntbdi2 (tmp1
, src
));
23010 tmp2
= gen_reg_rtx (DImode
);
23011 emit_insn (gen_lshrdi3 (tmp2
, tmp1
, GEN_INT (32)));
23012 tmp3
= gen_reg_rtx (DImode
);
23013 emit_insn (gen_xordi3 (tmp3
, tmp1
, tmp2
));
23015 tmp4
= gen_reg_rtx (DImode
);
23016 emit_insn (gen_lshrdi3 (tmp4
, tmp3
, GEN_INT (16)));
23017 tmp5
= gen_reg_rtx (DImode
);
23018 emit_insn (gen_xordi3 (tmp5
, tmp3
, tmp4
));
23020 tmp6
= gen_reg_rtx (DImode
);
23021 emit_insn (gen_lshrdi3 (tmp6
, tmp5
, GEN_INT (8)));
23022 emit_insn (gen_xordi3 (tmp
, tmp5
, tmp6
));
23025 rs6000_emit_popcount (tmp
, src
);
23026 emit_insn (gen_anddi3 (dst
, tmp
, const1_rtx
));
23030 /* Expand an Altivec constant permutation for little endian mode.
23031 OP0 and OP1 are the input vectors and TARGET is the output vector.
23032 SEL specifies the constant permutation vector.
23034 There are two issues: First, the two input operands must be
23035 swapped so that together they form a double-wide array in LE
23036 order. Second, the vperm instruction has surprising behavior
23037 in LE mode: it interprets the elements of the source vectors
23038 in BE mode ("left to right") and interprets the elements of
23039 the destination vector in LE mode ("right to left"). To
23040 correct for this, we must subtract each element of the permute
23041 control vector from 31.
23043 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23044 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23045 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23046 serve as the permute control vector. Then, in BE mode,
23050 places the desired result in vr9. However, in LE mode the
23051 vector contents will be
23053 vr10 = 00000003 00000002 00000001 00000000
23054 vr11 = 00000007 00000006 00000005 00000004
23056 The result of the vperm using the same permute control vector is
23058 vr9 = 05000000 07000000 01000000 03000000
23060 That is, the leftmost 4 bytes of vr10 are interpreted as the
23061 source for the rightmost 4 bytes of vr9, and so on.
23063 If we change the permute control vector to
23065 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23073 vr9 = 00000006 00000004 00000002 00000000. */
23076 altivec_expand_vec_perm_const_le (rtx target
, rtx op0
, rtx op1
,
23077 const vec_perm_indices
&sel
)
23081 rtx constv
, unspec
;
23083 /* Unpack and adjust the constant selector. */
23084 for (i
= 0; i
< 16; ++i
)
23086 unsigned int elt
= 31 - (sel
[i
] & 31);
23087 perm
[i
] = GEN_INT (elt
);
23090 /* Expand to a permute, swapping the inputs and using the
23091 adjusted selector. */
23093 op0
= force_reg (V16QImode
, op0
);
23095 op1
= force_reg (V16QImode
, op1
);
23097 constv
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, perm
));
23098 constv
= force_reg (V16QImode
, constv
);
23099 unspec
= gen_rtx_UNSPEC (V16QImode
, gen_rtvec (3, op1
, op0
, constv
),
23101 if (!REG_P (target
))
23103 rtx tmp
= gen_reg_rtx (V16QImode
);
23104 emit_move_insn (tmp
, unspec
);
23108 emit_move_insn (target
, unspec
);
23111 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23112 permute control vector. But here it's not a constant, so we must
23113 generate a vector NAND or NOR to do the adjustment. */
23116 altivec_expand_vec_perm_le (rtx operands
[4])
23118 rtx notx
, iorx
, unspec
;
23119 rtx target
= operands
[0];
23120 rtx op0
= operands
[1];
23121 rtx op1
= operands
[2];
23122 rtx sel
= operands
[3];
23124 rtx norreg
= gen_reg_rtx (V16QImode
);
23125 machine_mode mode
= GET_MODE (target
);
23127 /* Get everything in regs so the pattern matches. */
23129 op0
= force_reg (mode
, op0
);
23131 op1
= force_reg (mode
, op1
);
23133 sel
= force_reg (V16QImode
, sel
);
23134 if (!REG_P (target
))
23135 tmp
= gen_reg_rtx (mode
);
23137 if (TARGET_P9_VECTOR
)
23139 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, sel
),
23144 /* Invert the selector with a VNAND if available, else a VNOR.
23145 The VNAND is preferred for future fusion opportunities. */
23146 notx
= gen_rtx_NOT (V16QImode
, sel
);
23147 iorx
= (TARGET_P8_VECTOR
23148 ? gen_rtx_IOR (V16QImode
, notx
, notx
)
23149 : gen_rtx_AND (V16QImode
, notx
, notx
));
23150 emit_insn (gen_rtx_SET (norreg
, iorx
));
23152 /* Permute with operands reversed and adjusted selector. */
23153 unspec
= gen_rtx_UNSPEC (mode
, gen_rtvec (3, op1
, op0
, norreg
),
23157 /* Copy into target, possibly by way of a register. */
23158 if (!REG_P (target
))
23160 emit_move_insn (tmp
, unspec
);
23164 emit_move_insn (target
, unspec
);
23167 /* Expand an Altivec constant permutation. Return true if we match
23168 an efficient implementation; false to fall back to VPERM.
23170 OP0 and OP1 are the input vectors and TARGET is the output vector.
23171 SEL specifies the constant permutation vector. */
23174 altivec_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
,
23175 const vec_perm_indices
&sel
)
23177 struct altivec_perm_insn
{
23178 HOST_WIDE_INT mask
;
23179 enum insn_code impl
;
23180 unsigned char perm
[16];
23182 static const struct altivec_perm_insn patterns
[] = {
23183 {OPTION_MASK_ALTIVEC
,
23184 CODE_FOR_altivec_vpkuhum_direct
,
23185 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23186 {OPTION_MASK_ALTIVEC
,
23187 CODE_FOR_altivec_vpkuwum_direct
,
23188 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23189 {OPTION_MASK_ALTIVEC
,
23190 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghb_direct
23191 : CODE_FOR_altivec_vmrglb_direct
,
23192 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23193 {OPTION_MASK_ALTIVEC
,
23194 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghh_direct
23195 : CODE_FOR_altivec_vmrglh_direct
,
23196 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23197 {OPTION_MASK_ALTIVEC
,
23198 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrghw_direct_v4si
23199 : CODE_FOR_altivec_vmrglw_direct_v4si
,
23200 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23201 {OPTION_MASK_ALTIVEC
,
23202 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglb_direct
23203 : CODE_FOR_altivec_vmrghb_direct
,
23204 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23205 {OPTION_MASK_ALTIVEC
,
23206 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglh_direct
23207 : CODE_FOR_altivec_vmrghh_direct
,
23208 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23209 {OPTION_MASK_ALTIVEC
,
23210 BYTES_BIG_ENDIAN
? CODE_FOR_altivec_vmrglw_direct_v4si
23211 : CODE_FOR_altivec_vmrghw_direct_v4si
,
23212 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23213 {OPTION_MASK_P8_VECTOR
,
23214 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgew_v4sf_direct
23215 : CODE_FOR_p8_vmrgow_v4sf_direct
,
23216 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23217 {OPTION_MASK_P8_VECTOR
,
23218 BYTES_BIG_ENDIAN
? CODE_FOR_p8_vmrgow_v4sf_direct
23219 : CODE_FOR_p8_vmrgew_v4sf_direct
,
23220 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23221 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23222 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23223 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23224 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23225 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23226 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23227 {OPTION_MASK_VSX
, CODE_FOR_vsx_xxpermdi_v16qi
,
23228 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23230 unsigned int i
, j
, elt
, which
;
23231 unsigned char perm
[16];
23235 /* Unpack the constant selector. */
23236 for (i
= which
= 0; i
< 16; ++i
)
23239 which
|= (elt
< 16 ? 1 : 2);
23243 /* Simplify the constant selector based on operands. */
23247 gcc_unreachable ();
23251 if (!rtx_equal_p (op0
, op1
))
23256 for (i
= 0; i
< 16; ++i
)
23268 /* Look for splat patterns. */
23273 for (i
= 0; i
< 16; ++i
)
23274 if (perm
[i
] != elt
)
23278 if (!BYTES_BIG_ENDIAN
)
23280 emit_insn (gen_altivec_vspltb_direct (target
, op0
, GEN_INT (elt
)));
23286 for (i
= 0; i
< 16; i
+= 2)
23287 if (perm
[i
] != elt
|| perm
[i
+ 1] != elt
+ 1)
23291 int field
= BYTES_BIG_ENDIAN
? elt
/ 2 : 7 - elt
/ 2;
23292 x
= gen_reg_rtx (V8HImode
);
23293 emit_insn (gen_altivec_vsplth_direct (x
, gen_lowpart (V8HImode
, op0
),
23295 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23302 for (i
= 0; i
< 16; i
+= 4)
23304 || perm
[i
+ 1] != elt
+ 1
23305 || perm
[i
+ 2] != elt
+ 2
23306 || perm
[i
+ 3] != elt
+ 3)
23310 int field
= BYTES_BIG_ENDIAN
? elt
/ 4 : 3 - elt
/ 4;
23311 x
= gen_reg_rtx (V4SImode
);
23312 emit_insn (gen_altivec_vspltw_direct (x
, gen_lowpart (V4SImode
, op0
),
23314 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23320 /* Look for merge and pack patterns. */
23321 for (j
= 0; j
< ARRAY_SIZE (patterns
); ++j
)
23325 if ((patterns
[j
].mask
& rs6000_isa_flags
) == 0)
23328 elt
= patterns
[j
].perm
[0];
23329 if (perm
[0] == elt
)
23331 else if (perm
[0] == elt
+ 16)
23335 for (i
= 1; i
< 16; ++i
)
23337 elt
= patterns
[j
].perm
[i
];
23339 elt
= (elt
>= 16 ? elt
- 16 : elt
+ 16);
23340 else if (one_vec
&& elt
>= 16)
23342 if (perm
[i
] != elt
)
23347 enum insn_code icode
= patterns
[j
].impl
;
23348 machine_mode omode
= insn_data
[icode
].operand
[0].mode
;
23349 machine_mode imode
= insn_data
[icode
].operand
[1].mode
;
23351 rtx perm_idx
= GEN_INT (0);
23352 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23369 perm_idx
= GEN_INT (perm_val
);
23372 /* For little-endian, don't use vpkuwum and vpkuhum if the
23373 underlying vector type is not V4SI and V8HI, respectively.
23374 For example, using vpkuwum with a V8HI picks up the even
23375 halfwords (BE numbering) when the even halfwords (LE
23376 numbering) are what we need. */
23377 if (!BYTES_BIG_ENDIAN
23378 && icode
== CODE_FOR_altivec_vpkuwum_direct
23380 && GET_MODE (op0
) != V4SImode
)
23382 && GET_MODE (XEXP (op0
, 0)) != V4SImode
)))
23384 if (!BYTES_BIG_ENDIAN
23385 && icode
== CODE_FOR_altivec_vpkuhum_direct
23387 && GET_MODE (op0
) != V8HImode
)
23389 && GET_MODE (XEXP (op0
, 0)) != V8HImode
)))
23392 /* For little-endian, the two input operands must be swapped
23393 (or swapped back) to ensure proper right-to-left numbering
23395 if (swapped
^ !BYTES_BIG_ENDIAN
23396 && icode
!= CODE_FOR_vsx_xxpermdi_v16qi
)
23397 std::swap (op0
, op1
);
23398 if (imode
!= V16QImode
)
23400 op0
= gen_lowpart (imode
, op0
);
23401 op1
= gen_lowpart (imode
, op1
);
23403 if (omode
== V16QImode
)
23406 x
= gen_reg_rtx (omode
);
23407 if (icode
== CODE_FOR_vsx_xxpermdi_v16qi
)
23408 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
, perm_idx
));
23410 emit_insn (GEN_FCN (icode
) (x
, op0
, op1
));
23411 if (omode
!= V16QImode
)
23412 emit_move_insn (target
, gen_lowpart (V16QImode
, x
));
23417 if (!BYTES_BIG_ENDIAN
)
23419 altivec_expand_vec_perm_const_le (target
, op0
, op1
, sel
);
23426 /* Expand a VSX Permute Doubleword constant permutation.
23427 Return true if we match an efficient implementation. */
23430 rs6000_expand_vec_perm_const_1 (rtx target
, rtx op0
, rtx op1
,
23431 unsigned char perm0
, unsigned char perm1
)
23435 /* If both selectors come from the same operand, fold to single op. */
23436 if ((perm0
& 2) == (perm1
& 2))
23443 /* If both operands are equal, fold to simpler permutation. */
23444 if (rtx_equal_p (op0
, op1
))
23447 perm1
= (perm1
& 1) + 2;
23449 /* If the first selector comes from the second operand, swap. */
23450 else if (perm0
& 2)
23456 std::swap (op0
, op1
);
23458 /* If the second selector does not come from the second operand, fail. */
23459 else if ((perm1
& 2) == 0)
23463 if (target
!= NULL
)
23465 machine_mode vmode
, dmode
;
23468 vmode
= GET_MODE (target
);
23469 gcc_assert (GET_MODE_NUNITS (vmode
) == 2);
23470 dmode
= mode_for_vector (GET_MODE_INNER (vmode
), 4).require ();
23471 x
= gen_rtx_VEC_CONCAT (dmode
, op0
, op1
);
23472 v
= gen_rtvec (2, GEN_INT (perm0
), GEN_INT (perm1
));
23473 x
= gen_rtx_VEC_SELECT (vmode
, x
, gen_rtx_PARALLEL (VOIDmode
, v
));
23474 emit_insn (gen_rtx_SET (target
, x
));
23479 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23482 rs6000_vectorize_vec_perm_const (machine_mode vmode
, machine_mode op_mode
,
23483 rtx target
, rtx op0
, rtx op1
,
23484 const vec_perm_indices
&sel
)
23486 if (vmode
!= op_mode
)
23489 bool testing_p
= !target
;
23491 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23492 if (TARGET_ALTIVEC
&& testing_p
)
23497 rtx nop0
= force_reg (vmode
, op0
);
23503 op1
= force_reg (vmode
, op1
);
23505 /* Check for ps_merge* or xxpermdi insns. */
23506 if ((vmode
== V2DFmode
|| vmode
== V2DImode
) && VECTOR_MEM_VSX_P (vmode
))
23510 op0
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 1);
23511 op1
= gen_raw_REG (vmode
, LAST_VIRTUAL_REGISTER
+ 2);
23513 if (rs6000_expand_vec_perm_const_1 (target
, op0
, op1
, sel
[0], sel
[1]))
23517 if (TARGET_ALTIVEC
)
23519 /* Force the target-independent code to lower to V16QImode. */
23520 if (vmode
!= V16QImode
)
23522 if (altivec_expand_vec_perm_const (target
, op0
, op1
, sel
))
23529 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23530 OP0 and OP1 are the input vectors and TARGET is the output vector.
23531 PERM specifies the constant permutation vector. */
23534 rs6000_do_expand_vec_perm (rtx target
, rtx op0
, rtx op1
,
23535 machine_mode vmode
, const vec_perm_builder
&perm
)
23537 rtx x
= expand_vec_perm_const (vmode
, op0
, op1
, perm
, BLKmode
, target
);
23539 emit_move_insn (target
, x
);
23542 /* Expand an extract even operation. */
23545 rs6000_expand_extract_even (rtx target
, rtx op0
, rtx op1
)
23547 machine_mode vmode
= GET_MODE (target
);
23548 unsigned i
, nelt
= GET_MODE_NUNITS (vmode
);
23549 vec_perm_builder
perm (nelt
, nelt
, 1);
23551 for (i
= 0; i
< nelt
; i
++)
23552 perm
.quick_push (i
* 2);
23554 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23557 /* Expand a vector interleave operation. */
23560 rs6000_expand_interleave (rtx target
, rtx op0
, rtx op1
, bool highp
)
23562 machine_mode vmode
= GET_MODE (target
);
23563 unsigned i
, high
, nelt
= GET_MODE_NUNITS (vmode
);
23564 vec_perm_builder
perm (nelt
, nelt
, 1);
23566 high
= (highp
? 0 : nelt
/ 2);
23567 for (i
= 0; i
< nelt
/ 2; i
++)
23569 perm
.quick_push (i
+ high
);
23570 perm
.quick_push (i
+ nelt
+ high
);
23573 rs6000_do_expand_vec_perm (target
, op0
, op1
, vmode
, perm
);
23576 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23578 rs6000_scale_v2df (rtx tgt
, rtx src
, int scale
)
23580 HOST_WIDE_INT
hwi_scale (scale
);
23581 REAL_VALUE_TYPE r_pow
;
23582 rtvec v
= rtvec_alloc (2);
23584 rtx scale_vec
= gen_reg_rtx (V2DFmode
);
23585 (void)real_powi (&r_pow
, DFmode
, &dconst2
, hwi_scale
);
23586 elt
= const_double_from_real_value (r_pow
, DFmode
);
23587 RTVEC_ELT (v
, 0) = elt
;
23588 RTVEC_ELT (v
, 1) = elt
;
23589 rs6000_expand_vector_init (scale_vec
, gen_rtx_PARALLEL (V2DFmode
, v
));
23590 emit_insn (gen_mulv2df3 (tgt
, src
, scale_vec
));
23593 /* Return an RTX representing where to find the function value of a
23594 function returning MODE. */
23596 rs6000_complex_function_value (machine_mode mode
)
23598 unsigned int regno
;
23600 machine_mode inner
= GET_MODE_INNER (mode
);
23601 unsigned int inner_bytes
= GET_MODE_UNIT_SIZE (mode
);
23603 if (TARGET_FLOAT128_TYPE
23605 || (mode
== TCmode
&& TARGET_IEEEQUAD
)))
23606 regno
= ALTIVEC_ARG_RETURN
;
23608 else if (FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23609 regno
= FP_ARG_RETURN
;
23613 regno
= GP_ARG_RETURN
;
23615 /* 32-bit is OK since it'll go in r3/r4. */
23616 if (TARGET_32BIT
&& inner_bytes
>= 4)
23617 return gen_rtx_REG (mode
, regno
);
23620 if (inner_bytes
>= 8)
23621 return gen_rtx_REG (mode
, regno
);
23623 r1
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
),
23625 r2
= gen_rtx_EXPR_LIST (inner
, gen_rtx_REG (inner
, regno
+ 1),
23626 GEN_INT (inner_bytes
));
23627 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, r1
, r2
));
23630 /* Return an rtx describing a return value of MODE as a PARALLEL
23631 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23632 stride REG_STRIDE. */
23635 rs6000_parallel_return (machine_mode mode
,
23636 int n_elts
, machine_mode elt_mode
,
23637 unsigned int regno
, unsigned int reg_stride
)
23639 rtx par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (n_elts
));
23642 for (i
= 0; i
< n_elts
; i
++)
23644 rtx r
= gen_rtx_REG (elt_mode
, regno
);
23645 rtx off
= GEN_INT (i
* GET_MODE_SIZE (elt_mode
));
23646 XVECEXP (par
, 0, i
) = gen_rtx_EXPR_LIST (VOIDmode
, r
, off
);
23647 regno
+= reg_stride
;
23653 /* Target hook for TARGET_FUNCTION_VALUE.
23655 An integer value is in r3 and a floating-point value is in fp1,
23656 unless -msoft-float. */
23659 rs6000_function_value (const_tree valtype
,
23660 const_tree fn_decl_or_type ATTRIBUTE_UNUSED
,
23661 bool outgoing ATTRIBUTE_UNUSED
)
23664 unsigned int regno
;
23665 machine_mode elt_mode
;
23668 /* Special handling for structs in darwin64. */
23670 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype
), valtype
))
23672 CUMULATIVE_ARGS valcum
;
23676 valcum
.fregno
= FP_ARG_MIN_REG
;
23677 valcum
.vregno
= ALTIVEC_ARG_MIN_REG
;
23678 /* Do a trial code generation as if this were going to be passed as
23679 an argument; if any part goes in memory, we return NULL. */
23680 valret
= rs6000_darwin64_record_arg (&valcum
, valtype
, true, /* retval= */ true);
23683 /* Otherwise fall through to standard ABI rules. */
23686 mode
= TYPE_MODE (valtype
);
23688 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23689 if (rs6000_discover_homogeneous_aggregate (mode
, valtype
, &elt_mode
, &n_elts
))
23691 int first_reg
, n_regs
;
23693 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode
))
23695 /* _Decimal128 must use even/odd register pairs. */
23696 first_reg
= (elt_mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23697 n_regs
= (GET_MODE_SIZE (elt_mode
) + 7) >> 3;
23701 first_reg
= ALTIVEC_ARG_RETURN
;
23705 return rs6000_parallel_return (mode
, n_elts
, elt_mode
, first_reg
, n_regs
);
23708 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23709 if (TARGET_32BIT
&& TARGET_POWERPC64
)
23718 int count
= GET_MODE_SIZE (mode
) / 4;
23719 return rs6000_parallel_return (mode
, count
, SImode
, GP_ARG_RETURN
, 1);
23722 if ((INTEGRAL_TYPE_P (valtype
)
23723 && GET_MODE_BITSIZE (mode
) < (TARGET_32BIT
? 32 : 64))
23724 || POINTER_TYPE_P (valtype
))
23725 mode
= TARGET_32BIT
? SImode
: DImode
;
23727 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23728 /* _Decimal128 must use an even/odd register pair. */
23729 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23730 else if (SCALAR_FLOAT_TYPE_P (valtype
) && TARGET_HARD_FLOAT
23731 && !FLOAT128_VECTOR_P (mode
))
23732 regno
= FP_ARG_RETURN
;
23733 else if (TREE_CODE (valtype
) == COMPLEX_TYPE
23734 && targetm
.calls
.split_complex_arg
)
23735 return rs6000_complex_function_value (mode
);
23736 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23737 return register is used in both cases, and we won't see V2DImode/V2DFmode
23738 for pure altivec, combine the two cases. */
23739 else if ((TREE_CODE (valtype
) == VECTOR_TYPE
|| VECTOR_ALIGNMENT_P (mode
))
23740 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
23741 && ALTIVEC_OR_VSX_VECTOR_MODE (mode
))
23742 regno
= ALTIVEC_ARG_RETURN
;
23744 regno
= GP_ARG_RETURN
;
23746 return gen_rtx_REG (mode
, regno
);
23749 /* Define how to find the value returned by a library function
23750 assuming the value has mode MODE. */
23752 rs6000_libcall_value (machine_mode mode
)
23754 unsigned int regno
;
23756 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23757 if (TARGET_32BIT
&& TARGET_POWERPC64
&& mode
== DImode
)
23758 return rs6000_parallel_return (mode
, 2, SImode
, GP_ARG_RETURN
, 1);
23760 if (DECIMAL_FLOAT_MODE_P (mode
) && TARGET_HARD_FLOAT
)
23761 /* _Decimal128 must use an even/odd register pair. */
23762 regno
= (mode
== TDmode
) ? FP_ARG_RETURN
+ 1 : FP_ARG_RETURN
;
23763 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode
) && TARGET_HARD_FLOAT
)
23764 regno
= FP_ARG_RETURN
;
23765 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23766 return register is used in both cases, and we won't see V2DImode/V2DFmode
23767 for pure altivec, combine the two cases. */
23768 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode
)
23769 && TARGET_ALTIVEC
&& TARGET_ALTIVEC_ABI
)
23770 regno
= ALTIVEC_ARG_RETURN
;
23771 else if (COMPLEX_MODE_P (mode
) && targetm
.calls
.split_complex_arg
)
23772 return rs6000_complex_function_value (mode
);
23774 regno
= GP_ARG_RETURN
;
23776 return gen_rtx_REG (mode
, regno
);
23779 /* Compute register pressure classes. We implement the target hook to avoid
23780 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23781 lead to incorrect estimates of number of available registers and therefor
23782 increased register pressure/spill. */
23784 rs6000_compute_pressure_classes (enum reg_class
*pressure_classes
)
23789 pressure_classes
[n
++] = GENERAL_REGS
;
23790 if (TARGET_ALTIVEC
)
23791 pressure_classes
[n
++] = ALTIVEC_REGS
;
23793 pressure_classes
[n
++] = VSX_REGS
;
23796 if (TARGET_HARD_FLOAT
)
23797 pressure_classes
[n
++] = FLOAT_REGS
;
23799 pressure_classes
[n
++] = CR_REGS
;
23800 pressure_classes
[n
++] = SPECIAL_REGS
;
23805 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23806 Frame pointer elimination is automatically handled.
23808 For the RS/6000, if frame pointer elimination is being done, we would like
23809 to convert ap into fp, not sp.
23811 We need r30 if -mminimal-toc was specified, and there are constant pool
23815 rs6000_can_eliminate (const int from
, const int to
)
23817 return (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
23818 ? ! frame_pointer_needed
23819 : from
== RS6000_PIC_OFFSET_TABLE_REGNUM
23820 ? ! TARGET_MINIMAL_TOC
|| TARGET_NO_TOC_OR_PCREL
23821 || constant_pool_empty_p ()
23825 /* Define the offset between two registers, FROM to be eliminated and its
23826 replacement TO, at the start of a routine. */
23828 rs6000_initial_elimination_offset (int from
, int to
)
23830 rs6000_stack_t
*info
= rs6000_stack_info ();
23831 HOST_WIDE_INT offset
;
23833 if (from
== HARD_FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23834 offset
= info
->push_p
? 0 : -info
->total_size
;
23835 else if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23837 offset
= info
->push_p
? 0 : -info
->total_size
;
23838 if (FRAME_GROWS_DOWNWARD
)
23839 offset
+= info
->fixed_size
+ info
->vars_size
+ info
->parm_size
;
23841 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23842 offset
= FRAME_GROWS_DOWNWARD
23843 ? info
->fixed_size
+ info
->vars_size
+ info
->parm_size
23845 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
23846 offset
= info
->total_size
;
23847 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
23848 offset
= info
->push_p
? info
->total_size
: 0;
23849 else if (from
== RS6000_PIC_OFFSET_TABLE_REGNUM
)
23852 gcc_unreachable ();
23857 /* Fill in sizes of registers used by unwinder. */
23860 rs6000_init_dwarf_reg_sizes_extra (tree address
)
23862 if (TARGET_MACHO
&& ! TARGET_ALTIVEC
)
23865 machine_mode mode
= TYPE_MODE (char_type_node
);
23866 rtx addr
= expand_expr (address
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
23867 rtx mem
= gen_rtx_MEM (BLKmode
, addr
);
23868 rtx value
= gen_int_mode (16, mode
);
23870 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23871 The unwinder still needs to know the size of Altivec registers. */
23873 for (i
= FIRST_ALTIVEC_REGNO
; i
< LAST_ALTIVEC_REGNO
+1; i
++)
23875 int column
= DWARF_REG_TO_UNWIND_COLUMN
23876 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i
), true));
23877 HOST_WIDE_INT offset
= column
* GET_MODE_SIZE (mode
);
23879 emit_move_insn (adjust_address (mem
, mode
, offset
), value
);
23884 /* Map internal gcc register numbers to debug format register numbers.
23885 FORMAT specifies the type of debug register number to use:
23886 0 -- debug information, except for frame-related sections
23887 1 -- DWARF .debug_frame section
23888 2 -- DWARF .eh_frame section */
23891 rs6000_debugger_regno (unsigned int regno
, unsigned int format
)
23893 /* On some platforms, we use the standard DWARF register
23894 numbering for .debug_info and .debug_frame. */
23895 if ((format
== 0 && dwarf_debuginfo_p ()) || format
== 1)
23897 #ifdef RS6000_USE_DWARF_NUMBERING
23900 if (FP_REGNO_P (regno
))
23901 return regno
- FIRST_FPR_REGNO
+ 32;
23902 if (ALTIVEC_REGNO_P (regno
))
23903 return regno
- FIRST_ALTIVEC_REGNO
+ 1124;
23904 if (regno
== LR_REGNO
)
23906 if (regno
== CTR_REGNO
)
23908 if (regno
== CA_REGNO
)
23909 return 101; /* XER */
23910 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23911 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23912 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23913 to the DWARF reg for CR. */
23914 if (format
== 1 && regno
== CR2_REGNO
)
23916 if (CR_REGNO_P (regno
))
23917 return regno
- CR0_REGNO
+ 86;
23918 if (regno
== VRSAVE_REGNO
)
23920 if (regno
== VSCR_REGNO
)
23923 /* These do not make much sense. */
23924 if (regno
== FRAME_POINTER_REGNUM
)
23926 if (regno
== ARG_POINTER_REGNUM
)
23931 gcc_unreachable ();
23935 /* We use the GCC 7 (and before) internal number for non-DWARF debug
23936 information, and also for .eh_frame. */
23937 /* Translate the regnos to their numbers in GCC 7 (and before). */
23940 if (FP_REGNO_P (regno
))
23941 return regno
- FIRST_FPR_REGNO
+ 32;
23942 if (ALTIVEC_REGNO_P (regno
))
23943 return regno
- FIRST_ALTIVEC_REGNO
+ 77;
23944 if (regno
== LR_REGNO
)
23946 if (regno
== CTR_REGNO
)
23948 if (regno
== CA_REGNO
)
23949 return 76; /* XER */
23950 if (CR_REGNO_P (regno
))
23951 return regno
- CR0_REGNO
+ 68;
23952 if (regno
== VRSAVE_REGNO
)
23954 if (regno
== VSCR_REGNO
)
23957 if (regno
== FRAME_POINTER_REGNUM
)
23959 if (regno
== ARG_POINTER_REGNUM
)
23964 gcc_unreachable ();
23967 /* target hook eh_return_filter_mode */
23968 static scalar_int_mode
23969 rs6000_eh_return_filter_mode (void)
23971 return TARGET_32BIT
? SImode
: word_mode
;
23974 /* Target hook for translate_mode_attribute. */
23975 static machine_mode
23976 rs6000_translate_mode_attribute (machine_mode mode
)
23978 if ((FLOAT128_IEEE_P (mode
)
23979 && ieee128_float_type_node
== long_double_type_node
)
23980 || (FLOAT128_IBM_P (mode
)
23981 && ibm128_float_type_node
== long_double_type_node
))
23982 return COMPLEX_MODE_P (mode
) ? E_TCmode
: E_TFmode
;
23986 /* Target hook for scalar_mode_supported_p. */
23988 rs6000_scalar_mode_supported_p (scalar_mode mode
)
23990 /* -m32 does not support TImode. This is the default, from
23991 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
23992 same ABI as for -m32. But default_scalar_mode_supported_p allows
23993 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
23994 for -mpowerpc64. */
23995 if (TARGET_32BIT
&& mode
== TImode
)
23998 if (DECIMAL_FLOAT_MODE_P (mode
))
23999 return default_decimal_float_supported_p ();
24000 else if (TARGET_FLOAT128_TYPE
&& (mode
== KFmode
|| mode
== IFmode
))
24003 return default_scalar_mode_supported_p (mode
);
24006 /* Target hook for libgcc_floating_mode_supported_p. */
24009 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode
)
24018 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24019 if long double does not use the IEEE 128-bit format. If long double
24020 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24021 Because the code will not use KFmode in that case, there will be aborts
24022 because it can't find KFmode in the Floatn types. */
24024 return TARGET_FLOAT128_TYPE
&& !TARGET_IEEEQUAD
;
24031 /* Target hook for vector_mode_supported_p. */
24033 rs6000_vector_mode_supported_p (machine_mode mode
)
24035 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24036 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24038 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode
) && !FLOAT128_IEEE_P (mode
))
24045 /* Target hook for floatn_mode. */
24046 static opt_scalar_float_mode
24047 rs6000_floatn_mode (int n
, bool extended
)
24057 if (TARGET_FLOAT128_TYPE
)
24058 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24060 return opt_scalar_float_mode ();
24063 return opt_scalar_float_mode ();
24066 /* Those are the only valid _FloatNx types. */
24067 gcc_unreachable ();
24081 if (TARGET_FLOAT128_TYPE
)
24082 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24084 return opt_scalar_float_mode ();
24087 return opt_scalar_float_mode ();
24093 /* Target hook for c_mode_for_suffix. */
24094 static machine_mode
24095 rs6000_c_mode_for_suffix (char suffix
)
24097 if (TARGET_FLOAT128_TYPE
)
24099 if (suffix
== 'q' || suffix
== 'Q')
24100 return (FLOAT128_IEEE_P (TFmode
)) ? TFmode
: KFmode
;
24102 /* At the moment, we are not defining a suffix for IBM extended double.
24103 If/when the default for -mabi=ieeelongdouble is changed, and we want
24104 to support __ibm128 constants in legacy library code, we may need to
24105 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24106 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24107 __float80 constants. */
24113 /* Target hook for invalid_arg_for_unprototyped_fn. */
24114 static const char *
24115 invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
24117 return (!rs6000_darwin64_abi
24119 && TREE_CODE (TREE_TYPE (val
)) == VECTOR_TYPE
24120 && (funcdecl
== NULL_TREE
24121 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
24122 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
24123 ? N_("AltiVec argument passed to unprototyped function")
24127 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24128 setup by using __stack_chk_fail_local hidden function instead of
24129 calling __stack_chk_fail directly. Otherwise it is better to call
24130 __stack_chk_fail directly. */
24132 static tree ATTRIBUTE_UNUSED
24133 rs6000_stack_protect_fail (void)
24135 return (DEFAULT_ABI
== ABI_V4
&& TARGET_SECURE_PLT
&& flag_pic
)
24136 ? default_hidden_stack_protect_fail ()
24137 : default_external_stack_protect_fail ();
24140 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24143 static unsigned HOST_WIDE_INT
24144 rs6000_asan_shadow_offset (void)
24146 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_64BIT
? 41 : 29);
24150 /* Mask options that we want to support inside of attribute((target)) and
24151 #pragma GCC target operations. Note, we do not include things like
24152 64/32-bit, endianness, hard/soft floating point, etc. that would have
24153 different calling sequences. */
24155 struct rs6000_opt_mask
{
24156 const char *name
; /* option name */
24157 HOST_WIDE_INT mask
; /* mask to set */
24158 bool invert
; /* invert sense of mask */
24159 bool valid_target
; /* option is a target option */
24162 static struct rs6000_opt_mask
const rs6000_opt_masks
[] =
24164 { "altivec", OPTION_MASK_ALTIVEC
, false, true },
24165 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX
,
24167 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR
,
24169 { "cmpb", OPTION_MASK_CMPB
, false, true },
24170 { "crypto", OPTION_MASK_CRYPTO
, false, true },
24171 { "direct-move", OPTION_MASK_DIRECT_MOVE
, false, true },
24172 { "dlmzb", OPTION_MASK_DLMZB
, false, true },
24173 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX
,
24175 { "float128", OPTION_MASK_FLOAT128_KEYWORD
, false, true },
24176 { "float128-hardware", OPTION_MASK_FLOAT128_HW
, false, true },
24177 { "fprnd", OPTION_MASK_FPRND
, false, true },
24178 { "power10", OPTION_MASK_POWER10
, false, true },
24179 { "hard-dfp", OPTION_MASK_DFP
, false, true },
24180 { "htm", OPTION_MASK_HTM
, false, true },
24181 { "isel", OPTION_MASK_ISEL
, false, true },
24182 { "mfcrf", OPTION_MASK_MFCRF
, false, true },
24183 { "mfpgpr", 0, false, true },
24184 { "mma", OPTION_MASK_MMA
, false, true },
24185 { "modulo", OPTION_MASK_MODULO
, false, true },
24186 { "mulhw", OPTION_MASK_MULHW
, false, true },
24187 { "multiple", OPTION_MASK_MULTIPLE
, false, true },
24188 { "pcrel", OPTION_MASK_PCREL
, false, true },
24189 { "pcrel-opt", OPTION_MASK_PCREL_OPT
, false, true },
24190 { "popcntb", OPTION_MASK_POPCNTB
, false, true },
24191 { "popcntd", OPTION_MASK_POPCNTD
, false, true },
24192 { "power8-fusion", OPTION_MASK_P8_FUSION
, false, true },
24193 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN
, false, true },
24194 { "power8-vector", OPTION_MASK_P8_VECTOR
, false, true },
24195 { "power9-minmax", OPTION_MASK_P9_MINMAX
, false, true },
24196 { "power9-misc", OPTION_MASK_P9_MISC
, false, true },
24197 { "power9-vector", OPTION_MASK_P9_VECTOR
, false, true },
24198 { "power10-fusion", OPTION_MASK_P10_FUSION
, false, true },
24199 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT
, false, true },
24200 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT
, false, true },
24201 { "prefixed", OPTION_MASK_PREFIXED
, false, true },
24202 { "quad-memory", OPTION_MASK_QUAD_MEMORY
, false, true },
24203 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC
, false, true },
24204 { "recip-precision", OPTION_MASK_RECIP_PRECISION
, false, true },
24205 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT
, false, true },
24206 { "string", 0, false, true },
24207 { "update", OPTION_MASK_NO_UPDATE
, true , true },
24208 { "vsx", OPTION_MASK_VSX
, false, true },
24209 #ifdef OPTION_MASK_64BIT
24211 { "aix64", OPTION_MASK_64BIT
, false, false },
24212 { "aix32", OPTION_MASK_64BIT
, true, false },
24214 { "64", OPTION_MASK_64BIT
, false, false },
24215 { "32", OPTION_MASK_64BIT
, true, false },
24218 #ifdef OPTION_MASK_EABI
24219 { "eabi", OPTION_MASK_EABI
, false, false },
24221 #ifdef OPTION_MASK_LITTLE_ENDIAN
24222 { "little", OPTION_MASK_LITTLE_ENDIAN
, false, false },
24223 { "big", OPTION_MASK_LITTLE_ENDIAN
, true, false },
24225 #ifdef OPTION_MASK_RELOCATABLE
24226 { "relocatable", OPTION_MASK_RELOCATABLE
, false, false },
24228 #ifdef OPTION_MASK_STRICT_ALIGN
24229 { "strict-align", OPTION_MASK_STRICT_ALIGN
, false, false },
24231 { "soft-float", OPTION_MASK_SOFT_FLOAT
, false, false },
24232 { "string", 0, false, false },
24235 /* Option variables that we want to support inside attribute((target)) and
24236 #pragma GCC target operations. */
24238 struct rs6000_opt_var
{
24239 const char *name
; /* option name */
24240 size_t global_offset
; /* offset of the option in global_options. */
24241 size_t target_offset
; /* offset of the option in target options. */
24244 static struct rs6000_opt_var
const rs6000_opt_vars
[] =
24247 offsetof (struct gcc_options
, x_TARGET_FRIZ
),
24248 offsetof (struct cl_target_option
, x_TARGET_FRIZ
), },
24249 { "avoid-indexed-addresses",
24250 offsetof (struct gcc_options
, x_TARGET_AVOID_XFORM
),
24251 offsetof (struct cl_target_option
, x_TARGET_AVOID_XFORM
) },
24253 offsetof (struct gcc_options
, x_rs6000_default_long_calls
),
24254 offsetof (struct cl_target_option
, x_rs6000_default_long_calls
), },
24255 { "optimize-swaps",
24256 offsetof (struct gcc_options
, x_rs6000_optimize_swaps
),
24257 offsetof (struct cl_target_option
, x_rs6000_optimize_swaps
), },
24258 { "allow-movmisalign",
24259 offsetof (struct gcc_options
, x_TARGET_ALLOW_MOVMISALIGN
),
24260 offsetof (struct cl_target_option
, x_TARGET_ALLOW_MOVMISALIGN
), },
24262 offsetof (struct gcc_options
, x_TARGET_SCHED_GROUPS
),
24263 offsetof (struct cl_target_option
, x_TARGET_SCHED_GROUPS
), },
24265 offsetof (struct gcc_options
, x_TARGET_ALWAYS_HINT
),
24266 offsetof (struct cl_target_option
, x_TARGET_ALWAYS_HINT
), },
24267 { "align-branch-targets",
24268 offsetof (struct gcc_options
, x_TARGET_ALIGN_BRANCH_TARGETS
),
24269 offsetof (struct cl_target_option
, x_TARGET_ALIGN_BRANCH_TARGETS
), },
24271 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24272 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24274 offsetof (struct gcc_options
, x_TARGET_SCHED_PROLOG
),
24275 offsetof (struct cl_target_option
, x_TARGET_SCHED_PROLOG
), },
24276 { "speculate-indirect-jumps",
24277 offsetof (struct gcc_options
, x_rs6000_speculate_indirect_jumps
),
24278 offsetof (struct cl_target_option
, x_rs6000_speculate_indirect_jumps
), },
24281 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24282 parsing. Return true if there were no errors. */
24285 rs6000_inner_target_options (tree args
, bool attr_p
)
24289 if (args
== NULL_TREE
)
24292 else if (TREE_CODE (args
) == STRING_CST
)
24294 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24297 while ((q
= strtok (p
, ",")) != NULL
)
24299 bool error_p
= false;
24300 bool not_valid_p
= false;
24301 const char *cpu_opt
= NULL
;
24304 if (startswith (q
, "cpu="))
24306 int cpu_index
= rs6000_cpu_name_lookup (q
+4);
24307 if (cpu_index
>= 0)
24308 rs6000_cpu_index
= cpu_index
;
24315 else if (startswith (q
, "tune="))
24317 int tune_index
= rs6000_cpu_name_lookup (q
+5);
24318 if (tune_index
>= 0)
24319 rs6000_tune_index
= tune_index
;
24329 bool invert
= false;
24333 if (startswith (r
, "no-"))
24339 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_masks
); i
++)
24340 if (strcmp (r
, rs6000_opt_masks
[i
].name
) == 0)
24342 HOST_WIDE_INT mask
= rs6000_opt_masks
[i
].mask
;
24344 if (!rs6000_opt_masks
[i
].valid_target
)
24345 not_valid_p
= true;
24349 rs6000_isa_flags_explicit
|= mask
;
24351 /* VSX needs altivec, so -mvsx automagically sets
24352 altivec and disables -mavoid-indexed-addresses. */
24355 if (mask
== OPTION_MASK_VSX
)
24357 mask
|= OPTION_MASK_ALTIVEC
;
24358 TARGET_AVOID_XFORM
= 0;
24362 if (rs6000_opt_masks
[i
].invert
)
24366 rs6000_isa_flags
&= ~mask
;
24368 rs6000_isa_flags
|= mask
;
24373 if (error_p
&& !not_valid_p
)
24375 for (i
= 0; i
< ARRAY_SIZE (rs6000_opt_vars
); i
++)
24376 if (strcmp (r
, rs6000_opt_vars
[i
].name
) == 0)
24378 size_t j
= rs6000_opt_vars
[i
].global_offset
;
24379 *((int *) ((char *)&global_options
+ j
)) = !invert
;
24381 not_valid_p
= false;
24389 const char *eprefix
, *esuffix
;
24394 eprefix
= "__attribute__((__target__(";
24399 eprefix
= "#pragma GCC target ";
24404 error ("invalid cpu %qs for %s%qs%s", cpu_opt
, eprefix
,
24406 else if (not_valid_p
)
24407 error ("%s%qs%s is not allowed", eprefix
, q
, esuffix
);
24409 error ("%s%qs%s is invalid", eprefix
, q
, esuffix
);
24414 else if (TREE_CODE (args
) == TREE_LIST
)
24418 tree value
= TREE_VALUE (args
);
24421 bool ret2
= rs6000_inner_target_options (value
, attr_p
);
24425 args
= TREE_CHAIN (args
);
24427 while (args
!= NULL_TREE
);
24432 error ("attribute %<target%> argument not a string");
24439 /* Print out the target options as a list for -mdebug=target. */
24442 rs6000_debug_target_options (tree args
, const char *prefix
)
24444 if (args
== NULL_TREE
)
24445 fprintf (stderr
, "%s<NULL>", prefix
);
24447 else if (TREE_CODE (args
) == STRING_CST
)
24449 char *p
= ASTRDUP (TREE_STRING_POINTER (args
));
24452 while ((q
= strtok (p
, ",")) != NULL
)
24455 fprintf (stderr
, "%s\"%s\"", prefix
, q
);
24460 else if (TREE_CODE (args
) == TREE_LIST
)
24464 tree value
= TREE_VALUE (args
);
24467 rs6000_debug_target_options (value
, prefix
);
24470 args
= TREE_CHAIN (args
);
24472 while (args
!= NULL_TREE
);
24476 gcc_unreachable ();
24482 /* Hook to validate attribute((target("..."))). */
24485 rs6000_valid_attribute_p (tree fndecl
,
24486 tree
ARG_UNUSED (name
),
24490 struct cl_target_option cur_target
;
24493 tree new_target
, new_optimize
;
24494 tree func_optimize
;
24496 gcc_assert ((fndecl
!= NULL_TREE
) && (args
!= NULL_TREE
));
24498 if (TARGET_DEBUG_TARGET
)
24500 tree tname
= DECL_NAME (fndecl
);
24501 fprintf (stderr
, "\n==================== rs6000_valid_attribute_p:\n");
24503 fprintf (stderr
, "function: %.*s\n",
24504 (int) IDENTIFIER_LENGTH (tname
),
24505 IDENTIFIER_POINTER (tname
));
24507 fprintf (stderr
, "function: unknown\n");
24509 fprintf (stderr
, "args:");
24510 rs6000_debug_target_options (args
, " ");
24511 fprintf (stderr
, "\n");
24514 fprintf (stderr
, "flags: 0x%x\n", flags
);
24516 fprintf (stderr
, "--------------------\n");
24519 /* attribute((target("default"))) does nothing, beyond
24520 affecting multi-versioning. */
24521 if (TREE_VALUE (args
)
24522 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
24523 && TREE_CHAIN (args
) == NULL_TREE
24524 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
24527 old_optimize
= build_optimization_node (&global_options
,
24528 &global_options_set
);
24529 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
24531 /* If the function changed the optimization levels as well as setting target
24532 options, start with the optimizations specified. */
24533 if (func_optimize
&& func_optimize
!= old_optimize
)
24534 cl_optimization_restore (&global_options
, &global_options_set
,
24535 TREE_OPTIMIZATION (func_optimize
));
24537 /* The target attributes may also change some optimization flags, so update
24538 the optimization options if necessary. */
24539 cl_target_option_save (&cur_target
, &global_options
, &global_options_set
);
24540 rs6000_cpu_index
= rs6000_tune_index
= -1;
24541 ret
= rs6000_inner_target_options (args
, true);
24543 /* Set up any additional state. */
24546 ret
= rs6000_option_override_internal (false);
24547 new_target
= build_target_option_node (&global_options
,
24548 &global_options_set
);
24553 new_optimize
= build_optimization_node (&global_options
,
24554 &global_options_set
);
24561 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
24563 if (old_optimize
!= new_optimize
)
24564 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
24567 cl_target_option_restore (&global_options
, &global_options_set
, &cur_target
);
24569 if (old_optimize
!= new_optimize
)
24570 cl_optimization_restore (&global_options
, &global_options_set
,
24571 TREE_OPTIMIZATION (old_optimize
));
24577 /* Hook to validate the current #pragma GCC target and set the state, and
24578 update the macros based on what was changed. If ARGS is NULL, then
24579 POP_TARGET is used to reset the options. */
24582 rs6000_pragma_target_parse (tree args
, tree pop_target
)
24584 tree prev_tree
= build_target_option_node (&global_options
,
24585 &global_options_set
);
24587 struct cl_target_option
*prev_opt
, *cur_opt
;
24588 HOST_WIDE_INT prev_flags
, cur_flags
, diff_flags
;
24590 if (TARGET_DEBUG_TARGET
)
24592 fprintf (stderr
, "\n==================== rs6000_pragma_target_parse\n");
24593 fprintf (stderr
, "args:");
24594 rs6000_debug_target_options (args
, " ");
24595 fprintf (stderr
, "\n");
24599 fprintf (stderr
, "pop_target:\n");
24600 debug_tree (pop_target
);
24603 fprintf (stderr
, "pop_target: <NULL>\n");
24605 fprintf (stderr
, "--------------------\n");
24610 cur_tree
= ((pop_target
)
24612 : target_option_default_node
);
24613 cl_target_option_restore (&global_options
, &global_options_set
,
24614 TREE_TARGET_OPTION (cur_tree
));
24618 rs6000_cpu_index
= rs6000_tune_index
= -1;
24619 if (!rs6000_inner_target_options (args
, false)
24620 || !rs6000_option_override_internal (false)
24621 || (cur_tree
= build_target_option_node (&global_options
,
24622 &global_options_set
))
24625 if (TARGET_DEBUG_BUILTIN
|| TARGET_DEBUG_TARGET
)
24626 fprintf (stderr
, "invalid pragma\n");
24632 target_option_current_node
= cur_tree
;
24633 rs6000_activate_target_options (target_option_current_node
);
24635 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24636 change the macros that are defined. */
24637 if (rs6000_target_modify_macros_ptr
)
24639 prev_opt
= TREE_TARGET_OPTION (prev_tree
);
24640 prev_flags
= prev_opt
->x_rs6000_isa_flags
;
24642 cur_opt
= TREE_TARGET_OPTION (cur_tree
);
24643 cur_flags
= cur_opt
->x_rs6000_isa_flags
;
24645 diff_flags
= (prev_flags
^ cur_flags
);
24647 if (diff_flags
!= 0)
24649 /* Delete old macros. */
24650 rs6000_target_modify_macros_ptr (false,
24651 prev_flags
& diff_flags
);
24653 /* Define new macros. */
24654 rs6000_target_modify_macros_ptr (true,
24655 cur_flags
& diff_flags
);
24663 /* Remember the last target of rs6000_set_current_function. */
24664 static GTY(()) tree rs6000_previous_fndecl
;
24666 /* Restore target's globals from NEW_TREE and invalidate the
24667 rs6000_previous_fndecl cache. */
24670 rs6000_activate_target_options (tree new_tree
)
24672 cl_target_option_restore (&global_options
, &global_options_set
,
24673 TREE_TARGET_OPTION (new_tree
));
24674 if (TREE_TARGET_GLOBALS (new_tree
))
24675 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
24676 else if (new_tree
== target_option_default_node
)
24677 restore_target_globals (&default_target_globals
);
24679 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
24680 rs6000_previous_fndecl
= NULL_TREE
;
24683 /* Establish appropriate back-end context for processing the function
24684 FNDECL. The argument might be NULL to indicate processing at top
24685 level, outside of any function scope. */
24687 rs6000_set_current_function (tree fndecl
)
24689 if (TARGET_DEBUG_TARGET
)
24691 fprintf (stderr
, "\n==================== rs6000_set_current_function");
24694 fprintf (stderr
, ", fndecl %s (%p)",
24695 (DECL_NAME (fndecl
)
24696 ? IDENTIFIER_POINTER (DECL_NAME (fndecl
))
24697 : "<unknown>"), (void *)fndecl
);
24699 if (rs6000_previous_fndecl
)
24700 fprintf (stderr
, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl
);
24702 fprintf (stderr
, "\n");
24705 /* Only change the context if the function changes. This hook is called
24706 several times in the course of compiling a function, and we don't want to
24707 slow things down too much or call target_reinit when it isn't safe. */
24708 if (fndecl
== rs6000_previous_fndecl
)
24712 if (rs6000_previous_fndecl
== NULL_TREE
)
24713 old_tree
= target_option_current_node
;
24714 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
))
24715 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl
);
24717 old_tree
= target_option_default_node
;
24720 if (fndecl
== NULL_TREE
)
24722 if (old_tree
!= target_option_current_node
)
24723 new_tree
= target_option_current_node
;
24725 new_tree
= NULL_TREE
;
24729 new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24730 if (new_tree
== NULL_TREE
)
24731 new_tree
= target_option_default_node
;
24734 if (TARGET_DEBUG_TARGET
)
24738 fprintf (stderr
, "\nnew fndecl target specific options:\n");
24739 debug_tree (new_tree
);
24744 fprintf (stderr
, "\nold fndecl target specific options:\n");
24745 debug_tree (old_tree
);
24748 if (old_tree
!= NULL_TREE
|| new_tree
!= NULL_TREE
)
24749 fprintf (stderr
, "--------------------\n");
24752 if (new_tree
&& old_tree
!= new_tree
)
24753 rs6000_activate_target_options (new_tree
);
24756 rs6000_previous_fndecl
= fndecl
;
24760 /* Save the current options */
24763 rs6000_function_specific_save (struct cl_target_option
*ptr
,
24764 struct gcc_options
*opts
,
24765 struct gcc_options */
* opts_set */
)
24767 ptr
->x_rs6000_isa_flags
= opts
->x_rs6000_isa_flags
;
24768 ptr
->x_rs6000_isa_flags_explicit
= opts
->x_rs6000_isa_flags_explicit
;
24771 /* Restore the current options */
24774 rs6000_function_specific_restore (struct gcc_options
*opts
,
24775 struct gcc_options */
* opts_set */
,
24776 struct cl_target_option
*ptr
)
24779 opts
->x_rs6000_isa_flags
= ptr
->x_rs6000_isa_flags
;
24780 opts
->x_rs6000_isa_flags_explicit
= ptr
->x_rs6000_isa_flags_explicit
;
24781 (void) rs6000_option_override_internal (false);
24784 /* Print the current options */
24787 rs6000_function_specific_print (FILE *file
, int indent
,
24788 struct cl_target_option
*ptr
)
24790 rs6000_print_isa_options (file
, indent
, "Isa options set",
24791 ptr
->x_rs6000_isa_flags
);
24793 rs6000_print_isa_options (file
, indent
, "Isa options explicit",
24794 ptr
->x_rs6000_isa_flags_explicit
);
24797 /* Helper function to print the current isa or misc options on a line. */
24800 rs6000_print_options_internal (FILE *file
,
24802 const char *string
,
24803 HOST_WIDE_INT flags
,
24804 const char *prefix
,
24805 const struct rs6000_opt_mask
*opts
,
24806 size_t num_elements
)
24809 size_t start_column
= 0;
24811 size_t max_column
= 120;
24812 size_t prefix_len
= strlen (prefix
);
24813 size_t comma_len
= 0;
24814 const char *comma
= "";
24817 start_column
+= fprintf (file
, "%*s", indent
, "");
24821 fprintf (stderr
, DEBUG_FMT_S
, string
, "<none>");
24825 start_column
+= fprintf (stderr
, DEBUG_FMT_WX
, string
, flags
);
24827 /* Print the various mask options. */
24828 cur_column
= start_column
;
24829 for (i
= 0; i
< num_elements
; i
++)
24831 bool invert
= opts
[i
].invert
;
24832 const char *name
= opts
[i
].name
;
24833 const char *no_str
= "";
24834 HOST_WIDE_INT mask
= opts
[i
].mask
;
24835 size_t len
= comma_len
+ prefix_len
+ strlen (name
);
24839 if ((flags
& mask
) == 0)
24842 len
+= strlen ("no-");
24850 if ((flags
& mask
) != 0)
24853 len
+= strlen ("no-");
24860 if (cur_column
> max_column
)
24862 fprintf (stderr
, ", \\\n%*s", (int)start_column
, "");
24863 cur_column
= start_column
+ len
;
24867 fprintf (file
, "%s%s%s%s", comma
, prefix
, no_str
, name
);
24869 comma_len
= strlen (", ");
24872 fputs ("\n", file
);
24875 /* Helper function to print the current isa options on a line. */
24878 rs6000_print_isa_options (FILE *file
, int indent
, const char *string
,
24879 HOST_WIDE_INT flags
)
24881 rs6000_print_options_internal (file
, indent
, string
, flags
, "-m",
24882 &rs6000_opt_masks
[0],
24883 ARRAY_SIZE (rs6000_opt_masks
));
24886 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24887 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24888 -mupper-regs-df, etc.).
24890 If the user used -mno-power8-vector, we need to turn off all of the implicit
24891 ISA 2.07 and 3.0 options that relate to the vector unit.
24893 If the user used -mno-power9-vector, we need to turn off all of the implicit
24894 ISA 3.0 options that relate to the vector unit.
24896 This function does not handle explicit options such as the user specifying
24897 -mdirect-move. These are handled in rs6000_option_override_internal, and
24898 the appropriate error is given if needed.
24900 We return a mask of all of the implicit options that should not be enabled
24903 static HOST_WIDE_INT
24904 rs6000_disable_incompatible_switches (void)
24906 HOST_WIDE_INT ignore_masks
= rs6000_isa_flags_explicit
;
24909 static const struct {
24910 const HOST_WIDE_INT no_flag
; /* flag explicitly turned off. */
24911 const HOST_WIDE_INT dep_flags
; /* flags that depend on this option. */
24912 const char *const name
; /* name of the switch. */
24914 { OPTION_MASK_P9_VECTOR
, OTHER_P9_VECTOR_MASKS
, "power9-vector" },
24915 { OPTION_MASK_P8_VECTOR
, OTHER_P8_VECTOR_MASKS
, "power8-vector" },
24916 { OPTION_MASK_VSX
, OTHER_VSX_VECTOR_MASKS
, "vsx" },
24917 { OPTION_MASK_ALTIVEC
, OTHER_ALTIVEC_MASKS
, "altivec" },
24920 for (i
= 0; i
< ARRAY_SIZE (flags
); i
++)
24922 HOST_WIDE_INT no_flag
= flags
[i
].no_flag
;
24924 if ((rs6000_isa_flags
& no_flag
) == 0
24925 && (rs6000_isa_flags_explicit
& no_flag
) != 0)
24927 HOST_WIDE_INT dep_flags
= flags
[i
].dep_flags
;
24928 HOST_WIDE_INT set_flags
= (rs6000_isa_flags_explicit
24934 for (j
= 0; j
< ARRAY_SIZE (rs6000_opt_masks
); j
++)
24935 if ((set_flags
& rs6000_opt_masks
[j
].mask
) != 0)
24937 set_flags
&= ~rs6000_opt_masks
[j
].mask
;
24938 error ("%<-mno-%s%> turns off %<-m%s%>",
24940 rs6000_opt_masks
[j
].name
);
24943 gcc_assert (!set_flags
);
24946 rs6000_isa_flags
&= ~dep_flags
;
24947 ignore_masks
|= no_flag
| dep_flags
;
24951 return ignore_masks
;
24955 /* Helper function for printing the function name when debugging. */
24957 static const char *
24958 get_decl_name (tree fn
)
24965 name
= DECL_NAME (fn
);
24967 return "<no-name>";
24969 return IDENTIFIER_POINTER (name
);
24972 /* Return the clone id of the target we are compiling code for in a target
24973 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
24974 the priority list for the target clones (ordered from lowest to
24978 rs6000_clone_priority (tree fndecl
)
24980 tree fn_opts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
24981 HOST_WIDE_INT isa_masks
;
24982 int ret
= CLONE_DEFAULT
;
24983 tree attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (fndecl
));
24984 const char *attrs_str
= NULL
;
24986 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
24987 attrs_str
= TREE_STRING_POINTER (attrs
);
24989 /* Return priority zero for default function. Return the ISA needed for the
24990 function if it is not the default. */
24991 if (strcmp (attrs_str
, "default") != 0)
24993 if (fn_opts
== NULL_TREE
)
24994 fn_opts
= target_option_default_node
;
24996 if (!fn_opts
|| !TREE_TARGET_OPTION (fn_opts
))
24997 isa_masks
= rs6000_isa_flags
;
24999 isa_masks
= TREE_TARGET_OPTION (fn_opts
)->x_rs6000_isa_flags
;
25001 for (ret
= CLONE_MAX
- 1; ret
!= 0; ret
--)
25002 if ((rs6000_clone_map
[ret
].isa_mask
& isa_masks
) != 0)
25006 if (TARGET_DEBUG_TARGET
)
25007 fprintf (stderr
, "rs6000_get_function_version_priority (%s) => %d\n",
25008 get_decl_name (fndecl
), ret
);
25013 /* This compares the priority of target features in function DECL1 and DECL2.
25014 It returns positive value if DECL1 is higher priority, negative value if
25015 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25016 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25019 rs6000_compare_version_priority (tree decl1
, tree decl2
)
25021 int priority1
= rs6000_clone_priority (decl1
);
25022 int priority2
= rs6000_clone_priority (decl2
);
25023 int ret
= priority1
- priority2
;
25025 if (TARGET_DEBUG_TARGET
)
25026 fprintf (stderr
, "rs6000_compare_version_priority (%s, %s) => %d\n",
25027 get_decl_name (decl1
), get_decl_name (decl2
), ret
);
25032 /* Make a dispatcher declaration for the multi-versioned function DECL.
25033 Calls to DECL function will be replaced with calls to the dispatcher
25034 by the front-end. Returns the decl of the dispatcher function. */
25037 rs6000_get_function_versions_dispatcher (void *decl
)
25039 tree fn
= (tree
) decl
;
25040 struct cgraph_node
*node
= NULL
;
25041 struct cgraph_node
*default_node
= NULL
;
25042 struct cgraph_function_version_info
*node_v
= NULL
;
25043 struct cgraph_function_version_info
*first_v
= NULL
;
25045 tree dispatch_decl
= NULL
;
25047 struct cgraph_function_version_info
*default_version_info
= NULL
;
25048 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
25050 if (TARGET_DEBUG_TARGET
)
25051 fprintf (stderr
, "rs6000_get_function_versions_dispatcher (%s)\n",
25052 get_decl_name (fn
));
25054 node
= cgraph_node::get (fn
);
25055 gcc_assert (node
!= NULL
);
25057 node_v
= node
->function_version ();
25058 gcc_assert (node_v
!= NULL
);
25060 if (node_v
->dispatcher_resolver
!= NULL
)
25061 return node_v
->dispatcher_resolver
;
25063 /* Find the default version and make it the first node. */
25065 /* Go to the beginning of the chain. */
25066 while (first_v
->prev
!= NULL
)
25067 first_v
= first_v
->prev
;
25069 default_version_info
= first_v
;
25070 while (default_version_info
!= NULL
)
25072 const tree decl2
= default_version_info
->this_node
->decl
;
25073 if (is_function_default_version (decl2
))
25075 default_version_info
= default_version_info
->next
;
25078 /* If there is no default node, just return NULL. */
25079 if (default_version_info
== NULL
)
25082 /* Make default info the first node. */
25083 if (first_v
!= default_version_info
)
25085 default_version_info
->prev
->next
= default_version_info
->next
;
25086 if (default_version_info
->next
)
25087 default_version_info
->next
->prev
= default_version_info
->prev
;
25088 first_v
->prev
= default_version_info
;
25089 default_version_info
->next
= first_v
;
25090 default_version_info
->prev
= NULL
;
25093 default_node
= default_version_info
->this_node
;
25095 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25096 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25097 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25098 "exports hardware capability bits");
25101 if (targetm
.has_ifunc_p ())
25103 struct cgraph_function_version_info
*it_v
= NULL
;
25104 struct cgraph_node
*dispatcher_node
= NULL
;
25105 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
25107 /* Right now, the dispatching is done via ifunc. */
25108 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
25109 TREE_NOTHROW (dispatch_decl
) = TREE_NOTHROW (fn
);
25111 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
25112 gcc_assert (dispatcher_node
!= NULL
);
25113 dispatcher_node
->dispatcher_function
= 1;
25114 dispatcher_version_info
25115 = dispatcher_node
->insert_new_function_version ();
25116 dispatcher_version_info
->next
= default_version_info
;
25117 dispatcher_node
->definition
= 1;
25119 /* Set the dispatcher for all the versions. */
25120 it_v
= default_version_info
;
25121 while (it_v
!= NULL
)
25123 it_v
->dispatcher_resolver
= dispatch_decl
;
25129 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
25130 "multiversioning needs %<ifunc%> which is not supported "
25135 return dispatch_decl
;
25138 /* Make the resolver function decl to dispatch the versions of a multi-
25139 versioned function, DEFAULT_DECL. Create an empty basic block in the
25140 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25144 make_resolver_func (const tree default_decl
,
25145 const tree dispatch_decl
,
25146 basic_block
*empty_bb
)
25148 /* Make the resolver function static. The resolver function returns
25150 tree decl_name
= clone_function_name (default_decl
, "resolver");
25151 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
25152 tree type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
25153 tree decl
= build_fn_decl (resolver_name
, type
);
25154 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
25156 DECL_NAME (decl
) = decl_name
;
25157 TREE_USED (decl
) = 1;
25158 DECL_ARTIFICIAL (decl
) = 1;
25159 DECL_IGNORED_P (decl
) = 0;
25160 TREE_PUBLIC (decl
) = 0;
25161 DECL_UNINLINABLE (decl
) = 1;
25163 /* Resolver is not external, body is generated. */
25164 DECL_EXTERNAL (decl
) = 0;
25165 DECL_EXTERNAL (dispatch_decl
) = 0;
25167 DECL_CONTEXT (decl
) = NULL_TREE
;
25168 DECL_INITIAL (decl
) = make_node (BLOCK
);
25169 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
25171 if (DECL_COMDAT_GROUP (default_decl
)
25172 || TREE_PUBLIC (default_decl
))
25174 /* In this case, each translation unit with a call to this
25175 versioned function will put out a resolver. Ensure it
25176 is comdat to keep just one copy. */
25177 DECL_COMDAT (decl
) = 1;
25178 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
25181 TREE_PUBLIC (dispatch_decl
) = 0;
25183 /* Build result decl and add to function_decl. */
25184 tree t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
25185 DECL_CONTEXT (t
) = decl
;
25186 DECL_ARTIFICIAL (t
) = 1;
25187 DECL_IGNORED_P (t
) = 1;
25188 DECL_RESULT (decl
) = t
;
25190 gimplify_function_tree (decl
);
25191 push_cfun (DECL_STRUCT_FUNCTION (decl
));
25192 *empty_bb
= init_lowered_empty_function (decl
, false,
25193 profile_count::uninitialized ());
25195 cgraph_node::add_new_function (decl
, true);
25196 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
25200 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25201 DECL_ATTRIBUTES (dispatch_decl
)
25202 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
25204 cgraph_node::create_same_body_alias (dispatch_decl
, decl
);
25209 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25210 return a pointer to VERSION_DECL if we are running on a machine that
25211 supports the index CLONE_ISA hardware architecture bits. This function will
25212 be called during version dispatch to decide which function version to
25213 execute. It returns the basic block at the end, to which more conditions
25217 add_condition_to_bb (tree function_decl
, tree version_decl
,
25218 int clone_isa
, basic_block new_bb
)
25220 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
25222 gcc_assert (new_bb
!= NULL
);
25223 gimple_seq gseq
= bb_seq (new_bb
);
25226 tree convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
25227 build_fold_addr_expr (version_decl
));
25228 tree result_var
= create_tmp_var (ptr_type_node
);
25229 gimple
*convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
25230 gimple
*return_stmt
= gimple_build_return (result_var
);
25232 if (clone_isa
== CLONE_DEFAULT
)
25234 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25235 gimple_seq_add_stmt (&gseq
, return_stmt
);
25236 set_bb_seq (new_bb
, gseq
);
25237 gimple_set_bb (convert_stmt
, new_bb
);
25238 gimple_set_bb (return_stmt
, new_bb
);
25243 tree bool_zero
= build_int_cst (bool_int_type_node
, 0);
25244 tree cond_var
= create_tmp_var (bool_int_type_node
);
25245 tree predicate_decl
= rs6000_builtin_decls
[(int) RS6000_BIF_CPU_SUPPORTS
];
25246 const char *arg_str
= rs6000_clone_map
[clone_isa
].name
;
25247 tree predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
25248 gimple
*call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
25249 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
25251 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
25252 gimple_set_bb (call_cond_stmt
, new_bb
);
25253 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
25255 gimple
*if_else_stmt
= gimple_build_cond (NE_EXPR
, cond_var
, bool_zero
,
25256 NULL_TREE
, NULL_TREE
);
25257 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
25258 gimple_set_bb (if_else_stmt
, new_bb
);
25259 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
25261 gimple_seq_add_stmt (&gseq
, convert_stmt
);
25262 gimple_seq_add_stmt (&gseq
, return_stmt
);
25263 set_bb_seq (new_bb
, gseq
);
25265 basic_block bb1
= new_bb
;
25266 edge e12
= split_block (bb1
, if_else_stmt
);
25267 basic_block bb2
= e12
->dest
;
25268 e12
->flags
&= ~EDGE_FALLTHRU
;
25269 e12
->flags
|= EDGE_TRUE_VALUE
;
25271 edge e23
= split_block (bb2
, return_stmt
);
25272 gimple_set_bb (convert_stmt
, bb2
);
25273 gimple_set_bb (return_stmt
, bb2
);
25275 basic_block bb3
= e23
->dest
;
25276 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
25279 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
25285 /* This function generates the dispatch function for multi-versioned functions.
25286 DISPATCH_DECL is the function which will contain the dispatch logic.
25287 FNDECLS are the function choices for dispatch, and is a tree chain.
25288 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25289 code is generated. */
25292 dispatch_function_versions (tree dispatch_decl
,
25294 basic_block
*empty_bb
)
25298 vec
<tree
> *fndecls
;
25299 tree clones
[CLONE_MAX
];
25301 if (TARGET_DEBUG_TARGET
)
25302 fputs ("dispatch_function_versions, top\n", stderr
);
25304 gcc_assert (dispatch_decl
!= NULL
25305 && fndecls_p
!= NULL
25306 && empty_bb
!= NULL
);
25308 /* fndecls_p is actually a vector. */
25309 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
25311 /* At least one more version other than the default. */
25312 gcc_assert (fndecls
->length () >= 2);
25314 /* The first version in the vector is the default decl. */
25315 memset ((void *) clones
, '\0', sizeof (clones
));
25316 clones
[CLONE_DEFAULT
] = (*fndecls
)[0];
25318 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25319 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25320 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25321 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25322 to insert the code here to do the call. */
25324 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
25326 int priority
= rs6000_clone_priority (ele
);
25327 if (!clones
[priority
])
25328 clones
[priority
] = ele
;
25331 for (ix
= CLONE_MAX
- 1; ix
>= 0; ix
--)
25334 if (TARGET_DEBUG_TARGET
)
25335 fprintf (stderr
, "dispatch_function_versions, clone %d, %s\n",
25336 ix
, get_decl_name (clones
[ix
]));
25338 *empty_bb
= add_condition_to_bb (dispatch_decl
, clones
[ix
], ix
,
25345 /* Generate the dispatching code body to dispatch multi-versioned function
25346 DECL. The target hook is called to process the "target" attributes and
25347 provide the code to dispatch the right function at run-time. NODE points
25348 to the dispatcher decl whose body will be created. */
25351 rs6000_generate_version_dispatcher_body (void *node_p
)
25354 basic_block empty_bb
;
25355 struct cgraph_node
*node
= (cgraph_node
*) node_p
;
25356 struct cgraph_function_version_info
*ninfo
= node
->function_version ();
25358 if (ninfo
->dispatcher_resolver
)
25359 return ninfo
->dispatcher_resolver
;
25361 /* node is going to be an alias, so remove the finalized bit. */
25362 node
->definition
= false;
25364 /* The first version in the chain corresponds to the default version. */
25365 ninfo
->dispatcher_resolver
= resolver
25366 = make_resolver_func (ninfo
->next
->this_node
->decl
, node
->decl
, &empty_bb
);
25368 if (TARGET_DEBUG_TARGET
)
25369 fprintf (stderr
, "rs6000_get_function_versions_dispatcher, %s\n",
25370 get_decl_name (resolver
));
25372 push_cfun (DECL_STRUCT_FUNCTION (resolver
));
25373 auto_vec
<tree
, 2> fn_ver_vec
;
25375 for (struct cgraph_function_version_info
*vinfo
= ninfo
->next
;
25377 vinfo
= vinfo
->next
)
25379 struct cgraph_node
*version
= vinfo
->this_node
;
25380 /* Check for virtual functions here again, as by this time it should
25381 have been determined if this function needs a vtable index or
25382 not. This happens for methods in derived classes that override
25383 virtual methods in base classes but are not explicitly marked as
25385 if (DECL_VINDEX (version
->decl
))
25386 sorry ("Virtual function multiversioning not supported");
25388 fn_ver_vec
.safe_push (version
->decl
);
25391 dispatch_function_versions (resolver
, &fn_ver_vec
, &empty_bb
);
25392 cgraph_edge::rebuild_edges ();
25397 /* Hook to decide if we need to scan function gimple statements to
25398 collect target specific information for inlining, and update the
25399 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25400 to predict which ISA feature is used at this time. Return true
25401 if we need to scan, otherwise return false. */
25404 rs6000_need_ipa_fn_target_info (const_tree decl
,
25405 unsigned int &info ATTRIBUTE_UNUSED
)
25407 tree target
= DECL_FUNCTION_SPECIFIC_TARGET (decl
);
25409 target
= target_option_default_node
;
25410 struct cl_target_option
*opts
= TREE_TARGET_OPTION (target
);
25412 /* See PR102059, we only handle HTM for now, so will only do
25413 the consequent scannings when HTM feature enabled. */
25414 if (opts
->x_rs6000_isa_flags
& OPTION_MASK_HTM
)
25420 /* Hook to update target specific information INFO for inlining by
25421 checking the given STMT. Return false if we don't need to scan
25422 any more, otherwise return true. */
25425 rs6000_update_ipa_fn_target_info (unsigned int &info
, const gimple
*stmt
)
25427 /* Assume inline asm can use any instruction features. */
25428 if (gimple_code (stmt
) == GIMPLE_ASM
)
25430 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25431 the only bit we care about. */
25432 info
|= RS6000_FN_TARGET_INFO_HTM
;
25435 else if (gimple_code (stmt
) == GIMPLE_CALL
)
25437 tree fndecl
= gimple_call_fndecl (stmt
);
25438 if (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
25440 enum rs6000_gen_builtins fcode
25441 = (enum rs6000_gen_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
25442 /* HTM bifs definitely exploit HTM insns. */
25443 if (bif_is_htm (rs6000_builtin_info
[fcode
]))
25445 info
|= RS6000_FN_TARGET_INFO_HTM
;
25454 /* Hook to determine if one function can safely inline another. */
25457 rs6000_can_inline_p (tree caller
, tree callee
)
25460 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
25461 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
25463 /* If the callee has no option attributes, then it is ok to inline. */
25469 HOST_WIDE_INT caller_isa
;
25470 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
25471 HOST_WIDE_INT callee_isa
= callee_opts
->x_rs6000_isa_flags
;
25472 HOST_WIDE_INT explicit_isa
= callee_opts
->x_rs6000_isa_flags_explicit
;
25474 /* If the caller has option attributes, then use them.
25475 Otherwise, use the command line options. */
25477 caller_isa
= TREE_TARGET_OPTION (caller_tree
)->x_rs6000_isa_flags
;
25479 caller_isa
= rs6000_isa_flags
;
25481 cgraph_node
*callee_node
= cgraph_node::get (callee
);
25482 if (ipa_fn_summaries
&& ipa_fn_summaries
->get (callee_node
) != NULL
)
25484 unsigned int info
= ipa_fn_summaries
->get (callee_node
)->target_info
;
25485 if ((info
& RS6000_FN_TARGET_INFO_HTM
) == 0)
25487 callee_isa
&= ~OPTION_MASK_HTM
;
25488 explicit_isa
&= ~OPTION_MASK_HTM
;
25492 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25494 callee_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25495 explicit_isa
&= ~(OPTION_MASK_P8_FUSION
| OPTION_MASK_P10_FUSION
);
25497 /* The callee's options must be a subset of the caller's options, i.e.
25498 a vsx function may inline an altivec function, but a no-vsx function
25499 must not inline a vsx function. However, for those options that the
25500 callee has explicitly enabled or disabled, then we must enforce that
25501 the callee's and caller's options match exactly; see PR70010. */
25502 if (((caller_isa
& callee_isa
) == callee_isa
)
25503 && (caller_isa
& explicit_isa
) == (callee_isa
& explicit_isa
))
25507 if (TARGET_DEBUG_TARGET
)
25508 fprintf (stderr
, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25509 get_decl_name (caller
), get_decl_name (callee
),
25510 (ret
? "can" : "cannot"));
25515 /* Allocate a stack temp and fixup the address so it meets the particular
25516 memory requirements (either offetable or REG+REG addressing). */
25519 rs6000_allocate_stack_temp (machine_mode mode
,
25520 bool offsettable_p
,
25523 rtx stack
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
25524 rtx addr
= XEXP (stack
, 0);
25525 int strict_p
= reload_completed
;
25527 if (!legitimate_indirect_address_p (addr
, strict_p
))
25530 && !rs6000_legitimate_offset_address_p (mode
, addr
, strict_p
, true))
25531 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25533 else if (reg_reg_p
&& !legitimate_indexed_address_p (addr
, strict_p
))
25534 stack
= replace_equiv_address (stack
, copy_addr_to_reg (addr
));
25540 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25541 convert to such a form to deal with memory reference instructions
25542 like STFIWX and LDBRX that only take reg+reg addressing. */
25545 rs6000_force_indexed_or_indirect_mem (rtx x
)
25547 machine_mode mode
= GET_MODE (x
);
25549 gcc_assert (MEM_P (x
));
25550 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x
, mode
))
25552 rtx addr
= XEXP (x
, 0);
25553 if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
25555 rtx reg
= XEXP (addr
, 0);
25556 HOST_WIDE_INT size
= GET_MODE_SIZE (GET_MODE (x
));
25557 rtx size_rtx
= GEN_INT ((GET_CODE (addr
) == PRE_DEC
) ? -size
: size
);
25558 gcc_assert (REG_P (reg
));
25559 emit_insn (gen_add3_insn (reg
, reg
, size_rtx
));
25562 else if (GET_CODE (addr
) == PRE_MODIFY
)
25564 rtx reg
= XEXP (addr
, 0);
25565 rtx expr
= XEXP (addr
, 1);
25566 gcc_assert (REG_P (reg
));
25567 gcc_assert (GET_CODE (expr
) == PLUS
);
25568 emit_insn (gen_add3_insn (reg
, XEXP (expr
, 0), XEXP (expr
, 1)));
25572 if (GET_CODE (addr
) == PLUS
)
25574 rtx op0
= XEXP (addr
, 0);
25575 rtx op1
= XEXP (addr
, 1);
25576 op0
= force_reg (Pmode
, op0
);
25577 op1
= force_reg (Pmode
, op1
);
25578 x
= replace_equiv_address (x
, gen_rtx_PLUS (Pmode
, op0
, op1
));
25581 x
= replace_equiv_address (x
, force_reg (Pmode
, addr
));
25587 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25589 On the RS/6000, all integer constants are acceptable, most won't be valid
25590 for particular insns, though. Only easy FP constants are acceptable. */
25593 rs6000_legitimate_constant_p (machine_mode mode
, rtx x
)
25595 if (TARGET_ELF
&& tls_referenced_p (x
))
25598 if (CONST_DOUBLE_P (x
))
25599 return easy_fp_constant (x
, mode
);
25601 if (GET_CODE (x
) == CONST_VECTOR
)
25602 return easy_vector_constant (x
, mode
);
25608 /* Implement TARGET_PRECOMPUTE_TLS_P.
25610 On the AIX, TLS symbols are in the TOC, which is maintained in the
25611 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25612 must be considered legitimate constants. */
25615 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
25617 return tls_referenced_p (x
);
25622 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25625 chain_already_loaded (rtx_insn
*last
)
25627 for (; last
!= NULL
; last
= PREV_INSN (last
))
25629 if (NONJUMP_INSN_P (last
))
25631 rtx patt
= PATTERN (last
);
25633 if (GET_CODE (patt
) == SET
)
25635 rtx lhs
= XEXP (patt
, 0);
25637 if (REG_P (lhs
) && REGNO (lhs
) == STATIC_CHAIN_REGNUM
)
25645 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25648 rs6000_call_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25650 rtx func
= func_desc
;
25651 rtx toc_reg
= gen_rtx_REG (Pmode
, TOC_REGNUM
);
25652 rtx toc_load
= NULL_RTX
;
25653 rtx toc_restore
= NULL_RTX
;
25655 rtx abi_reg
= NULL_RTX
;
25659 bool is_pltseq_longcall
;
25662 tlsarg
= global_tlsarg
;
25664 /* Handle longcall attributes. */
25665 is_pltseq_longcall
= false;
25666 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25667 && GET_CODE (func_desc
) == SYMBOL_REF
)
25669 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25671 is_pltseq_longcall
= true;
25674 /* Handle indirect calls. */
25675 if (!SYMBOL_REF_P (func
)
25676 || (DEFAULT_ABI
== ABI_AIX
&& !SYMBOL_REF_FUNCTION_P (func
)))
25678 if (!rs6000_pcrel_p ())
25680 /* Save the TOC into its reserved slot before the call,
25681 and prepare to restore it after the call. */
25682 rtx stack_toc_offset
= GEN_INT (RS6000_TOC_SAVE_SLOT
);
25683 rtx stack_toc_unspec
= gen_rtx_UNSPEC (Pmode
,
25684 gen_rtvec (1, stack_toc_offset
),
25686 toc_restore
= gen_rtx_SET (toc_reg
, stack_toc_unspec
);
25688 /* Can we optimize saving the TOC in the prologue or
25689 do we need to do it at every call? */
25690 if (TARGET_SAVE_TOC_INDIRECT
&& !cfun
->calls_alloca
)
25691 cfun
->machine
->save_toc_in_prologue
= true;
25694 rtx stack_ptr
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
25695 rtx stack_toc_mem
= gen_frame_mem (Pmode
,
25696 gen_rtx_PLUS (Pmode
, stack_ptr
,
25697 stack_toc_offset
));
25698 MEM_VOLATILE_P (stack_toc_mem
) = 1;
25699 if (is_pltseq_longcall
)
25701 rtvec v
= gen_rtvec (3, toc_reg
, func_desc
, tlsarg
);
25702 rtx mark_toc_reg
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25703 emit_insn (gen_rtx_SET (stack_toc_mem
, mark_toc_reg
));
25706 emit_move_insn (stack_toc_mem
, toc_reg
);
25710 if (DEFAULT_ABI
== ABI_ELFv2
)
25712 /* A function pointer in the ELFv2 ABI is just a plain address, but
25713 the ABI requires it to be loaded into r12 before the call. */
25714 func_addr
= gen_rtx_REG (Pmode
, 12);
25715 emit_move_insn (func_addr
, func
);
25716 abi_reg
= func_addr
;
25717 /* Indirect calls via CTR are strongly preferred over indirect
25718 calls via LR, so move the address there. Needed to mark
25719 this insn for linker plt sequence editing too. */
25720 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25721 if (is_pltseq_longcall
)
25723 rtvec v
= gen_rtvec (3, abi_reg
, func_desc
, tlsarg
);
25724 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25725 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25726 v
= gen_rtvec (2, func_addr
, func_desc
);
25727 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25730 emit_move_insn (func_addr
, abi_reg
);
25734 /* A function pointer under AIX is a pointer to a data area whose
25735 first word contains the actual address of the function, whose
25736 second word contains a pointer to its TOC, and whose third word
25737 contains a value to place in the static chain register (r11).
25738 Note that if we load the static chain, our "trampoline" need
25739 not have any executable code. */
25741 /* Load up address of the actual function. */
25742 func
= force_reg (Pmode
, func
);
25743 func_addr
= gen_reg_rtx (Pmode
);
25744 emit_move_insn (func_addr
, gen_rtx_MEM (Pmode
, func
));
25746 /* Indirect calls via CTR are strongly preferred over indirect
25747 calls via LR, so move the address there. */
25748 rtx ctr_reg
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25749 emit_move_insn (ctr_reg
, func_addr
);
25750 func_addr
= ctr_reg
;
25752 /* Prepare to load the TOC of the called function. Note that the
25753 TOC load must happen immediately before the actual call so
25754 that unwinding the TOC registers works correctly. See the
25755 comment in frob_update_context. */
25756 rtx func_toc_offset
= GEN_INT (GET_MODE_SIZE (Pmode
));
25757 rtx func_toc_mem
= gen_rtx_MEM (Pmode
,
25758 gen_rtx_PLUS (Pmode
, func
,
25760 toc_load
= gen_rtx_USE (VOIDmode
, func_toc_mem
);
25762 /* If we have a static chain, load it up. But, if the call was
25763 originally direct, the 3rd word has not been written since no
25764 trampoline has been built, so we ought not to load it, lest we
25765 override a static chain value. */
25766 if (!(GET_CODE (func_desc
) == SYMBOL_REF
25767 && SYMBOL_REF_FUNCTION_P (func_desc
))
25768 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25769 && !chain_already_loaded (get_current_sequence ()->next
->last
))
25771 rtx sc_reg
= gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
25772 rtx func_sc_offset
= GEN_INT (2 * GET_MODE_SIZE (Pmode
));
25773 rtx func_sc_mem
= gen_rtx_MEM (Pmode
,
25774 gen_rtx_PLUS (Pmode
, func
,
25776 emit_move_insn (sc_reg
, func_sc_mem
);
25783 /* No TOC register needed for calls from PC-relative callers. */
25784 if (!rs6000_pcrel_p ())
25785 /* Direct calls use the TOC: for local calls, the callee will
25786 assume the TOC register is set; for non-local calls, the
25787 PLT stub needs the TOC register. */
25792 /* Create the call. */
25793 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25794 if (value
!= NULL_RTX
)
25795 call
[0] = gen_rtx_SET (value
, call
[0]);
25796 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25800 call
[n_call
++] = toc_load
;
25802 call
[n_call
++] = toc_restore
;
25804 call
[n_call
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25806 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n_call
, call
));
25807 insn
= emit_call_insn (insn
);
25809 /* Mention all registers defined by the ABI to hold information
25810 as uses in CALL_INSN_FUNCTION_USAGE. */
25812 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25815 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25818 rs6000_sibcall_aix (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25822 rtx r12
= NULL_RTX
;
25823 rtx func_addr
= func_desc
;
25826 tlsarg
= global_tlsarg
;
25828 /* Handle longcall attributes. */
25829 if (INTVAL (cookie
) & CALL_LONG
&& SYMBOL_REF_P (func_desc
))
25831 /* PCREL can do a sibling call to a longcall function
25832 because we don't need to restore the TOC register. */
25833 gcc_assert (rs6000_pcrel_p ());
25834 func_desc
= rs6000_longcall_ref (func_desc
, tlsarg
);
25837 gcc_assert (INTVAL (cookie
) == 0);
25839 /* For ELFv2, r12 and CTR need to hold the function address
25840 for an indirect call. */
25841 if (GET_CODE (func_desc
) != SYMBOL_REF
&& DEFAULT_ABI
== ABI_ELFv2
)
25843 r12
= gen_rtx_REG (Pmode
, 12);
25844 emit_move_insn (r12
, func_desc
);
25845 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25846 emit_move_insn (func_addr
, r12
);
25849 /* Create the call. */
25850 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25851 if (value
!= NULL_RTX
)
25852 call
[0] = gen_rtx_SET (value
, call
[0]);
25854 call
[1] = simple_return_rtx
;
25856 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (2, call
));
25857 insn
= emit_call_insn (insn
);
25859 /* Note use of the TOC register. */
25860 if (!rs6000_pcrel_p ())
25861 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
),
25862 gen_rtx_REG (Pmode
, TOC_REGNUM
));
25864 /* Note use of r12. */
25866 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), r12
);
25869 /* Expand code to perform a call under the SYSV4 ABI. */
25872 rs6000_call_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25874 rtx func
= func_desc
;
25878 rtx abi_reg
= NULL_RTX
;
25882 tlsarg
= global_tlsarg
;
25884 /* Handle longcall attributes. */
25885 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25886 && GET_CODE (func_desc
) == SYMBOL_REF
)
25888 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25889 /* If the longcall was implemented as an inline PLT call using
25890 PLT unspecs then func will be REG:r11. If not, func will be
25891 a pseudo reg. The inline PLT call sequence supports lazy
25892 linking (and longcalls to functions in dlopen'd libraries).
25893 The other style of longcalls don't. The lazy linking entry
25894 to the dynamic symbol resolver requires r11 be the function
25895 address (as it is for linker generated PLT stubs). Ensure
25896 r11 stays valid to the bctrl by marking r11 used by the call. */
25901 /* Handle indirect calls. */
25902 if (GET_CODE (func
) != SYMBOL_REF
)
25904 func
= force_reg (Pmode
, func
);
25906 /* Indirect calls via CTR are strongly preferred over indirect
25907 calls via LR, so move the address there. That can't be left
25908 to reload because we want to mark every instruction in an
25909 inline PLT call sequence with a reloc, enabling the linker to
25910 edit the sequence back to a direct call when that makes sense. */
25911 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25914 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25915 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25916 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25917 v
= gen_rtvec (2, func_addr
, func_desc
);
25918 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25921 emit_move_insn (func_addr
, func
);
25926 /* Create the call. */
25927 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
25928 if (value
!= NULL_RTX
)
25929 call
[0] = gen_rtx_SET (value
, call
[0]);
25931 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
25933 if (TARGET_SECURE_PLT
25935 && GET_CODE (func_addr
) == SYMBOL_REF
25936 && !SYMBOL_REF_LOCAL_P (func_addr
))
25937 call
[n
++] = gen_rtx_USE (VOIDmode
, pic_offset_table_rtx
);
25939 call
[n
++] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
25941 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (n
, call
));
25942 insn
= emit_call_insn (insn
);
25944 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
25947 /* Expand code to perform a sibling call under the SysV4 ABI. */
25950 rs6000_sibcall_sysv (rtx value
, rtx func_desc
, rtx tlsarg
, rtx cookie
)
25952 rtx func
= func_desc
;
25956 rtx abi_reg
= NULL_RTX
;
25959 tlsarg
= global_tlsarg
;
25961 /* Handle longcall attributes. */
25962 if ((INTVAL (cookie
) & CALL_LONG
) != 0
25963 && GET_CODE (func_desc
) == SYMBOL_REF
)
25965 func
= rs6000_longcall_ref (func_desc
, tlsarg
);
25966 /* If the longcall was implemented as an inline PLT call using
25967 PLT unspecs then func will be REG:r11. If not, func will be
25968 a pseudo reg. The inline PLT call sequence supports lazy
25969 linking (and longcalls to functions in dlopen'd libraries).
25970 The other style of longcalls don't. The lazy linking entry
25971 to the dynamic symbol resolver requires r11 be the function
25972 address (as it is for linker generated PLT stubs). Ensure
25973 r11 stays valid to the bctr by marking r11 used by the call. */
25978 /* Handle indirect calls. */
25979 if (GET_CODE (func
) != SYMBOL_REF
)
25981 func
= force_reg (Pmode
, func
);
25983 /* Indirect sibcalls must go via CTR. That can't be left to
25984 reload because we want to mark every instruction in an inline
25985 PLT call sequence with a reloc, enabling the linker to edit
25986 the sequence back to a direct call when that makes sense. */
25987 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
25990 rtvec v
= gen_rtvec (3, func
, func_desc
, tlsarg
);
25991 rtx mark_func
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25992 emit_insn (gen_rtx_SET (func_addr
, mark_func
));
25993 v
= gen_rtvec (2, func_addr
, func_desc
);
25994 func_addr
= gen_rtx_UNSPEC (Pmode
, v
, UNSPEC_PLTSEQ
);
25997 emit_move_insn (func_addr
, func
);
26002 /* Create the call. */
26003 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26004 if (value
!= NULL_RTX
)
26005 call
[0] = gen_rtx_SET (value
, call
[0]);
26007 call
[1] = gen_rtx_USE (VOIDmode
, cookie
);
26008 call
[2] = simple_return_rtx
;
26010 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26011 insn
= emit_call_insn (insn
);
26013 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), abi_reg
);
26018 /* Expand code to perform a call under the Darwin ABI.
26019 Modulo handling of mlongcall, this is much the same as sysv.
26020 if/when the longcall optimisation is removed, we could drop this
26021 code and use the sysv case (taking care to avoid the tls stuff).
26023 We can use this for sibcalls too, if needed. */
26026 rs6000_call_darwin_1 (rtx value
, rtx func_desc
, rtx tlsarg
,
26027 rtx cookie
, bool sibcall
)
26029 rtx func
= func_desc
;
26033 int cookie_val
= INTVAL (cookie
);
26034 bool make_island
= false;
26036 /* Handle longcall attributes, there are two cases for Darwin:
26037 1) Newer linkers are capable of synthesising any branch islands needed.
26038 2) We need a helper branch island synthesised by the compiler.
26039 The second case has mostly been retired and we don't use it for m64.
26040 In fact, it's is an optimisation, we could just indirect as sysv does..
26041 ... however, backwards compatibility for now.
26042 If we're going to use this, then we need to keep the CALL_LONG bit set,
26043 so that we can pick up the special insn form later. */
26044 if ((cookie_val
& CALL_LONG
) != 0
26045 && GET_CODE (func_desc
) == SYMBOL_REF
)
26047 /* FIXME: the longcall opt should not hang off this flag, it is most
26048 likely incorrect for kernel-mode code-generation. */
26049 if (darwin_symbol_stubs
&& TARGET_32BIT
)
26050 make_island
= true; /* Do nothing yet, retain the CALL_LONG flag. */
26053 /* The linker is capable of doing this, but the user explicitly
26054 asked for -mlongcall, so we'll do the 'normal' version. */
26055 func
= rs6000_longcall_ref (func_desc
, NULL_RTX
);
26056 cookie_val
&= ~CALL_LONG
; /* Handled, zap it. */
26060 /* Handle indirect calls. */
26061 if (GET_CODE (func
) != SYMBOL_REF
)
26063 func
= force_reg (Pmode
, func
);
26065 /* Indirect calls via CTR are strongly preferred over indirect
26066 calls via LR, and are required for indirect sibcalls, so move
26067 the address there. */
26068 func_addr
= gen_rtx_REG (Pmode
, CTR_REGNO
);
26069 emit_move_insn (func_addr
, func
);
26074 /* Create the call. */
26075 call
[0] = gen_rtx_CALL (VOIDmode
, gen_rtx_MEM (SImode
, func_addr
), tlsarg
);
26076 if (value
!= NULL_RTX
)
26077 call
[0] = gen_rtx_SET (value
, call
[0]);
26079 call
[1] = gen_rtx_USE (VOIDmode
, GEN_INT (cookie_val
));
26082 call
[2] = simple_return_rtx
;
26084 call
[2] = gen_hard_reg_clobber (Pmode
, LR_REGNO
);
26086 insn
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (3, call
));
26087 insn
= emit_call_insn (insn
);
26088 /* Now we have the debug info in the insn, we can set up the branch island
26089 if we're using one. */
26092 tree funname
= get_identifier (XSTR (func_desc
, 0));
26094 if (no_previous_def (funname
))
26096 rtx label_rtx
= gen_label_rtx ();
26097 char *label_buf
, temp_buf
[256];
26098 ASM_GENERATE_INTERNAL_LABEL (temp_buf
, "L",
26099 CODE_LABEL_NUMBER (label_rtx
));
26100 label_buf
= temp_buf
[0] == '*' ? temp_buf
+ 1 : temp_buf
;
26101 tree labelname
= get_identifier (label_buf
);
26102 add_compiler_branch_island (labelname
, funname
,
26103 insn_line ((const rtx_insn
*)insn
));
26110 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26111 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26114 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, false);
26122 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED
, rtx func_desc ATTRIBUTE_UNUSED
,
26123 rtx tlsarg ATTRIBUTE_UNUSED
, rtx cookie ATTRIBUTE_UNUSED
)
26126 rs6000_call_darwin_1 (value
, func_desc
, tlsarg
, cookie
, true);
26132 /* Return whether we should generate PC-relative code for FNDECL. */
26134 rs6000_fndecl_pcrel_p (const_tree fndecl
)
26136 if (DEFAULT_ABI
!= ABI_ELFv2
)
26139 struct cl_target_option
*opts
= target_opts_for_fn (fndecl
);
26141 return ((opts
->x_rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26142 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26145 /* Return whether we should generate PC-relative code for *FN. */
26147 rs6000_function_pcrel_p (struct function
*fn
)
26149 if (DEFAULT_ABI
!= ABI_ELFv2
)
26152 /* Optimize usual case. */
26154 return ((rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26155 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26157 return rs6000_fndecl_pcrel_p (fn
->decl
);
26160 /* Return whether we should generate PC-relative code for the current
26165 return (DEFAULT_ABI
== ABI_ELFv2
26166 && (rs6000_isa_flags
& OPTION_MASK_PCREL
) != 0
26167 && TARGET_CMODEL
== CMODEL_MEDIUM
);
26171 /* Given an address (ADDR), a mode (MODE), and what the format of the
26172 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26173 for the address. */
26176 address_to_insn_form (rtx addr
,
26178 enum non_prefixed_form non_prefixed_format
)
26180 /* Single register is easy. */
26181 if (REG_P (addr
) || SUBREG_P (addr
))
26182 return INSN_FORM_BASE_REG
;
26184 /* If the non prefixed instruction format doesn't support offset addressing,
26185 make sure only indexed addressing is allowed.
26187 We special case SDmode so that the register allocator does not try to move
26188 SDmode through GPR registers, but instead uses the 32-bit integer load and
26189 store instructions for the floating point registers. */
26190 if (non_prefixed_format
== NON_PREFIXED_X
|| (mode
== SDmode
&& TARGET_DFP
))
26192 if (GET_CODE (addr
) != PLUS
)
26193 return INSN_FORM_BAD
;
26195 rtx op0
= XEXP (addr
, 0);
26196 rtx op1
= XEXP (addr
, 1);
26197 if (!REG_P (op0
) && !SUBREG_P (op0
))
26198 return INSN_FORM_BAD
;
26200 if (!REG_P (op1
) && !SUBREG_P (op1
))
26201 return INSN_FORM_BAD
;
26203 return INSN_FORM_X
;
26206 /* Deal with update forms. */
26207 if (GET_RTX_CLASS (GET_CODE (addr
)) == RTX_AUTOINC
)
26208 return INSN_FORM_UPDATE
;
26210 /* Handle PC-relative symbols and labels. Check for both local and
26211 external symbols. Assume labels are always local. TLS symbols
26212 are not PC-relative for rs6000. */
26215 if (LABEL_REF_P (addr
))
26216 return INSN_FORM_PCREL_LOCAL
;
26218 if (SYMBOL_REF_P (addr
) && !SYMBOL_REF_TLS_MODEL (addr
))
26220 if (!SYMBOL_REF_LOCAL_P (addr
))
26221 return INSN_FORM_PCREL_EXTERNAL
;
26223 return INSN_FORM_PCREL_LOCAL
;
26227 if (GET_CODE (addr
) == CONST
)
26228 addr
= XEXP (addr
, 0);
26230 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26231 if (GET_CODE (addr
) == LO_SUM
)
26232 return INSN_FORM_LO_SUM
;
26234 /* Everything below must be an offset address of some form. */
26235 if (GET_CODE (addr
) != PLUS
)
26236 return INSN_FORM_BAD
;
26238 rtx op0
= XEXP (addr
, 0);
26239 rtx op1
= XEXP (addr
, 1);
26241 /* Check for indexed addresses. */
26242 if (REG_P (op1
) || SUBREG_P (op1
))
26244 if (REG_P (op0
) || SUBREG_P (op0
))
26245 return INSN_FORM_X
;
26247 return INSN_FORM_BAD
;
26250 if (!CONST_INT_P (op1
))
26251 return INSN_FORM_BAD
;
26253 HOST_WIDE_INT offset
= INTVAL (op1
);
26254 if (!SIGNED_INTEGER_34BIT_P (offset
))
26255 return INSN_FORM_BAD
;
26257 /* Check for local and external PC-relative addresses. Labels are always
26258 local. TLS symbols are not PC-relative for rs6000. */
26261 if (LABEL_REF_P (op0
))
26262 return INSN_FORM_PCREL_LOCAL
;
26264 if (SYMBOL_REF_P (op0
) && !SYMBOL_REF_TLS_MODEL (op0
))
26266 if (!SYMBOL_REF_LOCAL_P (op0
))
26267 return INSN_FORM_PCREL_EXTERNAL
;
26269 return INSN_FORM_PCREL_LOCAL
;
26273 /* If it isn't PC-relative, the address must use a base register. */
26274 if (!REG_P (op0
) && !SUBREG_P (op0
))
26275 return INSN_FORM_BAD
;
26277 /* Large offsets must be prefixed. */
26278 if (!SIGNED_INTEGER_16BIT_P (offset
))
26280 if (TARGET_PREFIXED
)
26281 return INSN_FORM_PREFIXED_NUMERIC
;
26283 return INSN_FORM_BAD
;
26286 /* We have a 16-bit offset, see what default instruction format to use. */
26287 if (non_prefixed_format
== NON_PREFIXED_DEFAULT
)
26289 unsigned size
= GET_MODE_SIZE (mode
);
26291 /* On 64-bit systems, assume 64-bit integers need to use DS form
26292 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26293 (for LXV and STXV). TImode is problematical in that its normal usage
26294 is expected to be GPRs where it wants a DS instruction format, but if
26295 it goes into the vector registers, it wants a DQ instruction
26297 if (TARGET_POWERPC64
&& size
>= 8 && GET_MODE_CLASS (mode
) == MODE_INT
)
26298 non_prefixed_format
= NON_PREFIXED_DS
;
26300 else if (TARGET_VSX
&& size
>= 16
26301 && (VECTOR_MODE_P (mode
) || VECTOR_ALIGNMENT_P (mode
)))
26302 non_prefixed_format
= NON_PREFIXED_DQ
;
26305 non_prefixed_format
= NON_PREFIXED_D
;
26308 /* Classify the D/DS/DQ-form addresses. */
26309 switch (non_prefixed_format
)
26311 /* Instruction format D, all 16 bits are valid. */
26312 case NON_PREFIXED_D
:
26313 return INSN_FORM_D
;
26315 /* Instruction format DS, bottom 2 bits must be 0. */
26316 case NON_PREFIXED_DS
:
26317 if ((offset
& 3) == 0)
26318 return INSN_FORM_DS
;
26320 else if (TARGET_PREFIXED
)
26321 return INSN_FORM_PREFIXED_NUMERIC
;
26324 return INSN_FORM_BAD
;
26326 /* Instruction format DQ, bottom 4 bits must be 0. */
26327 case NON_PREFIXED_DQ
:
26328 if ((offset
& 15) == 0)
26329 return INSN_FORM_DQ
;
26331 else if (TARGET_PREFIXED
)
26332 return INSN_FORM_PREFIXED_NUMERIC
;
26335 return INSN_FORM_BAD
;
26341 return INSN_FORM_BAD
;
26344 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26345 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26346 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26347 a D-form or DS-form instruction. X-form and base_reg are always
26350 address_is_non_pfx_d_or_x (rtx addr
, machine_mode mode
,
26351 enum non_prefixed_form non_prefixed_format
)
26353 enum insn_form result_form
;
26355 result_form
= address_to_insn_form (addr
, mode
, non_prefixed_format
);
26357 switch (non_prefixed_format
)
26359 case NON_PREFIXED_D
:
26360 switch (result_form
)
26365 case INSN_FORM_BASE_REG
:
26371 case NON_PREFIXED_DS
:
26372 switch (result_form
)
26376 case INSN_FORM_BASE_REG
:
26388 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26389 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26390 the load or store with the PCREL_OPT optimization to make sure it is an
26391 instruction that can be optimized.
26393 We need to specify the MODE separately from the REG to allow for loads that
26394 include zero/sign/float extension. */
26397 pcrel_opt_valid_mem_p (rtx reg
, machine_mode mode
, rtx mem
)
26399 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26400 PCREL_OPT optimization. */
26401 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mode
);
26402 if (non_prefixed
== NON_PREFIXED_X
)
26405 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26406 rtx addr
= XEXP (mem
, 0);
26407 enum insn_form iform
= address_to_insn_form (addr
, mode
, non_prefixed
);
26408 return (iform
== INSN_FORM_BASE_REG
26409 || iform
== INSN_FORM_D
26410 || iform
== INSN_FORM_DS
26411 || iform
== INSN_FORM_DQ
);
26414 /* Helper function to see if we're potentially looking at lfs/stfs.
26415 - PARALLEL containing a SET and a CLOBBER
26417 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26418 - CLOBBER is a V4SF
26420 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26425 is_lfs_stfs_insn (rtx_insn
*insn
)
26427 rtx pattern
= PATTERN (insn
);
26428 if (GET_CODE (pattern
) != PARALLEL
)
26431 /* This should be a parallel with exactly one set and one clobber. */
26432 if (XVECLEN (pattern
, 0) != 2)
26435 rtx set
= XVECEXP (pattern
, 0, 0);
26436 if (GET_CODE (set
) != SET
)
26439 rtx clobber
= XVECEXP (pattern
, 0, 1);
26440 if (GET_CODE (clobber
) != CLOBBER
)
26443 /* All we care is that the destination of the SET is a mem:SI,
26444 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26445 should be a scratch:V4SF. */
26447 rtx dest
= SET_DEST (set
);
26448 rtx src
= SET_SRC (set
);
26449 rtx scratch
= SET_DEST (clobber
);
26451 if (GET_CODE (src
) != UNSPEC
)
26455 if (XINT (src
, 1) == UNSPEC_SI_FROM_SF
26456 && GET_CODE (dest
) == MEM
&& GET_MODE (dest
) == SImode
26457 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == V4SFmode
)
26461 if (XINT (src
, 1) == UNSPEC_SF_FROM_SI
26462 && GET_CODE (dest
) == REG
&& GET_MODE (dest
) == SFmode
26463 && GET_CODE (scratch
) == SCRATCH
&& GET_MODE (scratch
) == DImode
)
26469 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26470 instruction format (D/DS/DQ) used for offset memory. */
26472 enum non_prefixed_form
26473 reg_to_non_prefixed (rtx reg
, machine_mode mode
)
26475 /* If it isn't a register, use the defaults. */
26476 if (!REG_P (reg
) && !SUBREG_P (reg
))
26477 return NON_PREFIXED_DEFAULT
;
26479 unsigned int r
= reg_or_subregno (reg
);
26481 /* If we have a pseudo, use the default instruction format. */
26482 if (!HARD_REGISTER_NUM_P (r
))
26483 return NON_PREFIXED_DEFAULT
;
26485 unsigned size
= GET_MODE_SIZE (mode
);
26487 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26488 128-bit floating point, and 128-bit integers. Before power9, only indexed
26489 addressing was available for vectors. */
26490 if (FP_REGNO_P (r
))
26492 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26493 return NON_PREFIXED_D
;
26496 return NON_PREFIXED_X
;
26498 else if (TARGET_VSX
&& size
>= 16
26499 && (VECTOR_MODE_P (mode
)
26500 || VECTOR_ALIGNMENT_P (mode
)
26501 || mode
== TImode
|| mode
== CTImode
))
26502 return (TARGET_P9_VECTOR
) ? NON_PREFIXED_DQ
: NON_PREFIXED_X
;
26505 return NON_PREFIXED_DEFAULT
;
26508 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26509 128-bit floating point, and 128-bit integers. Before power9, only indexed
26510 addressing was available. */
26511 else if (ALTIVEC_REGNO_P (r
))
26513 if (!TARGET_P9_VECTOR
)
26514 return NON_PREFIXED_X
;
26516 if (mode
== SFmode
|| size
== 8 || FLOAT128_2REG_P (mode
))
26517 return NON_PREFIXED_DS
;
26520 return NON_PREFIXED_X
;
26522 else if (TARGET_VSX
&& size
>= 16
26523 && (VECTOR_MODE_P (mode
)
26524 || VECTOR_ALIGNMENT_P (mode
)
26525 || mode
== TImode
|| mode
== CTImode
))
26526 return NON_PREFIXED_DQ
;
26529 return NON_PREFIXED_DEFAULT
;
26532 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26533 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26534 through the GPR registers for memory operations. */
26535 else if (TARGET_POWERPC64
&& size
>= 8)
26536 return NON_PREFIXED_DS
;
26538 return NON_PREFIXED_D
;
26542 /* Whether a load instruction is a prefixed instruction. This is called from
26543 the prefixed attribute processing. */
26546 prefixed_load_p (rtx_insn
*insn
)
26548 /* Validate the insn to make sure it is a normal load insn. */
26549 extract_insn_cached (insn
);
26550 if (recog_data
.n_operands
< 2)
26553 rtx reg
= recog_data
.operand
[0];
26554 rtx mem
= recog_data
.operand
[1];
26556 if (!REG_P (reg
) && !SUBREG_P (reg
))
26562 /* Prefixed load instructions do not support update or indexed forms. */
26563 if (get_attr_indexed (insn
) == INDEXED_YES
26564 || get_attr_update (insn
) == UPDATE_YES
)
26567 /* LWA uses the DS format instead of the D format that LWZ uses. */
26568 enum non_prefixed_form non_prefixed
;
26569 machine_mode reg_mode
= GET_MODE (reg
);
26570 machine_mode mem_mode
= GET_MODE (mem
);
26572 if (mem_mode
== SImode
&& reg_mode
== DImode
26573 && get_attr_sign_extend (insn
) == SIGN_EXTEND_YES
)
26574 non_prefixed
= NON_PREFIXED_DS
;
26577 non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26579 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26580 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, NON_PREFIXED_DEFAULT
);
26582 return address_is_prefixed (XEXP (mem
, 0), mem_mode
, non_prefixed
);
26585 /* Whether a store instruction is a prefixed instruction. This is called from
26586 the prefixed attribute processing. */
26589 prefixed_store_p (rtx_insn
*insn
)
26591 /* Validate the insn to make sure it is a normal store insn. */
26592 extract_insn_cached (insn
);
26593 if (recog_data
.n_operands
< 2)
26596 rtx mem
= recog_data
.operand
[0];
26597 rtx reg
= recog_data
.operand
[1];
26599 if (!REG_P (reg
) && !SUBREG_P (reg
))
26605 /* Prefixed store instructions do not support update or indexed forms. */
26606 if (get_attr_indexed (insn
) == INDEXED_YES
26607 || get_attr_update (insn
) == UPDATE_YES
)
26610 machine_mode mem_mode
= GET_MODE (mem
);
26611 rtx addr
= XEXP (mem
, 0);
26612 enum non_prefixed_form non_prefixed
= reg_to_non_prefixed (reg
, mem_mode
);
26614 /* Need to make sure we aren't looking at a stfs which doesn't look
26615 like the other things reg_to_non_prefixed/address_is_prefixed
26617 if (non_prefixed
== NON_PREFIXED_X
&& is_lfs_stfs_insn (insn
))
26618 return address_is_prefixed (addr
, mem_mode
, NON_PREFIXED_DEFAULT
);
26620 return address_is_prefixed (addr
, mem_mode
, non_prefixed
);
26623 /* Whether a load immediate or add instruction is a prefixed instruction. This
26624 is called from the prefixed attribute processing. */
26627 prefixed_paddi_p (rtx_insn
*insn
)
26629 rtx set
= single_set (insn
);
26633 rtx dest
= SET_DEST (set
);
26634 rtx src
= SET_SRC (set
);
26636 if (!REG_P (dest
) && !SUBREG_P (dest
))
26639 /* Is this a load immediate that can't be done with a simple ADDI or
26641 if (CONST_INT_P (src
))
26642 return (satisfies_constraint_eI (src
)
26643 && !satisfies_constraint_I (src
)
26644 && !satisfies_constraint_L (src
));
26646 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26648 if (GET_CODE (src
) == PLUS
)
26650 rtx op1
= XEXP (src
, 1);
26652 return (CONST_INT_P (op1
)
26653 && satisfies_constraint_eI (op1
)
26654 && !satisfies_constraint_I (op1
)
26655 && !satisfies_constraint_L (op1
));
26658 /* If not, is it a load of a PC-relative address? */
26659 if (!TARGET_PCREL
|| GET_MODE (dest
) != Pmode
)
26662 if (!SYMBOL_REF_P (src
) && !LABEL_REF_P (src
) && GET_CODE (src
) != CONST
)
26665 enum insn_form iform
= address_to_insn_form (src
, Pmode
,
26666 NON_PREFIXED_DEFAULT
);
26668 return (iform
== INSN_FORM_PCREL_EXTERNAL
|| iform
== INSN_FORM_PCREL_LOCAL
);
26671 /* Whether the next instruction needs a 'p' prefix issued before the
26672 instruction is printed out. */
26673 static bool prepend_p_to_next_insn
;
26675 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26676 outputting the assembler code. On the PowerPC, we remember if the current
26677 insn is a prefixed insn where we need to emit a 'p' before the insn.
26679 In addition, if the insn is part of a PC-relative reference to an external
26680 label optimization, this is recorded also. */
26682 rs6000_final_prescan_insn (rtx_insn
*insn
, rtx
[], int)
26684 prepend_p_to_next_insn
= (get_attr_maybe_prefixed (insn
)
26685 == MAYBE_PREFIXED_YES
26686 && get_attr_prefixed (insn
) == PREFIXED_YES
);
26690 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26691 We use it to emit a 'p' for prefixed insns that is set in
26692 FINAL_PRESCAN_INSN. */
26694 rs6000_asm_output_opcode (FILE *stream
)
26696 if (prepend_p_to_next_insn
)
26698 fprintf (stream
, "p");
26700 /* Reset the flag in the case where there are separate insn lines in the
26701 sequence, so the 'p' is only emitted for the first line. This shows up
26702 when we are doing the PCREL_OPT optimization, in that the label created
26703 with %r<n> would have a leading 'p' printed. */
26704 prepend_p_to_next_insn
= false;
26710 /* Emit the relocation to tie the next instruction to a previous instruction
26711 that loads up an external address. This is used to do the PCREL_OPT
26712 optimization. Note, the label is generated after the PLD of the got
26713 pc-relative address to allow for the assembler to insert NOPs before the PLD
26714 instruction. The operand is a constant integer that is the label
26718 output_pcrel_opt_reloc (rtx label_num
)
26720 rtx operands
[1] = { label_num
};
26721 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26725 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26726 should be adjusted to reflect any required changes. This macro is used when
26727 there is some systematic length adjustment required that would be difficult
26728 to express in the length attribute.
26730 In the PowerPC, we use this to adjust the length of an instruction if one or
26731 more prefixed instructions are generated, using the attribute
26732 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26733 hardware requires that a prefied instruciton does not cross a 64-byte
26734 boundary. This means the compiler has to assume the length of the first
26735 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26736 already set for the non-prefixed instruction, we just need to udpate for the
26740 rs6000_adjust_insn_length (rtx_insn
*insn
, int length
)
26742 if (TARGET_PREFIXED
&& NONJUMP_INSN_P (insn
))
26744 rtx pattern
= PATTERN (insn
);
26745 if (GET_CODE (pattern
) != USE
&& GET_CODE (pattern
) != CLOBBER
26746 && get_attr_prefixed (insn
) == PREFIXED_YES
)
26748 int num_prefixed
= get_attr_max_prefixed_insns (insn
);
26749 length
+= 4 * (num_prefixed
+ 1);
26757 #ifdef HAVE_GAS_HIDDEN
26758 # define USE_HIDDEN_LINKONCE 1
26760 # define USE_HIDDEN_LINKONCE 0
26763 /* Fills in the label name that should be used for a 476 link stack thunk. */
26766 get_ppc476_thunk_name (char name
[32])
26768 gcc_assert (TARGET_LINK_STACK
);
26770 if (USE_HIDDEN_LINKONCE
)
26771 sprintf (name
, "__ppc476.get_thunk");
26773 ASM_GENERATE_INTERNAL_LABEL (name
, "LPPC476_", 0);
26776 /* This function emits the simple thunk routine that is used to preserve
26777 the link stack on the 476 cpu. */
26779 static void rs6000_code_end (void) ATTRIBUTE_UNUSED
;
26781 rs6000_code_end (void)
26786 if (!TARGET_LINK_STACK
)
26789 get_ppc476_thunk_name (name
);
26791 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
, get_identifier (name
),
26792 build_function_type_list (void_type_node
, NULL_TREE
));
26793 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
26794 NULL_TREE
, void_type_node
);
26795 TREE_PUBLIC (decl
) = 1;
26796 TREE_STATIC (decl
) = 1;
26799 if (USE_HIDDEN_LINKONCE
&& !TARGET_XCOFF
)
26801 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
26802 targetm
.asm_out
.unique_section (decl
, 0);
26803 switch_to_section (get_named_section (decl
, NULL
, 0));
26804 DECL_WEAK (decl
) = 1;
26805 ASM_WEAKEN_DECL (asm_out_file
, decl
, name
, 0);
26806 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
26807 targetm
.asm_out
.assemble_visibility (decl
, VISIBILITY_HIDDEN
);
26808 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
26813 switch_to_section (text_section
);
26814 ASM_OUTPUT_LABEL (asm_out_file
, name
);
26817 DECL_INITIAL (decl
) = make_node (BLOCK
);
26818 current_function_decl
= decl
;
26819 allocate_struct_function (decl
, false);
26820 init_function_start (decl
);
26821 first_function_block_is_cold
= false;
26822 /* Make sure unwind info is emitted for the thunk if needed. */
26823 final_start_function (emit_barrier (), asm_out_file
, 1);
26825 fputs ("\tblr\n", asm_out_file
);
26827 final_end_function ();
26828 init_insn_lengths ();
26829 free_after_compilation (cfun
);
26831 current_function_decl
= NULL
;
26834 /* Add r30 to hard reg set if the prologue sets it up and it is not
26835 pic_offset_table_rtx. */
26838 rs6000_set_up_by_prologue (struct hard_reg_set_container
*set
)
26840 if (!TARGET_SINGLE_PIC_BASE
26842 && TARGET_MINIMAL_TOC
26843 && !constant_pool_empty_p ())
26844 add_to_hard_reg_set (&set
->set
, Pmode
, RS6000_PIC_OFFSET_TABLE_REGNUM
);
26845 if (cfun
->machine
->split_stack_argp_used
)
26846 add_to_hard_reg_set (&set
->set
, Pmode
, 12);
26848 /* Make sure the hard reg set doesn't include r2, which was possibly added
26849 via PIC_OFFSET_TABLE_REGNUM. */
26851 remove_from_hard_reg_set (&set
->set
, Pmode
, TOC_REGNUM
);
26855 /* Helper function for rs6000_split_logical to emit a logical instruction after
26856 spliting the operation to single GPR registers.
26858 DEST is the destination register.
26859 OP1 and OP2 are the input source registers.
26860 CODE is the base operation (AND, IOR, XOR, NOT).
26861 MODE is the machine mode.
26862 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26863 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26864 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26867 rs6000_split_logical_inner (rtx dest
,
26870 enum rtx_code code
,
26872 bool complement_final_p
,
26873 bool complement_op1_p
,
26874 bool complement_op2_p
)
26878 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26879 if (op2
&& CONST_INT_P (op2
)
26880 && (mode
== SImode
|| (mode
== DImode
&& TARGET_POWERPC64
))
26881 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26883 HOST_WIDE_INT mask
= GET_MODE_MASK (mode
);
26884 HOST_WIDE_INT value
= INTVAL (op2
) & mask
;
26886 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26891 emit_insn (gen_rtx_SET (dest
, const0_rtx
));
26895 else if (value
== mask
)
26897 if (!rtx_equal_p (dest
, op1
))
26898 emit_insn (gen_rtx_SET (dest
, op1
));
26903 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26904 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26905 else if (code
== IOR
|| code
== XOR
)
26909 if (!rtx_equal_p (dest
, op1
))
26910 emit_insn (gen_rtx_SET (dest
, op1
));
26916 if (code
== AND
&& mode
== SImode
26917 && !complement_final_p
&& !complement_op1_p
&& !complement_op2_p
)
26919 emit_insn (gen_andsi3 (dest
, op1
, op2
));
26923 if (complement_op1_p
)
26924 op1
= gen_rtx_NOT (mode
, op1
);
26926 if (complement_op2_p
)
26927 op2
= gen_rtx_NOT (mode
, op2
);
26929 /* For canonical RTL, if only one arm is inverted it is the first. */
26930 if (!complement_op1_p
&& complement_op2_p
)
26931 std::swap (op1
, op2
);
26933 bool_rtx
= ((code
== NOT
)
26934 ? gen_rtx_NOT (mode
, op1
)
26935 : gen_rtx_fmt_ee (code
, mode
, op1
, op2
));
26937 if (complement_final_p
)
26938 bool_rtx
= gen_rtx_NOT (mode
, bool_rtx
);
26940 emit_insn (gen_rtx_SET (dest
, bool_rtx
));
26943 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
26944 operations are split immediately during RTL generation to allow for more
26945 optimizations of the AND/IOR/XOR.
26947 OPERANDS is an array containing the destination and two input operands.
26948 CODE is the base operation (AND, IOR, XOR, NOT).
26949 MODE is the machine mode.
26950 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26951 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26952 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
26953 CLOBBER_REG is either NULL or a scratch register of type CC to allow
26954 formation of the AND instructions. */
26957 rs6000_split_logical_di (rtx operands
[3],
26958 enum rtx_code code
,
26959 bool complement_final_p
,
26960 bool complement_op1_p
,
26961 bool complement_op2_p
)
26963 const HOST_WIDE_INT lower_32bits
= HOST_WIDE_INT_C(0xffffffff);
26964 const HOST_WIDE_INT upper_32bits
= ~ lower_32bits
;
26965 const HOST_WIDE_INT sign_bit
= HOST_WIDE_INT_C(0x80000000);
26966 enum hi_lo
{ hi
= 0, lo
= 1 };
26967 rtx op0_hi_lo
[2], op1_hi_lo
[2], op2_hi_lo
[2];
26970 op0_hi_lo
[hi
] = gen_highpart (SImode
, operands
[0]);
26971 op1_hi_lo
[hi
] = gen_highpart (SImode
, operands
[1]);
26972 op0_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[0]);
26973 op1_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[1]);
26976 op2_hi_lo
[hi
] = op2_hi_lo
[lo
] = NULL_RTX
;
26979 if (!CONST_INT_P (operands
[2]))
26981 op2_hi_lo
[hi
] = gen_highpart_mode (SImode
, DImode
, operands
[2]);
26982 op2_hi_lo
[lo
] = gen_lowpart (SImode
, operands
[2]);
26986 HOST_WIDE_INT value
= INTVAL (operands
[2]);
26987 HOST_WIDE_INT value_hi_lo
[2];
26989 gcc_assert (!complement_final_p
);
26990 gcc_assert (!complement_op1_p
);
26991 gcc_assert (!complement_op2_p
);
26993 value_hi_lo
[hi
] = value
>> 32;
26994 value_hi_lo
[lo
] = value
& lower_32bits
;
26996 for (i
= 0; i
< 2; i
++)
26998 HOST_WIDE_INT sub_value
= value_hi_lo
[i
];
27000 if (sub_value
& sign_bit
)
27001 sub_value
|= upper_32bits
;
27003 op2_hi_lo
[i
] = GEN_INT (sub_value
);
27005 /* If this is an AND instruction, check to see if we need to load
27006 the value in a register. */
27007 if (code
== AND
&& sub_value
!= -1 && sub_value
!= 0
27008 && !and_operand (op2_hi_lo
[i
], SImode
))
27009 op2_hi_lo
[i
] = force_reg (SImode
, op2_hi_lo
[i
]);
27014 for (i
= 0; i
< 2; i
++)
27016 /* Split large IOR/XOR operations. */
27017 if ((code
== IOR
|| code
== XOR
)
27018 && CONST_INT_P (op2_hi_lo
[i
])
27019 && !complement_final_p
27020 && !complement_op1_p
27021 && !complement_op2_p
27022 && !logical_const_operand (op2_hi_lo
[i
], SImode
))
27024 HOST_WIDE_INT value
= INTVAL (op2_hi_lo
[i
]);
27025 HOST_WIDE_INT hi_16bits
= value
& HOST_WIDE_INT_C(0xffff0000);
27026 HOST_WIDE_INT lo_16bits
= value
& HOST_WIDE_INT_C(0x0000ffff);
27027 rtx tmp
= gen_reg_rtx (SImode
);
27029 /* Make sure the constant is sign extended. */
27030 if ((hi_16bits
& sign_bit
) != 0)
27031 hi_16bits
|= upper_32bits
;
27033 rs6000_split_logical_inner (tmp
, op1_hi_lo
[i
], GEN_INT (hi_16bits
),
27034 code
, SImode
, false, false, false);
27036 rs6000_split_logical_inner (op0_hi_lo
[i
], tmp
, GEN_INT (lo_16bits
),
27037 code
, SImode
, false, false, false);
27040 rs6000_split_logical_inner (op0_hi_lo
[i
], op1_hi_lo
[i
], op2_hi_lo
[i
],
27041 code
, SImode
, complement_final_p
,
27042 complement_op1_p
, complement_op2_p
);
27048 /* Split the insns that make up boolean operations operating on multiple GPR
27049 registers. The boolean MD patterns ensure that the inputs either are
27050 exactly the same as the output registers, or there is no overlap.
27052 OPERANDS is an array containing the destination and two input operands.
27053 CODE is the base operation (AND, IOR, XOR, NOT).
27054 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27055 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27056 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27059 rs6000_split_logical (rtx operands
[3],
27060 enum rtx_code code
,
27061 bool complement_final_p
,
27062 bool complement_op1_p
,
27063 bool complement_op2_p
)
27065 machine_mode mode
= GET_MODE (operands
[0]);
27066 machine_mode sub_mode
;
27068 int sub_size
, regno0
, regno1
, nregs
, i
;
27070 /* If this is DImode, use the specialized version that can run before
27071 register allocation. */
27072 if (mode
== DImode
&& !TARGET_POWERPC64
)
27074 rs6000_split_logical_di (operands
, code
, complement_final_p
,
27075 complement_op1_p
, complement_op2_p
);
27081 op2
= (code
== NOT
) ? NULL_RTX
: operands
[2];
27082 sub_mode
= (TARGET_POWERPC64
) ? DImode
: SImode
;
27083 sub_size
= GET_MODE_SIZE (sub_mode
);
27084 regno0
= REGNO (op0
);
27085 regno1
= REGNO (op1
);
27087 gcc_assert (reload_completed
);
27088 gcc_assert (IN_RANGE (regno0
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27089 gcc_assert (IN_RANGE (regno1
, FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27091 nregs
= rs6000_hard_regno_nregs
[(int)mode
][regno0
];
27092 gcc_assert (nregs
> 1);
27094 if (op2
&& REG_P (op2
))
27095 gcc_assert (IN_RANGE (REGNO (op2
), FIRST_GPR_REGNO
, LAST_GPR_REGNO
));
27097 for (i
= 0; i
< nregs
; i
++)
27099 int offset
= i
* sub_size
;
27100 rtx sub_op0
= simplify_subreg (sub_mode
, op0
, mode
, offset
);
27101 rtx sub_op1
= simplify_subreg (sub_mode
, op1
, mode
, offset
);
27102 rtx sub_op2
= ((code
== NOT
)
27104 : simplify_subreg (sub_mode
, op2
, mode
, offset
));
27106 rs6000_split_logical_inner (sub_op0
, sub_op1
, sub_op2
, code
, sub_mode
,
27107 complement_final_p
, complement_op1_p
,
27114 /* Emit instructions to move SRC to DST. Called by splitters for
27115 multi-register moves. It will emit at most one instruction for
27116 each register that is accessed; that is, it won't emit li/lis pairs
27117 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27121 rs6000_split_multireg_move (rtx dst
, rtx src
)
27123 /* The register number of the first register being moved. */
27125 /* The mode that is to be moved. */
27127 /* The mode that the move is being done in, and its size. */
27128 machine_mode reg_mode
;
27130 /* The number of registers that will be moved. */
27133 reg
= REG_P (dst
) ? REGNO (dst
) : REGNO (src
);
27134 mode
= GET_MODE (dst
);
27135 nregs
= hard_regno_nregs (reg
, mode
);
27137 /* If we have a vector quad register for MMA, and this is a load or store,
27138 see if we can use vector paired load/stores. */
27139 if (mode
== XOmode
&& TARGET_MMA
27140 && (MEM_P (dst
) || MEM_P (src
)))
27145 /* If we have a vector pair/quad mode, split it into two/four separate
27147 else if (mode
== OOmode
|| mode
== XOmode
)
27148 reg_mode
= V1TImode
;
27149 else if (FP_REGNO_P (reg
))
27150 reg_mode
= DECIMAL_FLOAT_MODE_P (mode
) ? DDmode
:
27151 (TARGET_HARD_FLOAT
? DFmode
: SFmode
);
27152 else if (ALTIVEC_REGNO_P (reg
))
27153 reg_mode
= V16QImode
;
27155 reg_mode
= word_mode
;
27156 reg_mode_size
= GET_MODE_SIZE (reg_mode
);
27158 gcc_assert (reg_mode_size
* nregs
== GET_MODE_SIZE (mode
));
27160 /* TDmode residing in FP registers is special, since the ISA requires that
27161 the lower-numbered word of a register pair is always the most significant
27162 word, even in little-endian mode. This does not match the usual subreg
27163 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27164 the appropriate constituent registers "by hand" in little-endian mode.
27166 Note we do not need to check for destructive overlap here since TDmode
27167 can only reside in even/odd register pairs. */
27168 if (FP_REGNO_P (reg
) && DECIMAL_FLOAT_MODE_P (mode
) && !BYTES_BIG_ENDIAN
)
27173 for (i
= 0; i
< nregs
; i
++)
27175 if (REG_P (src
) && FP_REGNO_P (REGNO (src
)))
27176 p_src
= gen_rtx_REG (reg_mode
, REGNO (src
) + nregs
- 1 - i
);
27178 p_src
= simplify_gen_subreg (reg_mode
, src
, mode
,
27179 i
* reg_mode_size
);
27181 if (REG_P (dst
) && FP_REGNO_P (REGNO (dst
)))
27182 p_dst
= gen_rtx_REG (reg_mode
, REGNO (dst
) + nregs
- 1 - i
);
27184 p_dst
= simplify_gen_subreg (reg_mode
, dst
, mode
,
27185 i
* reg_mode_size
);
27187 emit_insn (gen_rtx_SET (p_dst
, p_src
));
27193 /* The __vector_pair and __vector_quad modes are multi-register
27194 modes, so if we have to load or store the registers, we have to be
27195 careful to properly swap them if we're in little endian mode
27196 below. This means the last register gets the first memory
27197 location. We also need to be careful of using the right register
27198 numbers if we are splitting XO to OO. */
27199 if (mode
== OOmode
|| mode
== XOmode
)
27201 nregs
= hard_regno_nregs (reg
, mode
);
27202 int reg_mode_nregs
= hard_regno_nregs (reg
, reg_mode
);
27205 unsigned offset
= 0;
27206 unsigned size
= GET_MODE_SIZE (reg_mode
);
27208 /* If we are reading an accumulator register, we have to
27209 deprime it before we can access it. */
27211 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27212 emit_insn (gen_mma_xxmfacc (src
, src
));
27214 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27217 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27218 rtx dst2
= adjust_address (dst
, reg_mode
, offset
);
27219 rtx src2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27221 emit_insn (gen_rtx_SET (dst2
, src2
));
27229 unsigned offset
= 0;
27230 unsigned size
= GET_MODE_SIZE (reg_mode
);
27232 for (int i
= 0; i
< nregs
; i
+= reg_mode_nregs
)
27235 = WORDS_BIG_ENDIAN
? i
: (nregs
- reg_mode_nregs
- i
);
27236 rtx dst2
= gen_rtx_REG (reg_mode
, reg
+ subreg
);
27237 rtx src2
= adjust_address (src
, reg_mode
, offset
);
27239 emit_insn (gen_rtx_SET (dst2
, src2
));
27242 /* If we are writing an accumulator register, we have to
27243 prime it after we've written it. */
27245 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27246 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27251 if (GET_CODE (src
) == UNSPEC
27252 || GET_CODE (src
) == UNSPEC_VOLATILE
)
27254 gcc_assert (XINT (src
, 1) == UNSPEC_VSX_ASSEMBLE
27255 || XINT (src
, 1) == UNSPECV_MMA_ASSEMBLE
);
27256 gcc_assert (REG_P (dst
));
27257 if (GET_MODE (src
) == XOmode
)
27258 gcc_assert (FP_REGNO_P (REGNO (dst
)));
27259 if (GET_MODE (src
) == OOmode
)
27260 gcc_assert (VSX_REGNO_P (REGNO (dst
)));
27262 int nvecs
= XVECLEN (src
, 0);
27263 for (int i
= 0; i
< nvecs
; i
++)
27266 int regno
= reg
+ i
;
27268 if (WORDS_BIG_ENDIAN
)
27270 op
= XVECEXP (src
, 0, i
);
27272 /* If we are loading an even VSX register and the memory location
27273 is adjacent to the next register's memory location (if any),
27274 then we can load them both with one LXVP instruction. */
27275 if ((regno
& 1) == 0)
27277 rtx op2
= XVECEXP (src
, 0, i
+ 1);
27278 if (adjacent_mem_locations (op
, op2
) == op
)
27280 op
= adjust_address (op
, OOmode
, 0);
27281 /* Skip the next register, since we're going to
27282 load it together with this register. */
27289 op
= XVECEXP (src
, 0, nvecs
- i
- 1);
27291 /* If we are loading an even VSX register and the memory location
27292 is adjacent to the next register's memory location (if any),
27293 then we can load them both with one LXVP instruction. */
27294 if ((regno
& 1) == 0)
27296 rtx op2
= XVECEXP (src
, 0, nvecs
- i
- 2);
27297 if (adjacent_mem_locations (op2
, op
) == op2
)
27299 op
= adjust_address (op2
, OOmode
, 0);
27300 /* Skip the next register, since we're going to
27301 load it together with this register. */
27307 rtx dst_i
= gen_rtx_REG (GET_MODE (op
), regno
);
27308 emit_insn (gen_rtx_SET (dst_i
, op
));
27311 /* We are writing an accumulator register, so we have to
27312 prime it after we've written it. */
27313 if (GET_MODE (src
) == XOmode
)
27314 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27319 /* Register -> register moves can use common code. */
27322 if (REG_P (src
) && REG_P (dst
) && (REGNO (src
) < REGNO (dst
)))
27324 /* If we are reading an accumulator register, we have to
27325 deprime it before we can access it. */
27327 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27328 emit_insn (gen_mma_xxmfacc (src
, src
));
27330 /* Move register range backwards, if we might have destructive
27333 /* XO/OO are opaque so cannot use subregs. */
27334 if (mode
== OOmode
|| mode
== XOmode
)
27336 for (i
= nregs
- 1; i
>= 0; i
--)
27338 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + i
);
27339 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + i
);
27340 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27345 for (i
= nregs
- 1; i
>= 0; i
--)
27346 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27347 i
* reg_mode_size
),
27348 simplify_gen_subreg (reg_mode
, src
, mode
,
27349 i
* reg_mode_size
)));
27352 /* If we are writing an accumulator register, we have to
27353 prime it after we've written it. */
27355 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27356 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27362 bool used_update
= false;
27363 rtx restore_basereg
= NULL_RTX
;
27365 if (MEM_P (src
) && INT_REGNO_P (reg
))
27369 if (GET_CODE (XEXP (src
, 0)) == PRE_INC
27370 || GET_CODE (XEXP (src
, 0)) == PRE_DEC
)
27373 breg
= XEXP (XEXP (src
, 0), 0);
27374 delta_rtx
= (GET_CODE (XEXP (src
, 0)) == PRE_INC
27375 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src
)))
27376 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src
))));
27377 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27378 src
= replace_equiv_address (src
, breg
);
27380 else if (! rs6000_offsettable_memref_p (src
, reg_mode
, true))
27382 if (GET_CODE (XEXP (src
, 0)) == PRE_MODIFY
)
27384 rtx basereg
= XEXP (XEXP (src
, 0), 0);
27387 rtx ndst
= simplify_gen_subreg (reg_mode
, dst
, mode
, 0);
27388 emit_insn (gen_rtx_SET (ndst
,
27389 gen_rtx_MEM (reg_mode
,
27391 used_update
= true;
27394 emit_insn (gen_rtx_SET (basereg
,
27395 XEXP (XEXP (src
, 0), 1)));
27396 src
= replace_equiv_address (src
, basereg
);
27400 rtx basereg
= gen_rtx_REG (Pmode
, reg
);
27401 emit_insn (gen_rtx_SET (basereg
, XEXP (src
, 0)));
27402 src
= replace_equiv_address (src
, basereg
);
27406 breg
= XEXP (src
, 0);
27407 if (GET_CODE (breg
) == PLUS
|| GET_CODE (breg
) == LO_SUM
)
27408 breg
= XEXP (breg
, 0);
27410 /* If the base register we are using to address memory is
27411 also a destination reg, then change that register last. */
27413 && REGNO (breg
) >= REGNO (dst
)
27414 && REGNO (breg
) < REGNO (dst
) + nregs
)
27415 j
= REGNO (breg
) - REGNO (dst
);
27417 else if (MEM_P (dst
) && INT_REGNO_P (reg
))
27421 if (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27422 || GET_CODE (XEXP (dst
, 0)) == PRE_DEC
)
27425 breg
= XEXP (XEXP (dst
, 0), 0);
27426 delta_rtx
= (GET_CODE (XEXP (dst
, 0)) == PRE_INC
27427 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst
)))
27428 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst
))));
27430 /* We have to update the breg before doing the store.
27431 Use store with update, if available. */
27435 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27436 emit_insn (TARGET_32BIT
27437 ? (TARGET_POWERPC64
27438 ? gen_movdi_si_update (breg
, breg
, delta_rtx
, nsrc
)
27439 : gen_movsi_si_update (breg
, breg
, delta_rtx
, nsrc
))
27440 : gen_movdi_di_update (breg
, breg
, delta_rtx
, nsrc
));
27441 used_update
= true;
27444 emit_insn (gen_add3_insn (breg
, breg
, delta_rtx
));
27445 dst
= replace_equiv_address (dst
, breg
);
27447 else if (!rs6000_offsettable_memref_p (dst
, reg_mode
, true)
27448 && GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27450 if (GET_CODE (XEXP (dst
, 0)) == PRE_MODIFY
)
27452 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27455 rtx nsrc
= simplify_gen_subreg (reg_mode
, src
, mode
, 0);
27456 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode
,
27459 used_update
= true;
27462 emit_insn (gen_rtx_SET (basereg
,
27463 XEXP (XEXP (dst
, 0), 1)));
27464 dst
= replace_equiv_address (dst
, basereg
);
27468 rtx basereg
= XEXP (XEXP (dst
, 0), 0);
27469 rtx offsetreg
= XEXP (XEXP (dst
, 0), 1);
27470 gcc_assert (GET_CODE (XEXP (dst
, 0)) == PLUS
27472 && REG_P (offsetreg
)
27473 && REGNO (basereg
) != REGNO (offsetreg
));
27474 if (REGNO (basereg
) == 0)
27476 rtx tmp
= offsetreg
;
27477 offsetreg
= basereg
;
27480 emit_insn (gen_add3_insn (basereg
, basereg
, offsetreg
));
27481 restore_basereg
= gen_sub3_insn (basereg
, basereg
, offsetreg
);
27482 dst
= replace_equiv_address (dst
, basereg
);
27485 else if (GET_CODE (XEXP (dst
, 0)) != LO_SUM
)
27486 gcc_assert (rs6000_offsettable_memref_p (dst
, reg_mode
, true));
27489 /* If we are reading an accumulator register, we have to
27490 deprime it before we can access it. */
27491 if (TARGET_MMA
&& REG_P (src
)
27492 && GET_MODE (src
) == XOmode
&& FP_REGNO_P (REGNO (src
)))
27493 emit_insn (gen_mma_xxmfacc (src
, src
));
27495 for (i
= 0; i
< nregs
; i
++)
27497 /* Calculate index to next subword. */
27502 /* If compiler already emitted move of first word by
27503 store with update, no need to do anything. */
27504 if (j
== 0 && used_update
)
27507 /* XO/OO are opaque so cannot use subregs. */
27508 if (mode
== OOmode
|| mode
== XOmode
)
27510 rtx dst_i
= gen_rtx_REG (reg_mode
, REGNO (dst
) + j
);
27511 rtx src_i
= gen_rtx_REG (reg_mode
, REGNO (src
) + j
);
27512 emit_insn (gen_rtx_SET (dst_i
, src_i
));
27515 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode
, dst
, mode
,
27516 j
* reg_mode_size
),
27517 simplify_gen_subreg (reg_mode
, src
, mode
,
27518 j
* reg_mode_size
)));
27521 /* If we are writing an accumulator register, we have to
27522 prime it after we've written it. */
27523 if (TARGET_MMA
&& REG_P (dst
)
27524 && GET_MODE (dst
) == XOmode
&& FP_REGNO_P (REGNO (dst
)))
27525 emit_insn (gen_mma_xxmtacc (dst
, dst
));
27527 if (restore_basereg
!= NULL_RTX
)
27528 emit_insn (restore_basereg
);
27532 /* Return true if the peephole2 can combine a load involving a combination of
27533 an addis instruction and a load with an offset that can be fused together on
27537 fusion_gpr_load_p (rtx addis_reg
, /* register set via addis. */
27538 rtx addis_value
, /* addis value. */
27539 rtx target
, /* target register that is loaded. */
27540 rtx mem
) /* bottom part of the memory addr. */
27545 /* Validate arguments. */
27546 if (!base_reg_operand (addis_reg
, GET_MODE (addis_reg
)))
27549 if (!base_reg_operand (target
, GET_MODE (target
)))
27552 if (!fusion_gpr_addis (addis_value
, GET_MODE (addis_value
)))
27555 /* Allow sign/zero extension. */
27556 if (GET_CODE (mem
) == ZERO_EXTEND
27557 || (GET_CODE (mem
) == SIGN_EXTEND
&& TARGET_P8_FUSION_SIGN
))
27558 mem
= XEXP (mem
, 0);
27563 if (!fusion_gpr_mem_load (mem
, GET_MODE (mem
)))
27566 addr
= XEXP (mem
, 0); /* either PLUS or LO_SUM. */
27567 if (GET_CODE (addr
) != PLUS
&& GET_CODE (addr
) != LO_SUM
)
27570 /* Validate that the register used to load the high value is either the
27571 register being loaded, or we can safely replace its use.
27573 This function is only called from the peephole2 pass and we assume that
27574 there are 2 instructions in the peephole (addis and load), so we want to
27575 check if the target register was not used in the memory address and the
27576 register to hold the addis result is dead after the peephole. */
27577 if (REGNO (addis_reg
) != REGNO (target
))
27579 if (reg_mentioned_p (target
, mem
))
27582 if (!peep2_reg_dead_p (2, addis_reg
))
27585 /* If the target register being loaded is the stack pointer, we must
27586 avoid loading any other value into it, even temporarily. */
27587 if (REG_P (target
) && REGNO (target
) == STACK_POINTER_REGNUM
)
27591 base_reg
= XEXP (addr
, 0);
27592 return REGNO (addis_reg
) == REGNO (base_reg
);
27595 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27596 sequence. We adjust the addis register to use the target register. If the
27597 load sign extends, we adjust the code to do the zero extending load, and an
27598 explicit sign extension later since the fusion only covers zero extending
27602 operands[0] register set with addis (to be replaced with target)
27603 operands[1] value set via addis
27604 operands[2] target register being loaded
27605 operands[3] D-form memory reference using operands[0]. */
27608 expand_fusion_gpr_load (rtx
*operands
)
27610 rtx addis_value
= operands
[1];
27611 rtx target
= operands
[2];
27612 rtx orig_mem
= operands
[3];
27613 rtx new_addr
, new_mem
, orig_addr
, offset
;
27614 enum rtx_code plus_or_lo_sum
;
27615 machine_mode target_mode
= GET_MODE (target
);
27616 machine_mode extend_mode
= target_mode
;
27617 machine_mode ptr_mode
= Pmode
;
27618 enum rtx_code extend
= UNKNOWN
;
27620 if (GET_CODE (orig_mem
) == ZERO_EXTEND
27621 || (TARGET_P8_FUSION_SIGN
&& GET_CODE (orig_mem
) == SIGN_EXTEND
))
27623 extend
= GET_CODE (orig_mem
);
27624 orig_mem
= XEXP (orig_mem
, 0);
27625 target_mode
= GET_MODE (orig_mem
);
27628 gcc_assert (MEM_P (orig_mem
));
27630 orig_addr
= XEXP (orig_mem
, 0);
27631 plus_or_lo_sum
= GET_CODE (orig_addr
);
27632 gcc_assert (plus_or_lo_sum
== PLUS
|| plus_or_lo_sum
== LO_SUM
);
27634 offset
= XEXP (orig_addr
, 1);
27635 new_addr
= gen_rtx_fmt_ee (plus_or_lo_sum
, ptr_mode
, addis_value
, offset
);
27636 new_mem
= replace_equiv_address_nv (orig_mem
, new_addr
, false);
27638 if (extend
!= UNKNOWN
)
27639 new_mem
= gen_rtx_fmt_e (ZERO_EXTEND
, extend_mode
, new_mem
);
27641 new_mem
= gen_rtx_UNSPEC (extend_mode
, gen_rtvec (1, new_mem
),
27642 UNSPEC_FUSION_GPR
);
27643 emit_insn (gen_rtx_SET (target
, new_mem
));
27645 if (extend
== SIGN_EXTEND
)
27647 int sub_off
= ((BYTES_BIG_ENDIAN
)
27648 ? GET_MODE_SIZE (extend_mode
) - GET_MODE_SIZE (target_mode
)
27651 = simplify_subreg (target_mode
, target
, extend_mode
, sub_off
);
27653 emit_insn (gen_rtx_SET (target
,
27654 gen_rtx_SIGN_EXTEND (extend_mode
, sign_reg
)));
27660 /* Emit the addis instruction that will be part of a fused instruction
27664 emit_fusion_addis (rtx target
, rtx addis_value
)
27667 const char *addis_str
= NULL
;
27669 /* Emit the addis instruction. */
27670 fuse_ops
[0] = target
;
27671 if (satisfies_constraint_L (addis_value
))
27673 fuse_ops
[1] = addis_value
;
27674 addis_str
= "lis %0,%v1";
27677 else if (GET_CODE (addis_value
) == PLUS
)
27679 rtx op0
= XEXP (addis_value
, 0);
27680 rtx op1
= XEXP (addis_value
, 1);
27682 if (REG_P (op0
) && CONST_INT_P (op1
)
27683 && satisfies_constraint_L (op1
))
27687 addis_str
= "addis %0,%1,%v2";
27691 else if (GET_CODE (addis_value
) == HIGH
)
27693 rtx value
= XEXP (addis_value
, 0);
27694 if (GET_CODE (value
) == UNSPEC
&& XINT (value
, 1) == UNSPEC_TOCREL
)
27696 fuse_ops
[1] = XVECEXP (value
, 0, 0); /* symbol ref. */
27697 fuse_ops
[2] = XVECEXP (value
, 0, 1); /* TOC register. */
27699 addis_str
= "addis %0,%2,%1@toc@ha";
27701 else if (TARGET_XCOFF
)
27702 addis_str
= "addis %0,%1@u(%2)";
27705 gcc_unreachable ();
27708 else if (GET_CODE (value
) == PLUS
)
27710 rtx op0
= XEXP (value
, 0);
27711 rtx op1
= XEXP (value
, 1);
27713 if (GET_CODE (op0
) == UNSPEC
27714 && XINT (op0
, 1) == UNSPEC_TOCREL
27715 && CONST_INT_P (op1
))
27717 fuse_ops
[1] = XVECEXP (op0
, 0, 0); /* symbol ref. */
27718 fuse_ops
[2] = XVECEXP (op0
, 0, 1); /* TOC register. */
27721 addis_str
= "addis %0,%2,%1+%3@toc@ha";
27723 else if (TARGET_XCOFF
)
27724 addis_str
= "addis %0,%1+%3@u(%2)";
27727 gcc_unreachable ();
27731 else if (satisfies_constraint_L (value
))
27733 fuse_ops
[1] = value
;
27734 addis_str
= "lis %0,%v1";
27737 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (value
))
27739 fuse_ops
[1] = value
;
27740 addis_str
= "lis %0,%1@ha";
27745 fatal_insn ("Could not generate addis value for fusion", addis_value
);
27747 output_asm_insn (addis_str
, fuse_ops
);
27750 /* Emit a D-form load or store instruction that is the second instruction
27751 of a fusion sequence. */
27754 emit_fusion_load (rtx load_reg
, rtx addis_reg
, rtx offset
, const char *insn_str
)
27757 char insn_template
[80];
27759 fuse_ops
[0] = load_reg
;
27760 fuse_ops
[1] = addis_reg
;
27762 if (CONST_INT_P (offset
) && satisfies_constraint_I (offset
))
27764 sprintf (insn_template
, "%s %%0,%%2(%%1)", insn_str
);
27765 fuse_ops
[2] = offset
;
27766 output_asm_insn (insn_template
, fuse_ops
);
27769 else if (GET_CODE (offset
) == UNSPEC
27770 && XINT (offset
, 1) == UNSPEC_TOCREL
)
27773 sprintf (insn_template
, "%s %%0,%%2@toc@l(%%1)", insn_str
);
27775 else if (TARGET_XCOFF
)
27776 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27779 gcc_unreachable ();
27781 fuse_ops
[2] = XVECEXP (offset
, 0, 0);
27782 output_asm_insn (insn_template
, fuse_ops
);
27785 else if (GET_CODE (offset
) == PLUS
27786 && GET_CODE (XEXP (offset
, 0)) == UNSPEC
27787 && XINT (XEXP (offset
, 0), 1) == UNSPEC_TOCREL
27788 && CONST_INT_P (XEXP (offset
, 1)))
27790 rtx tocrel_unspec
= XEXP (offset
, 0);
27792 sprintf (insn_template
, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str
);
27794 else if (TARGET_XCOFF
)
27795 sprintf (insn_template
, "%s %%0,%%2+%%3@l(%%1)", insn_str
);
27798 gcc_unreachable ();
27800 fuse_ops
[2] = XVECEXP (tocrel_unspec
, 0, 0);
27801 fuse_ops
[3] = XEXP (offset
, 1);
27802 output_asm_insn (insn_template
, fuse_ops
);
27805 else if (TARGET_ELF
&& !TARGET_POWERPC64
&& CONSTANT_P (offset
))
27807 sprintf (insn_template
, "%s %%0,%%2@l(%%1)", insn_str
);
27809 fuse_ops
[2] = offset
;
27810 output_asm_insn (insn_template
, fuse_ops
);
27814 fatal_insn ("Unable to generate load/store offset for fusion", offset
);
27819 /* Given an address, convert it into the addis and load offset parts. Addresses
27820 created during the peephole2 process look like:
27821 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27822 (unspec [(...)] UNSPEC_TOCREL)) */
27825 fusion_split_address (rtx addr
, rtx
*p_hi
, rtx
*p_lo
)
27829 if (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == LO_SUM
)
27831 hi
= XEXP (addr
, 0);
27832 lo
= XEXP (addr
, 1);
27835 gcc_unreachable ();
27841 /* Return a string to fuse an addis instruction with a gpr load to the same
27842 register that we loaded up the addis instruction. The address that is used
27843 is the logical address that was formed during peephole2:
27844 (lo_sum (high) (low-part))
27846 The code is complicated, so we call output_asm_insn directly, and just
27850 emit_fusion_gpr_load (rtx target
, rtx mem
)
27855 const char *load_str
= NULL
;
27858 if (GET_CODE (mem
) == ZERO_EXTEND
)
27859 mem
= XEXP (mem
, 0);
27861 gcc_assert (REG_P (target
) && MEM_P (mem
));
27863 addr
= XEXP (mem
, 0);
27864 fusion_split_address (addr
, &addis_value
, &load_offset
);
27866 /* Now emit the load instruction to the same register. */
27867 mode
= GET_MODE (mem
);
27885 gcc_assert (TARGET_POWERPC64
);
27890 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target
, mem
));
27893 /* Emit the addis instruction. */
27894 emit_fusion_addis (target
, addis_value
);
27896 /* Emit the D-form load instruction. */
27897 emit_fusion_load (target
, target
, load_offset
, load_str
);
27902 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27903 ignores it then. */
27904 static GTY(()) tree atomic_hold_decl
;
27905 static GTY(()) tree atomic_clear_decl
;
27906 static GTY(()) tree atomic_update_decl
;
27908 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27910 rs6000_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
27912 if (!TARGET_HARD_FLOAT
)
27914 #ifdef RS6000_GLIBC_ATOMIC_FENV
27915 if (atomic_hold_decl
== NULL_TREE
)
27918 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27919 get_identifier ("__atomic_feholdexcept"),
27920 build_function_type_list (void_type_node
,
27921 double_ptr_type_node
,
27923 TREE_PUBLIC (atomic_hold_decl
) = 1;
27924 DECL_EXTERNAL (atomic_hold_decl
) = 1;
27927 if (atomic_clear_decl
== NULL_TREE
)
27930 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27931 get_identifier ("__atomic_feclearexcept"),
27932 build_function_type_list (void_type_node
,
27934 TREE_PUBLIC (atomic_clear_decl
) = 1;
27935 DECL_EXTERNAL (atomic_clear_decl
) = 1;
27938 tree const_double
= build_qualified_type (double_type_node
,
27940 tree const_double_ptr
= build_pointer_type (const_double
);
27941 if (atomic_update_decl
== NULL_TREE
)
27944 = build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
27945 get_identifier ("__atomic_feupdateenv"),
27946 build_function_type_list (void_type_node
,
27949 TREE_PUBLIC (atomic_update_decl
) = 1;
27950 DECL_EXTERNAL (atomic_update_decl
) = 1;
27953 tree fenv_var
= create_tmp_var_raw (double_type_node
);
27954 TREE_ADDRESSABLE (fenv_var
) = 1;
27955 tree fenv_addr
= build1 (ADDR_EXPR
, double_ptr_type_node
,
27956 build4 (TARGET_EXPR
, double_type_node
, fenv_var
,
27957 void_node
, NULL_TREE
, NULL_TREE
));
27959 *hold
= build_call_expr (atomic_hold_decl
, 1, fenv_addr
);
27960 *clear
= build_call_expr (atomic_clear_decl
, 0);
27961 *update
= build_call_expr (atomic_update_decl
, 1,
27962 fold_convert (const_double_ptr
, fenv_addr
));
27967 tree mffs
= rs6000_builtin_decls
[RS6000_BIF_MFFS
];
27968 tree mtfsf
= rs6000_builtin_decls
[RS6000_BIF_MTFSF
];
27969 tree call_mffs
= build_call_expr (mffs
, 0);
27971 /* Generates the equivalent of feholdexcept (&fenv_var)
27973 *fenv_var = __builtin_mffs ();
27975 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
27976 __builtin_mtfsf (0xff, fenv_hold); */
27978 /* Mask to clear everything except for the rounding modes and non-IEEE
27979 arithmetic flag. */
27980 const unsigned HOST_WIDE_INT hold_exception_mask
27981 = HOST_WIDE_INT_C (0xffffffff00000007);
27983 tree fenv_var
= create_tmp_var_raw (double_type_node
);
27985 tree hold_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_var
, call_mffs
,
27986 NULL_TREE
, NULL_TREE
);
27988 tree fenv_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_var
);
27989 tree fenv_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
27990 build_int_cst (uint64_type_node
,
27991 hold_exception_mask
));
27993 tree fenv_hold_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
27996 tree hold_mtfsf
= build_call_expr (mtfsf
, 2,
27997 build_int_cst (unsigned_type_node
, 0xff),
28000 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_mffs
, hold_mtfsf
);
28002 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28004 double fenv_clear = __builtin_mffs ();
28005 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28006 __builtin_mtfsf (0xff, fenv_clear); */
28008 /* Mask to clear everything except for the rounding modes and non-IEEE
28009 arithmetic flag. */
28010 const unsigned HOST_WIDE_INT clear_exception_mask
28011 = HOST_WIDE_INT_C (0xffffffff00000000);
28013 tree fenv_clear
= create_tmp_var_raw (double_type_node
);
28015 tree clear_mffs
= build4 (TARGET_EXPR
, double_type_node
, fenv_clear
,
28016 call_mffs
, NULL_TREE
, NULL_TREE
);
28018 tree fenv_clean_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, fenv_clear
);
28019 tree fenv_clear_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
,
28021 build_int_cst (uint64_type_node
,
28022 clear_exception_mask
));
28024 tree fenv_clear_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28025 fenv_clear_llu_and
);
28027 tree clear_mtfsf
= build_call_expr (mtfsf
, 2,
28028 build_int_cst (unsigned_type_node
, 0xff),
28031 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, clear_mffs
, clear_mtfsf
);
28033 /* Generates the equivalent of feupdateenv (&fenv_var)
28035 double old_fenv = __builtin_mffs ();
28036 double fenv_update;
28037 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28038 (*(uint64_t*)fenv_var 0x1ff80fff);
28039 __builtin_mtfsf (0xff, fenv_update); */
28041 const unsigned HOST_WIDE_INT update_exception_mask
28042 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28043 const unsigned HOST_WIDE_INT new_exception_mask
28044 = HOST_WIDE_INT_C (0x1ff80fff);
28046 tree old_fenv
= create_tmp_var_raw (double_type_node
);
28047 tree update_mffs
= build4 (TARGET_EXPR
, double_type_node
, old_fenv
,
28048 call_mffs
, NULL_TREE
, NULL_TREE
);
28050 tree old_llu
= build1 (VIEW_CONVERT_EXPR
, uint64_type_node
, old_fenv
);
28051 tree old_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, old_llu
,
28052 build_int_cst (uint64_type_node
,
28053 update_exception_mask
));
28055 tree new_llu_and
= build2 (BIT_AND_EXPR
, uint64_type_node
, fenv_llu
,
28056 build_int_cst (uint64_type_node
,
28057 new_exception_mask
));
28059 tree new_llu_mask
= build2 (BIT_IOR_EXPR
, uint64_type_node
,
28060 old_llu_and
, new_llu_and
);
28062 tree fenv_update_mtfsf
= build1 (VIEW_CONVERT_EXPR
, double_type_node
,
28065 tree update_mtfsf
= build_call_expr (mtfsf
, 2,
28066 build_int_cst (unsigned_type_node
, 0xff),
28067 fenv_update_mtfsf
);
28069 *update
= build2 (COMPOUND_EXPR
, void_type_node
, update_mffs
, update_mtfsf
);
28073 rs6000_generate_float2_double_code (rtx dst
, rtx src1
, rtx src2
)
28075 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28077 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28078 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28080 /* The destination of the vmrgew instruction layout is:
28081 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28082 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28083 vmrgew instruction will be correct. */
28084 if (BYTES_BIG_ENDIAN
)
28086 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0
, src1
, src2
,
28088 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1
, src1
, src2
,
28093 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28094 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28097 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28098 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28100 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2
, rtx_tmp0
));
28101 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3
, rtx_tmp1
));
28103 if (BYTES_BIG_ENDIAN
)
28104 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28106 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28110 rs6000_generate_float2_code (bool signed_convert
, rtx dst
, rtx src1
, rtx src2
)
28112 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28114 rtx_tmp0
= gen_reg_rtx (V2DImode
);
28115 rtx_tmp1
= gen_reg_rtx (V2DImode
);
28117 /* The destination of the vmrgew instruction layout is:
28118 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28119 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28120 vmrgew instruction will be correct. */
28121 if (BYTES_BIG_ENDIAN
)
28123 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28124 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28128 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0
, src1
, src2
, GEN_INT (3)));
28129 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1
, src1
, src2
, GEN_INT (0)));
28132 rtx_tmp2
= gen_reg_rtx (V4SFmode
);
28133 rtx_tmp3
= gen_reg_rtx (V4SFmode
);
28135 if (signed_convert
)
28137 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2
, rtx_tmp0
));
28138 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3
, rtx_tmp1
));
28142 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2
, rtx_tmp0
));
28143 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3
, rtx_tmp1
));
28146 if (BYTES_BIG_ENDIAN
)
28147 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp2
, rtx_tmp3
));
28149 emit_insn (gen_p8_vmrgew_v4sf (dst
, rtx_tmp3
, rtx_tmp2
));
28153 rs6000_generate_vsigned2_code (bool signed_convert
, rtx dst
, rtx src1
,
28156 rtx rtx_tmp0
, rtx_tmp1
, rtx_tmp2
, rtx_tmp3
;
28158 rtx_tmp0
= gen_reg_rtx (V2DFmode
);
28159 rtx_tmp1
= gen_reg_rtx (V2DFmode
);
28161 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0
, src1
, src2
, GEN_INT (0)));
28162 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1
, src1
, src2
, GEN_INT (3)));
28164 rtx_tmp2
= gen_reg_rtx (V4SImode
);
28165 rtx_tmp3
= gen_reg_rtx (V4SImode
);
28167 if (signed_convert
)
28169 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2
, rtx_tmp0
));
28170 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3
, rtx_tmp1
));
28174 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2
, rtx_tmp0
));
28175 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3
, rtx_tmp1
));
28178 emit_insn (gen_p8_vmrgew_v4si (dst
, rtx_tmp2
, rtx_tmp3
));
28181 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28184 rs6000_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
28185 optimization_type opt_type
)
28190 return (opt_type
== OPTIMIZE_FOR_SPEED
28191 && RS6000_RECIP_AUTO_RSQRTE_P (mode1
));
28198 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28200 static HOST_WIDE_INT
28201 rs6000_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
28203 if (TREE_CODE (exp
) == STRING_CST
28204 && (STRICT_ALIGNMENT
|| !optimize_size
))
28205 return MAX (align
, BITS_PER_WORD
);
28209 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28211 static HOST_WIDE_INT
28212 rs6000_starting_frame_offset (void)
28214 if (FRAME_GROWS_DOWNWARD
)
28216 return RS6000_STARTING_FRAME_OFFSET
;
28219 /* Internal function to return the built-in function id for the complex
28220 multiply operation for a given mode. */
28222 static inline built_in_function
28223 complex_multiply_builtin_code (machine_mode mode
)
28225 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28226 int func
= BUILT_IN_COMPLEX_MUL_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28227 return (built_in_function
) func
;
28230 /* Internal function to return the built-in function id for the complex divide
28231 operation for a given mode. */
28233 static inline built_in_function
28234 complex_divide_builtin_code (machine_mode mode
)
28236 gcc_assert (IN_RANGE (mode
, MIN_MODE_COMPLEX_FLOAT
, MAX_MODE_COMPLEX_FLOAT
));
28237 int func
= BUILT_IN_COMPLEX_DIV_MIN
+ mode
- MIN_MODE_COMPLEX_FLOAT
;
28238 return (built_in_function
) func
;
28241 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28242 function names from <foo>l to <foo>f128 if the default long double type is
28243 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28244 include file switches the names on systems that support long double as IEEE
28245 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28246 In the future, glibc will export names like __ieee128_sinf128 and we can
28247 switch to using those instead of using sinf128, which pollutes the user's
28250 This will switch the names for Fortran math functions as well (which doesn't
28251 use math.h). However, Fortran needs other changes to the compiler and
28252 library before you can switch the real*16 type at compile time.
28254 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28255 only do this transformation if the __float128 type is enabled. This
28256 prevents us from doing the transformation on older 32-bit ports that might
28257 have enabled using IEEE 128-bit floating point as the default long double
28260 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28261 function names used for complex multiply and divide to the appropriate
28265 rs6000_mangle_decl_assembler_name (tree decl
, tree id
)
28267 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28268 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28269 if (TARGET_FLOAT128_TYPE
28270 && TREE_CODE (decl
) == FUNCTION_DECL
28271 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28272 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28274 built_in_function id
= DECL_FUNCTION_CODE (decl
);
28275 const char *newname
= NULL
;
28277 if (id
== complex_multiply_builtin_code (KCmode
))
28278 newname
= "__mulkc3";
28280 else if (id
== complex_multiply_builtin_code (ICmode
))
28281 newname
= "__multc3";
28283 else if (id
== complex_multiply_builtin_code (TCmode
))
28284 newname
= (TARGET_IEEEQUAD
) ? "__mulkc3" : "__multc3";
28286 else if (id
== complex_divide_builtin_code (KCmode
))
28287 newname
= "__divkc3";
28289 else if (id
== complex_divide_builtin_code (ICmode
))
28290 newname
= "__divtc3";
28292 else if (id
== complex_divide_builtin_code (TCmode
))
28293 newname
= (TARGET_IEEEQUAD
) ? "__divkc3" : "__divtc3";
28297 if (TARGET_DEBUG_BUILTIN
)
28298 fprintf (stderr
, "Map complex mul/div => %s\n", newname
);
28300 return get_identifier (newname
);
28304 /* Map long double built-in functions if long double is IEEE 128-bit. */
28305 if (TARGET_FLOAT128_TYPE
&& TARGET_IEEEQUAD
&& TARGET_LONG_DOUBLE_128
28306 && TREE_CODE (decl
) == FUNCTION_DECL
28307 && DECL_IS_UNDECLARED_BUILTIN (decl
)
28308 && DECL_BUILT_IN_CLASS (decl
) == BUILT_IN_NORMAL
)
28310 size_t len
= IDENTIFIER_LENGTH (id
);
28311 const char *name
= IDENTIFIER_POINTER (id
);
28312 char *newname
= NULL
;
28314 /* See if it is one of the built-in functions with an unusual name. */
28315 switch (DECL_FUNCTION_CODE (decl
))
28317 case BUILT_IN_DREML
:
28318 newname
= xstrdup ("__remainderieee128");
28321 case BUILT_IN_GAMMAL
:
28322 newname
= xstrdup ("__lgammaieee128");
28325 case BUILT_IN_GAMMAL_R
:
28326 case BUILT_IN_LGAMMAL_R
:
28327 newname
= xstrdup ("__lgammaieee128_r");
28330 case BUILT_IN_NEXTTOWARD
:
28331 newname
= xstrdup ("__nexttoward_to_ieee128");
28334 case BUILT_IN_NEXTTOWARDF
:
28335 newname
= xstrdup ("__nexttowardf_to_ieee128");
28338 case BUILT_IN_NEXTTOWARDL
:
28339 newname
= xstrdup ("__nexttowardieee128");
28342 case BUILT_IN_POW10L
:
28343 newname
= xstrdup ("__exp10ieee128");
28346 case BUILT_IN_SCALBL
:
28347 newname
= xstrdup ("__scalbieee128");
28350 case BUILT_IN_SIGNIFICANDL
:
28351 newname
= xstrdup ("__significandieee128");
28354 case BUILT_IN_SINCOSL
:
28355 newname
= xstrdup ("__sincosieee128");
28362 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28365 size_t printf_len
= strlen ("printf");
28366 size_t scanf_len
= strlen ("scanf");
28367 size_t printf_chk_len
= strlen ("printf_chk");
28369 if (len
>= printf_len
28370 && strcmp (name
+ len
- printf_len
, "printf") == 0)
28371 newname
= xasprintf ("__%sieee128", name
);
28373 else if (len
>= scanf_len
28374 && strcmp (name
+ len
- scanf_len
, "scanf") == 0)
28375 newname
= xasprintf ("__isoc99_%sieee128", name
);
28377 else if (len
>= printf_chk_len
28378 && strcmp (name
+ len
- printf_chk_len
, "printf_chk") == 0)
28379 newname
= xasprintf ("%sieee128", name
);
28381 else if (name
[len
- 1] == 'l')
28383 bool uses_ieee128_p
= false;
28384 tree type
= TREE_TYPE (decl
);
28385 machine_mode ret_mode
= TYPE_MODE (type
);
28387 /* See if the function returns a IEEE 128-bit floating point type or
28389 if (ret_mode
== TFmode
|| ret_mode
== TCmode
)
28390 uses_ieee128_p
= true;
28393 function_args_iterator args_iter
;
28396 /* See if the function passes a IEEE 128-bit floating point type
28397 or complex type. */
28398 FOREACH_FUNCTION_ARGS (type
, arg
, args_iter
)
28400 machine_mode arg_mode
= TYPE_MODE (arg
);
28401 if (arg_mode
== TFmode
|| arg_mode
== TCmode
)
28403 uses_ieee128_p
= true;
28409 /* If we passed or returned an IEEE 128-bit floating point type,
28410 change the name. Use __<name>ieee128, instead of <name>l. */
28411 if (uses_ieee128_p
)
28412 newname
= xasprintf ("__%.*sieee128", (int)(len
- 1), name
);
28418 if (TARGET_DEBUG_BUILTIN
)
28419 fprintf (stderr
, "Map %s => %s\n", name
, newname
);
28421 id
= get_identifier (newname
);
28429 /* Predict whether the given loop in gimple will be transformed in the RTL
28430 doloop_optimize pass. */
28433 rs6000_predict_doloop_p (struct loop
*loop
)
28437 /* On rs6000, targetm.can_use_doloop_p is actually
28438 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28439 if (loop
->inner
!= NULL
)
28441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
28442 fprintf (dump_file
, "Predict doloop failure due to"
28443 " loop nesting.\n");
28450 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28452 static machine_mode
28453 rs6000_preferred_doloop_mode (machine_mode
)
28458 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28461 rs6000_cannot_substitute_mem_equiv_p (rtx mem
)
28463 gcc_assert (MEM_P (mem
));
28465 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28466 type addresses, so don't allow MEMs with those address types to be
28467 substituted as an equivalent expression. See PR93974 for details. */
28468 if (GET_CODE (XEXP (mem
, 0)) == AND
)
28474 /* Implement TARGET_INVALID_CONVERSION. */
28476 static const char *
28477 rs6000_invalid_conversion (const_tree fromtype
, const_tree totype
)
28479 /* Make sure we're working with the canonical types. */
28480 if (TYPE_CANONICAL (fromtype
) != NULL_TREE
)
28481 fromtype
= TYPE_CANONICAL (fromtype
);
28482 if (TYPE_CANONICAL (totype
) != NULL_TREE
)
28483 totype
= TYPE_CANONICAL (totype
);
28485 machine_mode frommode
= TYPE_MODE (fromtype
);
28486 machine_mode tomode
= TYPE_MODE (totype
);
28488 if (frommode
!= tomode
)
28490 /* Do not allow conversions to/from XOmode and OOmode types. */
28491 if (frommode
== XOmode
)
28492 return N_("invalid conversion from type %<__vector_quad%>");
28493 if (tomode
== XOmode
)
28494 return N_("invalid conversion to type %<__vector_quad%>");
28495 if (frommode
== OOmode
)
28496 return N_("invalid conversion from type %<__vector_pair%>");
28497 if (tomode
== OOmode
)
28498 return N_("invalid conversion to type %<__vector_pair%>");
28501 /* Conversion allowed. */
28505 /* Convert a SFmode constant to the integer bit pattern. */
28508 rs6000_const_f32_to_i32 (rtx operand
)
28511 const struct real_value
*rv
= CONST_DOUBLE_REAL_VALUE (operand
);
28513 gcc_assert (GET_MODE (operand
) == SFmode
);
28514 REAL_VALUE_TO_TARGET_SINGLE (*rv
, value
);
28519 rs6000_emit_xxspltidp_v2df (rtx dst
, long value
)
28521 if (((value
& 0x7F800000) == 0) && ((value
& 0x7FFFFF) != 0))
28522 inform (input_location
,
28523 "the result for the xxspltidp instruction "
28524 "is undefined for subnormal input values");
28525 emit_insn( gen_xxspltidp_v2df_inst (dst
, GEN_INT (value
)));
28528 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28531 rs6000_gen_pic_addr_diff_vec (void)
28533 return rs6000_relative_jumptables
;
28537 rs6000_output_addr_vec_elt (FILE *file
, int value
)
28539 const char *directive
= TARGET_64BIT
? DOUBLE_INT_ASM_OP
: "\t.long\t";
28542 fprintf (file
, "%s", directive
);
28543 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", value
);
28544 assemble_name (file
, buf
);
28545 fprintf (file
, "\n");
28549 /* Copy an integer constant to the vector constant structure. */
28552 constant_int_to_128bit_vector (rtx op
,
28555 vec_const_128bit_type
*info
)
28557 unsigned HOST_WIDE_INT uvalue
= UINTVAL (op
);
28558 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28560 for (int shift
= bitsize
- 8; shift
>= 0; shift
-= 8)
28561 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28564 /* Copy a floating point constant to the vector constant structure. */
28567 constant_fp_to_128bit_vector (rtx op
,
28570 vec_const_128bit_type
*info
)
28572 unsigned bitsize
= GET_MODE_BITSIZE (mode
);
28573 unsigned num_words
= bitsize
/ 32;
28574 const REAL_VALUE_TYPE
*rtype
= CONST_DOUBLE_REAL_VALUE (op
);
28575 long real_words
[VECTOR_128BIT_WORDS
];
28577 /* Make sure we don't overflow the real_words array and that it is
28578 filled completely. */
28579 gcc_assert (num_words
<= VECTOR_128BIT_WORDS
&& (bitsize
% 32) == 0);
28581 real_to_target (real_words
, rtype
, mode
);
28583 /* Iterate over each 32-bit word in the floating point constant. The
28584 real_to_target function puts out words in target endian fashion. We need
28585 to arrange the order so that the bytes are written in big endian order. */
28586 for (unsigned num
= 0; num
< num_words
; num
++)
28588 unsigned endian_num
= (BYTES_BIG_ENDIAN
28590 : num_words
- 1 - num
);
28592 unsigned uvalue
= real_words
[endian_num
];
28593 for (int shift
= 32 - 8; shift
>= 0; shift
-= 8)
28594 info
->bytes
[byte_num
++] = (uvalue
>> shift
) & 0xff;
28597 /* Mark that this constant involves floating point. */
28598 info
->fp_constant_p
= true;
28601 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28604 Break out the constant out to bytes, half words, words, and double words.
28605 Return true if we have successfully converted the constant.
28607 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28608 constants. Integer and floating point scalar constants are splatted to fill
28612 vec_const_128bit_to_bytes (rtx op
,
28614 vec_const_128bit_type
*info
)
28616 /* Initialize the constant structure. */
28617 memset ((void *)info
, 0, sizeof (vec_const_128bit_type
));
28619 /* Assume CONST_INTs are DImode. */
28620 if (mode
== VOIDmode
)
28621 mode
= CONST_INT_P (op
) ? DImode
: GET_MODE (op
);
28623 if (mode
== VOIDmode
)
28626 unsigned size
= GET_MODE_SIZE (mode
);
28627 bool splat_p
= false;
28629 if (size
> VECTOR_128BIT_BYTES
)
28632 /* Set up the bits. */
28633 switch (GET_CODE (op
))
28635 /* Integer constants, default to double word. */
28638 constant_int_to_128bit_vector (op
, mode
, 0, info
);
28643 /* Floating point constants. */
28646 /* Fail if the floating point constant is the wrong mode. */
28647 if (GET_MODE (op
) != mode
)
28650 /* SFmode stored as scalars are stored in DFmode format. */
28651 if (mode
== SFmode
)
28654 size
= GET_MODE_SIZE (DFmode
);
28657 constant_fp_to_128bit_vector (op
, mode
, 0, info
);
28662 /* Vector constants, iterate over each element. On little endian
28663 systems, we have to reverse the element numbers. */
28666 /* Fail if the vector constant is the wrong mode or size. */
28667 if (GET_MODE (op
) != mode
28668 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28671 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28672 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28673 size_t nunits
= GET_MODE_NUNITS (mode
);
28675 for (size_t num
= 0; num
< nunits
; num
++)
28677 rtx ele
= CONST_VECTOR_ELT (op
, num
);
28678 size_t byte_num
= (BYTES_BIG_ENDIAN
28680 : nunits
- 1 - num
) * ele_size
;
28682 if (CONST_INT_P (ele
))
28683 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28684 else if (CONST_DOUBLE_P (ele
))
28685 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28693 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28694 Since we are duplicating the element, we don't have to worry about
28696 case VEC_DUPLICATE
:
28698 /* Fail if the vector duplicate is the wrong mode or size. */
28699 if (GET_MODE (op
) != mode
28700 || GET_MODE_SIZE (mode
) != VECTOR_128BIT_BYTES
)
28703 machine_mode ele_mode
= GET_MODE_INNER (mode
);
28704 size_t ele_size
= GET_MODE_SIZE (ele_mode
);
28705 rtx ele
= XEXP (op
, 0);
28706 size_t nunits
= GET_MODE_NUNITS (mode
);
28708 if (!CONST_INT_P (ele
) && !CONST_DOUBLE_P (ele
))
28711 for (size_t num
= 0; num
< nunits
; num
++)
28713 size_t byte_num
= num
* ele_size
;
28715 if (CONST_INT_P (ele
))
28716 constant_int_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28718 constant_fp_to_128bit_vector (ele
, ele_mode
, byte_num
, info
);
28724 /* Any thing else, just return failure. */
28729 /* Splat the constant to fill 128 bits if desired. */
28730 if (splat_p
&& size
< VECTOR_128BIT_BYTES
)
28732 if ((VECTOR_128BIT_BYTES
% size
) != 0)
28735 for (size_t offset
= size
;
28736 offset
< VECTOR_128BIT_BYTES
;
28738 memcpy ((void *) &info
->bytes
[offset
],
28739 (void *) &info
->bytes
[0],
28743 /* Remember original size. */
28744 info
->original_size
= size
;
28746 /* Determine if the bytes are all the same. */
28747 unsigned char first_byte
= info
->bytes
[0];
28748 info
->all_bytes_same
= true;
28749 for (size_t i
= 1; i
< VECTOR_128BIT_BYTES
; i
++)
28750 if (first_byte
!= info
->bytes
[i
])
28752 info
->all_bytes_same
= false;
28756 /* Pack half words together & determine if all of the half words are the
28758 for (size_t i
= 0; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28759 info
->half_words
[i
] = ((info
->bytes
[i
* 2] << 8)
28760 | info
->bytes
[(i
* 2) + 1]);
28762 unsigned short first_hword
= info
->half_words
[0];
28763 info
->all_half_words_same
= true;
28764 for (size_t i
= 1; i
< VECTOR_128BIT_HALF_WORDS
; i
++)
28765 if (first_hword
!= info
->half_words
[i
])
28767 info
->all_half_words_same
= false;
28771 /* Pack words together & determine if all of the words are the same. */
28772 for (size_t i
= 0; i
< VECTOR_128BIT_WORDS
; i
++)
28773 info
->words
[i
] = ((info
->bytes
[i
* 4] << 24)
28774 | (info
->bytes
[(i
* 4) + 1] << 16)
28775 | (info
->bytes
[(i
* 4) + 2] << 8)
28776 | info
->bytes
[(i
* 4) + 3]);
28778 info
->all_words_same
28779 = (info
->words
[0] == info
->words
[1]
28780 && info
->words
[0] == info
->words
[1]
28781 && info
->words
[0] == info
->words
[2]
28782 && info
->words
[0] == info
->words
[3]);
28784 /* Pack double words together & determine if all of the double words are the
28786 for (size_t i
= 0; i
< VECTOR_128BIT_DOUBLE_WORDS
; i
++)
28788 unsigned HOST_WIDE_INT d_word
= 0;
28789 for (size_t j
= 0; j
< 8; j
++)
28790 d_word
= (d_word
<< 8) | info
->bytes
[(i
* 8) + j
];
28792 info
->double_words
[i
] = d_word
;
28795 info
->all_double_words_same
28796 = (info
->double_words
[0] == info
->double_words
[1]);
28801 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28802 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28803 value to be used with the LXVKQ instruction. */
28806 constant_generates_lxvkq (vec_const_128bit_type
*vsx_const
)
28808 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28809 floating point hardware and VSX registers are available. */
28810 if (!TARGET_IEEE128_CONSTANT
|| !TARGET_FLOAT128_HW
|| !TARGET_POWER10
28814 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28816 if (vsx_const
->words
[1] != 0
28817 || vsx_const
->words
[2] != 0
28818 || vsx_const
->words
[3] != 0)
28821 /* See if we have a match for the first word. */
28822 switch (vsx_const
->words
[0])
28824 case 0x3FFF0000U
: return 1; /* IEEE 128-bit +1.0. */
28825 case 0x40000000U
: return 2; /* IEEE 128-bit +2.0. */
28826 case 0x40008000U
: return 3; /* IEEE 128-bit +3.0. */
28827 case 0x40010000U
: return 4; /* IEEE 128-bit +4.0. */
28828 case 0x40014000U
: return 5; /* IEEE 128-bit +5.0. */
28829 case 0x40018000U
: return 6; /* IEEE 128-bit +6.0. */
28830 case 0x4001C000U
: return 7; /* IEEE 128-bit +7.0. */
28831 case 0x7FFF0000U
: return 8; /* IEEE 128-bit +Infinity. */
28832 case 0x7FFF8000U
: return 9; /* IEEE 128-bit quiet NaN. */
28833 case 0x80000000U
: return 16; /* IEEE 128-bit -0.0. */
28834 case 0xBFFF0000U
: return 17; /* IEEE 128-bit -1.0. */
28835 case 0xC0000000U
: return 18; /* IEEE 128-bit -2.0. */
28836 case 0xC0008000U
: return 19; /* IEEE 128-bit -3.0. */
28837 case 0xC0010000U
: return 20; /* IEEE 128-bit -4.0. */
28838 case 0xC0014000U
: return 21; /* IEEE 128-bit -5.0. */
28839 case 0xC0018000U
: return 22; /* IEEE 128-bit -6.0. */
28840 case 0xC001C000U
: return 23; /* IEEE 128-bit -7.0. */
28841 case 0xFFFF0000U
: return 24; /* IEEE 128-bit -Infinity. */
28843 /* anything else cannot be loaded. */
28851 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28852 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28853 value to be used with the XXSPLTIW instruction. */
28856 constant_generates_xxspltiw (vec_const_128bit_type
*vsx_const
)
28858 if (!TARGET_SPLAT_WORD_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28861 if (!vsx_const
->all_words_same
)
28864 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28865 if (vsx_const
->all_bytes_same
)
28868 /* See if we can use VSPLTISH or VSPLTISW. */
28869 if (vsx_const
->all_half_words_same
)
28871 short sign_h_word
= vsx_const
->half_words
[0];
28872 if (EASY_VECTOR_15 (sign_h_word
))
28876 int sign_word
= vsx_const
->words
[0];
28877 if (EASY_VECTOR_15 (sign_word
))
28880 return vsx_const
->words
[0];
28883 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28884 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28885 value to be used with the XXSPLTIDP instruction. */
28888 constant_generates_xxspltidp (vec_const_128bit_type
*vsx_const
)
28890 if (!TARGET_SPLAT_FLOAT_CONSTANT
|| !TARGET_PREFIXED
|| !TARGET_VSX
)
28893 /* Reject if the two 64-bit segments are not the same. */
28894 if (!vsx_const
->all_double_words_same
)
28897 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28898 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28899 if (vsx_const
->all_bytes_same
28900 || vsx_const
->all_half_words_same
28901 || vsx_const
->all_words_same
)
28904 unsigned HOST_WIDE_INT value
= vsx_const
->double_words
[0];
28906 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28907 pattern and the signalling NaN bit pattern. Recognize infinity and
28908 negative infinity. */
28910 /* Bit representation of DFmode normal quiet NaN. */
28911 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28913 /* Bit representation of DFmode normal signaling NaN. */
28914 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28916 /* Bit representation of DFmode positive infinity. */
28917 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
28919 /* Bit representation of DFmode negative infinity. */
28920 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
28922 if (value
!= RS6000_CONST_DF_NAN
28923 && value
!= RS6000_CONST_DF_NANS
28924 && value
!= RS6000_CONST_DF_INF
28925 && value
!= RS6000_CONST_DF_NEG_INF
)
28927 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
28928 the exponent, and 52 bits for the mantissa (not counting the hidden
28929 bit used for normal numbers). NaN values have the exponent set to all
28930 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
28932 int df_exponent
= (value
>> 52) & 0x7ff;
28933 unsigned HOST_WIDE_INT
28934 df_mantissa
= value
& ((HOST_WIDE_INT_1U
<< 52) - HOST_WIDE_INT_1U
);
28936 if (df_exponent
== 0x7ff && df_mantissa
!= 0) /* other NaNs. */
28939 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
28940 the exponent all 0 bits, and the mantissa non-zero. If the value is
28941 subnormal, then the hidden bit in the mantissa is not set. */
28942 if (df_exponent
== 0 && df_mantissa
!= 0) /* subnormal. */
28946 /* Change the representation to DFmode constant. */
28947 long df_words
[2] = { vsx_const
->words
[0], vsx_const
->words
[1] };
28949 /* real_from_target takes the target words in target order. */
28950 if (!BYTES_BIG_ENDIAN
)
28951 std::swap (df_words
[0], df_words
[1]);
28953 REAL_VALUE_TYPE rv_type
;
28954 real_from_target (&rv_type
, df_words
, DFmode
);
28956 const REAL_VALUE_TYPE
*rv
= &rv_type
;
28958 /* Validate that the number can be stored as a SFmode value. */
28959 if (!exact_real_truncate (SFmode
, rv
))
28962 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
28963 mantissa field is non-zero) which is undefined for the XXSPLTIDP
28966 real_to_target (&sf_value
, rv
, SFmode
);
28968 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
28969 and 23 bits for the mantissa. Subnormal numbers have the exponent all
28970 0 bits, and the mantissa non-zero. */
28971 long sf_exponent
= (sf_value
>> 23) & 0xFF;
28972 long sf_mantissa
= sf_value
& 0x7FFFFF;
28974 if (sf_exponent
== 0 && sf_mantissa
!= 0)
28977 /* Return the immediate to be used. */
28981 /* Now we have only two opaque types, they are __vector_quad and
28982 __vector_pair built-in types. They are target specific and
28983 only available when MMA is supported. With MMA supported, it
28984 simply returns true, otherwise it checks if the given gimple
28985 STMT is an assignment, asm or call stmt and uses either of
28986 these two opaque types unexpectedly, if yes, it would raise
28987 an error message and returns true, otherwise it returns false. */
28990 rs6000_opaque_type_invalid_use_p (gimple
*stmt
)
28995 /* If the given TYPE is one MMA opaque type, emit the corresponding
28996 error messages and return true, otherwise return false. */
28997 auto check_and_error_invalid_use
= [](tree type
)
28999 tree mv
= TYPE_MAIN_VARIANT (type
);
29000 if (mv
== vector_quad_type_node
)
29002 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29005 else if (mv
== vector_pair_type_node
)
29007 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29015 /* The usage of MMA opaque types is very limited for now,
29016 to check with gassign, gasm and gcall is enough so far. */
29017 if (gassign
*ga
= dyn_cast
<gassign
*> (stmt
))
29019 tree lhs
= gimple_assign_lhs (ga
);
29020 tree type
= TREE_TYPE (lhs
);
29021 if (check_and_error_invalid_use (type
))
29024 else if (gasm
*gs
= dyn_cast
<gasm
*> (stmt
))
29026 unsigned ninputs
= gimple_asm_ninputs (gs
);
29027 for (unsigned i
= 0; i
< ninputs
; i
++)
29029 tree op
= gimple_asm_input_op (gs
, i
);
29030 tree val
= TREE_VALUE (op
);
29031 tree type
= TREE_TYPE (val
);
29032 if (check_and_error_invalid_use (type
))
29035 unsigned noutputs
= gimple_asm_noutputs (gs
);
29036 for (unsigned i
= 0; i
< noutputs
; i
++)
29038 tree op
= gimple_asm_output_op (gs
, i
);
29039 tree val
= TREE_VALUE (op
);
29040 tree type
= TREE_TYPE (val
);
29041 if (check_and_error_invalid_use (type
))
29045 else if (gcall
*gc
= dyn_cast
<gcall
*> (stmt
))
29047 unsigned nargs
= gimple_call_num_args (gc
);
29048 for (unsigned i
= 0; i
< nargs
; i
++)
29050 tree arg
= gimple_call_arg (gc
, i
);
29051 tree type
= TREE_TYPE (arg
);
29052 if (check_and_error_invalid_use (type
))
29061 struct gcc_target targetm
= TARGET_INITIALIZER
;
29063 #include "gt-rs6000.h"